In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
import os
os.chdir('/content/drive/MyDrive/TLCN/Code/Full Code/hybrid_movie_v2')

In [None]:
# !pip install scikit-surprise

In [None]:
# !pip install tensorrt

### Load file

In [None]:
!python "MovieLens.py"
!python "NCF.py"
!python "HybridAlgorithm.py"
!python "SGD.py"

### Import library

In [None]:
from MovieLens import MovieLens
from HybridAlgorithm import HybridAlgorithm
from surprise import SVD, KNNWithMeans
from NCF import NCF
from SGD import SGD
import random
import numpy as np
import pandas as pd
from surprise.model_selection import train_test_split
from surprise import accuracy
from ContentKNNAlgorithm import ContentKNNAlgorithm

### Load data

In [None]:
def LoadMovieLensData():
    ml = MovieLens()
    data = ml.loadMovieLensLatestSmall()
    rankings = ml.getPopularityRanks()
    return (ml, data, rankings)

np.random.seed(29)
random.seed(29)

(ml, evaluationData, rankings) = LoadMovieLensData()

trainset, testset =  train_test_split(evaluationData, test_size=.3, random_state=0)

result_df = pd.DataFrame

### Content based

In [None]:
ContentKNN = ContentKNNAlgorithm()
content_model = ContentKNN.fit(trainset)

In [None]:
content_test = content_model.test(testset)

In [None]:
content_rmse = accuracy.rmse(content_test, verbose=False)
content_mae = accuracy.mae(content_test, verbose=False)

result_df = pd.DataFrame({
    'Algorithm': ['Content'],
    'RMSE': [content_rmse],
    'MAE': [content_mae]

})

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {content_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {content_rmse}')

In [None]:
result_df

### Colaborative filtering

#### User - User Collaborative Filtering (User based)

In [None]:
UserKNNmeans = KNNWithMeans(sim_options = {'name': 'cosine', 'user_based': True})
userBased_model = UserKNNmeans.fit(trainset)

In [None]:
userBased_test = userBased_model.test(testset)

In [None]:
userBased_rmse = accuracy.rmse(userBased_test, verbose=False)
userBased_mae = accuracy.mae(userBased_test, verbose=False)

result_userBased = {
    'Algorithm': 'User Based',
    'RMSE': userBased_rmse,
    'MAE': userBased_mae
}

result_df.loc[len(result_df)] = result_userBased

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {userBased_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {userBased_rmse}')

In [None]:
result_df

#### Item - Item Collaborative Filtering (Item based)

In [None]:
ItemKNNmeans = KNNWithMeans(sim_options = {'name': 'cosine', 'user_based': False})
itemBased_model = ItemKNNmeans.fit(trainset)

In [None]:
itemBased_test = itemBased_model.test(testset)

In [None]:
itemBased_rmse = accuracy.rmse(itemBased_test, verbose=False)
itemBased_mae = accuracy.mae(itemBased_test, verbose=False)

result_itemBased = {
    'Algorithm': 'Item Based',
    'RMSE': itemBased_rmse,
    'MAE': itemBased_mae
}

result_df.loc[len(result_df)] = result_itemBased

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {itemBased_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {itemBased_rmse}')

In [None]:
result_df

#### SVD Untuned

In [None]:
svdUntuned_model = SVD()
svdUntuned_model.fit(trainset)

In [None]:
svdUntuned_test = svdUntuned_model.test(testset)

In [None]:
svdUntuned_rmse = accuracy.rmse(svdUntuned_test, verbose=False)
svdUntuned_mae = accuracy.mae(svdUntuned_test, verbose=False)


result_svdUntuned = {
    'Algorithm': 'SVD Untuned',
    'RMSE': svdUntuned_rmse,
    'MAE': svdUntuned_mae
}

result_df.loc[len(result_df)] = result_svdUntuned

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {svdUntuned_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {svdUntuned_rmse}')

In [None]:
result_df

#### SVD Tuned

In [None]:
svdTuned_model = SVD(n_epochs=11, lr_all=0.034, n_factors=90, reg_all=0.06)
svdTuned_model.fit(trainset)

In [None]:
svdTuned_test = svdTuned_model.test(testset)

In [None]:
svdTuned_rmse = accuracy.rmse(svdTuned_test, verbose=False)
svdTuned_mae = accuracy.mae(svdTuned_test, verbose=False)

result_svdTuned = {
    'Algorithm': 'SVD Tuned',
    'RMSE': svdTuned_rmse,
    'MAE': svdTuned_mae
}

result_df.loc[len(result_df)] = result_svdTuned

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {svdTuned_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {svdTuned_rmse}')

In [None]:
result_df

#### SGD

In [None]:
sgd_model = SGD(learning_rate=0.01, n_epochs=10, n_factors=10)
sgd_model.fit(trainset)

In [None]:
sgd_test = sgd_model.test(testset)

In [None]:
sgd_rmse = accuracy.rmse(sgd_test, verbose=False)
sgd_mae = accuracy.mae(sgd_test, verbose=False)

result_sgd = {
    'Algorithm': 'SGD',
    'RMSE': sgd_rmse,
    'MAE': sgd_mae
}

result_df.loc[len(result_df)] = result_sgd

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {sgd_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {sgd_rmse}')

In [None]:
result_df

#### Neural Collaborative Filtering (NCF)

##### Transform data

In [None]:
trainset_test = trainset.build_testset()
train_data = pd.DataFrame(trainset_test, columns=['userId', 'movieId', 'rating'])
test_data= pd.DataFrame(testset, columns=['userId', 'movieId', 'rating'])

In [None]:
n_users, n_movies = ml.get_number()

ncf_model = NCF(n_users,n_movies)
ncf_model.fit(train_data)

In [None]:
ncf_mae, ncf_rmse = ncf_model.test(test_data)

result_ncf = {
    'Algorithm': 'Neural',
    'RMSE': ncf_rmse,
    'MAE': ncf_mae
}

result_df.loc[len(result_df)] = result_ncf

In [None]:
result_df

In [None]:
ncf_model.estimate(438,116823)

### Hybrid

##### Content - Userbased

In [None]:
content_userBased_model = HybridAlgorithm([userBased_model, content_model], [0.7, 0.3])
content_userBased_model.fit(trainset)

In [None]:
content_userBased_test = content_userBased_model.test(testset)

In [None]:
content_userBased_rmse = accuracy.rmse(content_userBased_test, verbose=False)
content_userBased_mae = accuracy.mae(content_userBased_test, verbose=False)

result_content_userBased = {
    'Algorithm': 'Hybrid Content Based - Userbased',
    'RMSE': content_userBased_rmse,
    'MAE': content_userBased_mae
}

result_df.loc[len(result_df)] = result_content_userBased

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {content_userBased_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {content_userBased_rmse}')

In [None]:
result_df

##### Content - Itembased

In [None]:
content_itemBased_model = HybridAlgorithm([content_model , itemBased_model], [0.5, 0.5])
content_itemBased_model.fit(trainset)

In [None]:
content_itemBased_test = content_itemBased_model.test(testset)

In [None]:
content_itemBased_rmse = accuracy.rmse(content_itemBased_test, verbose=False)
content_itemBased_mae = accuracy.mae(content_itemBased_test, verbose=False)

result_content_itemBased = {
    'Algorithm': 'Hybrid Content Based - Itembased',
    'RMSE': content_itemBased_rmse,
    'MAE': content_itemBased_mae
}

result_df.loc[len(result_df)] = result_content_itemBased

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {content_itemBased_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {content_itemBased_rmse}')

In [None]:
result_df

##### Content - SVD Untuned

In [None]:
content_svdUntuned_model = HybridAlgorithm([content_model , svdUntuned_model], [0.5, 0.5])
content_svdUntuned_model.fit(trainset)

In [None]:
content_svdUntuned_test = content_svdUntuned_model.test(testset)

In [None]:
content_svdUntuned_rmse = accuracy.rmse(content_svdUntuned_test, verbose=False)
content_svdUntuned_mae = accuracy.mae(content_svdUntuned_test, verbose=False)

result_content_svdUntuned = {
    'Algorithm': 'Hybrid Content Based - SVD Untuned',
    'RMSE': content_svdUntuned_rmse,
    'MAE': content_svdUntuned_mae
}

result_df.loc[len(result_df)] = result_content_svdUntuned

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {content_svdUntuned_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {content_svdUntuned_rmse}')

In [None]:
result_df

##### Content - SVD Tuned

In [None]:
content_svdTuned_model = HybridAlgorithm([content_model , svdTuned_model], [0.5, 0.5])
content_svdTuned_model.fit(trainset)

In [None]:
content_svdTuned_test = content_svdTuned_model.test(testset)

In [None]:
content_svdTuned_rmse = accuracy.rmse(content_svdTuned_test, verbose=False)
content_svdTuned_mae = accuracy.mae(content_svdTuned_test, verbose=False)

result_content_svdTuned = {
    'Algorithm': 'Hybrid Content Based - SVD Tuned',
    'RMSE': content_svdTuned_rmse,
    'MAE': content_svdTuned_mae
}

result_df.loc[len(result_df)] = result_content_svdTuned

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {content_svdTuned_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {content_svdTuned_rmse}')

In [None]:
result_df

##### Content - Neural

In [None]:
content_neural_model = HybridAlgorithm([content_model , ncf_model], [0.5, 0.5])
content_neural_model.fit(trainset)

[<ContentKNNAlgorithm.ContentKNNAlgorithm object at 0x7c36119c9870>, <NCF.NCF object at 0x7c36119e78e0>]
Computing content-based similarity matrix...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<HybridAlgorithm.HybridAlgorithm at 0x7c35e4438490>

In [None]:
content_neural_test = content_neural_model.test(testset)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m


In [None]:
content_neural_rmse = accuracy.rmse(content_neural_test, verbose=False)
content_neural_mae = accuracy.mae(content_neural_test, verbose=False)

result_content_neural = {
    'Algorithm': 'Hybrid Content Based - Neural',
    'RMSE': content_neural_rmse,
    'MAE': content_neural_mae
}

result_df.loc[len(result_df)] = result_content_neural

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {content_neural_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {content_neural_rmse}')

Mean Absolute Error (MAE) on test data: 0.6927453035780557
Root Mean Squared Error (RMSE) on test data: 0.8991763178148051


In [None]:
result_df

Unnamed: 0,Algorithm,RMSE,MAE
0,Content,0.90161,0.694323
1,User Based,0.902766,0.689302
2,Item Based,0.905951,0.691571
3,SVD Untuned,0.875188,0.673068
4,SVD Tuned,0.862806,0.661852
5,SGD,1.437396,1.142435
6,Neural,0.872952,0.66603
7,Hybrid Content Based - Userbased,0.899176,0.692745
8,Hybrid Content Based - Itembased,0.899176,0.692745
9,Hybrid Content Based - SVD Untuned,0.862621,0.66357


##### SVD Untuned - SVD Tuned

In [None]:
svdUntuned_svdTuned_model = HybridAlgorithm([svdUntuned_model , svdTuned_model], [0.5, 0.5])
svdUntuned_svdTuned_model.fit(trainset)

[<surprise.prediction_algorithms.matrix_factorization.SVD object at 0x7c36119e4a90>, <surprise.prediction_algorithms.matrix_factorization.SVD object at 0x7c3694261d20>]


<HybridAlgorithm.HybridAlgorithm at 0x7c360b549030>

In [None]:
svdUntuned_svdTuned_test = svdUntuned_svdTuned_model.test(testset)

In [None]:
svdUntuned_svdTuned_rmse = accuracy.rmse(svdUntuned_svdTuned_test, verbose=False)
svdUntuned_svdTuned_mae = accuracy.mae(svdUntuned_svdTuned_test, verbose=False)

result_svdUntuned_svdTuned = {
    'Algorithm': 'Hybrid SVD Untuned - SVD Tuned',
    'RMSE': svdUntuned_svdTuned_rmse,
    'MAE': svdUntuned_svdTuned_mae
}

result_df.loc[len(result_df)] = result_svdUntuned_svdTuned

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {svdUntuned_svdTuned_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {svdUntuned_svdTuned_rmse}')

Mean Absolute Error (MAE) on test data: 0.6602026312483243
Root Mean Squared Error (RMSE) on test data: 0.8598840920083561


In [None]:
result_df

Unnamed: 0,Algorithm,RMSE,MAE
0,Content,0.90161,0.694323
1,User Based,0.902766,0.689302
2,Item Based,0.905951,0.691571
3,SVD Untuned,0.875188,0.673068
4,SVD Tuned,0.862806,0.661852
5,SGD,1.437396,1.142435
6,Neural,0.872952,0.66603
7,Hybrid Content Based - Userbased,0.899176,0.692745
8,Hybrid Content Based - Itembased,0.899176,0.692745
9,Hybrid Content Based - SVD Untuned,0.862621,0.66357


##### SVD Tuned - User Based

In [None]:
svdTuned_userBased_model = HybridAlgorithm([svdTuned_model , userBased_model], [0.5, 0.5])
svdTuned_userBased_model.fit(trainset)

[<surprise.prediction_algorithms.matrix_factorization.SVD object at 0x7c3694261d20>, <surprise.prediction_algorithms.knns.KNNWithMeans object at 0x7c36119c9e70>]
Computing the cosine similarity matrix...
Done computing similarity matrix.


<HybridAlgorithm.HybridAlgorithm at 0x7c361282b7c0>

In [None]:
svdTuned_userBased_test = svdTuned_userBased_model.test(testset)

In [None]:
svdTuned_userBased_rmse = accuracy.rmse(svdTuned_userBased_test, verbose=False)
svdTuned_userBased_mae = accuracy.mae(svdTuned_userBased_test, verbose=False)

result_svdTuned_userBased = {
    'Algorithm': 'Hybrid SVD Tuned - User Based',
    'RMSE': svdTuned_userBased_rmse,
    'MAE': svdTuned_userBased_mae
}

result_df.loc[len(result_df)] = result_svdTuned_userBased

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {svdTuned_userBased_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {svdTuned_userBased_rmse}')

Mean Absolute Error (MAE) on test data: 0.6681122050632664
Root Mean Squared Error (RMSE) on test data: 0.8716515998358848


In [None]:
result_df

Unnamed: 0,Algorithm,RMSE,MAE
0,Content,0.90161,0.694323
1,User Based,0.902766,0.689302
2,Item Based,0.905951,0.691571
3,SVD Untuned,0.875188,0.673068
4,SVD Tuned,0.862806,0.661852
5,SGD,1.437396,1.142435
6,Neural,0.872952,0.66603
7,Hybrid Content Based - Userbased,0.899176,0.692745
8,Hybrid Content Based - Itembased,0.899176,0.692745
9,Hybrid Content Based - SVD Untuned,0.862621,0.66357


##### SVD Tuned - Item Based

In [None]:
svdTuned_itemBased_model = HybridAlgorithm([svdTuned_model , itemBased_model], [0.5, 0.5])
svdTuned_itemBased_model.fit(trainset)

[<surprise.prediction_algorithms.matrix_factorization.SVD object at 0x7c3694261d20>, <surprise.prediction_algorithms.knns.KNNWithMeans object at 0x7c36119e5990>]
Computing the cosine similarity matrix...
Done computing similarity matrix.


<HybridAlgorithm.HybridAlgorithm at 0x7c360b7d1c60>

In [None]:
svdTuned_itemBased_test = svdTuned_itemBased_model.test(testset)

In [None]:
svdTuned_itemBased_rmse = accuracy.rmse(svdTuned_itemBased_test, verbose=False)
svdTuned_itemBased_mae = accuracy.mae(svdTuned_itemBased_test, verbose=False)

result_svdTuned_itemBased = {
    'Algorithm': 'Hybrid SVD Tuned - Item Based',
    'RMSE': svdTuned_itemBased_rmse,
    'MAE': svdTuned_itemBased_mae
}

result_df.loc[len(result_df)] = result_svdTuned_itemBased

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {svdTuned_itemBased_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {svdTuned_itemBased_rmse}')

Mean Absolute Error (MAE) on test data: 0.6670598668347821
Root Mean Squared Error (RMSE) on test data: 0.8718262186314139


In [None]:
result_df

Unnamed: 0,Algorithm,RMSE,MAE
0,Content,0.90161,0.694323
1,User Based,0.902766,0.689302
2,Item Based,0.905951,0.691571
3,SVD Untuned,0.875188,0.673068
4,SVD Tuned,0.862806,0.661852
5,SGD,1.437396,1.142435
6,Neural,0.872952,0.66603
7,Hybrid Content Based - Userbased,0.899176,0.692745
8,Hybrid Content Based - Itembased,0.899176,0.692745
9,Hybrid Content Based - SVD Untuned,0.862621,0.66357


##### SVD Tuned - Neural

In [None]:
svdTuned_neural_model = HybridAlgorithm([svdTuned_model , ncf_model], [0.5, 0.5])
svdTuned_neural_model.fit(trainset)

[<surprise.prediction_algorithms.matrix_factorization.SVD object at 0x7c3694261d20>, <NCF.NCF object at 0x7c36119e78e0>]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<HybridAlgorithm.HybridAlgorithm at 0x7c360b7d15a0>

In [None]:
svdTuned_neural_test = svdTuned_neural_model.test(testset)

In [None]:
svdTuned_neural_rmse = accuracy.rmse(svdTuned_neural_test, verbose=False)
svdTuned_neural_mae = accuracy.mae(svdTuned_neural_test, verbose=False)

result_svdTuned_neural = {
    'Algorithm': 'Hybrid SVD Tuned - Neural',
    'RMSE': svdTuned_neural_rmse,
    'MAE': svdTuned_neural_mae
}

result_df.loc[len(result_df)] = result_svdTuned_neural

In [None]:
print(f'Mean Absolute Error (MAE) on test data: {svdTuned_neural_mae}')
print(f'Root Mean Squared Error (RMSE) on test data: {svdTuned_neural_rmse}')

In [None]:
result_df

In [None]:
result_df.to_excel('result2.xlsx', index=False)