In [None]:
# Install the required packages
pip install google

In [None]:
# Import drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Install the required packages for the collab code to run
!pip install numpy
!pip install scikit-surprise
import os
import pandas as pd
from surprise import KNNBasic
from surprise import SVD
from surprise import SVDpp
from surprise import NMF
from surprise import accuracy
from surprise import AlgoBase
from surprise import Reader
from surprise import Dataset
from surprise.model_selection import KFold
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.model_selection import GridSearchCV
from surprise.accuracy import rmse
from collections import defaultdict


In [None]:
# Get the ratings data from ratings.csv file
ratings_data = pd.read_csv('/content/drive/MyDrive/CSE573-SWM-Movie-Recommender/DATA/ratings.csv')
ratings_data.head()

In [11]:
# Map the relative data into the variable
ratings_map_data = {}
ratings_map_data['itemID'] = list(ratings_data.movieId)
ratings_map_data['userID'] = list(ratings_data.userId)
ratings_map_data['rating'] = list(ratings_data.rating)

# Contruct a data frame for the ratings data
df = pd.DataFrame(ratings_map_data)
df.shape

(100836, 3)

In [12]:
from surprise.model_selection import KFold

# Set the rating scale
rr = Reader(rating_scale=(0.5, 5.0))

# Categorize the rating keys
rating_map_keys = ['userID', 'itemID', 'rating']
filter = df[rating_map_keys]

# Get the rating data according the key filters
data = Dataset.load_from_df(filter, rr)

# Use 5-fold split
kf = KFold(n_splits=5)
kf.split(data)

<generator object KFold.split at 0x7f6c557623d0>

In [13]:
# Split the training and test set data 
trainset, testset = train_test_split(data, test_size=0.2)

# Apply the SVD to factorize the matrix 
model = SVD(n_factors=30, n_epochs=20, lr_all=0.008, reg_all=0.08)

# Find the predictions and accuracy
predictions = model.fit(trainset).test(testset)
accuracy.rmse(predictions)

RMSE: 0.8641


0.8641432622800898

In [15]:
# Use KNN to get the predictions
model1 = KNNBasic(n_factors=30, n_epochs=20)
predictions2 = model1.fit(trainset).test(testset)

# Find accuracy using RMSE
accuracy.rmse(predictions2)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9421


0.9420790684480527

In [18]:
def calculate_precision_recall_for_k(predictions, k=10, threshold=3.5):

    # Construct user id value to estimate
    user_id_to_estimate_map = defaultdict(list)
    for user_id, _, ratings_true, estimate_value, _ in predictions:
        user_id_to_estimate_map[user_id].append((estimate_value, ratings_true))

    # Precision and Recall variables definition
    precisions = dict()
    recalls = dict()

    # Return precision and recall for each user id 
    for user_id, user_ratings in user_id_to_estimate_map.items():

        user_ratings.sort(key=lambda x: x[0], reverse=True)

        n_rel = sum((ratings_true >= threshold) for (_, ratings_true) in user_ratings)

        n_rec_k = sum((estimate_value >= threshold) for (estimate_value, _) in user_ratings[:k])

        n_rel_and_rec_k = sum(((ratings_true >= threshold) and (estimate_value >= threshold))
                              for (estimate_value, ratings_true) in user_ratings[:k])

        # Calculate precision and recall
        precisions[user_id] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1

        recalls[user_id] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1

    return precisions, recalls

In [19]:
# Using K fold technique with 4 splits
kf = KFold(n_splits=4)

i = 1

# Calculating the final results with split
for trainset, testset in kf.split(data):
    print("Split:", i)
    predictions = model1.fit(trainset).test(testset)

    # Calculate the accuracy using RMSE
    accuracy.rmse(predictions, verbose=True)
    precisions, recalls = calculate_precision_recall_for_k(predictions, k=5, threshold=4)

    print("Precision:", sum(prec for prec in precisions.values()) / len(precisions))
    print("Recall:", sum(rec for rec in recalls.values()) / len(recalls))
    i+=1

Split: 1
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9490
Precision: 0.7934699453551924
Recall: 0.25461574366041145
Split: 1
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9518
Precision: 0.7769398907103835
Recall: 0.26155487184094406
Split: 1
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9511
Precision: 0.7766393442622963
Recall: 0.2665051902172836
Split: 1
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9531
Precision: 0.7621038251366131
Recall: 0.26986408655957206


In [20]:
# Calculate the predictions
def getpreds(predictions):
    
    pred_value = defaultdict(list)    
    for user_id, id, ratings_true, estimate_value, _ in predictions:
        pred_value[user_id].append((id, estimate_value))

    for user_id, user_ratings in pred_value.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)

    return pred_value

In [22]:
trainset = data.build_full_trainset()
model1.fit(trainset)

# Get the predictions 
testset = trainset.build_anti_testset()
predictions = model1.test(testset)
final_pred = getpreds(predictions)

Computing the msd similarity matrix...
Done computing similarity matrix.


In [23]:
# Adjusting the recommendation quantity to 10.
n = 10

for user_id, user_ratings in final_pred.items():
    user_ratings.sort(key=lambda x: x[1], reverse=True)
    final_pred[user_id] = user_ratings[:n]

In [24]:
# Prepare the data
tmp = pd.DataFrame.from_dict(final_pred)
tmp_transpose = tmp.transpose()

In [25]:
# Compile the result values
response = []
for user_id,user_ratings in final_pred.items():
  response.append(tmp_transpose.loc[user_id])

In [26]:
recommendation = []
# Compile the recommendation
for i in response:
  recommended_movie_ids=[]
  for x in range(0, n):
    recommended_movie_ids.append(i[x][0])
  recommendation.append(recommended_movie_ids)

In [28]:
movies = pd.read_csv('/content/drive/MyDrive/CSE573-SWM-Movie-Recommender/DATA/movies.csv')
final_value = []

# Get final results for the recommendation
for i in recommendation:
  df = movies[movies['movieId'].isin(i)]
  temp = df['title'].tolist()
  final_value.append(temp)

In [29]:
# Construct dataframe from the results
final_df = pd.DataFrame(final_value)

In [30]:
# Convert the results to csv file
final_df.to_csv('/content/drive/MyDrive/CSE573-SWM-Movie-Recommender/DATA/file1.csv',index = False)

In [32]:
# Print the compiled csv results
result = pd.read_csv('/content/drive/MyDrive/CSE573-SWM-Movie-Recommender/DATA/file1.csv')
result

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Lesson Faust (1994),"Assignment, The (1997)",Mephisto (1981),I'm the One That I Want (2000),Galaxy of Terror (Quest) (1981),Alien Contamination (1980),Dylan Moran: Monster (2004),Bill Hicks: Revelations (1993),The Jinx: The Life and Deaths of Robert Durst ...,Black Mirror
1,Far From Home: The Adventures of Yellow Dog (1...,Lassie (1994),It's My Party (1996),Children of the Corn IV: The Gathering (1996),Lesson Faust (1994),I'm the One That I Want (2000),Elling (2001),Dylan Moran: Monster (2004),Bill Hicks: Revelations (1993),Black Mirror
2,It's My Party (1996),"Assignment, The (1997)",Mephisto (1981),I'm the One That I Want (2000),Elling (2001),My Sassy Girl (Yeopgijeogin geunyeo) (2001),Strictly Sexual (2008),Dylan Moran: Monster (2004),Bill Hicks: Revelations (1993),Black Mirror
3,Lesson Faust (1994),"Assignment, The (1997)",Mephisto (1981),Galaxy of Terror (Quest) (1981),Alien Contamination (1980),My Sassy Girl (Yeopgijeogin geunyeo) (2001),Dylan Moran: Monster (2004),Bill Hicks: Revelations (1993),The Jinx: The Life and Deaths of Robert Durst ...,Black Mirror
4,Lesson Faust (1994),"Assignment, The (1997)",Mephisto (1981),I'm the One That I Want (2000),Galaxy of Terror (Quest) (1981),Elling (2001),Alien Contamination (1980),Dylan Moran: Monster (2004),The Jinx: The Life and Deaths of Robert Durst ...,Black Mirror
...,...,...,...,...,...,...,...,...,...,...
605,Lesson Faust (1994),"Assignment, The (1997)",Mephisto (1981),I'm the One That I Want (2000),Galaxy of Terror (Quest) (1981),Alien Contamination (1980),Dylan Moran: Monster (2004),Bill Hicks: Revelations (1993),The Jinx: The Life and Deaths of Robert Durst ...,Black Mirror
606,Lesson Faust (1994),"Assignment, The (1997)",Mephisto (1981),I'm the One That I Want (2000),Galaxy of Terror (Quest) (1981),Alien Contamination (1980),Dylan Moran: Monster (2004),Bill Hicks: Revelations (1993),The Jinx: The Life and Deaths of Robert Durst ...,Black Mirror
607,Lesson Faust (1994),"Assignment, The (1997)",Mephisto (1981),I'm the One That I Want (2000),Galaxy of Terror (Quest) (1981),Alien Contamination (1980),Dylan Moran: Monster (2004),Bill Hicks: Revelations (1993),The Jinx: The Life and Deaths of Robert Durst ...,Black Mirror
608,Lesson Faust (1994),"Assignment, The (1997)",Bent (1997),Mephisto (1981),I'm the One That I Want (2000),Elling (2001),Dylan Moran: Monster (2004),Bill Hicks: Revelations (1993),The Jinx: The Life and Deaths of Robert Durst ...,Black Mirror
