In [13]:
import pandas as pd
import numpy as np


# Read data
df = pd.read_csv("/Users/sonu/Documents/aiml/assignments/c5/C5-mini-project2/movie_ratings.csv")
print(df.head())

# Read movie metadata
df_movie = pd.read_csv("/Users/sonu/Documents/aiml/assignments/c5/C5-mini-project2/movie_titles.csv")

   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931


In [14]:
import surprise

# Extending
class SGD(surprise.AlgoBase):
    def __init__(self,learning_rate,num_epochs,num_factors):
        self.alpha = learning_rate
        self.num_epochs = num_epochs
        self.num_factors = num_factors

    def fit(self,train):
        P = np.random.normal(0,.1,(train.n_users,self.num_factors))
        Q = np.random.normal(0,.1,(train.n_items,self.num_factors))

        for epoch in range(self.num_epochs):
            for u,i,r_ui in train.all_ratings():
                residual = r_ui - np.dot(P[u],Q[i])
                temp = P[u,:]
                P[u,:] +=  self.alpha * residual * Q[i]
                Q[i,:] +=  self.alpha * residual * temp 

        # Save model parameter
        self.P = P
        self.Q = Q

        self.trainset = train
    
    
    def estimate(self,u,i):
        if self.trainset.knows_user(u) and self.trainset.knows_item(i):
            nanCheck = np.dot(self.P[u],self.Q[i])            
            if np.isnan(nanCheck):
                return self.trainset.global_mean
            else:
                return np.dot(self.P[u,:],self.Q[i,:])
        else:
            return self.trainset.global_mean

In [19]:
from surprise import Dataset
from surprise import Reader

reader = Reader(rating_scale=(1, 5))
# Loads Pandas dataframe
data = Dataset.load_from_df(df[["userId", "movieId", "rating"]], reader)
data1 = data.build_full_trainset()

mymodel = surprise.model_selection.GridSearchCV(SGD,
                                          param_grid={'learning_rate':[0.01],
                                                      'num_epochs':[5],
                                                      'num_factors':[10]},
                                          measures=['rmse', 'mae'],
                                          cv=2)
mymodel.fit(data)
print('rsme: ',mymodel.best_score['rmse'],'mae: ',mymodel.best_score['mae'])

best_params = mymodel.best_params['rmse']
print('rsme: ',mymodel.best_params['rmse'],'mae: ',mymodel.best_params['mae'])

rsme:  1.1944957516067378 mae:  0.9141405274226972
rsme:  {'learning_rate': 0.01, 'num_epochs': 5, 'num_factors': 10} mae:  {'learning_rate': 0.01, 'num_epochs': 5, 'num_factors': 10}


In [20]:
mymodel = surprise.model_selection.GridSearchCV(SGD,
                                          param_grid={'learning_rate':[0.01],
                                                      'num_epochs':[5],
                                                      'num_factors':[7]},
                                          measures=['rmse', 'mae'],
                                          cv=2)
mymodel.fit(data)
print('rsme: ',mymodel.best_score['rmse'],'mae: ',mymodel.best_score['mae'])

best_params = mymodel.best_params['rmse']
print('rsme: ',mymodel.best_params['rmse'],'mae: ',mymodel.best_params['mae'])

rsme:  1.1942029895109094 mae:  0.9146001280572134
rsme:  {'learning_rate': 0.01, 'num_epochs': 5, 'num_factors': 7} mae:  {'learning_rate': 0.01, 'num_epochs': 5, 'num_factors': 7}


In [21]:
mymodel = surprise.model_selection.GridSearchCV(SGD,
                                          param_grid={'learning_rate':[0.01],
                                                      'num_epochs':[5],
                                                      'num_factors':[5]},
                                          measures=['rmse', 'mae'],
                                          cv=2)
mymodel.fit(data)
print('rsme: ',mymodel.best_score['rmse'],'mae: ',mymodel.best_score['mae'])

best_params = mymodel.best_params['rmse']
print('rsme: ',mymodel.best_params['rmse'],'mae: ',mymodel.best_params['mae'])

rsme:  1.2169800223595706 mae:  0.9325579867346411
rsme:  {'learning_rate': 0.01, 'num_epochs': 5, 'num_factors': 5} mae:  {'learning_rate': 0.01, 'num_epochs': 5, 'num_factors': 5}
