In [1]:
import pandas as pd
import numpy as np
from surprise import Reader, Dataset, SVD
from surprise.model_selection import GridSearchCV


In [2]:
ratings = pd.read_csv('dataset/FinalRatings.csv')[0:45000]

In [3]:
def collaborative(ratings, user_id):

        reader = Reader(rating_scale=(0, 10))
        temp_ratings = ratings

        data = Dataset.load_from_df(temp_ratings[['user_id', 'book_id', 'rating']], reader)
        trainset = data.build_full_trainset()
        
        param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6]}
        gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)

        gs.fit(data)

        # best RMSE score
        print(gs.best_score['rmse'])

        # combination of parameters that gave the best RMSE score
        print(gs.best_params['rmse'])
        
        algo = gs.best_estimator['rmse']
        algo.fit(trainset)

        unknown_ratings = trainset.build_anti_testset()
        predictions = algo.test(unknown_ratings)
        
        result = [ [p.uid, p.iid, p.est] for p in predictions if p.uid == user_id]
        result = np.array(result)
        result_dict = {'uid': result[:,0], 'iid': result[:,1], 'est': result[:,2]}
        result_df = pd.DataFrame.from_dict(result_dict)

        return result_df

In [4]:
rec = collaborative(ratings, 276796)

3.78137918061023
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}


In [5]:
rec.sort_values(by=['est'], ascending=False).head(20)

Unnamed: 0,uid,iid,est
126,276796,0156528207,5.094100090389718
763,276796,2253007102,4.80826739018412
446,276796,8433914545,4.73124269586881
826,276796,0743222229,4.691741838744048
248,276796,0446677450,4.608296393104767
6,276796,0385504209,4.607729145596828
671,276796,0060558865,4.577485036556086
240,276796,8445071408,4.575356089173368
753,276796,193156146X,4.542671102975697
468,276796,0590481371,4.506124684934446
