# Book Recommendation System -part 2

In [1]:
#import relevant datasets to perform recommendation system
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# read dataset into pandas dataframe
df = pd.read_csv('train.csv/train.csv', delimiter= '\t')

In [3]:
df.head(10)

Unnamed: 0,user_id,book_id,rating
0,12726,7784,5
1,23770,104293,4
2,15669,29291,4
3,649,420180,0
4,10980,7089179,0
5,23252,30119,4
6,30950,27414384,0
7,28472,34268,5
8,20976,231850,0
9,2259,718473,0


In [4]:
df.shape

(700000, 3)

In [7]:
#import surprise library algorithms 
from surprise import KNNBasic, BaselineOnly,Dataset, Reader

In [14]:
# convert data into reader class of surprise library to parse the ratings properly
reader = Reader(rating_scale=(0,5))  
data=Dataset.load_from_df(df,reader)

In [16]:
from surprise.mo#import test train split to randomly split the data. I have used split size of 0.25
from surprise.model_selection import train_test_split
trainset, testset = train_test_split(data, test_size=.25)del_selection import train_test_split
trainset, testset = train_test_split(data, test_size=.25)

In [20]:
#Algorithm 6 - GridSearchCV -parameter tuning, using 5 fold cross validation
from surprise.model_selection import GridSearchCV
from surprise import SVD
param_grid = {'n_epochs': [40,50,60], 'lr_all': [0.005, 0.05],
              'reg_all': [0.1,0.5, 0.01]}
#use 5 fold cross validation
gs_algo = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=5, refit = True)
gs_algo.fit(data)
#predict RMSE for validation data
print(gs_algo.best_score['rmse'])
print(gs_algo.best_params['rmse'])

1.557814968387207
{'lr_all': 0.005, 'n_epochs': 60, 'reg_all': 0.1}


In [21]:
#import test data as pandas dataframe
test = pd.read_csv('test.csv/test.csv', delimiter='\t')
test.head(5)

Unnamed: 0,user_id,book_id
0,20989,1832332
1,37040,191139
2,36167,28449164
3,9398,24693869
4,29848,8127


In [22]:
# MAke predictions for test data and sav in file
res=[]
res.append('user_id-book_id,rating')
for i in range(len(test)):
    pred = gs_algo.predict(test['user_id'][i], test['book_id'][i])
    res.append(str(test['user_id'][i])+'-'+str(test['book_id'][i])+','+str(int(round(pred[3]))))
np.savetxt('pred_gs_60', res,fmt='%s',newline='\n')

In [23]:
#More fine tuning of parameters, using 10 fold cross validation
param_grid = {'n_epochs': [60], 'lr_all': [0.005],
              'reg_all': [0.1]}
gs_algo = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=10, refit = True)
gs_algo.fit(data)
print(gs_algo.best_score['rmse'])
print(gs_algo.best_params['rmse'])

1.5473713195503291
{'lr_all': 0.005, 'n_epochs': 60, 'reg_all': 0.1}


In [26]:
# Make predictions for test data and save in file
res1=[]
res1.append('user_id-book_id,rating')
for i in range(len(test)):
    pred1 = gs_algo.predict(test['user_id'][i], test['book_id'][i])
    res1.append(str(test['user_id'][i])+'-'+str(test['book_id'][i])+','+str(int(round(pred1[3]))))
np.savetxt('pred_gs_cv10', res1,fmt='%s',newline='\n')

In [27]:
#more fine tuning of parameters, using 10 fold cross validation
param_grid = {'n_epochs': [50], 'lr_all': [0.005],
              'reg_all': [0.1]}
gs1_algo = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=10, refit = True)
gs1_algo.fit(data)
print(gs1_algo.best_score['rmse'])
print(gs1_algo.best_params['rmse'])

1.5469738803204738
{'lr_all': 0.005, 'n_epochs': 50, 'reg_all': 0.1}


In [None]:
#Make predictions and save to file
res2=[]
res2.append('user_id-book_id,rating')
for i in range(len(test)):
    pred2 = gs1_algo.predict(test['user_id'][i], test['book_id'][i])
    res2.append(str(test['user_id'][i])+'-'+str(test['book_id'][i])+','+str(int(round(pred2[3]))))
np.savetxt('pred_gs_cv10', res2,fmt='%s',newline='\n')

In [29]:
#More fine tuning, using 7 fold cross validation
param_grid = {'n_epochs': [50], 'lr_all': [0.005],
              'reg_all': [0.1]}
gs2_algo = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=7, refit = True)
gs2_algo.fit(data)
print(gs2_algo.best_score['rmse'])
print(gs2_algo.best_params['rmse'])

1.5514734969581478
{'lr_all': 0.005, 'n_epochs': 50, 'reg_all': 0.1}


In [30]:
#Make predictions and save to file
res3=[]
res3.append('user_id-book_id,rating')
for i in range(len(test)):
    pred3 = gs2_algo.predict(test['user_id'][i], test['book_id'][i])
    res3.append(str(test['user_id'][i])+'-'+str(test['book_id'][i])+','+str(int(round(pred3[3]))))
np.savetxt('pred_gs_cv7', res3,fmt='%s',newline='\n')

In [31]:
#More fine tuning using 8 fold cross validation
param_grid = {'n_epochs': [60], 'lr_all': [0.005],
              'reg_all': [0.1]}
gs3_algo = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=8, refit = True)
gs3_algo.fit(data)
print(gs3_algo.best_score['rmse'])
print(gs3_algo.best_params['rmse'])

1.5496330271897638
{'lr_all': 0.005, 'n_epochs': 60, 'reg_all': 0.1}


In [32]:
#Make predictions and save to file
res4=[]
res4.append('user_id-book_id,rating')
for i in range(len(test)):
    pred4 = gs3_algo.predict(test['user_id'][i], test['book_id'][i])
    res4.append(str(test['user_id'][i])+'-'+str(test['book_id'][i])+','+str(int(round(pred4[3]))))
np.savetxt('pred_gs_2', res4,fmt='%s',newline='\n')

In [37]:
#Algorithm 7 - NMF
#import libraries
from surprise import NMF, evaluate
nmf_algo = NMF()
#split data into 5 folds
data.split(n_folds=5)


In [38]:
#Perform evaluation in validation data using RMSE
evaluate(nmf_algo, data, measures=['RMSE'])



Evaluating RMSE of algorithm NMF.

------------
Fold 1
RMSE: 1.7487
------------
Fold 2
RMSE: 1.7400
------------
Fold 3
RMSE: 1.7465
------------
Fold 4
RMSE: 1.7522
------------
Fold 5
RMSE: 1.7494
------------
------------
Mean RMSE: 1.7473
------------
------------


CaseInsensitiveDefaultDict(list,
                           {'rmse': [1.748704805587569,
                             1.7399722441485956,
                             1.7464998505662706,
                             1.7521828400590813,
                             1.7493552538690902]})