# Contents:

- User based prediction

- Item based prediction

In [1]:
import os
import pandas as pd
import numpy as np
import surprise

##### Reading data and converting using surprise file

--------

In [2]:
df = pd.read_csv('sample_data.csv')
df.head()

Unnamed: 0,user,rating,item
0,1,2,1
1,2,2,1
2,3,3,2
3,4,3,2
4,5,1,1


In [3]:
reader = surprise.dataset.Reader(line_format='user rating item',rating_scale=(1,5))

#line format is the format in which the data coloumns are present 

In [4]:
data = surprise.dataset.Dataset.load_from_df(df,reader=reader)

In [5]:
data.raw_ratings

[(1, 2, 1.0, None),
 (2, 2, 1.0, None),
 (3, 3, 2.0, None),
 (4, 3, 2.0, None),
 (5, 1, 1.0, None)]

##### Directly reading the csv file

In [6]:
#if we directly read from csv it should be present in the specified format

reader = surprise.dataset.Reader(line_format='user rating item',sep=',',
                                rating_scale=(1,5),skip_lines=1)

In [7]:
data1= surprise.dataset.Dataset.load_from_file('sample_data.csv',reader=reader)

In [8]:
data1.raw_ratings

[('1', '1', 2.0, None),
 ('2', '1', 2.0, None),
 ('3', '2', 3.0, None),
 ('4', '2', 3.0, None),
 ('5', '1', 1.0, None)]

In [9]:
data_dir=r'C:\Users\tsharmili\Downloads\Machine-Learning-main\Recommendation_Systems\ml-latest-small'

In [10]:
os.chdir(data_dir)

In [11]:
os.getcwd()

'C:\\Users\\tsharmili\\Downloads\\Machine-Learning-main\\Recommendation_Systems\\ml-latest-small'

In [12]:
mr = pd.read_csv('ratings.csv')
mr.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [13]:
mr.drop('timestamp',axis =1,inplace=True)

In [14]:
mr.columns

Index(['userId', 'movieId', 'rating'], dtype='object')

In [15]:
mr.rename(columns={'userId':'user', 'movieId':'item', 'rating':'rating'},inplace=True)

In [17]:
reader = surprise.dataset.Reader(line_format='user item rating',rating_scale=(1,5))

In [18]:
mr_train = surprise.dataset.Dataset.load_from_df(mr,reader= reader)

##### Building Training data

In [19]:
mr_trainset = mr_train.build_full_trainset()

In [20]:
import surprise.prediction_algorithms.knns as knns

## Building a smilarity model

##### Building USER BASED COLLABORATIVE MODEL

In [21]:
knnbasic = knns.KNNBasic(k=40,min_k=1,sim_options={'name':'cosine',
                                                  'user_based':True})

In [22]:
knnbasic.fit(mr_trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x2408918a970>

In [23]:
mr.head()

Unnamed: 0,user,item,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [24]:
knnbasic.predict(uid=1,iid=31,r_ui=2.5)

Prediction(uid=1, iid=31, r_ui=2.5, est=3.1834796860227086, details={'actual_k': 40, 'was_impossible': False})

In [25]:
knnbasic = knns.KNNBasic(k=40,min_k=1,sim_options={'name':'cosine',
                                                  'user_based':False})

In [27]:
knnbasic.fit(mr_trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x24089474580>

In [28]:
knnbasic.predict(uid=1,iid=31,r_ui=2.5)

Prediction(uid=1, iid=31, r_ui=2.5, est=2.547471538910294, details={'actual_k': 20, 'was_impossible': False})

### Consider the average effects 

Building the similarity model using the average effects of item based and user based models

Here we specify the KNNWithMeans

In [29]:
knnbasic = knns.KNNWithMeans(k=40,min_k=1,sim_options={'name':'pearson',
                                                  'user_based':False})

In [30]:
knnbasic.fit(mr_trainset)

Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x240895d83d0>

In [31]:
knnbasic.predict(uid=1,iid=31,r_ui=2.5)

Prediction(uid=1, iid=31, r_ui=2.5, est=2.18133813941489, details={'actual_k': 17, 'was_impossible': False})

### Dividing data into batches

Kfold is used to divide the data into batches and evaluate the performance


In [None]:
# split data into batches (3)
# train and test taking 2 batches each and testing the 3rd


In [32]:
from surprise.model_selection import KFold
from surprise import accuracy

In [33]:
#number of folds

kf= KFold(n_splits=3)

knnbasic = knns.KNNBasic(k=40,min_k=1,sim_options={'name':'cosine',
                                                  'user_based':False})

for trainset ,testset in kf.split(mr_train):
    knnbasic.fit(trainset)
    predictions = knnbasic.test(testset)
    
    accuracy.rmse(predictions,verbose=True)
    accuracy.mae(predictions,verbose= True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 1.0003
MAE:  0.7763
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.9909
MAE:  0.7730
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.9897
MAE:  0.7691


In [34]:
#number of folds

kf= KFold(n_splits=3)

knnbasic = knns.KNNWithMeans(k=40,min_k=1,sim_options={'name':'cosine',
                                                  'user_based':False})

for trainset ,testset in kf.split(mr_train):
    knnbasic.fit(trainset)
    predictions = knnbasic.test(testset)
    
    accuracy.rmse(predictions,verbose=True)
    accuracy.mae(predictions,verbose= True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.9326
MAE:  0.7159
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.9319
MAE:  0.7134
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.9367
MAE:  0.7191


In [35]:
knnbasic1 = knns.KNNWithMeans(k=40,min_k=1,sim_options={'name':'cosine',
                                                  'user_based':False})
knnbasic1.fit(trainset)
predictions1 = knnbasic.test(testset)
    
accuracy.rmse(predictions1,verbose=True)
accuracy.mae(predictions1,verbose= True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.9367
MAE:  0.7191


0.7191342992092398

In [36]:
param_grid = {'k':[10,20],
             'sim_options':{'name':['msd','cosine'],'user_based':[False]}}

In [37]:
algo = knns.KNNWithMeans

In [38]:
from surprise.model_selection import GridSearchCV

In [39]:
grid_search = GridSearchCV(algo,param_grid=param_grid,
                          measures=['RMSE','MAE'])

In [40]:
grid_search.fit(mr_train)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix.

In [41]:
mr_trainset.to_inner_iid(1061)

2

In [None]:
print()

# SVD

In [42]:
from surprise import SVD

In [43]:
modelsvd= SVD(n_factors=20)

In [44]:
modelsvd.fit(mr_trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2408bc99280>

In [45]:
mr.head()

Unnamed: 0,user,item,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [46]:
modelsvd.predict(uid=1,iid=31)

Prediction(uid=1, iid=31, r_ui=None, est=2.4131981882879603, details={'was_impossible': False})

In [54]:
#number of folds

kf= KFold(n_splits=3)

model = SVD(n_factors=20)

for trainset ,testset in kf.split(mr_train):
    model.fit(trainset)
    predictions = model.test(testset)
    
    accuracy.rmse(predictions,verbose=True)
    accuracy.mae(predictions,verbose= True)

RMSE: 0.8948
MAE:  0.6899
RMSE: 0.9028
MAE:  0.6956
RMSE: 0.8956
MAE:  0.6898


# NMF

In [47]:
from surprise import NMF

In [51]:
modelnmf = NMF(n_factors=20,biased= True)

In [52]:
modelnmf.fit(mr_trainset)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x24089474be0>

In [53]:
modelnmf.predict(uid=1,iid=31)

Prediction(uid=1, iid=31, r_ui=None, est=1.5282941377376014, details={'was_impossible': False})

In [55]:
#number of folds

kf= KFold(n_splits=3)

model = NMF(n_factors=20,biased=True)

for trainset ,testset in kf.split(mr_train):
    model.fit(trainset)
    predictions = model.test(testset)
    
    accuracy.rmse(predictions,verbose=True)
    accuracy.mae(predictions,verbose= True)

RMSE: 2.0084
MAE:  1.6521
RMSE: 1.3509
MAE:  1.0284
RMSE: 1.3698
MAE:  1.0393


In [56]:
param_grid={'n_factors':[15,20,25,30]}
algo= SVD

In [57]:
gridsearch = GridSearchCV(SVD, param_grid=param_grid,
                         measures=['rmse','mae'])

In [58]:
gridsearch.fit(mr_train)

In [59]:
print(gridsearch.best_params['rmse'])
print(gridsearch.best_params['mae'])

{'n_factors': 25}
{'n_factors': 15}


In [60]:
print(gridsearch.best_score['rmse'])
print(gridsearch.best_score['mae'])

0.8906138037568553
0.6854107082309696
