In [1]:
import pandas as pd
import numpy as np
from surprise import Reader, Dataset, SVD, KNNWithMeans
from surprise.model_selection import GridSearchCV, cross_validate

In [2]:
ratings = pd.read_csv('dataset/FinalRatings.csv')[0:45000]

In [3]:
def collaborative(ratings, user_id):

        reader = Reader(rating_scale=(0, 10))
        temp_ratings = ratings

        data = Dataset.load_from_df(temp_ratings[['user_id', 'book_id', 'rating']], reader)
        trainset = data.build_full_trainset()
        
        param_grid = {'n_epochs': [10, 20], 'lr_all': [0.005, 0.01], 'reg_all': [0.2, 0.4, 0.6]}
        gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=5)

        gs.fit(data)

        # best RMSE score
        print(gs.best_score['rmse'])

        # combination of parameters that gave the best RMSE score
        print(gs.best_params['rmse'])
        
        algo = gs.best_estimator['rmse']
        algo.fit(trainset)

        unknown_ratings = trainset.build_anti_testset()
        predictions = algo.test(unknown_ratings)
        
        result = [ [p.uid, p.iid, p.est] for p in predictions if p.uid == user_id]
        result = np.array(result)
        result_dict = {'user_id': result[:,0], 'book_id': result[:,1], 'rating': result[:,2]}
        result_df = pd.DataFrame.from_dict(result_dict)

        return result_df

In [4]:
user_id = 11676
rec = collaborative(ratings, user_id)

3.7558720386945663
{'n_epochs': 10, 'lr_all': 0.01, 'reg_all': 0.4}


In [12]:
user_ratings = rec.sort_values(by=['rating'], ascending=False)
final_data=pd.read_csv('dataset/FinalData.csv')
merged = pd.merge(user_ratings, final_data, on=['book_id','book_id'])
merged.to_csv('colab_filter_11676.csv')

In [6]:
def compare_metric(ratings, metric):
        reader = Reader(rating_scale=(0, 10))
        temp_ratings = ratings

        data = Dataset.load_from_df(temp_ratings[['user_id', 'book_id', 'rating']], reader)
        
        sim_options = {
            "name": metric,
            "min_support": 5,
            "user_based": True,
        }
        algo = KNNWithMeans(sim_options=sim_options)
        
        print("#########  " + metric)
        validate = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

        return validate

In [7]:
metrics = ["msd", "cosine", "pearson"]
rmse = []
mae = []
for m in metrics:
    metric_score = compare_metric(ratings, metrics[0])
    rmse.append(round(np.mean(metric_score['test_rmse']), 4))
    mae.append(round(np.mean(metric_score['test_mae']), 4))

score_dict = {'metric': metrics, 'rmse': rmse, 'mae': mae}
score_df = pd.DataFrame.from_dict(score_dict)

#########  msd
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    4.1305  4.2465  4.1213  4.1970  4.1121  4.1615  0.0520  
MAE (testset)     3.2668  3.3713  3.2688  3.3254  3.2541  3.2973  0.0445  
Fit time          4.34    4.50    4.40    4.41    4.36    4.40    0.05    
Test time         0.74    0.75    0.74    0.75    0.74    0.75    0.00    
#########  msd
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd sim

In [9]:
score_df

Unnamed: 0,metric,rmse,mae
0,msd,4.1615,3.2973
1,cosine,4.1389,3.2724
2,pearson,4.1339,3.2657


In [3]:
def collaborative_knn(ratings, user_id):

        reader = Reader(rating_scale=(0, 10))
        temp_ratings = ratings

        data = Dataset.load_from_df(temp_ratings[['user_id', 'book_id', 'rating']], reader)
        trainset = data.build_full_trainset()
        
        sim_options = {
            "name": ["msd", "cosine"],
            "min_support": [3, 4, 5],
            "user_based": [False, True],
        }

        param_grid = {"sim_options": sim_options}
        gs = GridSearchCV(KNNWithMeans, param_grid, measures=['rmse', 'mae'], cv=5)

        gs.fit(data)

        # best RMSE score
        print(gs.best_score['rmse'])

        # combination of parameters that gave the best RMSE score
        print(gs.best_params['rmse'])
        
        algo = gs.best_estimator['rmse']
        algo.fit(trainset)

        unknown_ratings = trainset.build_anti_testset()
        predictions = algo.test(unknown_ratings)
        
        result = [ [p.uid, p.iid, p.est] for p in predictions if p.uid == user_id]
        result = np.array(result)
        result_dict = {'user_id': result[:,0], 'book_id': result[:,1], 'rating': result[:,2]}
        result_df = pd.DataFrame.from_dict(result_dict)

        return result_df

In [4]:
user_id = 11676
rec_knn = collaborative_knn(ratings, user_id)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

  sim = construction_func[name](*args)


Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing th

In [5]:
user_ratings = rec_knn.sort_values(by=['rating'], ascending=False)
final_data=pd.read_csv('dataset/FinalData.csv')
merged = pd.merge(user_ratings, final_data, on=['book_id','book_id'])
merged.to_csv('colab_filter_knn_11676.csv')