In [None]:
from surprise import Dataset
from surprise import Reader
import os
from surprise import SVD
from surprise.model_selection import cross_validate
from surprise.model_selection import split
from surprise import NMF
from surprise import KNNBasic
from surprise.model_selection import KFold
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
my_seed = 0
random.seed(my_seed)
np.random.seed(my_seed)

#load data from a file
file_path = os.path.expanduser('/Users/QUAN/Documents/restaurant_ratings.txt')
reader = Reader(line_format='user item rating timestamp', sep='\t')
data = Dataset.load_from_file(file_path, reader=reader)


print("\n--------- SVD----------\n")
algo = SVD()
cross_validate(algo, data, cv=3, verbose=True)


print("\n------- Probabilistic Matrix Function----------\n")
algo = SVD(biased=False)
cross_validate(algo, data, cv=3, verbose=True)


print("\n------ Non-negative Matrix Factorization----------\n")
algo = NMF()
cross_validate(algo, data, cv=3, verbose=True)


print("\n-----------User based Collaborative Filtering----------\n")
algo = KNNBasic(sim_options={'user_based': True})
cross_validate(algo, data, cv=3, verbose=True)


print("\n--------Item based Collaborative Filtering----------\n")
algo = KNNBasic(sim_options={'user_based': False})
cross_validate(algo, data, cv=3, verbose=True)

print("\n---------User based Collaborative Filtering - MSD ----------\n")
algo = KNNBasic(sim_options={'name': 'MSD', 'user_based': True})
cross_validate(algo, data, cv=3, verbose=True)

print("\n-----User based Collaborative Filtering - Cosine----------\n")
algo = KNNBasic(sim_options={'name': 'cosine', 'user_based': True})
cross_validate(algo, data, cv=3, verbose=True)

print("\n-------User based Collaborative Filtering - pearson----------\n")
algo = KNNBasic(sim_options={'name': 'pearson', 'user_based': True})
cross_validate(algo, data, cv=3, verbose=True)

print("\n-----------Item based Collaborative Filtering - MSD----------\n")
algo = KNNBasic(sim_options={'name': 'MSD', 'user_based': True})
cross_validate(algo, data, cv=3, verbose=True)

print("\n----Item based Collaborative Filtering - Cosine----------\n")
algo = KNNBasic(sim_options={'name': 'cosine', 'user_based': True})
cross_validate(algo, data, cv=3, verbose=True)

print("\n----------Item based Collaborative Filtering-pearson---------")

algo = KNNBasic(sim_options={'name': 'pearson', 'user_based': True})
cross_validate(algo, data, cv=3, verbose=True)

def PlotsSimilarity():
    """
    Plot how Cosine MSD(Mean Squared Difference), and Pearson similarities impact the performances of
    User based Collaborative Filtering andItem based Collaborative Filtering.
    :return: Nothing
    """
    plotRMSE = []
    plotMAE = []
    print("\n----User based Collaborative Filtering---MSD------\n")
    algo = KNNBasic(sim_options={'name': 'MSD', 'user_based': True})
    userbased_MSD = cross_validate(algo, data, cv=3, verbose=True)
    plotRMSE.append(["User based Collaborative Filtering", "MSD", userbased_MSD["test_rmse"].mean()])
    plotMAE.append(["User based Collaborative Filtering", "MSD", userbased_MSD["test_mae"].mean()])

    print("\n---Item based Collaborative Filtering--MSD--------\n")
    algo = KNNBasic(sim_options={'name': 'MSD', 'user_based': False})
    itembased_MSD = cross_validate(algo, data, cv=3, verbose=True)
    plotRMSE.append(["Item based Collaborative Filtering", "MSD", itembased_MSD["test_rmse"].mean()])
    plotMAE.append(["Item based Collaborative Filtering", "MSD", itembased_MSD["test_mae"].mean()])

    print("\n---------User based Collaborative Filtering--Cosine--------/n")
    algo = KNNBasic(sim_options={'name': 'cosine', 'user_based': True})
    userbased_cosine = cross_validate(algo, data, cv=3, verbose=True)
    plotRMSE.append(["User based Collaborative Filtering", "Cosine", userbased_cosine["test_rmse"].mean()])
    plotMAE.append(["User based Collaborative Filtering", "Cosine", userbased_cosine["test_mae"].mean()])

    print("\n----Item based Collaborative Filtering--Cosine--------\n")
    algo = KNNBasic(sim_options={'name': 'cosine', 'user_based': False})
    itembased_cosine = cross_validate(algo, data, cv=3, verbose=True)
    plotRMSE.append(["Item based Collaborative Filtering", "Cosine", itembased_cosine["test_rmse"].mean()])
    plotMAE.append(["Item based Collaborative Filtering", "Cosine", itembased_cosine["test_mae"].mean()])

    print("\n---------User based Collaborative Filtering--pearson--------\n")
    algo = KNNBasic(sim_options={'name': 'pearson', 'user_based': True})
    userbased_pearson = cross_validate(algo, data, cv=3, verbose=True)
    plotRMSE.append(["User based Collaborative Filtering", "Pearson", userbased_pearson["test_rmse"].mean()])
    plotMAE.append(["User based Collaborative Filtering", "Pearson", userbased_pearson["test_mae"].mean()])

    print("\n----Item based Collaborative Filtering--pearson--------\n")
    algo = KNNBasic(sim_options={'name': 'pearson', 'user_based': False})
    itembased_pearson = cross_validate(algo, data, cv=3, verbose=True)
    plotRMSE.append(["Item based Collaborative Filtering", "Pearson", itembased_pearson["test_rmse"].mean()])
    plotMAE.append(["Item based Collaborative Filtering", "Pearson", itembased_pearson["test_mae"].mean()])

    print(plotRMSE)
    plotRmseDF = pd.DataFrame(data=plotRMSE, columns=["Filtering Method Used", "Algorithm", "RMSE"])
    plotRmseDF.pivot("Algorithm", "Filtering Method Used", "RMSE").plot(kind="bar")
    plt.ylim(.9, 1.1)
    plt.show()

    print(plotMAE)
    plotRmseDF = pd.DataFrame(data=plotMAE, columns=["Filtering Method Used", "Algorithm", "MAE"])
    plotRmseDF.pivot("Algorithm", "Filtering Method Used", "MAE").plot(kind="bar")
    plt.ylim(.7, .9)
    plt.show()


PlotsSimilarity()

def NeighborsFiltering():
    plotRMSE = []
    plotMAE = []
    neighbors = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
    for k in neighbors:
        algo = KNNBasic(k,sim_options={'name': 'MSD', 'user_based': True})
        userbased_neighbors = cross_validate(algo, data, cv=3, verbose=False)
        plotRMSE.append(["User based Collaborative Filtering",k , userbased_neighbors["test_rmse"].mean()])
        plotMAE.append(["User based Collaborative Filtering", k, userbased_neighbors["test_mae"].mean()])
        
        algo = KNNBasic(k,sim_options={'name': 'MSD', 'user_based': False})
        userbased_neighbors = cross_validate(algo, data, cv=3, verbose=False)
        plotRMSE.append(["Item based Collaborative Filtering",k , userbased_neighbors["test_rmse"].mean()])
        plotMAE.append(["Item based Collaborative Filtering", k, userbased_neighbors["test_mae"].mean()])
        
    print(plotRMSE)
    plotRmseDF = pd.DataFrame(data=plotRMSE, columns=["Filtering Method Used", "Number of Neighbors", "RMSE"])
    plotRmseDF.pivot("Number of Neighbors", "Filtering Method Used", "RMSE").plot(kind="bar")
    plt.ylim(.9, 1.1)
    plt.show()

    print(plotMAE)
    plotRmseDF = pd.DataFrame(data=plotMAE, columns=["Filtering Method Used", "Number of neighbors", "MAE"])
    plotRmseDF.pivot("Number of neighbors", "Filtering Method Used", "MAE").plot(kind="bar")
    plt.ylim(.7, .9)
    plt.show()
    
NeighborsFiltering()
    