In [1]:
# Import Required packages
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


In [4]:
# Calling essential Algorithms
from surprise import SVD
from surprise.model_selection import cross_validate
from surprise.model_selection import split
from surprise import NMF
from surprise import KNNBasic
from surprise.model_selection import KFold
from surprise import Dataset
from surprise import Reader


ModuleNotFoundError: No module named 'surprise'

In [5]:
my_seed = 0
random.seed(my_seed)
np.random.seed(my_seed)


In [6]:
# load data from a file
file_path = os.path.expanduser(r'C:\Users\Nathan Campbell\Documents\Machine Leaning\Homeworks\Homework 7\restaurant_ratings.txt')
reader = Reader(line_format='user item rating timestamp', sep='\t')
data = Dataset.load_from_file(file_path, reader=reader)


NameError: name 'Reader' is not defined

In [7]:
def compareRmseAndMaeForSvdPmfNmfUserBasedItemBased():
    """
    Returns values for all the algorithms mentioned in the method name
    :return: Nothing
    """
    # SVD (Singular Value Decomposition)
    print("\n-----------3-folds cross validation SVD----------\n")
    algo = SVD()
    cross_validate(algo, data, cv=3, verbose=True)

    # PMF (Probabilistic Matrix Function)
    print("\n-----------3-folds cross validation for PMF----------\n")
    algo = SVD(biased=False)
    cross_validate(algo, data, cv=3, verbose=True)

    # NMF (Non-negative Matrix Factorization)
    print("\n-----------3-folds cross validation for NMF----------\n")
    algo = NMF()
    cross_validate(algo, data, cv=3, verbose=True)

    # User based Collaborative Filtering
    print("\n-----------3-folds cross validation for User based Collaborative Filtering----------\n")
    algo = KNNBasic(sim_options={'user_based': True})
    cross_validate(algo, data, cv=3, verbose=True)

    # Item based Collaborative Filtering
    print("\n-----------3-folds cross validation for Item based Collaborative Filtering----------\n")
    algo = KNNBasic(sim_options={'user_based': False})
    cross_validate(algo, data, cv=3, verbose=True)


In [8]:
def createPlotsDueToSimilarityUsed():
    """
    Plot how Cosine MSD(Mean Squared Difference), and Pearson similarities impact the performances of
    User based Collaborative Filtering andItem based Collaborative Filtering.
    :return: Nothing
    """
    plotArrayRMSE = []
    plotArrayMAE = []
    print("\n-----------3-folds cross validation for User based Collaborative Filtering----------")
    print("-----------MSD----------\n")
    algo = KNNBasic(sim_options={'name': 'MSD', 'user_based': True})
    user_MSD = cross_validate(algo, data, cv=3, verbose=True)
    plotArrayRMSE.append(["User based Collaborative Filtering", 1, user_MSD["test_rmse"].mean()])
    plotArrayMAE.append(["User based Collaborative Filtering", 1, user_MSD["test_mae"].mean()])

    print("\n-----------3-folds cross validation for Item based Collaborative Filtering----------")
    print("-----------MSD----------\n")
    algo = KNNBasic(sim_options={'name': 'MSD', 'user_based': False})
    item_MSD = cross_validate(algo, data, cv=3, verbose=True)
    plotArrayRMSE.append(["Item based Collaborative Filtering", 1, item_MSD["test_rmse"].mean()])
    plotArrayMAE.append(["Item based Collaborative Filtering", 1, item_MSD["test_mae"].mean()])

    print("\n-----------3-folds cross validation for User based Collaborative Filtering----------")
    print("-----------Cosine----------\n")
    algo = KNNBasic(sim_options={'name': 'cosine', 'user_based': True})
    user_Cos = cross_validate(algo, data, cv=3, verbose=True)
    plotArrayRMSE.append(["User based Collaborative Filtering", 2, user_Cos["test_rmse"].mean()])
    plotArrayMAE.append(["User based Collaborative Filtering", 2, user_Cos["test_mae"].mean()])

    print("\n-----------3-folds cross validation for Item based Collaborative Filtering----------")
    print("-----------Cosine----------\n")
    algo = KNNBasic(sim_options={'name': 'MSD', 'user_based': False})
    item_Cos = cross_validate(algo, data, cv=3, verbose=True)
    plotArrayRMSE.append(["Item based Collaborative Filtering", 2, item_Cos["test_rmse"].mean()])
    plotArrayMAE.append(["Item based Collaborative Filtering", 2, item_Cos["test_mae"].mean()])

    print("\n-----------3-folds cross validation for User based Collaborative Filtering----------")
    print("-----------pearson----------\n")
    algo = KNNBasic(sim_options={'name': 'pearson', 'user_based': True})
    user_Pear = cross_validate(algo, data, cv=3, verbose=True)
    plotArrayRMSE.append(["User based Collaborative Filtering", 3, user_Pear["test_rmse"].mean()])
    plotArrayMAE.append(["User based Collaborative Filtering", 3, user_Pear["test_mae"].mean()])

    print("\n-----------3-folds cross validation for Item based Collaborative Filtering----------")
    print("-----------pearson----------\n")
    algo = KNNBasic(sim_options={'name': 'MSD', 'user_based': False})
    item_Pear = cross_validate(algo, data, cv=3, verbose=True)
    plotArrayRMSE.append(["Item based Collaborative Filtering", 3, item_Pear["test_rmse"].mean()])
    plotArrayMAE.append(["Item based Collaborative Filtering", 3, item_Pear["test_mae"].mean()])

    plotRmseDF = pd.DataFrame(data=plotArrayRMSE, columns=["Filtering Method Used", "Algorithm", "RMSE"])
    plotRmseDF.pivot("Algorithm", "Filtering Method Used", "RMSE").plot(kind="bar")
    plt.title("User vs Item Based Collaboration (RMSE)")
    plt.ylabel("RMSE")
    plt.ylim(.9, 1.1)
    plt.show()

    plotMaeDF = pd.DataFrame(data=plotArrayMAE, columns=["Filtering Method Used", "Algorithm", "MAE"])
    plotMaeDF.pivot("Algorithm", "Filtering Method Used", "MAE").plot(kind="bar")
    plt.title("User vs Item Based Collaboration (MAE)")
    plt.ylabel("MAE")
    plt.ylim(.7, .9)
    plt.show()
 
PlotsSimilarity()


In [9]:
def NeighborsFiltering():
    plotRMSE = []
    plotMAE = []
    neighbors = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
    for k in neighbors:
        algo = KNNBasic(k,sim_options={'name': 'MSD', 'user_based': True})
        userbased_neighbors = cross_validate(algo, data, cv=3, verbose=False)
        plotRMSE.append(["User based Collaborative Filtering",k , userbased_neighbors["test_rmse"].mean()])
        plotMAE.append(["User based Collaborative Filtering", k, userbased_neighbors["test_mae"].mean()])
        
        algo = KNNBasic(k,sim_options={'name': 'MSD', 'user_based': False})
        userbased_neighbors = cross_validate(algo, data, cv=3, verbose=False)
        plotRMSE.append(["Item based Collaborative Filtering",k , userbased_neighbors["test_rmse"].mean()])
        plotMAE.append(["Item based Collaborative Filtering", k, userbased_neighbors["test_mae"].mean()])
        
    print(plotRMSE)
    plotRmseDF = pd.DataFrame(data=plotRMSE, columns=["Filtering Method Used", "Number of Neighbors", "RMSE"])
    plotRmseDF.pivot("Number of Neighbors", "Filtering Method Used", "RMSE").plot(kind="bar")
    plt.ylim(.9, 1.1)
    plt.show()

    print(plotMAE)
    plotRmseDF = pd.DataFrame(data=plotMAE, columns=["Filtering Method Used", "Number of neighbors", "MAE"])
    plotRmseDF.pivot("Number of neighbors", "Filtering Method Used", "MAE").plot(kind="bar")
    plt.ylim(.7, .9)
    plt.show()
    
Neighbors_Filtering()


NameError: name 'KNNBasic' is not defined