In [None]:
%load_ext autotime

In [None]:
import pandas as pd
import numpy as np
import scipy.sparse as sp
from scipy.sparse.linalg import svds
import pickle
import surprise as srp
import time

In [None]:
precisionAt = 5

dataFilePath = 'data/training.dat'
itemFilePath = 'ml-1m/movies.dat'
testFilePath = 'data/test.csv'

dataFileSep = ','
itemFileSep = '::'
testFileSep = ','

In [None]:
reader = srp.Reader(line_format='user item rating timestamp', sep=dataFileSep)

data = srp.Dataset.load_from_file(dataFilePath, reader=reader)
data.split(n_folds=3)

In [None]:
# define algorithm
#algo = srp.SVD(n_factors=16)

#sim_options = {'name': 'pearson_baseline', 'user_based': False}
#algo = srp.KNNBaseline(sim_options=sim_options)

algo = srp.NormalPredictor()

In [None]:
for trainset, testset in data.folds():
    algo.train(trainset)
    predictions = algo.test(testset)

    rmse = srp.accuracy.rmse(predictions, verbose=True)

In [None]:
item_file = pd.read_table(itemFilePath, sep=itemFileSep, header=None, engine='python')
print("Item data shape :", item_file.shape)
test_file = pd.read_table(testFilePath, sep=testFileSep, header=None, engine='python')
print("Test data shape :", test_file.shape)

In [None]:
testUsers = np.unique(test_file[0])
items = np.unique(item_file[0])

testNumberOfUsers = len(testUsers)
numberOfItems = len(items)

itemIndices, testUserIndices = {}, {}

for i in range(len(items)):
    itemIndices[items[i]] = i

for i in range(len(testUsers)):
    testUserIndices[testUsers[i]] = i

In [None]:
testV = sp.lil_matrix((testNumberOfUsers, numberOfItems))
for line in test_file.values:
    u, i, r, t = map(int, line)
    testV[testUserIndices[u], itemIndices[i]] = r

In [None]:
def predict(index):
    result = sp.lil_matrix((1, numberOfItems))
    for item in np.nonzero(testV[index, :])[1]:
        pred = algo.predict(str(testUsers[index]), str(items[item]), r_ui=0, verbose=False)
        result[0, item] = pred[3]
    return result.todense()

In [None]:
def recommend(index):
    P = predict(index)
    indexList = np.nonzero(P[0, :])[1]
    relevant = np.asarray(P[0, indexList])
    indexSort = np.fliplr(relevant.argsort())
    result = []
    for i in indexSort[0]:
        result.append(items[indexList[i]])
    return result

In [None]:
def computeUserAccuracy(index):
    computeditems = recommend(index)
    if not computeditems:
        return 0
    weightedSum = 0
    counter = 0
    if precisionAt > len(computeditems):
        counter = len(computeditems)
    else:
        counter = precisionAt
    sumWeight = (counter * (counter + 1)) / 2
    for recommendation in computeditems:
        if (counter != 0):
            weightedSum = weightedSum + testV[index, itemIndices[recommendation]] * counter
            counter = counter - 1
    return float(weightedSum / (sumWeight * 5))

In [None]:
def computeAccuracy():
    empty = 0
    sumUserAccuracy = 0.0
    for user in range(0, testV.shape[0]):
        userAccuracy = computeUserAccuracy(user)
        if (userAccuracy == 0):
            empty = empty + 1
        sumUserAccuracy = sumUserAccuracy + userAccuracy
        print(userAccuracy)
    print(empty)
    print(float(sumUserAccuracy / (testV.shape[0] - empty)))

In [None]:
computeAccuracy()