In [None]:
%load_ext autotime

In [None]:
import pandas as pd
import numpy as np
import scipy.sparse as sp
from scipy.sparse.linalg import svds
import pickle
import surprise as srp
import time

In [None]:
# path to dataset file
#file_path = os.path.expanduser('~/.surprise_data/ml-100k/ml-100k/u.data')

# As we're loading a custom dataset, we need to define a reader. In the
# itemlens-100k dataset, each line has the following format:
# 'user item rating timestamp', separated by '\t' characters.
# and split it into 3 folds for cross-validation.
reader = srp.Reader(line_format='user item rating timestamp', sep=',')

data = srp.Dataset.load_from_file('data/training.dat', reader=reader)
data.split(n_folds=3)

In [None]:
# We'll use the famous SVD algorithm.
#algo = srp.SVD(n_factors=16)

#sim_options = {'name': 'pearson_baseline', 'user_based': False}
#algo = srp.KNNBaseline(sim_options=sim_options)

algo = srp.NormalPredictor()

for trainset, testset in data.folds():

    # train and test algorithm.
    algo.train(trainset)
    predictions = algo.test(testset)

    # Compute and print Root Mean Squared Error
    #rmse = srp.accuracy.rmse(predictions, verbose=True)

In [None]:
uid = str(1)  # raw user id (as in the ratings file). They are **strings**!
iid = str(48)  # raw item id (as in the ratings file). They are **strings**!

# get a prediction for specific users and items.
pred = algo.predict(uid, iid, r_ui=4, verbose=True)

In [None]:
test_file = pd.read_table('data/test.csv', sep=',', header=None, engine='python')
print(test_file.shape)
item_file = pd.read_table('ml-1m/movies.dat', sep='::', header=None, engine='python')
print(item_file.shape)

In [None]:
# items 3666(gercege karşılık gelen index) alıp 3952(gerçekid) döner, itemIndices 3952 alıp 3666 döner
testUsers = np.unique(test_file[0])  # 1(0.idex) den 6040(6039.index) a kadar
items = np.unique(item_file[0])

testNumberOfUsers = len(testUsers)  # 6040
numberOfItems = len(items)  # 3667

itemIndices, testUserIndices = {}, {}

for i in range(len(items)):
    # itemIndices[3952] = 3666 x.filmin indisini verir
    itemIndices[items[i]] = i

for i in range(len(testUsers)):
    testUserIndices[testUsers[i]] = i  # x.userın indisini verir
print(len(itemIndices))

In [None]:
testV = sp.lil_matrix((testNumberOfUsers, numberOfItems))
for line in test_file.values:
    u, i, r, t = map(int, line)
    # gerçek user ve item idnin indexini bulup ratingi matrixteki yere atar
    testV[testUserIndices[u], itemIndices[i]] = r

In [None]:
def predict(index):
    result = sp.lil_matrix((1, numberOfItems))
    for item in np.nonzero(testV[index, :])[1]:
        pred = algo.predict(str(testUsers[index]), str(items[item]), r_ui=0, verbose=False)
        result[0, item] = pred[3]
    return result.todense()

In [None]:
def recommend(index):
    P = predict(index)
    indexList = np.nonzero(P[0, :])[1]
    relevant = np.asarray(P[0, indexList])
    #print("indexlist", indexList)
    #print("relevant", relevant)
    indexSort = np.fliplr(relevant.argsort())
    #print("indexsort", indexSort)
    result = []
    for i in indexSort[0]:
        result.append(items[indexList[i]])
    return result

In [None]:
result = recommend(0)
print(result)

In [None]:
precisionAt = 5


def computeUserAccuracy(index):
    computeditems = recommend(index)
    if not computeditems:
        return 0
    weightedSum = 0
    counter = 0
    if precisionAt > len(computeditems):
        counter = len(computeditems)
    else:
        counter = precisionAt
    sumWeight = (counter * (counter + 1)) / 2
    for recommendation in computeditems:
        if (counter != 0):
            weightedSum = weightedSum + testV[index, itemIndices[recommendation]] * counter
            counter = counter - 1
    return float(weightedSum / (sumWeight * 5))

In [None]:
def computeAccuracy():
    empty = 0
    sumUserAccuracy = 0.0
    for user in range(0, testV.shape[0]):
        userAccuracy = computeUserAccuracy(user)
        if (userAccuracy == 0):
            empty = empty + 1
        sumUserAccuracy = sumUserAccuracy + userAccuracy
        print(userAccuracy)
    print(empty)
    print(float(sumUserAccuracy / (testV.shape[0] - empty)))

In [None]:
computeAccuracy()

In [None]:
computeUserAccuracy(0)