In [None]:
%load_ext autotime

In [None]:
import pandas as pd
import numpy as np
import scipy.sparse as sp
from scipy.sparse.linalg import svds
import pickle
import surprise as srp
import time

In [None]:
# path to dataset file
#file_path = os.path.expanduser('~/.surprise_data/ml-100k/ml-100k/u.data')

# As we're loading a custom dataset, we need to define a reader. In the
# movielens-100k dataset, each line has the following format:
# 'user item rating timestamp', separated by '\t' characters.
# and split it into 3 folds for cross-validation.
reader = srp.Reader(line_format='user item rating timestamp', sep=',')

data = srp.Dataset.load_from_file('data/training.dat', reader=reader)
data.split(n_folds=3)


In [None]:
# We'll use the famous SVD algorithm.
algo = srp.SVD()

for trainset, testset in data.folds():

    # train and test algorithm.
    algo.train(trainset)
    predictions = algo.test(testset)

    # Compute and print Root Mean Squared Error
    rmse = srp.accuracy.rmse(predictions, verbose=True)

In [None]:
uid = str(1)  # raw user id (as in the ratings file). They are **strings**!
iid = str(48)  # raw item id (as in the ratings file). They are **strings**!

# get a prediction for specific users and items.
pred = algo.predict(uid, iid, r_ui=4, verbose=True)

In [None]:
predictions[1]

In [None]:
pred = algo.predict(uid, iid, r_ui=5, verbose=True)

In [None]:
pred = algo.predict(uid, iid, r_ui=1, verbose=True)

In [None]:
pred = algo.predict(uid, iid, r_ui=0, verbose=True)

In [None]:
print(predictions[1])

In [None]:
print(predictions[1][0])

In [None]:
test_file = pd.read_table('data/test.csv', sep = ',', header=None, engine='python')
print(test_file.shape)
movie_file = pd.read_table('ml-1m/movies.dat', sep = '::', header=None, engine='python')
print(movie_file.shape)

In [None]:
#movies 3666(gercege karşılık gelen index) alıp 3952(gerçekid) döner, movie_indices 3952 alıp 3666 döner
test_users = np.unique(test_file[0]) # 1(0.idex) den 6040(6039.index) a kadar
movies = np.unique(movie_file[0])

test_number_of_rows = len(test_users) #6040
number_of_columns = len(movies) #3667

movie_indices, test_user_indices = {}, {}
 
for i in range(len(movies)):
    movie_indices[movies[i]] = i # movie_indices[3952] = 3666 x.filmin indisini verir
  
for i in range(len(test_users)):
    test_user_indices[test_users[i]] = i # x.userın indisini verir
print(len(movie_indices))

In [None]:
test_V = sp.lil_matrix((test_number_of_rows, number_of_columns))
for line in test_file.values:
    u, i , r , t = map(int,line)
    test_V[test_user_indices[u], movie_indices[i]] = r # gerçek user ve movie idnin indexini bulup ratingi matrixteki yere atar

In [None]:
#P = sp.lil_matrix((test_number_of_rows, number_of_columns))
#for user in range(test_number_of_rows):
#    for movie in np.nonzero(test_V[user,:])[1]:
#        pred = algo.predict(str(test_users[user]), str(movies[movie]), r_ui=0, verbose=False)
#        P[user, movie] = pred[3]
#P = P.todense()
#print(P.shape)

In [None]:
def predict(index):
    result = sp.lil_matrix((1, number_of_columns))
    for movie in np.nonzero(test_V[index,:])[1]:
        pred = algo.predict(str(test_users[index]), str(movies[movie]), r_ui=0, verbose=False)
        result[0, movie] = pred[3]
    return result.todense()

In [None]:
def recommend(index):
    P = predict(index)
    indexList = np.nonzero(P[0,:])[1]
    relevant = np.asarray(P[0,indexList])
    #print("indexlist", indexList)
    #print("relevant", relevant)
    indexSort = np.fliplr(relevant.argsort())
    #print("indexsort", indexSort)
    result = []
    for i in indexSort[0]:
        result.append(movies[indexList[i]])
    return result

In [None]:
result = recommend(0)
print(result)

In [None]:
precisionAt = 5
def computeUserAccuracy(index):
    computedMovies = recommend(index)
    if not computedMovies:
        return 0
    weightedSum = 0
    counter = 0
    if precisionAt > len(computedMovies):
        counter = len(computedMovies) 
    else:
        counter = precisionAt 
    sumWeight = (counter * (counter +1)) /2
    for recommendation in computedMovies:
        if (counter != 0):
            weightedSum = weightedSum + test_V[index, movie_indices[recommendation]] * counter
            counter = counter - 1  
    return float(weightedSum / (sumWeight*5))

In [None]:
def computeAccuracy():
    empty = 0
    sumUserAccuracy = 0.0
    for user in range(0,test_V.shape[0]):
        userAccuracy = computeUserAccuracy(user)
        if (userAccuracy == 0):
            empty = empty + 1
        sumUserAccuracy = sumUserAccuracy + userAccuracy
        print(userAccuracy)
    print(empty)
    print(float(sumUserAccuracy / (test_V.shape[0] - empty)))

In [None]:
computeAccuracy()

In [None]:
computeUserAccuracy(0)