In [17]:
import glob

read_files = glob.glob("*.rating")

with open("test_data.rating", "w") as outfile:
    for f in read_files:
        with open(f, "r") as infile:
            outfile.write(infile.read())

In [2]:
from collections import defaultdict
import numpy as np
import scipy, scipy.sparse, scipy.sparse.linalg
import matplotlib.pyplot as plt

In [13]:
graph = defaultdict(list)
users_set = set()
with open('test_data.links') as file:
    for line in file:
        node_1, node_2, _ = map(int, line.strip().split('\t'))
        graph[node_1].append(node_2)
        graph[node_2].append(node_1)
        users_set.update([node_1, node_2])
num_users = len(users_set)

In [15]:
print("For this dataset, there are:", num_users, "users.")

For this dataset, there are: 3 users.


In [18]:
items_set = set()
with open('test_data.rating') as file:
    for line in file:
        user, item, _ = map(int, line.strip().split('\t'))
        items_set.add(item)
num_items = len(items_set)
item_idx_encode = dict(zip(sorted(list(items_set)), 
                           range(num_users, num_users+num_items)))
with open('test_data.rating') as file:
    for line in file:
        user, item, _ = map(int, line.strip().split('\t'))
        item = item_idx_encode[item]
        graph[user].append(item)
        graph[item].append(user)

In [19]:
print("For this dataset, there are:", num_items, "items.")

For this dataset, there are: 5 items.


In [20]:
A = np.zeros((num_users+num_items, num_users+num_items))
for node in graph:
    for neighbor in graph[node]:
        A[node][neighbor] = 1
        A[neighbor][node] = 1

In [21]:
A_sparse = scipy.sparse.csc_matrix(A)

In [22]:
eig_val, eig_vec = scipy.sparse.linalg.eigs(A_sparse, k=1)

In [23]:
eig_vec = np.abs(eig_vec)

In [24]:
import pickle

pickle.dump(eig_vec.real, open('eigen_vector.p', 'wb'))
pickle.dump(item_idx_encode, open('item_idx_encode.p', 'wb'))

In [49]:
tmp_hr_list, tmp_ndcg_list = [], []

# sample testcase
evaluate_index_dict = {2:[0,1]}
evaluate_real_rating_matrix = [[0.5], [0.5]] # (positive prediction)
evaluate_predict_rating_matrix = {2:[0.5673024, 0.5, 0.5673024, 0.5673024, 0.5, 0.5673024, 
                                     0.5673024, 0.48266914, 0.5673024, 0.48266914]} # (negative prediction)

In [50]:
topK = 3
num_procs = 1

user_list = list(evaluate_index_dict.keys())
batch_size = len(user_list) // num_procs

hr_list, ndcg_list = [], []
index = 0

In [53]:
for _ in range(num_procs):
    if index + batch_size < len(user_list):
        batch_user_list = user_list[index:index+batch_size]
        index = index + batch_size
    else:
        batch_user_list = user_list[index:len(user_list)]
        
    for u in user_list:
        real_item_index_list = evaluate_index_dict[u]
        real_item_rating_list = list(np.concatenate(evaluate_real_rating_matrix[real_item_index_list]))
        positive_length = len(real_item_rating_list)
        target_length = min(positive_length, topK)

        predict_rating_list = evaluate_predict_rating_matrix[u]
        real_item_rating_list.extend(predict_rating_list)
        sort_index = np.argsort(real_item_rating_list)
        sort_index = sort_index[::-1]

        user_hr_list = []
        user_ndcg_list = []
        hits_num = 0
        for idx in range(topK):
            ranking = sort_index[idx]
            if ranking < positive_length:
                hits_num += 1
                user_hr_list.append(self.getHr(idx))
                user_ndcg_list.append(self.getDcg(idx))

        idcg = self.getIdcg(target_length)

        tmp_hr = np.sum(user_hr_list) / target_length
        tmp_ndcg = np.sum(user_ndcg_list) / idcg
        tmp_hr_list.append(tmp_hr)
        tmp_ndcg_list.append(tmp_ndcg)
    
    hr_list.extend(tmp_hr_list)
    ndcg_list.extend(tmp_ndcg_list)

TypeError: list indices must be integers or slices, not list