# Evaluation of User based CF

In [1]:
import numpy as np
import scipy
import pickle
import random
import sklearn.metrics

In [2]:
SEED = 256
random.seed(SEED)

In [3]:
with open("utility_matrix.pkl", "rb") as f:
    utility = pickle.load(f)
    
U_csc = utility.tocsc()
U_csr = utility.tocsr()

In [2]:
with open("eval_data.pkl", "rb") as f:
    eval_data = pickle.load(f)

In [10]:
s = 0
for k,v in eval_data.items():
    s += len(v['false_idx']) + len(v['true_idx'])

In [11]:
s

563636

In [5]:
def user_cf(user_vec, user_neighborhood_k=30):
    
    # calculate similarities with all the other users
    sims = sklearn.metrics.pairwise.cosine_similarity(user_vec, U_csr)
    sims = np.squeeze(sims)

    # get users most similar to the queried user
    asort = sims.argsort()
    k = user_neighborhood_k
    k_most_similar_idx = asort[::-1][1:k+1]
    k_sims = sims[k_most_similar_idx]

    # calculate missing utility matrix entries based on the neighborhood
    ratings_k_similar = np.squeeze(np.array([U_csr.getrow(r).todense() for r in k_most_similar_idx]))
    ratings_k_similar_weighted = (k_sims[:,np.newaxis] * ratings_k_similar)
    predicted_ratings = ratings_k_similar_weighted.mean(axis=0)
    
    return predicted_ratings

In [6]:
all_negative_scores = []
all_positive_scores = []

for i, (row_idx, v) in enumerate(eval_data.items()):
    
    false_idx = v['false_idx']
    true_idx = v['true_idx']
    
    uv =  U_csr[row_idx,:]
#     truth = uv.copy()
    true_idx_val = np.random.choice(true_idx, int(len(true_idx)/2))
    false_idx_val = np.random.choice(false_idx, int(len(false_idx)/2)) 
    uv[:,true_idx_val] = False

    scores = user_cf(uv)
    
    all_negative_scores += list(scores[false_idx_val])
    all_positive_scores += list(scores[true_idx_val])

In [7]:
THR = 0.5
pred = np.array([all_positive_scores, all_negative_scores]) > THR
pred = np.ravel(pred)

truth = np.array([np.ones_like(all_positive_scores), np.zeros_like(all_negative_scores)])
truth = np.ravel(truth)

In [8]:
result = sklearn.metrics.classification_report(pred, truth, output_dict=True)

# Results for negative class

In [9]:
print(f"Precision: {result['False']['precision']}")
print(f"Recall: {result['False']['recall']}")

Precision: 1.0
Recall: 0.7224011713030747


# Results for positive class

In [12]:
print(f"Precision: {result['True']['precision']}")
print(f"Recall: {result['True']['recall']}")

Precision: 0.6157276043777867
Recall: 1.0


In [13]:
result['accuracy']

0.8078638021888934