# Evaluation of item based CF

In [2]:
import numpy as np
import scipy
import pickle
import random
import sklearn.metrics

In [3]:
SEED = 256
random.seed(SEED)

In [4]:
with open("utility_matrix.pkl", "rb") as f:
    utility = pickle.load(f)
    
U_csc = utility.tocsc()
U_csr = utility.tocsr()

In [5]:
with open("eval_data.pkl", "rb") as f:
    eval_data = pickle.load(f)

In [53]:
def item_cf(user_vec, item_neighborhood_k=30):
    
    _, cited_item_idxs = user_vec.nonzero()
    cited_item_idxs = cited_item_idxs
    pred_rtgs = []
    
    for idx in cited_item_idxs:
        curr_item = U_csr.getcol(idx).T

        # calculate similarities
        sims = sklearn.metrics.pairwise.cosine_similarity(curr_item, U_csc.T)
        sims = np.squeeze(sims) 

        # get the most similar items
        asort = sims.argsort()
        asort = asort[::-1] # to descending order

        # get the neighborhood
        k = item_neighborhood_k
        k_most_similar_idx = asort[1:k+1] # the highest-similarity idx will be the user herself, so we skip that
        k_sims = sims[k_most_similar_idx]
        cols_k_similar = np.squeeze(np.array([U_csr.getcol(r).todense() for r in k_most_similar_idx]))
        pred_from_one_item = cols_k_similar.mean(axis=0)
        pred_rtgs.append(pred_from_one_item)

    predicted_ratings = np.array(pred_rtgs).mean(axis=0)
    
    return predicted_ratings

In [80]:
all_negative_scores = []
all_positive_scores = []

for i, (row_idx, v) in enumerate(eval_data.items()):
    
    false_idx = v['false_idx']
    true_idx = v['true_idx']
    
    uv =  U_csr[row_idx,:]
    
    true_idx_val = np.random.choice(true_idx, int(len(true_idx)/2))
    false_idx_val = np.random.choice(false_idx, int(len(false_idx)/2)) 
    
    uv[:,true_idx_val] = False

    scores = item_cf(uv)
    
    all_negative_scores += list(scores[false_idx_val])
    all_positive_scores += list(scores[true_idx_val])

In [81]:
pred = np.array([all_positive_scores, all_negative_scores]) > THR
pred = np.ravel(pred)

truth = np.array([np.ones_like(all_positive_scores), np.zeros_like(all_negative_scores)])
truth = np.ravel(truth)

In [82]:
result = sklearn.metrics.classification_report(pred, truth, output_dict=True)

# Results for negative class

In [83]:
print(f"Precision: {result['False']['precision']}")

Precision: 1.0


# Results for positive class

In [84]:
print(f"Precision: {result['True']['precision']}")

Precision: 0.7764227642276422


In [85]:
result['accuracy']

0.8882113821138211