In [4]:
import numpy as np
import pandas as pd
import scipy.sparse as sp
from time import time
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_distances

from data_loader import load_ratings, load_user_item_matrix

### User/item matrix loading

In [5]:
user_item = load_user_item_matrix()
print('User item matrix shape: ', user_item.shape)

User item matrix shape:  (138493, 26744)


### User-user collaborative filtering

In [8]:
def calculate_distances(U, UI):
    return np.array(U.dot(UI.T) / np.sqrt(np.sum(U.power(2), axis=1).reshape(-1, 1) * np.sum(UI.power(2), axis=1).reshape(1, -1)))

def predict_user_item_values(U, UI, k=5, report_progress=True):
    start_time = time()
    D = calculate_distances(U, UI)
    res = np.zeros(shape=(U.shape[0], UI.shape[1]))
    for movie_id in range(UI.shape[1]):
        movie_mask = (UI[:, movie_id] != 0).T.toarray()
        masked_distances = np.where(movie_mask, D, 0)
        topk = np.argpartition(masked_distances, -k, axis=1)[:, -k:]
        predicted_ratings = np.mean(UI[topk, movie_id].toarray(), axis=1)
        res[:, movie_id] = predicted_ratings
        if report_progress and movie_id % 1000 == 0:
            percentage = float(100) * movie_id // UI.shape[1]
            time_elapsed = time() - start_time
            total_time = float(100) * time_elapsed / percentage if percentage > 0 else float('nan')
            total_min = total_time / 60
            total_sec = total_time % 60
            time_left = total_time - time_elapsed
            min_left = time_left / 60
            sec_left = time_left % 60
            print('{}% after {:.2f} s, estimated total time: {:.2f} min {:.2f} sec, etl: {:.2f} min {:.2f}'.format(percentage, time_elapsed, total_min, total_sec, min_left, sec_left))
    return res

In [9]:
predicted = predict_user_item_values(user_item[:10], user_item[10:])

0.0% after 0.73 s, estimated total time: nan min nan sec, etl: nan min nan
3.0% after 44.79 s, estimated total time: 24.88 min 52.87 sec, etl: 24.13 min 8.09


KeyboardInterrupt: 