### Info
This code is inspired by: 
https://github.com/rposhala/Recommender-System-on-MovieLens-dataset/blob/main/Item_based_Collaborative_Recommender_System_using_KNN.ipynb


In [35]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

from libreco.algorithms import UserCF, ItemCF
from libreco.data import DatasetPure
from libreco.utils import save_knn
from serving.flask import sim2redis, user_consumed2redis
from libreco.data import split_by_ratio_chrono, DatasetPure
import tensorflow as tf

import time


In [36]:
data = '../data/ml-latest-small/'

In [37]:
column_names1 = ['userId','movieId','rating','timestamp']
column_names2 = ['movieId','title','genres']

ratings = pd.read_csv(data + 'ratings.csv', header=None, names = column_names1, skiprows=1)
movies = pd.read_csv(data + 'movies.csv',header = None, names = column_names2, skiprows=1)

In [38]:
ratings.head()


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [39]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [40]:
df_merged = pd.merge(ratings, movies, how='inner', on='movieId')
df_merged.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [41]:
refined_dataset = df_merged.groupby(by=['userId','title'], as_index=False).agg({"rating":"mean"})

refined_dataset.head()

Unnamed: 0,userId,title,rating
0,1,"13th Warrior, The (1999)",4.0
1,1,20 Dates (1998),4.0
2,1,"Abyss, The (1989)",4.0
3,1,"Adventures of Robin Hood, The (1938)",5.0
4,1,Alice in Wonderland (1951),5.0


#### KNN model


In [42]:
# pivot and create movie-user matrix
user_to_movie_df = refined_dataset.pivot(
    index='userId',
     columns='title',
      values='rating').fillna(0)

user_to_movie_df.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
# transform matrix to scipy sparse matrix
user_to_movie_sparse_df = csr_matrix(user_to_movie_df.values)
user_to_movie_sparse_df

<610x9719 sparse matrix of type '<class 'numpy.float64'>'
	with 100832 stored elements in Compressed Sparse Row format>

In [44]:
ratings = pd.read_csv(data + 'ratings.csv', header=None, names = column_names1, skiprows=1)

In [45]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["KMP_WARNINGS"] = "FALSE"
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [46]:
start_time = time.perf_counter()
data = pd.read_csv(data + 'ratings.csv',
                    names=["user", "item", "label", "time"],
                    skiprows=1)

train_data, eval_data = split_by_ratio_chrono(data, test_size=0.2)
train_data, data_info = DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
print(data_info)


user_cf = UserCF(task="rating", data_info=data_info, k=20, sim_type="cosine")
user_cf.fit(train_data, verbose=2, mode="invert", num_threads=4, min_common=1,
            eval_data=eval_data, metrics=["rmse", "mae", "r2"])
print("prediction: ", user_cf.predict(user=1, item=2333))
print("recommendation: ", user_cf.recommend_user(user=1, n_rec=7))

n_users: 610, n_items: 8237, data sparsity: 1.6056 %
Training start time: [35m2022-06-22 12:47:06[0m
Final block size and num: (610, 1)
sim_matrix elapsed: 0.009s
sim_matrix, shape: (610, 610), num_elements: 309208, sparsity: 83.0981 %


top_k: 100%|██████████| 610/610 [00:00<00:00, 15271.77it/s]
eval_pred:   0%|          | 0/3 [00:00<?, ?it/s]

[31mNo common interaction or similar neighbor for user 6 and item 5513, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 79 and item 6770, proceed with default prediction[0m


eval_pred:  33%|███▎      | 1/3 [00:00<00:00,  2.09it/s]

[31mNo common interaction or similar neighbor for user 310 and item 36, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 332 and item 2663, proceed with default prediction[0m


eval_pred: 100%|██████████| 3/3 [00:01<00:00,  2.85it/s]

	 eval rmse: 1.0033
	 eval mae: 0.7708
	 eval r2: 0.0992
prediction:  4.215054988861084
recommendation:  [(852, 5.0), (3388, 5.0), (5851, 5.0), (7844, 5.0), (860, 5.0), (1951, 5.0), (3446, 5.0)]





In [47]:
print("prediction: ", user_cf.predict(user=2, item=2000))
print("recommendation: ", user_cf.recommend_user(user=15, n_rec=10))

prediction:  3.577197551727295
recommendation:  [(6354, 5.0), (1462, 5.0), (1659, 5.0), (5625, 5.0), (831, 5.0), (920, 5.0), (948, 5.0), (1378, 5.0), (5999, 5.0), (6144, 5.0)]


In [51]:
import redis
r = redis.Redis(host="127.0.0.1", port=6379, decode_responses=True)
r.ping()

True