## Importing libraries

In [93]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from PIL import Image
import urllib.request
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse
import warnings
warnings.filterwarnings("ignore")

## Importing Datasets

In [94]:
df = pd.read_csv('../crawled_data/user_rating.csv')
movies_df = pd.read_csv('../preprocessor/data_cleaned.csv', usecols=['movie_id', 'title', 'img_url'])


In [95]:
df.head()

Unnamed: 0,user_id,movie_id,rating
0,ur2326544,tt0377092,9
1,ur34845698,tt0377092,7
2,ur61691810,tt0377092,8
3,ur3032446,tt0377092,8
4,ur1406078,tt0377092,7


In [96]:
movies_df.head()

Unnamed: 0,movie_id,title,img_url
0,tt0068646,The Godfather,https://m.media-amazon.com/images/M/MV5BM2MyNj...
1,tt0816692,Interstellar,https://m.media-amazon.com/images/M/MV5BZjdkOT...
2,tt10638522,Talk to Me,https://m.media-amazon.com/images/M/MV5BMmY5ZG...
3,tt0120791,Practical Magic,https://m.media-amazon.com/images/M/MV5BMzU4Mm...
4,tt3774694,Love,https://m.media-amazon.com/images/M/MV5BZGQxZT...


## Functions to return dataset's features

In [97]:
def get_movie_url(movie_id):
    return movies_df[movies_df.movie_id == movie_id].img_url.values[0]

def get_user_id(user_id_number):
    return df[df.user_id_number == user_id_number].user_id.values[0]

def get_movie_id(movie_id_number):
    return df[df.movie_id_number == movie_id_number].movie_id.values[0]

In [98]:
df['user_id_number'] = df['user_id'].astype('category').cat.codes.values
df['movie_id_number'] = df['movie_id'].astype('category').cat.codes.values
# df = df.groupby('user_id').filter(lambda x : len(x) >= 20)
# df = df.groupby('movie_id').filter(lambda x : len(x) >= 10)
Y_data = df[['user_id_number', 'movie_id_number', 'rating']].values
print(Y_data)

[[171677  48478      9]
 [215055  48478      7]
 [282153  48478      8]
 ...
 [141152  22559      7]
 [316613  22559     10]
 [ 22009  22559      9]]


## Colaborative filtering model creation

In [104]:
class Colaborative_Filtering(object):
    def __init__(self, Y_data, k, dist_func = cosine_similarity):
        self.Y_data = Y_data
        self.k = k # number of neighbor points
        self.dist_func = dist_func
        self.Ybar_data = None
        # number of users and items. Remember to add 1 since id starts from 0
        self.n_users = int(np.max(self.Y_data[:, 0])) + 1
        self.n_items = int(np.max(self.Y_data[:, 1])) + 1 
    def create_model(self):
        from sklearn.neighbors import NearestNeighbors
        self.model = NearestNeighbors(n_neighbors=20,algorithm='brute',metric='cosine')
        self.model.fit(self.interaction_matrix)
        print('success')
    def create_matrix(self):
        #Create a sparse interaction matrix
        self.interaction_matrix = sparse.coo_matrix((self.Y_data[:, 2],
            (self.Y_data[:, 1], self.Y_data[:, 0])), (self.n_items, self.n_users))
        self.interaction_matrix= self.interaction_matrix.tocsr()
    def get_rated_movies(self,user_id):
        self.movies_rated = df[['user_id_number','movie_id_number','rating']]
        self.movies_rated = df.loc[df['user_id_number'] == user_id, ['user_id_number', 'movie_id_number', 'rating']]
        self.movies_rated = pd.DataFrame(self.movies_rated, columns=['user_id_number', 'movie_id_number', 'rating'])
        self.movies_rated= self.movies_rated[['movie_id_number','rating']].reset_index(drop=True)
        return self.movies_rated
    def calculate_score(self,user_id):
        similar_candidates_rating= pd.Series(dtype='float64')
        similar_candidates_score= pd.Series(dtype='float64')
        # print(self.movies_rated)
        self.movies_list=self.interaction_matrix.getcol(user_id).toarray()
        self.movies_rated= self.movies_list
        self.movies_list = np.where(self.movies_list != 0)[0]
        
        for movie in self.movies_list:
            similar = self.model.kneighbors(
                [self.interaction_matrix.getrow(movie).toarray().squeeze()],
                return_distance=True
            )
            sim_score=similar[0]
            sim_id=similar[1]
            sim_id=np.array(list(map(lambda x: x,sim_id[0])))
            similar=pd.Series(data=sim_score[0],index=sim_id)
            similar=similar[similar!=0]
            # similar=similar[similar.index.isin(stats.index)]
            similar_candidates_score=pd.concat([similar_candidates_score,similar])
            similar=similar.map(lambda x: x*self.movies_rated[movie])
            similar_candidates_rating = pd.concat([similar_candidates_rating,similar])
        filtered_candidates_rating_sum= similar_candidates_rating.groupby(similar_candidates_rating.index).sum()
        filtered_candidates_score_sum= similar_candidates_score.groupby(similar_candidates_score.index).sum()
        similar_movies=filtered_candidates_rating_sum.index
        print(similar_movies)
        pred_rating= pd.Series(dtype='float64',index=similar_movies)
        for i in range(0,len(similar_movies)):
            pred_rating[similar_movies[i]]= filtered_candidates_rating_sum[similar_movies[i]]/filtered_candidates_score_sum[similar_movies[i]]
        return pred_rating

## Item-Based Colaborative filtering

In [105]:
# user_based_cf = CF(Y_data, k = 5, uuCF = 1)
# user_based_cf.fit()
recommender = Colaborative_Filtering(Y_data, k=5)
recommender.create_matrix()
recommender.create_model()
user_id='ur3032446'
user_id="ur61691810"

# Check if there are any rows with the specified user_id
matching_rows = df[df.user_id == user_id]

if not matching_rows.empty:
    # If there are matching rows, retrieve the user_id_number
    user_id_number =matching_rows.values[0][3]
    movies_rated=recommender.get_rated_movies(user_id_number)
    pred_rating = recommender.calculate_score(user_id_number)
else:
    print(f"No rows found for user_id: {user_id}")
print(pred_rating)

success
Index([  7288,  11282,  14459,  14479,  24830,  25660,  25689,  27206,  32141,
        34635,  48478,  50046,  55937,  60981,  74554,  76534,  90776,  94142,
       106239, 114491],
      dtype='int64')
7288      8.0
11282     8.0
14459     8.0
14479     8.0
24830     8.0
25660     8.0
25689     8.0
27206     8.0
32141     8.0
34635     8.0
48478     8.0
50046     8.0
55937     8.0
60981     8.0
74554     8.0
76534     8.0
90776     8.0
94142     8.0
106239    8.0
114491    8.0
dtype: float64


## Convert movie_id_number to movie title

In [106]:
movie_id_convert =df[['movie_id','movie_id_number']]
movie_id_convert= movie_id_convert.drop_duplicates()
movie_title = movies_df[['movie_id','title']]
movie_title_convert = pd.merge(movie_id_convert,movie_title)
movie_title_convert.head(10)

Unnamed: 0,movie_id,movie_id_number,title
0,tt0377092,48478,Mean Girls
1,tt6710474,110955,Everything Everywhere All at Once
2,tt11127680,63791,Boiling Point
3,tt0095444,18286,Killer Klowns from Outer Space
4,tt15474916,77198,Smile
5,tt0477348,54561,No Country for Old Men
6,tt0091251,16539,Come and See
7,tt0083929,13748,Fast Times at Ridgemont High
8,tt0114814,25085,The Usual Suspects
9,tt0093177,17328,Hellraiser


## Print recommendations

In [112]:
print("List of recommendations for user {}".format(user_id))
pred_rating.sort_values(inplace=True,ascending=False)
pred_rating_df = pd.DataFrame(pred_rating).reset_index()
pred_rating_df.columns = ['movie_id_number', 'predicted_rating']
final_pred_df = pd.merge(pred_rating_df,movie_id_convert)
final_pred_df = pd.merge(final_pred_df,movies_df)
# final_pred_df = final_pred_df[['movie_id','title','predicted_rating']]
final_pred_df.head(10)

List of recommendations for user ur61691810


Unnamed: 0,movie_id_number,predicted_rating,movie_id,title,img_url
0,7288,8.0,tt0068267,Betragen ungenügend!,https://m.media-amazon.com/images/M/MV5BYmFiZT...
1,94142,8.0,tt3073172,Das Pferd auf dem Balkon,https://m.media-amazon.com/images/M/MV5BMTg5NT...
2,48478,8.0,tt0377092,Mean Girls,https://m.media-amazon.com/images/M/MV5BMjE1MD...
3,34635,8.0,tt0199601,Hochwürden drückt ein Auge zu,https://m.media-amazon.com/images/M/MV5BNjIwMD...
4,27206,8.0,tt0120625,Campus,https://m.media-amazon.com/images/M/MV5BMTk3Mz...
5,25689,8.0,tt0116796,Das Zauberbuch,https://m.media-amazon.com/images/M/MV5BMjA3Nz...
6,14479,8.0,tt0085882,Die Macht der Gefühle,https://m.media-amazon.com/images/M/MV5BZTE3MD...
7,114491,8.0,tt7709664,Erik & Erika,https://m.media-amazon.com/images/M/MV5BZDFmMj...
