In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
import pickle

In [2]:
movies = pd.read_csv('movie_data_with_urls.csv')
ratings = pd.read_csv('ratings.csv')

In [3]:
movies.isna().sum()

movieId       0
title         0
genres        0
Poster_URL    0
dtype: int64

### Data Preprocessing

In [4]:
movList = list(movies['genres'])
sp_list = []

for mov in movList:
    sp_list.append(mov.split('|'))

movies['genres'] = sp_list
movies

Unnamed: 0,movieId,title,genres,Poster_URL
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]",https://image.tmdb.org/t/p/original/vgpXmVaVyU...
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",https://image.tmdb.org/t/p/original/1FSXpj5e8l...
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]",https://image.tmdb.org/t/p/original/4wjGMwPsdl...
4,5,Father of the Bride Part II (1995),[Comedy],https://image.tmdb.org/t/p/original/rj4LBtwQ0u...
...,...,...,...,...
10328,146684,Cosmic Scrat-tastrophe (2015),"[Animation, Children, Comedy]",https://image.tmdb.org/t/p/original/vHzbxEcwN1...
10329,146878,Le Grand Restaurant (1966),[Comedy],https://image.tmdb.org/t/p/original/gTr2HfSlRp...
10330,148238,A Very Murray Christmas (2015),[Comedy],https://image.tmdb.org/t/p/original/2ir2DjNNXm...
10331,148626,The Big Short (2015),[Drama],https://image.tmdb.org/t/p/original/isuQWbJPbj...


In [5]:
movies['genres'] = movies['genres'].apply(lambda genre_list: [genre.lower() for genre in genre_list])
movies['title'] = movies['title'].str.lower()
movies

Unnamed: 0,movieId,title,genres,Poster_URL
0,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...
1,2,jumanji (1995),"[adventure, children, fantasy]",https://image.tmdb.org/t/p/original/vgpXmVaVyU...
2,3,grumpier old men (1995),"[comedy, romance]",https://image.tmdb.org/t/p/original/1FSXpj5e8l...
3,4,waiting to exhale (1995),"[comedy, drama, romance]",https://image.tmdb.org/t/p/original/4wjGMwPsdl...
4,5,father of the bride part ii (1995),[comedy],https://image.tmdb.org/t/p/original/rj4LBtwQ0u...
...,...,...,...,...
10328,146684,cosmic scrat-tastrophe (2015),"[animation, children, comedy]",https://image.tmdb.org/t/p/original/vHzbxEcwN1...
10329,146878,le grand restaurant (1966),[comedy],https://image.tmdb.org/t/p/original/gTr2HfSlRp...
10330,148238,a very murray christmas (2015),[comedy],https://image.tmdb.org/t/p/original/2ir2DjNNXm...
10331,148626,the big short (2015),[drama],https://image.tmdb.org/t/p/original/isuQWbJPbj...


In [6]:
display(movies.head(), ratings.head())

Unnamed: 0,movieId,title,genres,Poster_URL
0,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...
1,2,jumanji (1995),"[adventure, children, fantasy]",https://image.tmdb.org/t/p/original/vgpXmVaVyU...
2,3,grumpier old men (1995),"[comedy, romance]",https://image.tmdb.org/t/p/original/1FSXpj5e8l...
3,4,waiting to exhale (1995),"[comedy, drama, romance]",https://image.tmdb.org/t/p/original/4wjGMwPsdl...
4,5,father of the bride part ii (1995),[comedy],https://image.tmdb.org/t/p/original/rj4LBtwQ0u...


Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


In [7]:
mov_ratings = movies.merge(ratings, how='inner', on='movieId')
mov_ratings.drop('timestamp', axis=1, inplace=True)
mov_ratings.head()

Unnamed: 0,movieId,title,genres,Poster_URL,userId,rating
0,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...,2,5.0
1,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...,5,4.0
2,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...,8,5.0
3,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...,11,4.0
4,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...,14,4.0


### Get Top Movies

In [8]:
df_getTopMov = mov_ratings.drop(['userId', 'movieId', 'genres'], axis=1)
df_getTopMov.drop_duplicates(inplace=True)
df_getTopMov

Unnamed: 0,title,Poster_URL,rating
0,toy story (1995),https://image.tmdb.org/t/p/original/uXDfjJbdP4...,5.0
1,toy story (1995),https://image.tmdb.org/t/p/original/uXDfjJbdP4...,4.0
6,toy story (1995),https://image.tmdb.org/t/p/original/uXDfjJbdP4...,3.0
8,toy story (1995),https://image.tmdb.org/t/p/original/uXDfjJbdP4...,4.5
12,toy story (1995),https://image.tmdb.org/t/p/original/uXDfjJbdP4...,1.5
...,...,...,...
105374,le grand restaurant (1966),https://image.tmdb.org/t/p/original/gTr2HfSlRp...,2.5
105375,a very murray christmas (2015),https://image.tmdb.org/t/p/original/2ir2DjNNXm...,3.0
105376,the big short (2015),https://image.tmdb.org/t/p/original/isuQWbJPbj...,4.0
105377,the big short (2015),https://image.tmdb.org/t/p/original/isuQWbJPbj...,4.5


In [44]:
num_rating_df = df_getTopMov.groupby('title').count()['rating'].reset_index()
num_rating_df

Unnamed: 0,title,rating
0,'71 (2014),1
1,"'burbs, the (1989)",7
2,'hellboy': the seeds of creation (2004),1
3,'night mother (1986),1
4,'round midnight (1986),1
...,...,...
10318,zozo (2005),1
10319,zu: warriors from the magic mountain (xin shu ...,1
10320,zulu (1964),5
10321,¡three amigos! (1986),9


In [45]:
avg_rating_df = df_getTopMov.groupby('title')['rating'].mean().reset_index()
avg_rating_df.rename(columns={'rating':'avg_rating'},inplace=True)
avg_rating_df

Unnamed: 0,title,avg_rating
0,'71 (2014),3.500000
1,"'burbs, the (1989)",2.928571
2,'hellboy': the seeds of creation (2004),3.000000
3,'night mother (1986),3.000000
4,'round midnight (1986),2.500000
...,...,...
10318,zozo (2005),3.000000
10319,zu: warriors from the magic mountain (xin shu ...,4.000000
10320,zulu (1964),4.000000
10321,¡three amigos! (1986),2.888889


In [46]:
popular_df = num_rating_df.merge(avg_rating_df,on='title')
popular_df

Unnamed: 0,title,rating,avg_rating
0,'71 (2014),1,3.500000
1,"'burbs, the (1989)",7,2.928571
2,'hellboy': the seeds of creation (2004),1,3.000000
3,'night mother (1986),1,3.000000
4,'round midnight (1986),1,2.500000
...,...,...,...
10318,zozo (2005),1,3.000000
10319,zu: warriors from the magic mountain (xin shu ...,1,4.000000
10320,zulu (1964),5,4.000000
10321,¡three amigos! (1986),9,2.888889


In [47]:
popular_df = popular_df[popular_df['rating'] > 0].sort_values('avg_rating', ascending=False)
popular_df.size

30969

In [48]:
popular_df = popular_df.merge(movies,on='title').drop_duplicates('title')[['title', 'Poster_URL', 'avg_rating']]
popular_df

Unnamed: 0,title,Poster_URL,avg_rating
0,gentlemen of fortune (dzhentlmeny udachi) (1972),https://image.tmdb.org/t/p/original/owZ2KQ2x99...,5.0
1,50 children: the rescue mission of mr. and mrs...,https://image.tmdb.org/t/p/original/fR8Key3XJu...,5.0
2,school for scoundrels (1960),https://image.tmdb.org/t/p/original/6SPeiQfuEC...,5.0
3,schizopolis (1996),https://image.tmdb.org/t/p/original/9ASmg9EvWk...,5.0
4,my sassy girl (2008),https://image.tmdb.org/t/p/original/nVVxhtBj4l...,5.0
...,...,...,...
10324,hercules in new york (1970),https://image.tmdb.org/t/p/original/2Rad0CXcWS...,0.5
10325,we are what we are (somos lo que hay) (2010),https://image.tmdb.org/t/p/original/3Q0rtUfNHF...,0.5
10326,new year's eve (2011),https://image.tmdb.org/t/p/original/nNh8PDzkx4...,0.5
10327,chained heat (1983),https://image.tmdb.org/t/p/original/iS3XhJEEmK...,0.5


In [49]:
with open('../PKL_Files/popular_movies_df', 'wb') as file:
    pickle.dump(popular_df, file)

### Content Based Filtering

In [50]:
mov_ratings[mov_ratings['title'] == 'Cocoon (1985)']

Unnamed: 0,movieId,title,genres,Poster_URL,userId,rating


In [12]:
new_df = mov_ratings[['movieId', 'title', 'genres', 'Poster_URL']]
new_df

Unnamed: 0,movieId,title,genres,Poster_URL
0,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...
1,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...
2,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...
3,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...
4,1,toy story (1995),"[adventure, animation, children, comedy, fantasy]",https://image.tmdb.org/t/p/original/uXDfjJbdP4...
...,...,...,...,...
105375,148238,a very murray christmas (2015),[comedy],https://image.tmdb.org/t/p/original/2ir2DjNNXm...
105376,148626,the big short (2015),[drama],https://image.tmdb.org/t/p/original/isuQWbJPbj...
105377,148626,the big short (2015),[drama],https://image.tmdb.org/t/p/original/isuQWbJPbj...
105378,148626,the big short (2015),[drama],https://image.tmdb.org/t/p/original/isuQWbJPbj...


In [52]:
# remove square brackets
new_df['genres'] = new_df['genres'].apply(lambda x: ' '.join(x))
new_df.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['genres'] = new_df['genres'].apply(lambda x: ' '.join(x))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df.drop_duplicates(inplace=True)


In [53]:
new_df.shape

(10325, 4)

In [54]:
new_df = new_df.reset_index()
new_df.head()

Unnamed: 0,index,movieId,title,genres,Poster_URL
0,0,1,toy story (1995),adventure animation children comedy fantasy,https://image.tmdb.org/t/p/original/uXDfjJbdP4...
1,232,2,jumanji (1995),adventure children fantasy,https://image.tmdb.org/t/p/original/vgpXmVaVyU...
2,324,3,grumpier old men (1995),comedy romance,https://image.tmdb.org/t/p/original/1FSXpj5e8l...
3,382,4,waiting to exhale (1995),comedy drama romance,https://image.tmdb.org/t/p/original/4wjGMwPsdl...
4,393,5,father of the bride part ii (1995),comedy,https://image.tmdb.org/t/p/original/rj4LBtwQ0u...


In [55]:
# To transform given text into a vector on the basis of frequency count
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=23, stop_words='english')

In [56]:
vectors = cv.fit_transform(new_df['genres']).toarray()
vectors

array([[0, 1, 1, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [14]:
# each column represents a unique word and 10325 are the no of documents.
# this represents the count of every tokenized genre word in each document 
vectors.shape

(10325, 23)

In [22]:
from nltk.stem.porter import PorterStemmer
pt = PorterStemmer()

In [23]:
# function to stem all the words in the genres column
def stem(text):
    y = []
    for i in text.split():
        y.append(pt.stem(i))
    
    return " ".join(y)

In [24]:
new_df['genres'] = new_df['genres'].apply(stem)
new_df['genres'][0]

'adventur anim children comedi fantasi'

In [25]:
# Save the array to a pickle file
with open('../PKL_Files/stemmed_df_content_based', 'wb') as file:
    pickle.dump(new_df, file)

In [26]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(vectors)

In [27]:
# Save the array to a pickle file
with open('../PKL_Files/similarity_content_based', 'wb') as file:
    pickle.dump(similarity, file)

In [28]:
def recommend(movie):
    # load files
    # Load the array from the pickle file
    with open('../PKL_Files/stemmed_df_content_based', 'rb') as file:
        new_df = pickle.load(file)

        # Load the array from the pickle file
    with open('../PKL_Files/similarity_content_based', 'rb') as file:
        similarity = pickle.load(file)

    mov_list = []
    movie_index = new_df[new_df['title'] == movie].index[0]
    distances = similarity[movie_index]
    movie_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x:x[1])[1:6]

    for i in movie_list:
        d = dict()
        d['title'] = new_df.iloc[i[0]].title
        d['url'] = new_df.iloc[i[0]].Poster_URL
        mov_list.append(d)
    
    return mov_list

In [29]:
recommended_mov_list = recommend('toy story (1995)')
for m in recommended_mov_list:
    print(m)

{'title': 'antz (1998)', 'url': 'https://image.tmdb.org/t/p/original/lWPjxbUMpAHFkJpZHHNWhQaRsax.jpg'}
{'title': 'toy story 2 (1999)', 'url': 'https://image.tmdb.org/t/p/original/2MFIhZAW0CVlEQrFyqwa4U6zqJP.jpg'}
{'title': 'adventures of rocky and bullwinkle, the (2000)', 'url': 'https://image.tmdb.org/t/p/original/xCFSsftt2rglC81I6QLWcZSTCBM.jpg'}
{'title': "emperor's new groove, the (2000)", 'url': 'https://image.tmdb.org/t/p/original/wwbgkXQBEKtnyIJapk6gUgWkVw8.jpg'}
{'title': 'monsters, inc. (2001)', 'url': 'https://image.tmdb.org/t/p/original/sgheSKxZkttIe8ONsf2sWXPgip3.jpg'}


### Colaborative filtering

In [30]:
mov_ratings[mov_ratings['title'] == 'nothing in common (1986)']

Unnamed: 0,movieId,title,genres,Poster_URL,userId,rating
51756,2418,nothing in common (1986),[comedy],https://image.tmdb.org/t/p/original/l9XCfmB2aY...,560,3.5
51757,2418,nothing in common (1986),[comedy],https://image.tmdb.org/t/p/original/l9XCfmB2aY...,575,3.0


In [31]:
# remove square brackets
pd.set_option('display.max_colwidth', None)

mov_ratings['genres'] = mov_ratings['genres'].apply(lambda x: ' '.join(x))
mov_ratings.drop_duplicates(inplace=True)

In [32]:
mov_ratings.head()

Unnamed: 0,movieId,title,genres,Poster_URL,userId,rating
0,1,toy story (1995),adventure animation children comedy fantasy,https://image.tmdb.org/t/p/original/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg,2,5.0
1,1,toy story (1995),adventure animation children comedy fantasy,https://image.tmdb.org/t/p/original/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg,5,4.0
2,1,toy story (1995),adventure animation children comedy fantasy,https://image.tmdb.org/t/p/original/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg,8,5.0
3,1,toy story (1995),adventure animation children comedy fantasy,https://image.tmdb.org/t/p/original/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg,11,4.0
4,1,toy story (1995),adventure animation children comedy fantasy,https://image.tmdb.org/t/p/original/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg,14,4.0


In [33]:
# Save the array to a pickle file
with open('../PKL_Files/movie_rating_collaborative', 'wb') as file:
    pickle.dump(mov_ratings, file)

In [34]:
# users who have given more than 100 ratings are considered

x = mov_ratings.groupby('userId').count()['rating'] > 1
users = x[x].index
users

Index([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,
       ...
       659, 660, 661, 662, 663, 664, 665, 666, 667, 668],
      dtype='int64', name='userId', length=668)

In [35]:
filtered_rating = mov_ratings[mov_ratings['userId'].isin(users)]
filtered_rating


Unnamed: 0,movieId,title,genres,Poster_URL,userId,rating
0,1,toy story (1995),adventure animation children comedy fantasy,https://image.tmdb.org/t/p/original/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg,2,5.0
1,1,toy story (1995),adventure animation children comedy fantasy,https://image.tmdb.org/t/p/original/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg,5,4.0
2,1,toy story (1995),adventure animation children comedy fantasy,https://image.tmdb.org/t/p/original/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg,8,5.0
3,1,toy story (1995),adventure animation children comedy fantasy,https://image.tmdb.org/t/p/original/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg,11,4.0
4,1,toy story (1995),adventure animation children comedy fantasy,https://image.tmdb.org/t/p/original/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg,14,4.0
...,...,...,...,...,...,...
105375,148238,a very murray christmas (2015),comedy,https://image.tmdb.org/t/p/original/2ir2DjNNXmMePsyc4inFa8oI23r.jpg,475,3.0
105376,148626,the big short (2015),drama,https://image.tmdb.org/t/p/original/isuQWbJPbjybBEWdcCaBUPmU0XO.jpg,458,4.0
105377,148626,the big short (2015),drama,https://image.tmdb.org/t/p/original/isuQWbJPbjybBEWdcCaBUPmU0XO.jpg,576,4.5
105378,148626,the big short (2015),drama,https://image.tmdb.org/t/p/original/isuQWbJPbjybBEWdcCaBUPmU0XO.jpg,668,4.5


In [36]:
# movies with more than 50 ratings
y = filtered_rating.groupby('title').count() > 50
famous_movies = y[y].index
famous_movies

Index([''71 (2014)', ''burbs, the (1989)',
       ''hellboy': the seeds of creation (2004)', ''night mother (1986)',
       ''round midnight (1986)', ''til there was you (1997)',
       '(500) days of summer (2009)', '*batteries not included (1987)',
       '...and justice for all (1979)', '10 (1979)',
       ...
       'zone 39 (1997)', 'zookeeper (2011)', 'zoolander (2001)',
       'zorba the greek (alexis zorbas) (1964)', 'zorro, the gay blade (1981)',
       'zozo (2005)',
       'zu: warriors from the magic mountain (xin shu shan jian ke) (1983)',
       'zulu (1964)', '¡three amigos! (1986)',
       'à nous la liberté (freedom for us) (1931)'],
      dtype='object', name='title', length=10323)

In [37]:
final_ratings = filtered_rating[filtered_rating['title'].isin(famous_movies)]



In [38]:
pt = final_ratings.pivot_table(index='title',columns='userId',values='rating')
pt.fillna(0,inplace=True)
pt

userId,1,2,3,4,5,6,7,8,9,10,...,659,660,661,662,663,664,665,666,667,668
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"'burbs, the (1989)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'hellboy': the seeds of creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'night mother (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'round midnight (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zozo (2005),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0
zu: warriors from the magic mountain (xin shu shan jian ke) (1983),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zulu (1964),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
¡three amigos! (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0


In [39]:
# Save the array to a pickle file
with open('../PKL_Files/pivot_table_collaborative', 'wb') as file:
    pickle.dump(pt, file)

In [40]:
similarity_scores = cosine_similarity(pt)
similarity_scores

array([[1.        , 0.        , 0.        , ..., 0.        , 0.09832433,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.        , 0.16181996,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.14748649,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.17252901,
        0.        ],
       [0.09832433, 0.16181996, 0.14748649, ..., 0.17252901, 1.        ,
        0.09832433],
       [0.        , 0.        , 0.        , ..., 0.        , 0.09832433,
        1.        ]])

In [41]:
# Save the array to a pickle file
with open('../PKL_Files/similarity_scores_collaborative', 'wb') as file:
    pickle.dump(similarity_scores, file)

In [42]:
def collaborative_recommend(movie_name):
    # Load files
    with open('../PKL_Files/movie_rating_collaborative', 'rb') as file:
        mov_ratings = pickle.load(file)

    with open('../PKL_Files/similarity_scores_collaborative', 'rb') as file:
        similarity_scores = pickle.load(file)

    with open('../PKL_Files/pivot_table_collaborative', 'rb') as file:
        pt = pickle.load(file)

    # index fetch
    index = np.where(pt.index==movie_name)[0][0]

    similar_items = sorted(list(enumerate(similarity_scores[index])),key=lambda x:x[1],reverse=True)[1:5]
    # simillar items from 1 to 4
    
    data = []
    for i in similar_items:
        item = []
        d = dict()
        temp_df = mov_ratings[mov_ratings['title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('title')['title'].values))

        # urls = temp_df['Poster_URL'].str.replace(r'\d+', '', regex=True) # to remove the index value before the RL

        d['title'] = item[0]
        d['url'] = temp_df['Poster_URL'].values.tolist()[0]
        data.append(d)
    
    return data

In [43]:
collaborative_recommend('zulu (1964)')

[{'title': 'nothing in common (1986)',
  'url': 'https://image.tmdb.org/t/p/original/l9XCfmB2aYWpkio8ltuGDTm8s1S.jpg'},
 {'title': 'undercover blues (1993)',
  'url': 'https://image.tmdb.org/t/p/original/r9yKccGLLzpPwRCyfCMjG2oNcbF.jpg'},
 {'title': 'zorro, the gay blade (1981)',
  'url': 'https://image.tmdb.org/t/p/original/qNFtu1TyoFbmG1hCdQkD8ENOjum.jpg'},
 {'title': 'black book (zwartboek) (2006)',
  'url': 'https://image.tmdb.org/t/p/original/kn28W24slBLyGr8ZIZnxNE5YZrY.jpg'}]

In [50]:
def collaborative_recommend_for_user(user_id, top_n=5):
    # Load files
    with open('../PKL_Files/movie_rating_collaborative', 'rb') as file:
        mov_ratings = pickle.load(file)

    with open('../PKL_Files/similarity_scores_collaborative', 'rb') as file:
        similarity_scores = pickle.load(file)

    with open('../PKL_Files/pivot_table_collaborative', 'rb') as file:
        pt = pickle.load(file)

    # Get the user's ratings from the user-movie rating matrix
    user_ratings = pt[user_id]

    # Create an empty list to store recommended movies
    recommended_movies = []

    for movie_index, similarity in enumerate(similarity_scores):
        item = []

        # Skip movies the user has already rated
        if user_ratings[movie_index] > 0:
            continue

        temp_df = mov_ratings[mov_ratings['title'] == pt.index[movie_index]]
        item.extend(list(temp_df.drop_duplicates('title')['title'].values))

        d = dict()
        d['title'] = item[0]
        d['url'] = temp_df['Poster_URL'].values.tolist()[0]

        recommended_movies.append(d)

    return recommended_movies[:5]

In [52]:
# Example usage:
user_id = 200
user_recommendations = collaborative_recommend_for_user(user_id)
user_recommendations

[{'title': "'71 (2014)",
  'url': 'https://image.tmdb.org/t/p/original/xjorsS84euahsmGlnEEeE3LFSVZ.jpg'},
 {'title': "'burbs, the (1989)",
  'url': 'https://image.tmdb.org/t/p/original/58cLiwJE71yLtUZjMsQwbG5yfya.jpg'},
 {'title': "'hellboy': the seeds of creation (2004)",
  'url': 'https://image.tmdb.org/t/p/original/358FTzyn2TusjvdqoW0lLMr7KTY.jpg'},
 {'title': "'night mother (1986)",
  'url': 'https://image.tmdb.org/t/p/original/5khUZ1QWNUNd9Ryq56kRbDU1959.jpg'},
 {'title': "'round midnight (1986)",
  'url': 'https://image.tmdb.org/t/p/original/8aNGnEsvLBldNvaBkfYkGGgZDhe.jpg'}]