In [1]:
# importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from fuzzywuzzy import fuzz
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
import pickle

In [2]:
# importing the dataset
movies_df = pd.read_csv('movies.csv')
tags_df = pd.read_csv('tags.csv')
ratings_df = pd.read_csv('ratings.csv')
links_df = pd.read_csv('links.csv')

# merging all the datasets
movieLens = pd.merge(left=movies_df, right=ratings_df, on='movieId')

---

## Popular Movies

In [3]:
# popular movies with highest average rating
ratings_movies_df = pd.merge(movies_df, ratings_df, how='inner', on='movieId')
def popularMovies():
    X = ratings_movies_df.groupby('title').rating.count()
    Y = ratings_movies_df.groupby('title').rating.mean()
    rating_title = pd.DataFrame(data=X)
    rating_title['ratingAvg'] = pd.DataFrame(Y)
    rating_title.sort_values('rating', ascending=False)
    filtering_df = rating_title[rating_title['rating']>50]
    filtering_df.sort_values('ratingAvg', ascending=False, inplace=True)
    col_arr = []
    # for i in range(437):
    #     col_arr.append(i)
    filtering_df['index'] = filtering_df.index
    # print(col_arr)
    return filtering_df

filtering = popularMovies()
filtering

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtering_df.sort_values('ratingAvg', ascending=False, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtering_df['index'] = filtering_df.index


Unnamed: 0_level_0,rating,ratingAvg,index
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Shawshank Redemption, The (1994)",317,4.429022,"Shawshank Redemption, The (1994)"
"Godfather, The (1972)",192,4.289062,"Godfather, The (1972)"
Fight Club (1999),218,4.272936,Fight Club (1999)
Cool Hand Luke (1967),57,4.271930,Cool Hand Luke (1967)
Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964),97,4.268041,Dr. Strangelove or: How I Learned to Stop Worr...
...,...,...,...
Johnny Mnemonic (1995),53,2.679245,Johnny Mnemonic (1995)
Judge Dredd (1995),62,2.669355,Judge Dredd (1995)
City Slickers II: The Legend of Curly's Gold (1994),55,2.645455,City Slickers II: The Legend of Curly's Gold (...
Coneheads (1993),63,2.420635,Coneheads (1993)


---

## Item Based Collaborative Filtering

#### For collaborative filtering, we do not need the 'genres' and 'tag' columns so we will drop them

In [4]:
# creating the pivot table
user_ratings_item = ratings_df.pivot_table(index='movieId',columns='userId',values='rating')
user_ratings_item

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,,,4.0,,4.5,,,,...,4.0,,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,,,,,,4.0,,4.0,,,...,,4.0,,5.0,3.5,,,2.0,,
3,4.0,,,,,5.0,,,,,...,,,,,,,,2.0,,
4,,,,,,3.0,,,,,...,,,,,,,,,,
5,,,,,,5.0,,,,,...,,,,3.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,,,,,,,,,,,...,,,,,,,,,,
193583,,,,,,,,,,,...,,,,,,,,,,
193585,,,,,,,,,,,...,,,,,,,,,,
193587,,,,,,,,,,,...,,,,,,,,,,


In [5]:
# dropping users who have rated less than 50 movies
user_ratings_item = user_ratings_item.dropna(thresh=50, axis=1).fillna(0)
user_ratings_item

userId,1,4,6,7,10,11,15,16,17,18,...,600,601,602,603,604,605,606,607,608,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,4.5,0.0,0.0,2.5,0.0,4.5,3.5,...,2.5,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,5.0
2,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,...,4.0,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0
3,4.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
4,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.5,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193583,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193585,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193587,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# creating a csr matrix to reduce the computations
csr_data = csr_matrix(user_ratings_item.values)
user_ratings_item.reset_index(inplace=True)

In [7]:
# using cosine similarity method in KNN
model = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=10, n_jobs=-1)
model.fit(csr_data)

NearestNeighbors(algorithm='brute', metric='cosine', n_jobs=-1, n_neighbors=10)

In [8]:
# recommender function
def item_based_recommend(title):
    n_movie_to_recommend = 10
    movie_list = movies_df[movies_df['title'].str.contains(title, case=False)]  
    if len(movie_list):        
        movie_idx = movie_list.iloc[0]['movieId']
        movie_idx = user_ratings_item[user_ratings_item['movieId'] == movie_idx].index[0]
        distances, indices = model.kneighbors(csr_data[movie_idx], n_neighbors=n_movie_to_recommend+1)    
        rec_movies = sorted(list(zip(indices.squeeze().tolist(), distances.squeeze().tolist())), key=lambda x: x[1])[:0:-1]
        recommend_frame = []
        for val in rec_movies:
            movie_idx = user_ratings_item.iloc[val[0]]['movieId']
            idx = movies_df[movies_df['movieId'] == movie_idx].index
            recommend_frame.append({'title':movies_df.iloc[idx]['title'].values[0],'Distance':val[1],'movieId':int(movie_idx)})
        df = pd.DataFrame(recommend_frame, index=range(1, n_movie_to_recommend+1))
        df.sort_values(by=['Distance'], inplace=True)
        # df['movieId'] = 
        return df.reset_index(drop=True)
    else:
        return "No similar movies found :("


In [9]:
movie_input = input("Enter a movie you liked: ")

In [10]:
print("Top 10 movies similar to", str(movie_input), "and that were liked by other users: ")
item_based_recommend(movie_input)

Top 10 movies similar to inception and that were liked by other users: 


Unnamed: 0,title,Distance,movieId
0,"Dark Knight, The (2008)",0.216082,58559
1,Inglourious Basterds (2009),0.305288,68157
2,"Dark Knight Rises, The (2012)",0.335075,91529
3,"Avengers, The (2012)",0.340302,89745
4,Shutter Island (2010),0.345888,74458
5,Django Unchained (2012),0.362976,99114
6,Sherlock Holmes (2009),0.366418,73017
7,Iron Man (2008),0.369175,59315
8,"Hangover, The (2009)",0.369214,69122
9,Up (2009),0.370812,68954


---

## Content Based Recommendation
- This will be used for a cold start

In [13]:
# the function to extract titles 
def extract_title(title): 
   year = title[len(title)-5:len(title)-1]
   
   # some movies do not have the info about year in the column title. So, we should take care of the case as well.
   
   if year.isnumeric():
      title_no_year = title[:len(title)-7]
      return title_no_year
   else:
      return title
      
# the function to extract years
def extract_year(title):
   year = title[len(title)-5:len(title)-1]
   # some movies do not have the info about year in the column title. So, we should take care of the case as well.
   if year.isnumeric():
      return int(year)
   else:
      return np.nan
# change the column name from title to title_year
movies_df.rename(columns={'title':'title_year'}, inplace=True) 

# remove leading and ending whitespaces in title_year
movies_df['title_year'] = movies_df['title_year'].apply(lambda x: x.strip()) 

# create the columns for title and year
movies_df['title'] = movies_df['title_year'].apply(extract_title) 
movies_df['year'] = movies_df['title_year'].apply(extract_year) 

In [14]:
# removing the unnecessary characters in the 'genres' column
movies_df['genres'] = movies_df['genres'].str.replace('|',' ')
movies_df['genres'] = movies_df['genres'].str.replace('Sci-Fi','SciFi')
movies_df['genres'] = movies_df['genres'].str.replace('Film-Noir','Noir')

  movies_df['genres'] = movies_df['genres'].str.replace('|',' ')


In [15]:
# intialising a TfidfVectorizer object with stop_word as 'English' as our data was taken in English
tfidf_vector = TfidfVectorizer(stop_words='english')

# apply the object to the 'genres' column
tfidf_matrix = tfidf_vector.fit_transform(movies_df['genres'])

# printing the vectorized 'genres' column
print(list(enumerate(tfidf_vector.get_feature_names())))

[(0, 'action'), (1, 'adventure'), (2, 'animation'), (3, 'children'), (4, 'comedy'), (5, 'crime'), (6, 'documentary'), (7, 'drama'), (8, 'fantasy'), (9, 'genres'), (10, 'horror'), (11, 'imax'), (12, 'listed'), (13, 'musical'), (14, 'mystery'), (15, 'noir'), (16, 'romance'), (17, 'scifi'), (18, 'thriller'), (19, 'war'), (20, 'western')]




In [16]:
# create the cosine similarity matrix
sim_matrix = linear_kernel(tfidf_matrix,tfidf_matrix) 
print(sim_matrix)

[[1.         0.81357774 0.15276924 ... 0.         0.4210373  0.26758648]
 [0.81357774 1.         0.         ... 0.         0.         0.        ]
 [0.15276924 0.         1.         ... 0.         0.         0.57091541]
 ...
 [0.         0.         0.         ... 1.         0.         0.        ]
 [0.4210373  0.         0.         ... 0.         1.         0.        ]
 [0.26758648 0.         0.57091541 ... 0.         0.         1.        ]]


In [17]:
# function to find the closest title
def matching_score(a,b):
   return fuzz.ratio(a,b)

In [18]:
# a function to convert index to title_year
def get_title_year_from_index(index):
   return movies_df[movies_df.index == index]['title_year'].values[0]

# a function to convert index to title
def get_title_from_index(index):
   return movies_df[movies_df.index == index]['title'].values[0]

# a function to convert title to index
def get_index_from_title(title):
   return movies_df[movies_df.title == title].index.values[0]
   
# a function to return the most similar title to the words a user type
def find_closest_title(title):
   leven_scores = list(enumerate(movies_df['title'].apply(matching_score, b=title)))
   sorted_leven_scores = sorted(leven_scores, key=lambda x: x[1], reverse=True)
   closest_title = get_title_from_index(sorted_leven_scores[0][0])
   distance_score = sorted_leven_scores[0][1]
   return closest_title, distance_score

In [19]:
def contents_based_recommender(movie_user_likes):
   return_mov = []
   closest_title, distance_score = find_closest_title(movie_user_likes)
   # When a user does not make misspellings
   if distance_score == 100:
      movie_index = get_index_from_title(closest_title)
      movie_list = list(enumerate(sim_matrix[int(movie_index)]))
      # remove the typed movie itself
      similar_movies = list(filter(lambda x:x[0] != int(movie_index), sorted(movie_list,key=lambda x:x[1], reverse=True))) 
      
      print('Here\'s the list of movies similar to '+'\033[1m'+str(closest_title)+'\033[0m'+'.\n')
      for i,s in similar_movies[:20]:
         print(get_title_year_from_index(i))
   # When a user makes misspellings    
   else:
      print('Did you mean '+'\033[1m'+str(closest_title)+'\033[0m'+'?','\n')
      movie_index = get_index_from_title(closest_title)
      movie_list = list(enumerate(sim_matrix[int(movie_index)]))
      similar_movies = list(filter(lambda x:x[0] != int(movie_index), sorted(movie_list,key=lambda x:x[1], reverse=True)))
      print('Here\'s the list of movies similar to: '+'\033[1m'+str(closest_title)+'\033[0m'+'.\n')
      for i,s in similar_movies[:20]:
         return_mov.append(get_title_year_from_index(i))
   return return_mov

In [20]:
contents_based_recommender('Toy Story')

Here's the list of movies similar to [1mToy Story[0m.

Antz (1998)
Toy Story 2 (1999)
Adventures of Rocky and Bullwinkle, The (2000)
Emperor's New Groove, The (2000)
Monsters, Inc. (2001)
Wild, The (2006)
Shrek the Third (2007)
Tale of Despereaux, The (2008)
Asterix and the Vikings (Astérix et les Vikings) (2006)
Turbo (2013)
The Good Dinosaur (2015)
Moana (2016)
Inside Out (2015)
Black Cauldron, The (1985)
Lord of the Rings, The (1978)
We're Back! A Dinosaur's Story (1993)
Atlantis: The Lost Empire (2001)
Land Before Time, The (1988)
Pokemon 4 Ever (a.k.a. Pokémon 4: The Movie) (2002)
Sinbad: Legend of the Seven Seas (2003)


[]

---

## User Based Collaborative Filtering

In [25]:
# merging the datasets
user_rating_user = pd.merge(movies_df, ratings_df, on='movieId').drop('timestamp', axis=1)
user_rating_user

Unnamed: 0,movieId,title_year,genres,title,year,userId,rating
0,1,Toy Story (1995),Adventure Animation Children Comedy Fantasy,Toy Story,1995.0,1,4.0
1,1,Toy Story (1995),Adventure Animation Children Comedy Fantasy,Toy Story,1995.0,5,4.0
2,1,Toy Story (1995),Adventure Animation Children Comedy Fantasy,Toy Story,1995.0,7,4.5
3,1,Toy Story (1995),Adventure Animation Children Comedy Fantasy,Toy Story,1995.0,15,2.5
4,1,Toy Story (1995),Adventure Animation Children Comedy Fantasy,Toy Story,1995.0,17,4.5
...,...,...,...,...,...,...,...
100831,193581,Black Butler: Book of the Atlantic (2017),Action Animation Comedy Fantasy,Black Butler: Book of the Atlantic,2017.0,184,4.0
100832,193583,No Game No Life: Zero (2017),Animation Comedy Fantasy,No Game No Life: Zero,2017.0,184,3.5
100833,193585,Flint (2017),Drama,Flint,2017.0,184,3.5
100834,193587,Bungo Stray Dogs: Dead Apple (2018),Action Animation,Bungo Stray Dogs: Dead Apple,2018.0,184,3.5


In [26]:
# making a pivot table
user_pivot_table = user_rating_user.pivot_table(index='userId', columns='movieId', values='rating')
user_pivot_table

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


In [27]:
# normalizing the ratings
user_pivot_norm = user_pivot_table.subtract(user_pivot_table.mean(axis=1), axis = 'rows')
user_pivot_norm.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.366379,,-0.366379,,,-0.366379,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,0.363636,,,,,,,,,,...,,,,,,,,,,


In [28]:
# using pearson correlation to get similar users
user_sim_corr = user_pivot_norm.T.corr()
user_sim_corr

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,,0.079819,0.207983,0.268749,-0.291636,-0.118773,0.469668,0.918559,-0.037987,...,9.157371e-02,-5.222581e-18,-0.061503,-0.407556,-0.164871,0.066378,0.174557,0.268070,-0.175412,-0.032086
2,,1.000000,,,,,-0.991241,,,0.037796,...,-3.873468e-01,,-1.000000,,,0.583333,,-0.125000,,0.623288
3,0.079819,,1.000000,,,,,,,,...,,,0.433200,,,-0.791334,-0.333333,-0.395092,,0.569562
4,0.207983,,,1.000000,-0.336525,0.148498,0.542861,0.117851,,0.485794,...,-2.221127e-01,3.966413e-01,0.090090,-0.080296,0.400124,0.144603,0.116518,-0.170501,-0.277350,-0.043786
5,0.268749,,,-0.336525,1.000000,0.043166,0.158114,0.028347,,-0.777714,...,-4.532467e-17,1.533034e-01,0.234743,0.067791,-0.364156,0.244321,0.231080,-0.020546,0.384111,0.040582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.066378,0.583333,-0.791334,0.144603,0.244321,-0.049192,0.137771,0.253582,0.572700,-0.382955,...,2.904896e-01,1.406134e-01,0.318473,0.682949,0.167062,1.000000,0.114191,0.240842,0.533002,0.389185
607,0.174557,,-0.333333,0.116518,0.231080,0.255639,0.402792,0.251280,,-0.241121,...,6.982411e-01,2.172105e-01,0.192787,0.035806,-0.299641,0.114191,1.000000,0.200814,0.190117,0.106605
608,0.268070,-0.125000,-0.395092,-0.170501,-0.020546,0.125428,0.008081,0.434423,0.336625,-0.571043,...,4.739665e-01,2.976461e-01,0.086423,0.053986,-0.075673,0.240842,0.200814,1.000000,0.488929,0.147606
609,-0.175412,,,-0.277350,0.384111,0.193649,0.420288,0.141860,,,...,1.000000e+00,1.885115e-01,0.343303,0.641624,-0.550000,0.533002,0.190117,0.488929,1.000000,-0.521773


In [29]:
def get_similar_user(pick_user_id,n):
    user_similarity_threshold = 0.3
    similar_users = user_sim_corr[user_sim_corr[pick_user_id]>user_similarity_threshold][pick_user_id].sort_values(ascending=False)[:n]
    return similar_users

In [30]:
def user_based_recommend(user_id,m):

    # getting the top n similar users
    n=10
    sim_users = get_similar_user(user_id,n)

    # remove movies that have been watched
    picked_userid_watched = user_pivot_norm[user_pivot_norm.index == user_id].dropna(axis=1, how='all')

    # remove movies that none of the similar users have watched
    similar_user_movies = user_pivot_norm[user_pivot_norm.index.isin(sim_users.index)].dropna(axis=1, how='all')

    # remove the already watched movies by the user from the movie list
    similar_user_movies.drop(picked_userid_watched.columns,axis=1, inplace=True, errors='ignore')

    # dictionary to store item scores
    item_score = {}
    
    # loop through items
    for i in similar_user_movies.columns:
        # get the ratings for movie i
        movie_rating = similar_user_movies[i]
        # variable to store the score
        total = 0
        # variable to store the number of scores
        count = 0
        # loop through similar users
        for j in sim_users.index:
            # if the movie has rating
            if pd.isna(movie_rating[j]) == False:
                # score is the sum of user similarity score multiply by the movie rating
                score = sim_users[j] * movie_rating[j]
                # add the score to the total score for the movie so far
                total += score
                # add 1 to the count
                count +=1
        # get the average score for the item
        item_score[i] = total / count
    # convert dictionary to pandas dataframe
    item_score = pd.DataFrame(item_score.items(), columns=['movieId', 'movie_score'])

    # sort the movies by score
    ranked_item_score = item_score.sort_values(by='movie_score', ascending=False)
    ranked_item_score = pd.merge(ranked_item_score, movies_df, on='movieId').drop(['genres','title_year','year'],axis=1)
    return ranked_item_score.head(m)

In [31]:
recommend = user_based_recommend(79,10)
recommend

Unnamed: 0,movieId,movie_score,title
0,4993,1.85567,"Lord of the Rings: The Fellowship of the Ring,..."
1,106489,1.85567,"Hobbit: The Desolation of Smaug, The"
2,91529,1.85567,"Dark Knight Rises, The"
3,1198,1.85567,Raiders of the Lost Ark (Indiana Jones and the...
4,4223,1.85567,Enemy at the Gates
5,98809,1.85567,"Hobbit: An Unexpected Journey, The"
6,60684,1.85567,Watchmen
7,122886,1.85567,Star Wars: Episode VII - The Force Awakens
8,1019,1.477273,"20,000 Leagues Under the Sea"
9,222,1.477273,Circle of Friends
