In [36]:
# ! wget https://raw.githubusercontent.com/oscar-defelice/Recommender-Systems-Course/main/requirements.txt
# ! pip install -r requirements.txt -q

# 1. Data Collection and Preprocessing

In [37]:
import pandas as pd

movie_columns = ['MovieID', 'Title', 'Genres']
rating_columns = ['UserID', 'MovieID', 'Rating', 'Timestamp']
user_columns = ['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code']

df_movies = pd.read_csv('/kaggle/input/movies/ml-1m/ml-1m/movies.dat', delimiter='::', engine='python', names=movie_columns, encoding='ISO-8859-1')
df_ratings = pd.read_csv('/kaggle/input/movies/ml-1m/ml-1m/ratings.dat', delimiter='::', engine='python', names=rating_columns, encoding='ISO-8859-1')
df_users = pd.read_csv('/kaggle/input/movies/ml-1m/ml-1m/users.dat', delimiter='::', engine='python', names=user_columns, encoding='ISO-8859-1')

display(df_movies)
display(df_ratings)
display(df_users)

Unnamed: 0,MovieID,Title,Genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
3878,3948,Meet the Parents (2000),Comedy
3879,3949,Requiem for a Dream (2000),Drama
3880,3950,Tigerland (2000),Drama
3881,3951,Two Family House (2000),Drama


Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291
...,...,...,...,...
1000204,6040,1091,1,956716541
1000205,6040,1094,5,956704887
1000206,6040,562,5,956704746
1000207,6040,1096,4,956715648


Unnamed: 0,UserID,Gender,Age,Occupation,Zip-code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,02460
4,5,M,25,20,55455
...,...,...,...,...,...
6035,6036,F,25,15,32603
6036,6037,F,45,1,76006
6037,6038,F,56,1,14706
6038,6039,F,45,0,01060


In [38]:
df_name_basics = pd.read_csv('/kaggle/input/movies/name.basics.tsv', delimiter='\t')
df_title_basics = pd.read_csv('/kaggle/input/movies/title.basics.tsv', delimiter='\t')

display(df_name_basics, df_title_basics)

  df_title_basics = pd.read_csv('/kaggle/input/movies/title.basics.tsv', delimiter='\t')


Unnamed: 0,nconst,primaryName,birthYear,deathYear,primaryProfession,knownForTitles
0,nm0000001,Fred Astaire,1899,1987,"actor,miscellaneous,producer","tt0072308,tt0050419,tt0053137,tt0027125"
1,nm0000002,Lauren Bacall,1924,2014,"actress,soundtrack,archive_footage","tt0037382,tt0075213,tt0117057,tt0038355"
2,nm0000003,Brigitte Bardot,1934,\N,"actress,music_department,producer","tt0057345,tt0049189,tt0056404,tt0054452"
3,nm0000004,John Belushi,1949,1982,"actor,writer,music_department","tt0072562,tt0077975,tt0080455,tt0078723"
4,nm0000005,Ingmar Bergman,1918,2007,"writer,director,actor","tt0050986,tt0083922,tt0050976,tt0069467"
...,...,...,...,...,...,...
13575521,nm9993714,Romeo del Rosario,\N,\N,"animation_department,art_department","tt11657662,tt14069590,tt2455546"
13575522,nm9993716,Essias Loberg,\N,\N,\N,\N
13575523,nm9993717,Harikrishnan Rajan,\N,\N,cinematographer,tt8736744
13575524,nm9993718,Aayush Nair,\N,\N,cinematographer,tt8736744


Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894,\N,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,\N,5,"Animation,Short"
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,\N,5,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,\N,12,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,\N,1,"Comedy,Short"
...,...,...,...,...,...,...,...,...,...
10856090,tt9916848,tvEpisode,Episode #3.17,Episode #3.17,0,2009,\N,\N,"Action,Drama,Family"
10856091,tt9916850,tvEpisode,Episode #3.19,Episode #3.19,0,2010,\N,\N,"Action,Drama,Family"
10856092,tt9916852,tvEpisode,Episode #3.20,Episode #3.20,0,2010,\N,\N,"Action,Drama,Family"
10856093,tt9916856,short,The Wind,The Wind,0,2015,\N,27,Short


## Movie Data Processing: Extracting Years and Cleaning Titles

Before merging the two datasets we need to do a little preprocessing 

We need to extract the year from the movie titles in `df_movies` and ensure the `Year` column is properly formatted in both datasets.

The function `remove_year_from_title` uses a regular expression to strip the year from the end of the movie titles, which are in the format `Title (Year)`. 

The function `extract_year` extracts the year from the title if it exists.

In [39]:
import re

def remove_year_from_title(title):
    return re.sub(r'\s*\(\d{4}\)$', '', title)

def extract_year(title):
    return int(title.split('(')[-1].split(')')[0]) if '(' in title else None

df_movies['Year'] = df_movies['Title'].apply(extract_year)
df_movies['Title'] = df_movies['Title'].apply(remove_year_from_title)
df_title_basics['Year'] = pd.to_numeric(df_title_basics['startYear'], errors='coerce')
df_title_basics = df_title_basics.dropna(subset=['Year', 'endYear']).copy()
df_title_basics['Year'] = df_title_basics['Year'].astype('int')

display(df_movies, df_title_basics)

Unnamed: 0,MovieID,Title,Genres,Year
0,1,Toy Story,Animation|Children's|Comedy,1995
1,2,Jumanji,Adventure|Children's|Fantasy,1995
2,3,Grumpier Old Men,Comedy|Romance,1995
3,4,Waiting to Exhale,Comedy|Drama,1995
4,5,Father of the Bride Part II,Comedy,1995
...,...,...,...,...
3878,3948,Meet the Parents,Comedy,2000
3879,3949,Requiem for a Dream,Drama,2000
3880,3950,Tigerland,Drama,2000
3881,3951,Two Family House,Drama,2000


Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,Year
0,tt0000001,short,Carmencita,Carmencita,0,1894,\N,1,"Documentary,Short",1894
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,\N,5,"Animation,Short",1892
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,\N,5,"Animation,Comedy,Romance",1892
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,\N,12,"Animation,Short",1892
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,\N,1,"Comedy,Short",1893
...,...,...,...,...,...,...,...,...,...,...
10856090,tt9916848,tvEpisode,Episode #3.17,Episode #3.17,0,2009,\N,\N,"Action,Drama,Family",2009
10856091,tt9916850,tvEpisode,Episode #3.19,Episode #3.19,0,2010,\N,\N,"Action,Drama,Family",2010
10856092,tt9916852,tvEpisode,Episode #3.20,Episode #3.20,0,2010,\N,\N,"Action,Drama,Family",2010
10856093,tt9916856,short,The Wind,The Wind,0,2015,\N,27,Short,2015


## Merging Movie Data with Title Basics

Now that both our dataset have a column year and title we can merge them

We perform a left merge on `df_movies` and `df_title_basics` using the `Title` and `Year` columns from `df_movies` and the `primaryTitle` and `Year` columns from `df_title_basics`. This combines the datasets, keeping all row from `df_movies` and adding matching rows from `df_title_basics`.

In [40]:
df_title_basics = df_title_basics.drop_duplicates(subset=['primaryTitle', 'Year'])
merged_movies = pd.merge(df_movies, df_title_basics, left_on=['Title', 'Year'], right_on=['primaryTitle', 'Year'], how='left')

In [41]:
display(merged_movies)

Unnamed: 0,MovieID,Title,Genres,Year,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,1,Toy Story,Animation|Children's|Comedy,1995,tt0114709,movie,Toy Story,Toy Story,0,1995,\N,81,"Adventure,Animation,Comedy"
1,2,Jumanji,Adventure|Children's|Fantasy,1995,tt0113497,movie,Jumanji,Jumanji,0,1995,\N,104,"Adventure,Comedy,Family"
2,3,Grumpier Old Men,Comedy|Romance,1995,tt0113228,movie,Grumpier Old Men,Grumpier Old Men,0,1995,\N,101,"Comedy,Romance"
3,4,Waiting to Exhale,Comedy|Drama,1995,tt0114885,movie,Waiting to Exhale,Waiting to Exhale,0,1995,\N,124,"Comedy,Drama,Romance"
4,5,Father of the Bride Part II,Comedy,1995,tt0113041,movie,Father of the Bride Part II,Father of the Bride Part II,0,1995,\N,106,"Comedy,Family,Romance"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3878,3948,Meet the Parents,Comedy,2000,tt0212338,movie,Meet the Parents,Meet the Parents,0,2000,\N,108,"Comedy,Romance"
3879,3949,Requiem for a Dream,Drama,2000,tt0180093,movie,Requiem for a Dream,Requiem for a Dream,0,2000,\N,102,Drama
3880,3950,Tigerland,Drama,2000,tt0170691,movie,Tigerland,Tigerland,0,2000,\N,101,"Drama,War"
3881,3951,Two Family House,Drama,2000,tt0202641,movie,Two Family House,Two Family House,0,2000,\N,108,"Comedy,Drama,Romance"


Now we have a single dataset with all the metadata

# 2. Feature Engineering (genres, year, isAdult, runtimeMinutes, and ratings)

In this section, We modified the dataframe to ensure every column has the correct data type, merged the two genre columns, and removed unnecessary columns.

In [42]:
def merge_genres(row):
    genres = str(row['Genres']) + '|' + str(row['genres'])
    genres_list = list(set(genres.replace(',', '|').split('|')))
    return ','.join([genre for genre in genres_list if genre != 'nan'])

merged_movies['merged_genres'] = merged_movies.apply(merge_genres, axis=1)

merged_movies['isAdult'] = merged_movies['isAdult'].astype(bool)
merged_movies['isAdult'] = merged_movies['isAdult'].fillna(False)

merged_movies = merged_movies.drop(columns=['primaryTitle', 'endYear', 'Genres', 'tconst', 'titleType', 'primaryTitle', 'originalTitle', 'genres', 'startYear'])

merged_movies.rename(columns={'merged_genres': 'Genres'}, inplace=True)

In [43]:
display(merged_movies)

Unnamed: 0,MovieID,Title,Year,isAdult,runtimeMinutes,Genres
0,1,Toy Story,1995,False,81,"Adventure,Children's,Animation,Comedy"
1,2,Jumanji,1995,False,104,"Fantasy,Comedy,Children's,Adventure,Family"
2,3,Grumpier Old Men,1995,False,101,"Romance,Comedy"
3,4,Waiting to Exhale,1995,False,124,"Romance,Drama,Comedy"
4,5,Father of the Bride Part II,1995,False,106,"Family,Romance,Comedy"
...,...,...,...,...,...,...
3878,3948,Meet the Parents,2000,False,108,"Romance,Comedy"
3879,3949,Requiem for a Dream,2000,False,102,Drama
3880,3950,Tigerland,2000,False,101,"War,Drama"
3881,3951,Two Family House,2000,False,108,"Romance,Drama,Comedy"


Now we have 4 features: Year, isAdult, runtimeMinutes and Genres

In [44]:
df_matrix = df_ratings.pivot(index="UserID", columns="MovieID", values="Rating")
display(df_matrix)

MovieID,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
UserID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,2.0,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,,,,2.0,,3.0,,,,,...,,,,,,,,,,
6037,,,,,,,,,,,...,,,,,,,,,,
6038,,,,,,,,,,,...,,,,,,,,,,
6039,,,,,,,,,,,...,,,,,,,,,,


# 3. Model Development

We've extracted several features, but We'll only be utilizing the ratings, genres and year as features for my model.

To predict user recommendations, I used matrix factorization using an SVD model:
- SVD is effective with sparsity (many users having rated only a small amount of movies)
- SVD is relatively straightforward to implement
- A KNN model can struggle with high-dimensional and sparse data, leading to less accurate recommendations.
- A deep learning model require large amounts of data (we only have ~4000 movies) and they are more complex to implement and tune compared to SVD.


In [49]:
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df_ratings[['UserID', 'MovieID', 'Rating']], reader)

trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

svd_model = SVD(n_factors=100, n_epochs=20, lr_all=0.005, reg_all=0.02)
svd_model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7aea391b6c80>

# 4. Recommendation Algorithm

To include the features in my algorithm, I computed 2 score:
 - The collaborative filetering with the SVD model I created
 - The content-based by using the Year and Genres as features

First, we create a `Features` column in the `merged_movies` dataframe by combining the `Year` and `Genres` columns as strings, TF-IDF make textual information in a numerical format suitable for similarity calculations.

Using TF-IDF vectors, we compute a cosine similarity matrixbetween all pairs of movies. This matrix quantify the similarity between movies based on their features.

The function (`get_user_profile`) function constructs a user profile vector based on movies rated by a specific user, it calculates a weighted average of its TF-IDF vector. 

Finally in the (`get_user_recommendations`) we combines CF and CB methods to generate movie recommendations for a user

This function (`get_couple_recommendations`) recommends movies for pairs of users (`user1` and `user2`). It gathers top movie suggestions for each user using `get_user_recommendations`, combines them into a single list (`couple_recs`), averages the scores for movies recommended by either user, and returns the top `n` suggestions based on these averaged scores.

In [50]:
merged_movies['Features'] = merged_movies['Year'].astype(str) + ' ' + merged_movies['Genres']

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(merged_movies['Features'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# The get_user_profile function constructs a user profile vector based on their rated movies
# It iterates through the movies rated by the user and calculates a weighted average of TF-IDF vectors of those movies.
def get_user_profile(user, svd_model, merged_movies):
    user_ratings = df_ratings[df_ratings['UserID'] == user]
    user_profile = np.zeros(tfidf_matrix.shape[1])
    rated_movies_count = 0
    
    for _, row in user_ratings.iterrows():
        movie_index = merged_movies[merged_movies['MovieID'] == row['MovieID']].index[0]
        user_profile += tfidf_matrix[movie_index].toarray()[0] * (row['Rating'] - 2.5)
        rated_movies_count += 1
    
    if rated_movies_count > 0:
        user_profile /= rated_movies_count
    
    return user_profile

def get_user_recommendations(user, svd_model, merged_movies, cosine_sim, n=10, cf_weight=0.7):
    all_movies = merged_movies['MovieID'].unique()
    user_ratings = []
    
    user_profile = get_user_profile(user, svd_model, merged_movies)
    
    for movie in all_movies:
        cf_rating = svd_model.predict(str(user), str(movie)).est
        
        movie_index = merged_movies[merged_movies['MovieID'] == movie].index[0]
        cb_score = cosine_similarity(user_profile.reshape(1, -1), tfidf_matrix[movie_index])[0][0]
        
        score = cf_weight * cf_rating + (1 - cf_weight) * cb_score
        user_ratings.append((movie, score))
    
    user_ratings.sort(key=lambda x: x[1], reverse=True)
    top_recommendations = user_ratings[:n]
    
    recommended_movies = merged_movies[merged_movies['MovieID'].isin([movie for movie, _ in top_recommendations])]
    recommended_movies['Score'] = [score for _, score in top_recommendations]
    
    return recommended_movies.sort_values('Score', ascending=False)

def get_couple_recommendations(user1, user2, svd_model, merged_movies, cosine_sim, n=10, cf_weight=0.7):
    user1_recs = get_user_recommendations(user1, svd_model, merged_movies, cosine_sim, n=n*2, cf_weight=cf_weight)
    user2_recs = get_user_recommendations(user2, svd_model, merged_movies, cosine_sim, n=n*2, cf_weight=cf_weight)
    
    couple_recs = pd.concat([user1_recs, user2_recs])
    couple_recs = couple_recs.groupby('MovieID').agg({
        'Title': 'first',
        'Year': 'first',
        'Genres': 'first',
        'Score': 'mean'
    }).reset_index()
    
    return couple_recs.sort_values('Score', ascending=False).head(n)

# 5. Evaluation

In [54]:
cross_validate(svd_model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8745  0.8718  0.8713  0.8745  0.8732  0.8731  0.0013  
MAE (testset)     0.6865  0.6841  0.6835  0.6869  0.6855  0.6853  0.0013  
Fit time          12.68   12.92   12.86   13.14   12.84   12.89   0.15    
Test time         2.13    2.13    2.65    2.31    2.59    2.36    0.22    


{'test_rmse': array([0.8745082 , 0.87181214, 0.87127387, 0.87452965, 0.87319819]),
 'test_mae': array([0.68647372, 0.68412   , 0.68351948, 0.68689493, 0.6855347 ]),
 'fit_time': (12.681369304656982,
  12.918066263198853,
  12.860503435134888,
  13.14331603050232,
  12.844359397888184),
 'test_time': (2.1271257400512695,
  2.129542827606201,
  2.654052495956421,
  2.3103244304656982,
  2.587702751159668)}

In [None]:
Now, we can compare the top movies recommended for two users individually as well as together.

In [53]:
user1 = 1
user2 = 2

user1_recommendations = get_user_recommendations(user1, svd_model, merged_movies, cosine_sim)
print("Recommendations for User 1:")
print(user1_recommendations[['Title', 'Year', 'Genres', 'Score']])

user2_recommendations = get_user_recommendations(user, svd_model, merged_movies, cosine_sim)
print("Recommendations for User 2:")
print(user2_recommendations[['Title', 'Year', 'Genres', 'Score']])

couple_recommendations = get_couple_recommendations(user1, user2, svd_model, merged_movies, cosine_sim)
print("\nRecommendations for User 1 and User 2 as a couple:")
print(couple_recommendations[['Title', 'Year', 'Genres', 'Score']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended_movies['Score'] = [score for _, score in top_recommendations]


Recommendations for User 1:
                          Title  Year  \
12                        Balto  1995   
47                   Pocahontas  1995   
626     All Dogs Go to Heaven 2  1996   
655   James and the Giant Peach  1996   
1526                   Hercules  1997   
1642                  Anastasia  1997   
1812          Quest for Camelot  1998   
1838                      Mulan  1998   
1851             Small Soldiers  1998   
2225                       Antz  1998   

                                                 Genres     Score  
12                 Adventure,Children's,Animation,Drama  2.737575  
47    Drama,Romance,Children's,Animation,Musical,Adv...  2.732756  
626       Children's,Animation,Musical,Adventure,Family  2.729060  
655       Children's,Animation,Musical,Adventure,Family  2.729060  
1526  Comedy,Action,Musical,Children's,Animation,Adv...  2.727903  
1642       Drama,Children's,Animation,Musical,Adventure  2.725882  
1812      Fantasy,Comedy,Children's,Animatio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended_movies['Score'] = [score for _, score in top_recommendations]


Recommendations for User 2:
                         Title  Year  \
345   Clear and Present Danger  1994   
788                   Daylight  1996   
825             Chain Reaction  1996   
1583           Fire Down Below  1997   
1592                      Stag  1997   
1843              Out of Sight  1998   
1849           Lethal Weapon 4  1998   
1933           Lethal Weapon 3  1992   
2253                   Soldier  1998   
3188            Bodyguard, The  1992   

                                          Genres     Score  
345        Drama,Action,Thriller,Crime,Adventure  2.698658  
788              Action,Adventure,Thriller,Drama  2.693753  
825       Drama,Action,Thriller,Sci-Fi,Adventure  2.692900  
1583                       Action,Thriller,Drama  2.691334  
1592                       Action,Thriller,Drama  2.688962  
1843           Drama,Comedy,Action,Romance,Crime  2.686923  
1849          Drama,Comedy,Action,Thriller,Crime  2.686700  
1933          Drama,Comedy,Action,Thriller,

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended_movies['Score'] = [score for _, score in top_recommendations]



Recommendations for User 1 and User 2 as a couple:
                              Title  Year  \
0                         Toy Story  1995   
1                             Balto  1995   
2                        Pocahontas  1995   
5                  Gumby: The Movie  1995   
8                           Aladdin  1992   
9           All Dogs Go to Heaven 2  1996   
11                        Space Jam  1996   
10        James and the Giant Peach  1996   
15  Aladdin and the King of Thieves  1996   
17                         Hercules  1997   

                                               Genres     Score  
0               Adventure,Children's,Animation,Comedy  2.737575  
1                Adventure,Children's,Animation,Drama  2.732756  
2   Drama,Romance,Children's,Animation,Musical,Adv...  2.729060  
5               Adventure,Children's,Animation,Comedy  2.729060  
8       Comedy,Musical,Children's,Animation,Adventure  2.727903  
9       Children's,Animation,Musical,Adventure,Family  2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recommended_movies['Score'] = [score for _, score in top_recommendations]
