# Hybrid Recommendation System: Combine Collaborative & Content based 

In [1]:
import pandas as pd
import numpy as np

# Collaborative Fitering: Item based Model

## Load Movies Data

In [2]:
movies = pd.read_csv('movies_metadata_final.csv')
print(movies.columns)
movies.head()

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage',
       'movie_id', 'imdb_id', 'original_language', 'title', 'keywords',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title.1', 'video',
       'vote_average', 'vote_count'],
      dtype='object')


Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,movie_id,imdb_id,original_language,title,keywords,...,release_date,revenue,runtime,spoken_languages,status,tagline,title.1,video,vote_average,vote_count
0,False,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",,2,tt0094675,fi,Ariel,Taisto Kasurinen is a Finnish coal miner whose...,...,1988-10-21,0.0,69.0,"[{'iso_639_1': 'fi', 'name': 'suomi'}, {'iso_6...",Released,,Ariel,False,7.1,44.0
1,False,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 35, 'name...",,3,tt0092149,fi,Varjoja paratiisissa,"An episode in the life of Nikander, a garbage ...",...,1986-10-16,0.0,76.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,,Shadows in Paradise,False,7.1,35.0
2,False,,4000000,"[{'id': 80, 'name': 'Crime'}, {'id': 35, 'name...",,5,tt0113101,en,Four Rooms,It's Ted the Bellhop's first night on the job....,...,1995-12-09,4300000.0,98.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Twelve outrageous guests. Four scandalous requ...,Four Rooms,False,6.5,539.0
3,False,,0,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",,6,tt0107286,en,Judgment Night,"While racing to a boxing match, Frank, Mike, J...",...,1993-10-15,12136938.0,110.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Don't move. Don't whisper. Don't even breathe.,Judgment Night,False,6.4,79.0
4,False,"{'id': 10, 'name': 'Star Wars Collection', 'po...",11000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.starwars.com/films/star-wars-episod...,11,tt0076759,en,Star Wars,Princess Leia is captured and held hostage by ...,...,1977-05-25,775398007.0,121.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"A long time ago in a galaxy far, far away...",Star Wars,False,8.1,6778.0


In [3]:
movies = movies[['movie_id', 'title']]
movies.shape, movies.head()

((45462, 2),
    movie_id                 title
 0         2                 Ariel
 1         3  Varjoja paratiisissa
 2         5            Four Rooms
 3         6        Judgment Night
 4        11             Star Wars)

In [4]:
movies.nunique(), movies.dtypes

(movie_id    45432
 title       43369
 dtype: int64,
 movie_id     int64
 title       object
 dtype: object)

## Load Ratings Data

In [5]:
ratings = pd.read_csv('ratings_small.csv')
ratings.shape, ratings.head()

((100004, 4),
    user_id  movie_id  rating   timestamp
 0        1        31     2.5  1260759144
 1        1      1029     3.0  1260759179
 2        1      1061     3.0  1260759182
 3        1      1129     2.0  1260759185
 4        1      1172     4.0  1260759205)

In [6]:
ratings.nunique()

user_id        671
movie_id      9066
rating          10
timestamp    78141
dtype: int64

In [7]:
ratings.drop(['timestamp'], axis=1, inplace=True)

In [8]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [9]:
ratings.describe()

Unnamed: 0,user_id,movie_id,rating
count,100004.0,100004.0,100004.0
mean,347.01131,12548.664363,3.543608
std,195.163838,26369.198969,1.058064
min,1.0,1.0,0.5
25%,182.0,1028.0,3.0
50%,367.0,2406.5,4.0
75%,520.0,5418.0,4.0
max,671.0,163949.0,5.0


In [10]:
n_users = ratings.user_id.nunique()
n_items = ratings.movie_id.nunique()
n_users, n_items

(671, 9066)

## Create Pivot Matrix from rating values

In [11]:
data_matrix = ratings.pivot(
    index='user_id',
    columns='movie_id',
    values='rating'
).fillna(0)
data_matrix.shape, data_matrix.head()

((671, 9066),
 movie_id  1       2       3       4       5       6       7       8       \
 user_id                                                                    
 1            0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 2            0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 3            0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 4            0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
 5            0.0     0.0     4.0     0.0     0.0     0.0     0.0     0.0   
 
 movie_id  9       10      ...  161084  161155  161594  161830  161918  161944  \
 user_id                   ...                                                   
 1            0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
 2            0.0     4.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
 3            0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
 4            0.0     4.0  ...     

# Pairwise Distance

## Movies pairwise similarity distance

In [12]:
data_matrix.T.shape, data_matrix.T

((9066, 671),
 user_id   1    2    3    4    5    6    7    8    9    10   ...  662  663  \
 movie_id                                                    ...             
 1         0.0  0.0  0.0  0.0  0.0  0.0  3.0  0.0  4.0  0.0  ...  0.0  4.0   
 2         0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  5.0  0.0   
 3         0.0  0.0  0.0  0.0  4.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
 4         0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
 5         0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
 ...       ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...   
 161944    0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
 162376    0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
 162542    0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
 162672    0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  0.0  0.0   
 163949    0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0 

In [43]:
from sklearn.metrics.pairwise import pairwise_distances 
item_similarity = 1- pairwise_distances(data_matrix.T, metric='cosine')
item_similarity.shape, item_similarity

((9066, 9066),
 array([[1.        , 0.39451145, 0.30651588, ..., 0.        , 0.        ,
         0.05582876],
        [0.39451145, 1.        , 0.21749153, ..., 0.        , 0.        ,
         0.        ],
        [0.30651588, 0.21749153, 1.        , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
         0.        ],
        [0.05582876, 0.        , 0.        , ..., 0.        , 0.        ,
         1.        ]]))

## Get similar recommended movies for a movie_id

In [14]:
movie_index = 5
movie_prediction = pd.DataFrame(item_similarity)
print(movie_prediction.shape)
movie_prediction.head()

(9066, 9066)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9056,9057,9058,9059,9060,9061,9062,9063,9064,9065
0,1.0,0.394511,0.306516,0.133614,0.245102,0.377086,0.278629,0.063031,0.117499,0.310689,...,0.055829,0.031902,0.079755,0.079755,0.079755,0.079755,0.079755,0.0,0.0,0.055829
1,0.394511,1.0,0.217492,0.164651,0.278476,0.222003,0.207299,0.223524,0.113669,0.418124,...,0.0,0.055038,0.068797,0.082557,0.082557,0.137594,0.068797,0.0,0.0,0.0
2,0.306516,0.217492,1.0,0.177012,0.370732,0.247499,0.435648,0.127574,0.306717,0.191255,...,0.0,0.0,0.0,0.116226,0.116226,0.0,0.0,0.0,0.0,0.0
3,0.133614,0.164651,0.177012,1.0,0.179556,0.072518,0.184626,0.501513,0.25463,0.111447,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.245102,0.278476,0.370732,0.179556,1.0,0.272645,0.388476,0.194113,0.367941,0.246846,...,0.0,0.176845,0.0,0.117897,0.117897,0.0,0.0,0.0,0.0,0.0


In [15]:
movie_prediction.iloc[movie_index]

0       0.377086
1       0.222003
2       0.247499
3       0.072518
4       0.272645
          ...   
9061    0.000000
9062    0.111103
9063    0.000000
9064    0.000000
9065    0.061724
Name: 5, Length: 9066, dtype: float64

In [16]:
item_recommendation = pd.DataFrame(movie_prediction.iloc[movie_index].sort_values(ascending=False))
item_recommendation.head()

Unnamed: 0,5
5,1.0
615,0.525758
24,0.504869
31,0.491099
650,0.467446


In [17]:
item_recommendation.reset_index(inplace=True)
item_recommendation.head()

Unnamed: 0,index,5
0,5,1.0
1,615,0.525758
2,24,0.504869
3,31,0.491099
4,650,0.467446


In [18]:
item_recommendation.columns = ['movie_id', 'score']
item_recommendation.head()

Unnamed: 0,movie_id,score
0,5,1.0
1,615,0.525758
2,24,0.504869
3,31,0.491099
4,650,0.467446


In [19]:
item_recommendation.dtypes, item_recommendation.describe()

(movie_id      int64
 score       float64
 dtype: object,
           movie_id        score
 count  9066.000000  9066.000000
 mean   4532.500000     0.078832
 std    2617.273104     0.074483
 min       0.000000     0.000000
 25%    2266.250000     0.000000
 50%    4532.500000     0.069438
 75%    6798.750000     0.113789
 max    9065.000000     1.000000)

### Merge movie_id with movie title

In [20]:
merged = pd.merge(item_recommendation, movies, on='movie_id', how='left')
merged.shape, merged.head()

((9068, 3),
    movie_id     score                      title
 0         5  1.000000                 Four Rooms
 1       615  0.525758  The Passion of the Christ
 2        24  0.504869          Kill Bill: Vol. 1
 3        31  0.491099                        NaN
 4       650  0.467446            Boyz n the Hood)

In [21]:
collab_output = merged[merged['title'].notna()]
collab_output.shape, collab_output.head(10)

((3140, 3),
     movie_id     score                                 title
 0          5  1.000000                            Four Rooms
 1        615  0.525758             The Passion of the Christ
 2         24  0.504869                     Kill Bill: Vol. 1
 4        650  0.467446                       Boyz n the Hood
 5        644  0.465175          A.I. Artificial Intelligence
 6         87  0.461647  Indiana Jones and the Temple of Doom
 7        535  0.460525                            Flashdance
 8        617  0.457149                           Wild Things
 9        561  0.452249                           Constantine
 11        15  0.430175                          Citizen Kane)

In [22]:
collab_output.describe()

Unnamed: 0,movie_id,score
count,3140.0,3140.0
mean,3571.053185,0.090171
std,2841.833636,0.084577
min,2.0,0.0
25%,985.75,0.018243
50%,2673.0,0.078298
75%,5744.75,0.128906
max,9065.0,1.0


### Min Max Normalization

In [23]:
collab_output['collaborative_score_normalized'] = (collab_output['score']-min(collab_output['score']))/(max(collab_output['score'])-min(collab_output['score']))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  collab_output['collaborative_score_normalized'] = (collab_output['score']-min(collab_output['score']))/(max(collab_output['score'])-min(collab_output['score']))


In [24]:
collab_output.head()

Unnamed: 0,movie_id,score,title,collaborative_score_normalized
0,5,1.0,Four Rooms,1.0
1,615,0.525758,The Passion of the Christ,0.525758
2,24,0.504869,Kill Bill: Vol. 1,0.504869
4,650,0.467446,Boyz n the Hood,0.467446
5,644,0.465175,A.I. Artificial Intelligence,0.465175


In [25]:
collab_output.describe()

Unnamed: 0,movie_id,score,collaborative_score_normalized
count,3140.0,3140.0,3140.0
mean,3571.053185,0.090171,0.090171
std,2841.833636,0.084577,0.084577
min,2.0,0.0,0.0
25%,985.75,0.018243,0.018243
50%,2673.0,0.078298,0.078298
75%,5744.75,0.128906,0.128906
max,9065.0,1.0,1.0


In [26]:
collab_output.shape

(3140, 4)

In [27]:
collab_output['title'][:10]

0                               Four Rooms
1                The Passion of the Christ
2                        Kill Bill: Vol. 1
4                          Boyz n the Hood
5             A.I. Artificial Intelligence
6     Indiana Jones and the Temple of Doom
7                               Flashdance
8                              Wild Things
9                              Constantine
11                            Citizen Kane
Name: title, dtype: object

# Content Based Recommendations

In [28]:
movies_keywords = pd.read_csv('movies_metadata_final.csv')
print(movies_keywords.columns)
movies_keywords.head()

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage',
       'movie_id', 'imdb_id', 'original_language', 'title', 'keywords',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title.1', 'video',
       'vote_average', 'vote_count'],
      dtype='object')


Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,movie_id,imdb_id,original_language,title,keywords,...,release_date,revenue,runtime,spoken_languages,status,tagline,title.1,video,vote_average,vote_count
0,False,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",,2,tt0094675,fi,Ariel,Taisto Kasurinen is a Finnish coal miner whose...,...,1988-10-21,0.0,69.0,"[{'iso_639_1': 'fi', 'name': 'suomi'}, {'iso_6...",Released,,Ariel,False,7.1,44.0
1,False,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 35, 'name...",,3,tt0092149,fi,Varjoja paratiisissa,"An episode in the life of Nikander, a garbage ...",...,1986-10-16,0.0,76.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,,Shadows in Paradise,False,7.1,35.0
2,False,,4000000,"[{'id': 80, 'name': 'Crime'}, {'id': 35, 'name...",,5,tt0113101,en,Four Rooms,It's Ted the Bellhop's first night on the job....,...,1995-12-09,4300000.0,98.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Twelve outrageous guests. Four scandalous requ...,Four Rooms,False,6.5,539.0
3,False,,0,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",,6,tt0107286,en,Judgment Night,"While racing to a boxing match, Frank, Mike, J...",...,1993-10-15,12136938.0,110.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Don't move. Don't whisper. Don't even breathe.,Judgment Night,False,6.4,79.0
4,False,"{'id': 10, 'name': 'Star Wars Collection', 'po...",11000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.starwars.com/films/star-wars-episod...,11,tt0076759,en,Star Wars,Princess Leia is captured and held hostage by ...,...,1977-05-25,775398007.0,121.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,"A long time ago in a galaxy far, far away...",Star Wars,False,8.1,6778.0


In [29]:
movies_keywords = movies_keywords[['title', 'keywords']]
movies_keywords.head()

Unnamed: 0,title,keywords
0,Ariel,Taisto Kasurinen is a Finnish coal miner whose...
1,Varjoja paratiisissa,"An episode in the life of Nikander, a garbage ..."
2,Four Rooms,It's Ted the Bellhop's first night on the job....
3,Judgment Night,"While racing to a boxing match, Frank, Mike, J..."
4,Star Wars,Princess Leia is captured and held hostage by ...


## convert to numpy arrays

In [30]:
keywords_array = movies_keywords['keywords'].to_numpy()
len(keywords_array), movies_keywords.shape, keywords_array

(45462,
 (45462, 2),
 array(["Taisto Kasurinen is a Finnish coal miner whose father has just committed suicide and who is framed for a crime he did not commit. In jail, he starts to dream about leaving the country and starting a new life. He escapes from prison but things don't go as planned...",
        'An episode in the life of Nikander, a garbage man, involving the death of a co-worker, an affair and much more.',
        "It's Ted the Bellhop's first night on the job...and the hotel's very unusual guests are about to place him in some outrageous predicaments. It seems that this evening's room service is serving up one unbelievable happening after another.",
        ...,
        "In the 1910s, beautiful young Silja loses both parents and ends up working as a maid at wealthier farms around her village. She is courted by a number of bachelors, ranging from simple farmhand Väinö to sophisticated young student Armas whom she meets when working for a charming old professor. However, Silj

In [31]:
keywords_array[0]

"Taisto Kasurinen is a Finnish coal miner whose father has just committed suicide and who is framed for a crime he did not commit. In jail, he starts to dream about leaving the country and starting a new life. He escapes from prison but things don't go as planned..."

## Split keywords & create word list

In [32]:
words_list = []

for keyword in keywords_array:
    
    splitted_words = str(keyword).lower().split()
    
    words_list.append(splitted_words)

print(len(words_list), len(words_list[0]), len(words_list[1]))

45462 49 21


In [33]:
print(words_list[:10])

[['taisto', 'kasurinen', 'is', 'a', 'finnish', 'coal', 'miner', 'whose', 'father', 'has', 'just', 'committed', 'suicide', 'and', 'who', 'is', 'framed', 'for', 'a', 'crime', 'he', 'did', 'not', 'commit.', 'in', 'jail,', 'he', 'starts', 'to', 'dream', 'about', 'leaving', 'the', 'country', 'and', 'starting', 'a', 'new', 'life.', 'he', 'escapes', 'from', 'prison', 'but', 'things', "don't", 'go', 'as', 'planned...'], ['an', 'episode', 'in', 'the', 'life', 'of', 'nikander,', 'a', 'garbage', 'man,', 'involving', 'the', 'death', 'of', 'a', 'co-worker,', 'an', 'affair', 'and', 'much', 'more.'], ["it's", 'ted', 'the', "bellhop's", 'first', 'night', 'on', 'the', 'job...and', 'the', "hotel's", 'very', 'unusual', 'guests', 'are', 'about', 'to', 'place', 'him', 'in', 'some', 'outrageous', 'predicaments.', 'it', 'seems', 'that', 'this', "evening's", 'room', 'service', 'is', 'serving', 'up', 'one', 'unbelievable', 'happening', 'after', 'another.'], ['while', 'racing', 'to', 'a', 'boxing', 'match,', 'f

## Create Dictionary, BOW, tfidf model & Similarity Matrix

In [34]:
from gensim.corpora.dictionary import Dictionary

# create a dictionary from words list
dictionary = Dictionary(words_list) 

#create corpus where the corpus is a bag of words for each document
corpus = [dictionary.doc2bow(doc) for doc in words_list] 

from gensim.models.tfidfmodel import TfidfModel
#create tfidf model of the corpus
tfidf = TfidfModel(corpus) 

from gensim.similarities import MatrixSimilarity
# Create the similarity matrix. This is the most important part where we get the similarities between the movies.
sims = MatrixSimilarity(tfidf[corpus], num_features=len(dictionary))
print(sims)


MatrixSimilarity<45462 docs, 157066 features>


## For an input movie, Run the Model & return Recommended Movies

In [35]:
def movie_recommendation(movie_title):
    # get the movie row
    movie = movies_keywords.loc[movies_keywords.title==movie_title] 
    
    #split the keywords
    keywords = movie['keywords'].iloc[0].split() 

    #set the list of words to query_doc
    query_doc = keywords 
    
    # get a bag of words from the query_doc
    query_doc_bow = dictionary.doc2bow(query_doc) 

    #convert the regular bag of words model to a tf-idf model
    query_doc_tfidf = tfidf[query_doc_bow] 

    # get similarity values between input movie and all other movies
    similarity_array = sims[query_doc_tfidf] 

    #Convert to a Series
    similarity_series = pd.Series(similarity_array.tolist(), index=movies_keywords.title.values) 

    #get the most similar movies 
    similarity_output = similarity_series.sort_values(ascending=False)
    return similarity_output

In [36]:
content_output = movie_recommendation('Four Rooms')
type(content_output), content_output

(pandas.core.series.Series,
 Four Rooms                   0.907315
 The Innkeepers               0.141339
 Enter Nowhere                0.124108
 Tic Tac                      0.114426
 Little Miss Broadway         0.099070
                                ...   
 Gamer                        0.000000
 Elsk meg i morgen            0.000000
 Lo scapolo                   0.000000
 El vals de los inútiles      0.000000
 Tutto tutto niente niente    0.000000
 Length: 45462, dtype: float64)

In [38]:
content_df = pd.DataFrame(content_output)
content_df.reset_index(inplace=True)
content_df.columns = ['title', 'score']
content_df.head()

Unnamed: 0,title,score
0,Four Rooms,0.907315
1,The Innkeepers,0.141339
2,Enter Nowhere,0.124108
3,Tic Tac,0.114426
4,Little Miss Broadway,0.09907


In [39]:
content_df['content_score_normalized'] = (content_df['score']-min(content_df['score'])) / (max(content_df['score']) - min(content_df['score']))
content_df.shape, content_df.head()

((45462, 3),
                   title     score  content_score_normalized
 0            Four Rooms  0.907315                  1.000000
 1        The Innkeepers  0.141339                  0.155777
 2         Enter Nowhere  0.124108                  0.136786
 3               Tic Tac  0.114426                  0.126116
 4  Little Miss Broadway  0.099070                  0.109191)

In [40]:
content_df.describe()

Unnamed: 0,score,content_score_normalized
count,45462.0,45462.0
mean,0.008451,0.009314
std,0.010153,0.01119
min,0.0,0.0
25%,0.001806,0.001991
50%,0.006123,0.006748
75%,0.011399,0.012563
max,0.907315,1.0


# Hybrid: Combine two Models

In [41]:
hybrid_output = pd.merge(collab_output, content_df, how='left', on='title')
print(hybrid_output.shape)
hybrid_output.head()

(3626, 6)


Unnamed: 0,movie_id,score_x,title,collaborative_score_normalized,score_y,content_score_normalized
0,5,1.0,Four Rooms,1.0,0.907315,1.0
1,615,0.525758,The Passion of the Christ,0.525758,0.014243,0.015698
2,24,0.504869,Kill Bill: Vol. 1,0.504869,0.011371,0.012532
3,650,0.467446,Boyz n the Hood,0.467446,0.013678,0.015075
4,644,0.465175,A.I. Artificial Intelligence,0.465175,0.013917,0.015339


In [42]:
hybrid_output['final_score'] = (hybrid_output['collaborative_score_normalized'] + hybrid_output['content_score_normalized'])/2
hybrid_output = hybrid_output[['title','final_score']]
hybrid_output.sort_values(by='final_score', ascending=False)[:10]
print(hybrid_output.shape)
hybrid_output.head()

(3626, 2)


Unnamed: 0,title,final_score
0,Four Rooms,1.0
1,The Passion of the Christ,0.270728
2,Kill Bill: Vol. 1,0.258701
3,Boyz n the Hood,0.241261
4,A.I. Artificial Intelligence,0.240257
