In [61]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

import warnings 

warnings.filterwarnings('ignore')

In [2]:
imdb = pd.read_csv('processed_imdb.csv')
netflix = pd.read_csv('processed_netflix.csv')

In [4]:
match_list = pd.read_csv('match_list.csv')

In [6]:
imdb.shape, netflix.shape, match_list.shape

((28041, 783), (7787, 781), (1348, 3))

In [5]:
match_list.head()

Unnamed: 0,IMDB Title,Matched Netflix Title,Match Score
0,The Great Train Robbery,the great train robbery,100
1,By the Sea,by the sea,100
2,The Captive,the captive,100
3,In the Park,in the dark,91
4,The Tramp,the trap,94


In [139]:
def get_recommendation(input_title, top_k=10, use_genre=True):
    
    input_title = imdb[imdb['IMDB Title']==input_title]
    # By default, only show the top 10 closest titles

    columns_to_show = [
        'IMDB Title',
        'Release Year', 
        'averageRating',
        'numVotes',
        'plot',
        'genres',
        'country',
        'Similarity Score'
    ]
    
    # First, filter out any titles that have different Genre Group
    # if use_genre is set to True
    if use_genre:
        recommended = imdb[imdb['Genre Group']==input_title['Genre Group'].item()]
    else:
        recommended = imdb.copy()
        
    # Second, filter out the data identical to input_title
    # so that there is no same movie in the recommended titles
    recommended = recommended[recommended['IMDB Title']!=input_title['IMDB Title'].item()]
    
    # Third, also filter out titles that may already exist in Netflix dataset
    # since we don't want to recommend what they already have
    recommended = recommended[~recommended['IMDB Title'].isin(match_list['IMDB Title'])]
    
    # Append the ipnut data at the top of the recommended list so that easier to compute the similarity
    # and reset indices
    recommended = pd.concat([input_title, recommended], ignore_index=True)
    recommended.reset_index(drop=True, inplace=True)

    semantics = recommended.iloc[:, -768:]
    similarity = cosine_similarity(semantics, semantics)
    # The first row contains similarity score between input title and other recomemnded titles. The first elem with score of 1 is the input which is why the score 1 (or 100%)
    similarity = similarity[0]
    
    recommended['Similarity Score'] = similarity
    
    # Drop semantics from the recommended since the score is already computed
    recommended.drop(columns=imdb.columns[imdb.columns.str.contains('Semantic')].tolist(), inplace=True)
    
    recommended = recommended.sort_values('Similarity Score', ascending=False)
    
    # Drop the first row since it's the input title
    recommended = recommended.iloc[1:].reset_index(drop=True)
    recommended = recommended[columns_to_show]
    recommended = recommended.iloc[:top_k]
    
    print('The original title information')
    
    # input_title = input_title.to_frame().transpose()
    input_title.drop(columns=input_title.columns[input_title.columns.str.contains('Semantic')].tolist(), inplace=True)
    input_title = input_title[columns_to_show[:-1]]
    
    display(input_title)
    
    print()
    
    display(recommended)
    
    return recommended

As an example, Fast & Furious movie will be used as an input

In [141]:
imdb[imdb['IMDB Title'].str.contains('Fast &')]

Unnamed: 0,IMDB ID,titleType,IMDB Title,originalTitle,isAdult,Release Year,runtimeMinutes,genres,averageRating,numVotes,...,Semantic 759,Semantic 760,Semantic 761,Semantic 762,Semantic 763,Semantic 764,Semantic 765,Semantic 766,Semantic 767,Semantic 768
28036,tt1013752,movie,Fast & Furious,Fast & Furious,0,2009,107,"Action,Thriller",6.6,264960,...,-0.583039,-0.363652,-0.208181,-0.398622,-0.009654,0.449042,-0.085996,-0.281167,0.036825,-0.253933


One can specify if one wants to use genre as a feature to recommend titles or not. Two cells below show different list of titles with and without genres

In [148]:
top_k = 15

In [146]:
recommended = get_recommendation('Fast & Furious', top_k=top_k, use_genre=False)
recommended.head(top_k)
recommended.to_csv('Recommended_titles_without_genres.csv', index=False)

The original title information


Unnamed: 0,IMDB Title,Release Year,averageRating,numVotes,plot,genres,country
28036,Fast & Furious,2009,6.6,264960,"Brian O'Conner, now working for the FBI in LA,...","Action,Thriller",USA





Unnamed: 0,IMDB Title,Release Year,averageRating,numVotes,plot,genres,country,Similarity Score
0,Clay Pigeon,1971,5.1,119,An exsoldier is recruited by the FBI to go und...,"Action,Drama",USA,0.903187
1,Raw Deal,1986,5.6,31207,A former FBI agent turned small town sheriff a...,"Action,Crime,Thriller","USA, Netherlands",0.898819
2,A Walk Among the Tombstones,2014,6.5,113383,Private investigator Matthew Scudder is hired ...,"Action,Crime,Drama",USA,0.897465
3,Johnny Stool Pigeon,1949,6.7,405,US Treasury agent George Morton persuades conv...,"Crime,Drama,Film-Noir",USA,0.89726
4,Another 48 Hrs.,1990,5.9,39084,Jack Cates once again enlists the aid of excon...,"Action,Comedy,Crime",USA,0.896218
5,Universal Soldier II: Brothers in Arms,1998,3.0,1631,The Universal Soldiers are used to smuggle dia...,"Action,Sci-Fi",Canada,0.891819
6,St. Ives,1976,6.3,2024,"Abner Procane, top L.A. burglar, finds that so...","Action,Crime,Drama",USA,0.891666
7,Top Dog,1995,4.3,3188,With his cop companion shot and killed by terr...,"Action,Comedy,Crime",USA,0.891374
8,The Italian Job,2003,7.0,344390,After being betrayed and left for dead in Ital...,"Action,Crime,Thriller","USA, France, UK",0.890442
9,Breach,2007,7.0,58101,FBI upstart Eric O'Neill enters into a power g...,"Biography,Crime,Drama",USA,0.890333


In [147]:
recommended = get_recommendation('Fast & Furious', top_k=top_k)
recommended.head(top_k)
recommended.to_csv('Recommended_titles_with_genres.csv', index=False)

The original title information


Unnamed: 0,IMDB Title,Release Year,averageRating,numVotes,plot,genres,country
28036,Fast & Furious,2009,6.6,264960,"Brian O'Conner, now working for the FBI in LA,...","Action,Thriller",USA





Unnamed: 0,IMDB Title,Release Year,averageRating,numVotes,plot,genres,country,Similarity Score
0,Raw Deal,1986,5.6,31207,A former FBI agent turned small town sheriff a...,"Action,Crime,Thriller","USA, Netherlands",0.898819
1,Another 48 Hrs.,1990,5.9,39084,Jack Cates once again enlists the aid of excon...,"Action,Comedy,Crime",USA,0.896218
2,Top Dog,1995,4.3,3188,With his cop companion shot and killed by terr...,"Action,Comedy,Crime",USA,0.891374
3,The Italian Job,2003,7.0,344390,After being betrayed and left for dead in Ital...,"Action,Crime,Thriller","USA, France, UK",0.890442
4,The Killer Elite,1975,6.0,5687,"Mike Locke, who works for a private security f...","Action,Crime,Thriller",USA,0.889039
5,Brannigan,1975,6.2,5134,Chicago police lieutenant Jim Brannigan is sen...,"Action,Comedy,Crime","UK, USA",0.888295
6,Direct Action,2004,5.0,1418,"Frank Gannon, a veteran cop, is being hunted b...","Action,Crime,Thriller","USA, Canada",0.886208
7,Once Upon a Time in Mexico,2003,6.4,154488,Hitman El Mariachi becomes involved in interna...,"Action,Crime,Thriller",USA,0.884975
8,High Sierra,1941,7.5,15297,"After being released from prison, notorious th...","Action,Adventure,Crime",USA,0.884683
9,Transporter 2,2005,6.3,192478,"Mercenary Frank Martin, who specializes moving...","Action,Crime,Thriller","France, USA",0.884678


As you can see here, there is only one Fast & Furious. It's possible that if there are other series'ed Fast & Furious, the model could have other Fast & Furious series as one of recommended titles