In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('cleaned_score.csv')
df = df.drop(columns=['Unnamed: 0']) # drop useless column

In [3]:
#df_score = df.copy()
#df_score = df_score.dropna(subset=['score'])
#df_score = df_score.loc[df_score['score'] >=5.5 ]

In [4]:
# Turn all characters into lower case and delete all spaces
def lower_space(x):
        return str.lower(x.replace(" ", ""))
    
def lower_only(x):
        return str.lower(x)

In [5]:
# Select useful features to be processed by lower_space()
features=['type','title','director','cast','country','release_year','rating','listed_in','score']
df_lower = df.copy()
df_lower=df_lower[features]

# create dictionary of ordinal to integer mapping
ord = {'TV-Y':'ratt', 'TV-Y7':'ratt', 'TV-Y7-FV':'ratt', 
            'G':'ratt ratt', 'TV-G':'ratt ratt', 'PG':'ratt ratt','TV-PG':'ratt ratt', 
            'PG-13':'ratt ratt ratt', 'TV-14':'ratt ratt ratt', 
            'R':'rattt', 'NC-17':'rattt', 'TV-MA':'rattt',
            'NR':'norated', 'UR':'norated', 'No Rated':'norated'}
# apply using map

df_lower['rating'] = df_lower['rating'].map(ord)

df_lower['title'] = df_lower['title'].apply(lower_only)
for feature in features:
    if feature != 'release_year' and feature != 'score'and feature != 'title' and feature != 'rating':
        df_lower[feature] = df_lower[feature].apply(lower_space)
    
df_lower['cast'] = df_lower['cast'].apply(lambda x: x.strip().split(',')[:3])
df_lower['cast'] = [','.join(map(str, l)) for l in df_lower['cast']]
df_lower.head(2)

Unnamed: 0,type,title,director,cast,country,release_year,rating,listed_in,score
0,movie,dick johnson is dead,kirstenjohnson,nodata,unitedstates,2020,ratt ratt ratt,documentaries,7.4
1,tvshow,blood & water,nodata,"amaqamata,khosingema,gailmabalane",southafrica,2021,rattt,"internationaltvshows,tvdramas,tvmysteries",6.6


In [6]:
# function for creating bag-of-words
def create_soup(x):
    #return x['type']+ ' ' + x['title']+ ' ' + x['director'] + ' ' + x['cast'] + ' ' + x['country'] + ' ' + x['release_year'] + ' ' + x['rating'] + ' ' +x['listed_in']+' '+ x['description']+' '+ x['score']
    return x['type']+ ' ' + x['title']+ ' ' + x['director'] + ' ' + x['cast'] + ' ' + x['country'] + ' ' + x['release_year'] + ' ' + x['rating'] + ' ' +x['listed_in']

In [7]:
# create bag-of-words
df_str = df_lower.copy()
df_str = df_str.applymap(str)
df_str['soup'] = df_str.apply(create_soup, axis=1)
df_str['soup']

0       movie dick johnson is dead kirstenjohnson noda...
1       tvshow blood & water nodata amaqamata,khosinge...
2       tvshow ganglands julienleclercq samibouajila,t...
3       tvshow jailbirds new orleans nodata nodata nod...
4       tvshow kota factory nodata mayurmore,jitendrak...
                              ...                        
8789    movie zodiac davidfincher markruffalo,jakegyll...
8790    tvshow zombie dumb nodata nodata nodata 2018 r...
8791    movie zombieland rubenfleischer jesseeisenberg...
8792    movie zoom peterhewitt timallen,courteneycox,c...
8793    movie zubaan mozezsingh vickykaushal,sarah-jan...
Name: soup, Length: 8794, dtype: object

In [8]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# generate Count Vectorizer matrix
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(df_str['soup'])

# generate Cosine Similarity matrix
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [9]:
# use movie titiles as indices
df_str=df_str.reset_index()
indices = pd.Series(df_str.index, index=df_str['title'])
indices

title
dick johnson is dead        0
blood & water               1
ganglands                   2
jailbirds new orleans       3
kota factory                4
                         ... 
zodiac                   8789
zombie dumb              8790
zombieland               8791
zoom                     8792
zubaan                   8793
Length: 8794, dtype: int64

In [10]:
def get_recommendations(title, cosine_sim=cosine_sim):
    title=title.lower()
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with the input movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    top_recom = []
    for i in sim_scores:
        if df['score'].iloc[i[0]]>= 7.0 and df['director'].iloc[i[0]]!="No Data" and df['cast'].iloc[i[0]]!="No Data" and df['country'].iloc[i[0]]!="No Data":
            top_recom.append(i[0])
    # Get the indices of the 10 most similar movies
    top_recom = top_recom[1:11]

    # Return the top 10 most similar movies
    features=['type','title','director','cast','country','release_year','rating','listed_in','description','score']
    return df[features].iloc[top_recom]

In [11]:
get_recommendations('Women Behind Bars', cosine_sim)

Unnamed: 0,type,title,director,cast,country,release_year,rating,listed_in,description,score
676,TV Show,Riverdale,Rob Seidenglanz,"K.J. Apa, Lili Reinhart, Camila Mendes, Cole S...",United States,2019,TV-14,"Crime TV Shows, TV Dramas, TV Mysteries","While navigating the troubled waters of sex, r...",7.5
2947,TV Show,Arrow,James Bamford,"Stephen Amell, Katie Cassidy, David Ramsey, Wi...",United States,2019,TV-14,"Crime TV Shows, TV Action & Adventure","Based on DC Comics' Green Arrow, an affluent p...",7.5
6657,Movie,Easy A,Will Gluck,"Emma Stone, Penn Badgley, Amanda Bynes, Dan By...",United States,2010,PG-13,"Comedies, Romantic Movies",When a lie about Olive's reputation transforms...,7.1
7842,Movie,Remember Me,Allen Coulter,"Robert Pattinson, Emilie de Ravin, Chris Coope...",United States,2010,PG-13,"Dramas, Romantic Movies",Tyler's still feeling lost following the trage...,7.1
8498,Movie,The Social Network,David Fincher,"Jesse Eisenberg, Andrew Garfield, Justin Timbe...",United States,2010,PG-13,Dramas,Director David Fincher's biographical drama ch...,7.7
380,TV Show,The Flash,Glen Winter,"Grant Gustin, Candice Patton, Danielle Panabak...",United States,2021,TV-14,"Crime TV Shows, TV Action & Adventure, TV Sci-...",A forensics expert who wakes from a coma with ...,7.6
1387,Movie,The Next Three Days,Paul Haggis,"Russell Crowe, Elizabeth Banks, Brian Dennehy,...","United States, France",2010,PG-13,"Dramas, Thrillers",When his wife becomes a murder suspect and is ...,7.3
5096,TV Show,Fullmetal Alchemist: Brotherhood,Yasuhiro Irie,"Romi Park, Rie Kugimiya, Megumi Takamoto, Shin...",Japan,2010,TV-14,"Anime Series, International TV Shows",After both suffer physical damage – brothers E...,9.1
797,Movie,I Am Sam,Jessie Nelson,"Sean Penn, Michelle Pfeiffer, Dakota Fanning, ...",United States,2001,PG-13,Dramas,When the legal system declares him unfit to be...,7.6
3278,Movie,SunGanges,Valli Bindana,Naseeruddin Shah,"India, United States",2018,TV-14,"Documentaries, International Movies",A trio of filmmakers treks across India to exp...,7.8


In [12]:
get_recommendations('Thor: Ragnarok', cosine_sim)

Unnamed: 0,type,title,director,cast,country,release_year,rating,listed_in,description,score
7431,Movie,Men in Black,Barry Sonnenfeld,"Tommy Lee Jones, Will Smith, Linda Fiorentino,...",United States,1997,PG-13,"Action & Adventure, Comedies, Sci-Fi & Fantasy",A streetwise cop teams with a veteran governme...,7.3
6167,Movie,Ant-Man and the Wasp,Peyton Reed,"Paul Rudd, Evangeline Lilly, Michael Douglas, ...",United States,2018,PG-13,"Action & Adventure, Comedies, Sci-Fi & Fantasy","Problems big and small pop up when Scott Lang,...",7.1
946,Movie,Stargate,Roland Emmerich,"Kurt Russell, James Spader, Jaye Davidson, Viv...","United States, France",1994,PG-13,"Action & Adventure, Sci-Fi & Fantasy",An Egyptologist joins a mission into the unkno...,7.1
5955,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,2009,PG-13,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi...",7.7
6498,Movie,Cloverfield,Matt Reeves,"Lizzy Caplan, Jessica Lucas, T.J. Miller, Mich...",United States,2008,PG-13,"Action & Adventure, Horror Movies, Sci-Fi & Fa...",A going-away party in Manhattan is interrupted...,7.0
594,Movie,Star Trek,J.J. Abrams,"Chris Pine, Zachary Quinto, Karl Urban, Zoe Sa...","United States, Germany",2009,PG-13,"Action & Adventure, Sci-Fi & Fantasy",On their first voyage aboard the starship Ente...,7.9
6320,Movie,Black Panther,Ryan Coogler,"Chadwick Boseman, Michael B. Jordan, Lupita Ny...",United States,2018,PG-13,"Action & Adventure, Sci-Fi & Fantasy","T'Challa, the superpowered new leader of the h...",7.3
8518,Movie,The Time Machine,Simon Wells,"Guy Pearce, Samantha Mumba, Jeremy Irons, Mark...","United States, United Arab Emirates",2002,PG-13,"Action & Adventure, Sci-Fi & Fantasy","Based on the H.G. Wells classic, this adventur...",7.6
340,Movie,Inception,Christopher Nolan,"Leonardo DiCaprio, Joseph Gordon-Levitt, Ellio...","United States, United Kingdom",2010,PG-13,"Action & Adventure, Sci-Fi & Fantasy, Thrillers",A troubled thief who extracts secrets from peo...,8.8
1957,Movie,Real Steel,Shawn Levy,"Hugh Jackman, Dakota Goyo, Evangeline Lilly, A...","United States, India",2011,PG-13,"Action & Adventure, Sci-Fi & Fantasy, Sports M...",A struggling fighter-turned-promoter reconnect...,7.1


In [13]:
get_recommendations('Sankofa', cosine_sim)

Unnamed: 0,type,title,director,cast,country,release_year,rating,listed_in,description,score
6511,Movie,Colonia,Florian Gallenberger,"Emma Watson, Daniel Brühl, Michael Nyqvist, Ri...","Germany, France, Luxembourg, United Kingdom, U...",2015,R,"Dramas, Independent Movies, International Movies",When her boyfriend is caught up in a Chilean c...,7.1
3125,Movie,The Danish Girl,Tom Hooper,"Eddie Redmayne, Alicia Vikander, Ben Whishaw, ...","United Kingdom, United States, Germany, Denmar...",2015,R,"Dramas, Independent Movies, International Movies","In 1920s Denmark, married artists embark on a ...",7.1
2895,Movie,System Crasher,Nora Fingscheidt,"Helena Zengel, Albrecht Schuch, Gabriela Maria...",Germany,2020,TV-MA,"Dramas, Independent Movies, International Movies","Traumatized, violent and yearning for love, 9-...",7.8
7317,Movie,Locke,Steven Knight,"Tom Hardy, Olivia Colman, Ruth Wilson, Andrew ...","United Kingdom, United States",2013,R,"Dramas, Independent Movies","Over the space of 90 minutes, Ivan Locke's lif...",7.1
2467,Movie,I'm No Longer Here,Fernando Frías de la Parra,"Juan Daniel García, Angelina Chen, Jonathan Es...","Mexico, United States",2019,TV-MA,"Dramas, Independent Movies, International Movies",A terrible misunderstanding with a local gang ...,7.3
4626,Movie,In Darkness,Anthony Byrne,"Natalie Dormer, Ed Skrein, Emily Ratajkowski, ...","United Kingdom, United States",2018,TV-MA,"Independent Movies, International Movies, Thri...",A blind pianist fears for her life after the s...,7.3
6140,Movie,American Honey,Andrea Arnold,"Sasha Lane, Shia LaBeouf, Riley Keough, McCaul...","United Kingdom, United States",2016,R,"Dramas, Independent Movies",A teenage girl leaves her dull life in Oklahom...,7.0
6711,Movie,Ex Machina,Alex Garland,"Domhnall Gleeson, Alicia Vikander, Oscar Isaac...",United Kingdom,2015,R,"Dramas, Independent Movies, International Movies",A coder at a tech company wins a week-long ret...,7.7
8072,Movie,Starred Up,David Mackenzie,"Jack O'Connell, Ben Mendelsohn, Rupert Friend,...",United Kingdom,2013,TV-MA,"Dramas, Independent Movies, International Movies",A British juvenile offender's violent temper g...,7.4
8565,Movie,Thithi,Raam Reddy,"Thammegowda S., Channegowda, Abhishek H.N., Po...","India, United States",2015,TV-MA,"Dramas, Independent Movies, International Movies","In a South Indian village, a centenarian's fun...",8.0
