In [2]:
import pandas as pd

In [8]:
data = pd.read_csv('netflix_titles.csv')

In [9]:
data.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [10]:
data.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,81145628,Movie,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China","September 9, 2019",2019,TV-PG,90 min,"Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...
1,80117401,Movie,Jandino: Whatever it Takes,,Jandino Asporaat,United Kingdom,"September 9, 2016",2016,TV-MA,94 min,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...
2,70234439,TV Show,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,"September 8, 2018",2013,TV-Y7-FV,1 Season,Kids' TV,"With the help of three human allies, the Autob..."
3,80058654,TV Show,Transformers: Robots in Disguise,,"Will Friedle, Darren Criss, Constance Zimmer, ...",United States,"September 8, 2018",2016,TV-Y7,1 Season,Kids' TV,When a prison ship crash unleashes hundreds of...
4,80125979,Movie,#realityhigh,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",United States,"September 8, 2017",2017,TV-14,99 min,Comedies,When nerdy high schooler Dani finally attracts...


In [15]:
#converting date 
data['date_added'] = pd.to_datetime(data['date_added'])
data['Year_added'] = data['date_added'].dt.year
data['Month_added'] = data['date_added'].dt.month

In [14]:
data_TV = data[data['type']=='TV Show']
data_Movies = data[data['type']=='Movie']

In [17]:
#Let us find out how the number of shows added has changed over time
data_TV['Year_added'].value_counts()

2019.0    803
2018.0    492
2017.0    387
2016.0    192
2020.0     37
2015.0     32
2014.0      6
2013.0      6
2012.0      3
2008.0      1
Name: Year_added, dtype: int64

In [18]:
#Increase in number of movies added over time.
data_Movies['Year_added'].value_counts()

2019.0    1546
2018.0    1290
2017.0     913
2016.0     264
2020.0     147
2015.0      58
2014.0      19
2011.0      13
2013.0       6
2012.0       4
2009.0       2
2010.0       1
2008.0       1
Name: Year_added, dtype: int64

In [19]:
#Let us remove the irrelvant attributes in order to make it simple
#The attributes we are interested in are, type,genre,director,cast,country,rating,title and description
new_data = data[['type','listed_in','director','cast','country','rating','title','description']]
new_data.head()

Unnamed: 0,type,listed_in,director,cast,country,rating,title,description
0,Movie,"Children & Family Movies, Comedies","Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China",TV-PG,Norm of the North: King Sized Adventure,Before planning an awesome wedding for his gra...
1,Movie,Stand-Up Comedy,,Jandino Asporaat,United Kingdom,TV-MA,Jandino: Whatever it Takes,Jandino Asporaat riffs on the challenges of ra...
2,TV Show,Kids' TV,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",United States,TV-Y7-FV,Transformers Prime,"With the help of three human allies, the Autob..."
3,TV Show,Kids' TV,,"Will Friedle, Darren Criss, Constance Zimmer, ...",United States,TV-Y7,Transformers: Robots in Disguise,When a prison ship crash unleashes hundreds of...
4,Movie,Comedies,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",United States,TV-14,#realityhigh,When nerdy high schooler Dani finally attracts...


In [20]:
#rake-nltk is a library for keyword extraction RAKE stands for Rapid Automatic Keyword Extraction
!pip install rake-nltk
from rake_nltk import Rake
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

Collecting rake-nltk
  Downloading https://files.pythonhosted.org/packages/8e/c4/b4ff57e541ac5624ad4b20b89c2bafd4e98f29fd83139f3a81858bdb3815/rake_nltk-1.0.4.tar.gz
Building wheels for collected packages: rake-nltk
  Building wheel for rake-nltk (setup.py) ... [?25ldone
[?25h  Created wheel for rake-nltk: filename=rake_nltk-1.0.4-py2.py3-none-any.whl size=7818 sha256=cc9babfb767c346be7f7d964d8731118b426b4b852c2aa59b5df688f66455316
  Stored in directory: /Users/mayank/Library/Caches/pip/wheels/ef/92/fc/271b3709e71a96ffe934b27818946b795ac6b9b8ff8682483f
Successfully built rake-nltk
Installing collected packages: rake-nltk
Successfully installed rake-nltk-1.0.4
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [29]:
#The data is already clean, all we need to do is take care of NaN values if there are any
new_data.dropna(inplace=True)
new_data.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,type,listed_in,director,cast,country,rating,title,description
0,Movie,"Children & Family Movies, Comedies","Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China",TV-PG,Norm of the North: King Sized Adventure,Before planning an awesome wedding for his gra...
4,Movie,Comedies,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",United States,TV-14,#realityhigh,When nerdy high schooler Dani finally attracts...
6,Movie,"International Movies, Sci-Fi & Fantasy, Thrillers",Gabe Ibáñez,"Antonio Banderas, Dylan McDermott, Melanie Gri...","Bulgaria, United States, Spain, Canada",R,Automata,"In a dystopian future, an insurance adjuster f..."
7,Movie,Stand-Up Comedy,"Rodrigo Toro, Francisco Schultz",Fabrizio Copano,Chile,TV-MA,Fabrizio Copano: Solo pienso en mi,Fabrizio Copano takes audience participation t...
9,Movie,"Action & Adventure, Thrillers",Henrik Ruben Genz,"James Franco, Kate Hudson, Tom Wilkinson, Omar...","United States, United Kingdom, Denmark, Sweden",R,Good People,A struggling couple can't believe their luck w...


In [32]:
blanks = []  # start with an empty list

col=['type','listed_in','director','cast','country','rating']
for i,col in new_data.iterrows():  # iterate over the DataFrame
    if type(col)==str:            # avoid NaN values
        if col.isspace():         # test 'review' for whitespace
            blanks.append(i)     # add matching index numbers to the list

new_data.drop(blanks, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [33]:
#Now we extract keywords from the description using RAKE keyword extractor and add those keywords in our dataframe
new_data['Key_words/desc'] = ''

for i,n in new_data.iterrows():
    desc = n['description']
    r = Rake()
    r.extract_keywords_from_text(desc)
    score_for_keyword = r.get_word_degrees()
    n['Key_words/desc']=list(score_for_keyword.keys())
    
new_data.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,type,listed_in,director,cast,country,rating,title,description,Key_words/desc
0,Movie,"Children & Family Movies, Comedies","Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","United States, India, South Korea, China",TV-PG,Norm of the North: King Sized Adventure,Before planning an awesome wedding for his gra...,"[grandfather, evil, archaeologist, first, awes..."
4,Movie,Comedies,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",United States,TV-14,#realityhigh,When nerdy high schooler Dani finally attracts...,"[social, media, celebrity, longtime, crush, ex..."
6,Movie,"International Movies, Sci-Fi & Fantasy, Thrillers",Gabe Ibáñez,"Antonio Banderas, Dylan McDermott, Melanie Gri...","Bulgaria, United States, Spain, Canada",R,Automata,"In a dystopian future, an insurance adjuster f...","[global, conspiracy, tech, company, investigat..."
7,Movie,Stand-Up Comedy,"Rodrigo Toro, Francisco Schultz",Fabrizio Copano,Chile,TV-MA,Fabrizio Copano: Solo pienso en mi,Fabrizio Copano takes audience participation t...,"[stand, sperm, banks, set, reflecting, family,..."
9,Movie,"Action & Adventure, Thrillers",Henrik Ruben Genz,"James Franco, Kate Hudson, Tom Wilkinson, Omar...","United States, United Kingdom, Denmark, Sweden",R,Good People,A struggling couple can't believe their luck w...,"[neighbor, stash, luck, money, believe, find, ..."
11,Movie,"Action & Adventure, Dramas, International Movies",Daniel Alfredson,"Jim Sturgess, Sam Worthington, Ryan Kwanten, A...","Netherlands, Belgium, United Kingdom, United S...",R,Kidnapping Mr. Heineken,"When beer magnate Alfred ""Freddy"" Heineken is ...","[freddy, kidnapped, 1983, abductors, make, lar..."
19,Movie,"Cult Movies, Dramas, Independent Movies",Gaspar Noé,"Karl Glusman, Klara Kristin, Aomi Muyock, Ugo ...","France, Belgium",NR,Love,A man in an unsatisfying marriage recalls the ...,"[may, missing, ex, man, intense, past, relatio..."
20,Movie,"Comedies, Independent Movies, Romantic Movies",Tom O'Brien,"Tom O'Brien, Katherine Waterston, Caitlin Fitz...",United States,TV-14,Manhattan Romance,A filmmaker working on a documentary about lov...,"[subjects, documentary, love, filmmaker, worki..."
21,Movie,"Action & Adventure, Comedies, International Mo...",Antoine Bardou-Jacquet,"Ron Perlman, Rupert Grint, Robert Sheehan, Ste...","France, Belgium",R,Moonwalkers,"A brain-addled war vet, a failing band manager...","[faking, cia, construct, brain, epic, scam, st..."
23,Movie,"Horror Movies, Thrillers",Brad Anderson,"Kate Beckinsale, Jim Sturgess, David Thewlis, ...",United States,PG-13,Stonehearst Asylum,"In 1899, a young doctor arrives at an asylum f...","[becomes, suspicious, mentor, female, patient,..."


In [34]:
#now let us convert all the columns into lists so that we can iterate easily over them.
new_data['cast'] = new_data['cast'].map(lambda x:x.split(',')[:3])
new_data['listed_in'] = new_data['listed_in'].map(lambda x:x.lower().split(','))
new_data['type'] = new_data['type'].map(lambda x:x.lower().split(','))
new_data['country'] = new_data['country'].map(lambda x:x.lower().split(','))
new_data['rating'] = new_data['rating'].map(lambda x:x.lower().split(','))
new_data['director'] = new_data['director'].map(lambda x:x.split(','))

new_data.drop('description',axis=1, inplace=True)

new_data.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cavea

Unnamed: 0,type,listed_in,director,cast,country,rating,title,Key_words/desc
0,[movie],"[children & family movies, comedies]","[Richard Finn, Tim Maltby]","[Alan Marriott, Andrew Toth, Brian Dobson]","[united states, india, south korea, china]",[tv-pg],Norm of the North: King Sized Adventure,"[grandfather, evil, archaeologist, first, awes..."
4,[movie],[comedies],[Fernando Lebrija],"[Nesta Cooper, Kate Walsh, John Michael Higg...",[united states],[tv-14],#realityhigh,"[social, media, celebrity, longtime, crush, ex..."
6,[movie],"[international movies, sci-fi & fantasy, thr...",[Gabe Ibáñez],"[Antonio Banderas, Dylan McDermott, Melanie ...","[bulgaria, united states, spain, canada]",[r],Automata,"[global, conspiracy, tech, company, investigat..."
7,[movie],[stand-up comedy],"[Rodrigo Toro, Francisco Schultz]",[Fabrizio Copano],[chile],[tv-ma],Fabrizio Copano: Solo pienso en mi,"[stand, sperm, banks, set, reflecting, family,..."
9,[movie],"[action & adventure, thrillers]",[Henrik Ruben Genz],"[James Franco, Kate Hudson, Tom Wilkinson]","[united states, united kingdom, denmark, sw...",[r],Good People,"[neighbor, stash, luck, money, believe, find, ..."
11,[movie],"[action & adventure, dramas, international m...",[Daniel Alfredson],"[Jim Sturgess, Sam Worthington, Ryan Kwanten]","[netherlands, belgium, united kingdom, unit...",[r],Kidnapping Mr. Heineken,"[freddy, kidnapped, 1983, abductors, make, lar..."
19,[movie],"[cult movies, dramas, independent movies]",[Gaspar Noé],"[Karl Glusman, Klara Kristin, Aomi Muyock]","[france, belgium]",[nr],Love,"[may, missing, ex, man, intense, past, relatio..."
20,[movie],"[comedies, independent movies, romantic movies]",[Tom O'Brien],"[Tom O'Brien, Katherine Waterston, Caitlin F...",[united states],[tv-14],Manhattan Romance,"[subjects, documentary, love, filmmaker, worki..."
21,[movie],"[action & adventure, comedies, international...",[Antoine Bardou-Jacquet],"[Ron Perlman, Rupert Grint, Robert Sheehan]","[france, belgium]",[r],Moonwalkers,"[faking, cia, construct, brain, epic, scam, st..."
23,[movie],"[horror movies, thrillers]",[Brad Anderson],"[Kate Beckinsale, Jim Sturgess, David Thewlis]",[united states],[pg-13],Stonehearst Asylum,"[becomes, suspicious, mentor, female, patient,..."


In [35]:
for i,n in new_data.iterrows():
    n['cast'] = [x.lower().replace(' ','') for x in n['cast']]
    n['type'] = [x.lower().replace(' ','') for x in n['type']]
    n['rating'] = [x.lower().replace(' ','') for x in n['rating']]
    n['country'] = [x.lower().replace(' ','') for x in n['country']]
    n['director'] = ''.join(n['director']).lower()
    
new_data = new_data.set_index('title')
new_data.head(10)

Unnamed: 0_level_0,type,listed_in,director,cast,country,rating,Key_words/desc
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Norm of the North: King Sized Adventure,[movie],"[children & family movies, comedies]",richard finn tim maltby,"[alanmarriott, andrewtoth, briandobson]","[unitedstates, india, southkorea, china]",[tv-pg],"[grandfather, evil, archaeologist, first, awes..."
#realityhigh,[movie],[comedies],fernando lebrija,"[nestacooper, katewalsh, johnmichaelhiggins]",[unitedstates],[tv-14],"[social, media, celebrity, longtime, crush, ex..."
Automata,[movie],"[international movies, sci-fi & fantasy, thr...",gabe ibáñez,"[antoniobanderas, dylanmcdermott, melaniegriff...","[bulgaria, unitedstates, spain, canada]",[r],"[global, conspiracy, tech, company, investigat..."
Fabrizio Copano: Solo pienso en mi,[movie],[stand-up comedy],rodrigo toro francisco schultz,[fabriziocopano],[chile],[tv-ma],"[stand, sperm, banks, set, reflecting, family,..."
Good People,[movie],"[action & adventure, thrillers]",henrik ruben genz,"[jamesfranco, katehudson, tomwilkinson]","[unitedstates, unitedkingdom, denmark, sweden]",[r],"[neighbor, stash, luck, money, believe, find, ..."
Kidnapping Mr. Heineken,[movie],"[action & adventure, dramas, international m...",daniel alfredson,"[jimsturgess, samworthington, ryankwanten]","[netherlands, belgium, unitedkingdom, unitedst...",[r],"[freddy, kidnapped, 1983, abductors, make, lar..."
Love,[movie],"[cult movies, dramas, independent movies]",gaspar noé,"[karlglusman, klarakristin, aomimuyock]","[france, belgium]",[nr],"[may, missing, ex, man, intense, past, relatio..."
Manhattan Romance,[movie],"[comedies, independent movies, romantic movies]",tom o'brien,"[tomo'brien, katherinewaterston, caitlinfitzge...",[unitedstates],[tv-14],"[subjects, documentary, love, filmmaker, worki..."
Moonwalkers,[movie],"[action & adventure, comedies, international...",antoine bardou-jacquet,"[ronperlman, rupertgrint, robertsheehan]","[france, belgium]",[r],"[faking, cia, construct, brain, epic, scam, st..."
Stonehearst Asylum,[movie],"[horror movies, thrillers]",brad anderson,"[katebeckinsale, jimsturgess, davidthewlis]",[unitedstates],[pg-13],"[becomes, suspicious, mentor, female, patient,..."


In [37]:
new_data['bag_of_words'] = ''

cols = new_data.columns
for i,j in new_data.iterrows():
    words = ''
    for k in cols:
        if k!='director':
            words = words + ' '.join(j[k])+ ' '
        else:
            words = words + j[k] + ' '
            
    j['bag_of_words'] = words
new_data.head(10)

Unnamed: 0_level_0,type,listed_in,director,cast,country,rating,Key_words/desc,bag_of_words
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Norm of the North: King Sized Adventure,[movie],"[children & family movies, comedies]",richard finn tim maltby,"[alanmarriott, andrewtoth, briandobson]","[unitedstates, india, southkorea, china]",[tv-pg],"[grandfather, evil, archaeologist, first, awes...",movie children & family movies comedies richa...
#realityhigh,[movie],[comedies],fernando lebrija,"[nestacooper, katewalsh, johnmichaelhiggins]",[unitedstates],[tv-14],"[social, media, celebrity, longtime, crush, ex...",movie comedies fernando lebrija nestacooper ka...
Automata,[movie],"[international movies, sci-fi & fantasy, thr...",gabe ibáñez,"[antoniobanderas, dylanmcdermott, melaniegriff...","[bulgaria, unitedstates, spain, canada]",[r],"[global, conspiracy, tech, company, investigat...",movie international movies sci-fi & fantasy ...
Fabrizio Copano: Solo pienso en mi,[movie],[stand-up comedy],rodrigo toro francisco schultz,[fabriziocopano],[chile],[tv-ma],"[stand, sperm, banks, set, reflecting, family,...",movie stand-up comedy rodrigo toro francisco s...
Good People,[movie],"[action & adventure, thrillers]",henrik ruben genz,"[jamesfranco, katehudson, tomwilkinson]","[unitedstates, unitedkingdom, denmark, sweden]",[r],"[neighbor, stash, luck, money, believe, find, ...",movie action & adventure thrillers henrik rub...
Kidnapping Mr. Heineken,[movie],"[action & adventure, dramas, international m...",daniel alfredson,"[jimsturgess, samworthington, ryankwanten]","[netherlands, belgium, unitedkingdom, unitedst...",[r],"[freddy, kidnapped, 1983, abductors, make, lar...",movie action & adventure dramas internationa...
Love,[movie],"[cult movies, dramas, independent movies]",gaspar noé,"[karlglusman, klarakristin, aomimuyock]","[france, belgium]",[nr],"[may, missing, ex, man, intense, past, relatio...",movie cult movies dramas independent movies ...
Manhattan Romance,[movie],"[comedies, independent movies, romantic movies]",tom o'brien,"[tomo'brien, katherinewaterston, caitlinfitzge...",[unitedstates],[tv-14],"[subjects, documentary, love, filmmaker, worki...",movie comedies independent movies romantic m...
Moonwalkers,[movie],"[action & adventure, comedies, international...",antoine bardou-jacquet,"[ronperlman, rupertgrint, robertsheehan]","[france, belgium]",[r],"[faking, cia, construct, brain, epic, scam, st...",movie action & adventure comedies internatio...
Stonehearst Asylum,[movie],"[horror movies, thrillers]",brad anderson,"[katebeckinsale, jimsturgess, davidthewlis]",[unitedstates],[pg-13],"[becomes, suspicious, mentor, female, patient,...",movie horror movies thrillers brad anderson k...


In [38]:
clean_data = new_data.drop(columns = [cols for cols in new_data.columns if cols!='bag_of_words'])
clean_data.head(10)

Unnamed: 0_level_0,bag_of_words
title,Unnamed: 1_level_1
Norm of the North: King Sized Adventure,movie children & family movies comedies richa...
#realityhigh,movie comedies fernando lebrija nestacooper ka...
Automata,movie international movies sci-fi & fantasy ...
Fabrizio Copano: Solo pienso en mi,movie stand-up comedy rodrigo toro francisco s...
Good People,movie action & adventure thrillers henrik rub...
Kidnapping Mr. Heineken,movie action & adventure dramas internationa...
Love,movie cult movies dramas independent movies ...
Manhattan Romance,movie comedies independent movies romantic m...
Moonwalkers,movie action & adventure comedies internatio...
Stonehearst Asylum,movie horror movies thrillers brad anderson k...


In [39]:
data[data.country=='India'][:5]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,Year_added,Month_added
35,81154455,Movie,Article 15,Anubhav Sinha,"Ayushmann Khurrana, Nassar, Manoj Pahwa, Kumud...",India,2019-09-06,2019,TV-MA,125 min,"Dramas, International Movies, Thrillers",The grim realities of caste discrimination com...,2019.0,9.0
37,81052275,Movie,Ee Nagaraniki Emaindi,Tharun Bhascker,"Vishwaksen Naidu, Sushanth Reddy, Abhinav Goma...",India,2019-09-06,2018,TV-14,133 min,"Comedies, International Movies","In Goa and in desperate need of cash, four chi...",2019.0,9.0
41,70303496,Movie,PK,Rajkumar Hirani,"Aamir Khan, Anuskha Sharma, Sanjay Dutt, Saura...",India,2018-09-06,2014,TV-14,146 min,"Comedies, Dramas, International Movies",Aamir Khan teams with director Rajkumar Hirani...,2018.0,9.0
58,81155784,Movie,Watchman,A. L. Vijay,"G.V. Prakash Kumar, Samyuktha Hegde, Suman, Ra...",India,2019-09-04,2019,TV-14,93 min,"Comedies, Dramas, International Movies","Rushing to pay off a loan shark, a young man b...",2019.0,9.0
99,80225885,TV Show,Bard of Blood,,"Emraan Hashmi, Viineet Kumar, Sobhita Dhulipal...",India,2019-09-27,2019,TV-MA,1 Season,"International TV Shows, TV Action & Adventure,...","Years after a disastrous job in Balochistan, a...",2019.0,9.0


In [40]:
clean_data.loc['PK']['bag_of_words']

'movie comedies  dramas  international movies rajkumar hirani aamirkhan anuskhasharma sanjaydutt india tv-14 aamir khan teams director rajkumar hirani state political satire corruption social crusader india play  '

In [41]:
count = CountVectorizer()
lol = count.fit_transform(clean_data['bag_of_words'])

In [42]:

similarity = cosine_similarity(lol,lol)

In [43]:
listy = pd.Series(clean_data.index)

In [45]:
def recommendations(Title, cosine_sim = similarity):
    
    recommended_movies = []
    
    # gettin the index of the movie that matches the title
    idx = listy[listy == Title].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)

    # getting the indexes of the 10 most similar movies
    top_10_indexes = list(score_series.iloc[1:11].index)
    
    # populating the list with the titles of the best 10 matching movies
    for i in top_10_indexes:
        recommended_movies.append(list(clean_data.index)[i])
        
    return recommended_movies

In [46]:
recommendations('Article 15')

['I Am',
 'Merku Thodarchi Malai',
 'W/O Ram',
 'Kacche Dhaagey',
 'Vanjagar Ulagam',
 'Saavat',
 'LSD: Love, Sex Aur Dhokha',
 'Brij Mohan Amar Rahe',
 'Hazaaron Khwaishein Aisi',
 'Made in China']

In [50]:
recommendations('PK')

['3 Idiots',
 'Merku Thodarchi Malai',
 'Sanju',
 'Hattrick',
 'Harishchandrachi Factory',
 'English Babu Desi Mem',
 'Dil Chahta Hai',
 'War Chhod Na Yaar',
 'Kacche Dhaagey',
 'Mahabharat']

In [53]:
recommendations('Lincoln')

['War Horse',
 'Catch Me If You Can',
 "Schindler's List",
 'Indiana Jones and the Kingdom of the Crystal Skull',
 'Indiana Jones and the Temple of Doom',
 'Indiana Jones and the Last Crusade',
 'The Last Face',
 'Indiana Jones and the Raiders of the Lost Ark',
 'The Adventures of Tintin',
 'Flash of Genius']