# Hybrid Filtering

In [360]:
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

import seaborn as sns
import matplotlib.pyplot as plt
import json

In [361]:
tmdb_movies = pd.read_csv("tmdb-5000-movie-dataset/tmdb_5000_movies.csv")
lens_movies = pd.read_csv("movie-lens-dataset/movies.csv")
credits = pd.read_csv("tmdb-5000-movie-dataset/tmdb_5000_credits.csv")

## add release year to title in tmdb_movies
year = tmdb_movies["release_date"].apply(lambda x: str(x)[:4])
tmdb_movies["title"] = tmdb_movies["title"]+" ("+year+")"
tmdb_movies = tmdb_movies.rename(columns={'id': "movie_id"})
# movies = movies.merge(lens_movies, on="title")

## exchange comma in title in lens_movies
def title_comma_exchange(obj):
    title, year = obj[:-7], obj[-6:]
    title = title.split(",")
    if title[-1]!= title[0]:
        new_title = title[-1]+" "+title[0]
        return new_title[1:]+" "+year
    else:
        return obj

lens_movies["title"] = lens_movies["title"].apply(title_comma_exchange)

In [364]:
movies = pd.merge(tmdb_movies, lens_movies, on="title")
movies = movies.merge(credits, on="movie_id")

In [365]:
movies

Unnamed: 0,budget,genres_x,homepage,movie_id,keywords,original_language,original_title,overview,popularity,production_companies,...,status,tagline,title_x,vote_average,vote_count,movieId,genres_y,title_y,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,Released,Enter the World of Pandora.,Avatar (2009),7.2,11800,72998,Action|Adventure|Sci-Fi|IMAX,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End (2007),6.9,4500,53125,Action|Adventure|Comedy|Fantasy,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,Released,A Plan No One Escapes,Spectre (2015),6.3,4466,136020,Action|Adventure|Crime,Spectre,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.312950,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...",...,Released,The Legend Ends,The Dark Knight Rises (2012),7.6,9106,91529,Action|Adventure|Crime|IMAX,The Dark Knight Rises,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]",...,Released,"Lost in our world, found in another.",John Carter (2012),6.1,2124,93363,Action|Adventure|Sci-Fi|IMAX,John Carter,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3150,0,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 18, ""nam...",,14585,"[{""id"": 1438, ""name"": ""office""}, {""id"": 9673, ...",en,In the Company of Men,Two business executives--one an avowed misogyn...,2.634007,"[{""name"": ""Alliance Atlantis Communications"", ...",...,Released,Are all men bastards...or just misunderstood?,In the Company of Men (1997),6.8,44,1594,Comedy|Drama,In the Company of Men,"[{""cast_id"": 1, ""character"": ""Chad"", ""credit_i...","[{""credit_id"": ""52fe46049251416c7506a36b"", ""de..."
3151,12000,"[{""id"": 27, ""name"": ""Horror""}, {""id"": 35, ""nam...",,692,"[{""id"": 237, ""name"": ""gay""}, {""id"": 900, ""name...",en,Pink Flamingos,Notorious Baltimore criminal and underground f...,4.553644,"[{""name"": ""Dreamland Productions"", ""id"": 407}]",...,Released,An exercise in poor taste.,Pink Flamingos (1972),6.2,110,2361,Comedy,Pink Flamingos,"[{""cast_id"": 8, ""character"": ""Divine / Babs Jo...","[{""credit_id"": ""52fe426bc3a36847f801d203"", ""de..."
3152,20000,"[{""id"": 80, ""name"": ""Crime""}, {""id"": 27, ""name...",,36095,"[{""id"": 233, ""name"": ""japan""}, {""id"": 549, ""na...",ja,キュア,A wave of gruesome murders is sweeping Tokyo. ...,0.212443,"[{""name"": ""Daiei Studios"", ""id"": 881}]",...,Released,Madness. Terror. Murder.,Cure (1997),7.4,63,4645,Crime|Horror|Thriller,Cure,"[{""cast_id"": 3, ""character"": ""Kenichi Takabe"",...","[{""credit_id"": ""52fe45cc9251416c9103eb7b"", ""de..."
3153,7000,"[{""id"": 878, ""name"": ""Science Fiction""}, {""id""...",http://www.primermovie.com,14337,"[{""id"": 1448, ""name"": ""distrust""}, {""id"": 2101...",en,Primer,Friends/fledgling entrepreneurs invent a devic...,23.307949,"[{""name"": ""Thinkfilm"", ""id"": 446}]",...,Released,What happens if it actually works?,Primer (2004),6.9,658,8914,Drama|Sci-Fi,Primer,"[{""cast_id"": 1, ""character"": ""Aaron"", ""credit_...","[{""credit_id"": ""52fe45e79251416c75066791"", ""de..."


## *Feature Selection and Preprocessing*

### *Important Features*

1. id
2. title
3. genres
5. keywords
6. release_date
7. overview
9. cast
10. crew

In [366]:
movies = movies[["movie_id", "title_x", "release_date", "overview", "genres_x", "keywords", "cast", "crew"]]
movies = movies.rename(columns={"title_x": "title"})
movies = movies.rename(columns={"genres_x": "genres"})

In [367]:
movies

Unnamed: 0,movie_id,title,release_date,overview,genres,keywords,cast,crew
0,19995,Avatar (2009),2009-12-10,"In the 22nd century, a paraplegic Marine is di...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End (2007),2007-05-19,"Captain Barbossa, long believed to be dead, ha...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre (2015),2015-10-26,A cryptic message from Bond’s past sends him o...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises (2012),2012-07-16,Following the death of District Attorney Harve...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter (2012),2012-03-07,"John Carter is a war-weary, former military ca...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 818, ""name"": ""based on novel""}, {""id"":...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."
...,...,...,...,...,...,...,...,...
3150,14585,In the Company of Men (1997),1997-01-19,Two business executives--one an avowed misogyn...,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 18, ""nam...","[{""id"": 1438, ""name"": ""office""}, {""id"": 9673, ...","[{""cast_id"": 1, ""character"": ""Chad"", ""credit_i...","[{""credit_id"": ""52fe46049251416c7506a36b"", ""de..."
3151,692,Pink Flamingos (1972),1972-03-12,Notorious Baltimore criminal and underground f...,"[{""id"": 27, ""name"": ""Horror""}, {""id"": 35, ""nam...","[{""id"": 237, ""name"": ""gay""}, {""id"": 900, ""name...","[{""cast_id"": 8, ""character"": ""Divine / Babs Jo...","[{""credit_id"": ""52fe426bc3a36847f801d203"", ""de..."
3152,36095,Cure (1997),1997-11-06,A wave of gruesome murders is sweeping Tokyo. ...,"[{""id"": 80, ""name"": ""Crime""}, {""id"": 27, ""name...","[{""id"": 233, ""name"": ""japan""}, {""id"": 549, ""na...","[{""cast_id"": 3, ""character"": ""Kenichi Takabe"",...","[{""credit_id"": ""52fe45cc9251416c9103eb7b"", ""de..."
3153,14337,Primer (2004),2004-10-08,Friends/fledgling entrepreneurs invent a devic...,"[{""id"": 878, ""name"": ""Science Fiction""}, {""id""...","[{""id"": 1448, ""name"": ""distrust""}, {""id"": 2101...","[{""cast_id"": 1, ""character"": ""Aaron"", ""credit_...","[{""credit_id"": ""52fe45e79251416c75066791"", ""de..."


In [368]:
movies.isnull().sum()

movie_id        0
title           0
release_date    0
overview        0
genres          0
keywords        0
cast            0
crew            0
dtype: int64

In [369]:
movies.dropna(inplace=True)

In [370]:
movies.duplicated().sum()

2

### *Convert genres and keywords*

In [371]:
movies.iloc[0].genres

'[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 878, "name": "Science Fiction"}]'

In [372]:
def convert_genres(obj):
    L = []
    for i in json.loads(obj):
        L.append(i["name"])
    return L

In [373]:
# convert_to_list(movies.iloc[0].genres)

In [374]:
movies["genres"] = movies["genres"].apply(convert_genres)
movies["keywords"] = movies["keywords"].apply(convert_genres)

In [375]:
movies.head()

Unnamed: 0,movie_id,title,release_date,overview,genres,keywords,cast,crew
0,19995,Avatar (2009),2009-12-10,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End (2007),2007-05-19,"Captain Barbossa, long believed to be dead, ha...","[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre (2015),2015-10-26,A cryptic message from Bond’s past sends him o...,"[Action, Adventure, Crime]","[spy, based on novel, secret agent, sequel, mi...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises (2012),2012-07-16,Following the death of District Attorney Harve...,"[Action, Crime, Drama, Thriller]","[dc comics, crime fighter, terrorist, secret i...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter (2012),2012-03-07,"John Carter is a war-weary, former military ca...","[Action, Adventure, Science Fiction]","[based on novel, mars, medallion, space travel...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


### *Convert cast*

In [376]:
def convert_cast(obj):
    L = []
    counter = 0
    for i in json.loads(obj):
        if counter == 3:
            break
        else:
            counter+=1
            L.append(i["name"])
    return L

In [377]:
movies["cast"] = movies["cast"].apply(convert_cast)

In [378]:
movies.head()

Unnamed: 0,movie_id,title,release_date,overview,genres,keywords,cast,crew
0,19995,Avatar (2009),2009-12-10,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weaver]","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End (2007),2007-05-19,"Captain Barbossa, long believed to be dead, ha...","[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ...","[Johnny Depp, Orlando Bloom, Keira Knightley]","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre (2015),2015-10-26,A cryptic message from Bond’s past sends him o...,"[Action, Adventure, Crime]","[spy, based on novel, secret agent, sequel, mi...","[Daniel Craig, Christoph Waltz, Léa Seydoux]","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises (2012),2012-07-16,Following the death of District Attorney Harve...,"[Action, Crime, Drama, Thriller]","[dc comics, crime fighter, terrorist, secret i...","[Christian Bale, Michael Caine, Gary Oldman]","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter (2012),2012-03-07,"John Carter is a war-weary, former military ca...","[Action, Adventure, Science Fiction]","[based on novel, mars, medallion, space travel...","[Taylor Kitsch, Lynn Collins, Samantha Morton]","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


### *Convert crew*

In [379]:
def convert_crew(obj):
    L = []
    for i in json.loads(obj):
        if i["job"] == "Director":
            L.append(i["name"])
    return L

In [380]:
movies["crew"] = movies["crew"].apply(convert_crew)

In [381]:
movies.head()

Unnamed: 0,movie_id,title,release_date,overview,genres,keywords,cast,crew
0,19995,Avatar (2009),2009-12-10,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weaver]",[James Cameron]
1,285,Pirates of the Caribbean: At World's End (2007),2007-05-19,"Captain Barbossa, long believed to be dead, ha...","[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ...","[Johnny Depp, Orlando Bloom, Keira Knightley]",[Gore Verbinski]
2,206647,Spectre (2015),2015-10-26,A cryptic message from Bond’s past sends him o...,"[Action, Adventure, Crime]","[spy, based on novel, secret agent, sequel, mi...","[Daniel Craig, Christoph Waltz, Léa Seydoux]",[Sam Mendes]
3,49026,The Dark Knight Rises (2012),2012-07-16,Following the death of District Attorney Harve...,"[Action, Crime, Drama, Thriller]","[dc comics, crime fighter, terrorist, secret i...","[Christian Bale, Michael Caine, Gary Oldman]",[Christopher Nolan]
4,49529,John Carter (2012),2012-03-07,"John Carter is a war-weary, former military ca...","[Action, Adventure, Science Fiction]","[based on novel, mars, medallion, space travel...","[Taylor Kitsch, Lynn Collins, Samantha Morton]",[Andrew Stanton]


### *Convert Overview*

In [382]:
movies["overview"] = movies["overview"].apply(lambda x:x.split())

In [383]:
movies.head()

Unnamed: 0,movie_id,title,release_date,overview,genres,keywords,cast,crew
0,19995,Avatar (2009),2009-12-10,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weaver]",[James Cameron]
1,285,Pirates of the Caribbean: At World's End (2007),2007-05-19,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ...","[Johnny Depp, Orlando Bloom, Keira Knightley]",[Gore Verbinski]
2,206647,Spectre (2015),2015-10-26,"[A, cryptic, message, from, Bond’s, past, send...","[Action, Adventure, Crime]","[spy, based on novel, secret agent, sequel, mi...","[Daniel Craig, Christoph Waltz, Léa Seydoux]",[Sam Mendes]
3,49026,The Dark Knight Rises (2012),2012-07-16,"[Following, the, death, of, District, Attorney...","[Action, Crime, Drama, Thriller]","[dc comics, crime fighter, terrorist, secret i...","[Christian Bale, Michael Caine, Gary Oldman]",[Christopher Nolan]
4,49529,John Carter (2012),2012-03-07,"[John, Carter, is, a, war-weary,, former, mili...","[Action, Adventure, Science Fiction]","[based on novel, mars, medallion, space travel...","[Taylor Kitsch, Lynn Collins, Samantha Morton]",[Andrew Stanton]


## *Remove spaces*

In [384]:
movies["overview"] = movies["overview"].apply(lambda x:[i.replace(" ","") for i in x])
movies["genres"] = movies["genres"].apply(lambda x:[i.replace(" ","") for i in x])
movies["keywords"] = movies["keywords"].apply(lambda x:[i.replace(" ","") for i in x])
movies["cast"] = movies["cast"].apply(lambda x:[i.replace(" ","") for i in x])
movies["crew"] = movies["crew"].apply(lambda x:[i.replace(" ","") for i in x])

In [385]:
movies.head()

Unnamed: 0,movie_id,title,release_date,overview,genres,keywords,cast,crew
0,19995,Avatar (2009),2009-12-10,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron]
1,285,Pirates of the Caribbean: At World's End (2007),2007-05-19,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski]
2,206647,Spectre (2015),2015-10-26,"[A, cryptic, message, from, Bond’s, past, send...","[Action, Adventure, Crime]","[spy, basedonnovel, secretagent, sequel, mi6, ...","[DanielCraig, ChristophWaltz, LéaSeydoux]",[SamMendes]
3,49026,The Dark Knight Rises (2012),2012-07-16,"[Following, the, death, of, District, Attorney...","[Action, Crime, Drama, Thriller]","[dccomics, crimefighter, terrorist, secretiden...","[ChristianBale, MichaelCaine, GaryOldman]",[ChristopherNolan]
4,49529,John Carter (2012),2012-03-07,"[John, Carter, is, a, war-weary,, former, mili...","[Action, Adventure, ScienceFiction]","[basedonnovel, mars, medallion, spacetravel, p...","[TaylorKitsch, LynnCollins, SamanthaMorton]",[AndrewStanton]


## *Combine columns: overview, genres, keywords, cast & crew*

In [386]:
# movies.head()

In [387]:
movies["tags"] = movies["overview"] + movies["genres"] + movies["keywords"] + movies["cast"] + movies["crew"]

In [388]:
new_df = movies[["movie_id", "title", "tags"]]
new_df.head()

Unnamed: 0,movie_id,title,tags
0,19995,Avatar (2009),"[In, the, 22nd, century,, a, paraplegic, Marin..."
1,285,Pirates of the Caribbean: At World's End (2007),"[Captain, Barbossa,, long, believed, to, be, d..."
2,206647,Spectre (2015),"[A, cryptic, message, from, Bond’s, past, send..."
3,49026,The Dark Knight Rises (2012),"[Following, the, death, of, District, Attorney..."
4,49529,John Carter (2012),"[John, Carter, is, a, war-weary,, former, mili..."


In [389]:
new_df["tags"] = new_df["tags"].apply(lambda x:" ".join(x))
new_df["tags"] = new_df["tags"].apply(lambda x: x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df["tags"] = new_df["tags"].apply(lambda x:" ".join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df["tags"] = new_df["tags"].apply(lambda x: x.lower())


In [390]:
new_df.head()

Unnamed: 0,movie_id,title,tags
0,19995,Avatar (2009),"in the 22nd century, a paraplegic marine is di..."
1,285,Pirates of the Caribbean: At World's End (2007),"captain barbossa, long believed to be dead, ha..."
2,206647,Spectre (2015),a cryptic message from bond’s past sends him o...
3,49026,The Dark Knight Rises (2012),following the death of district attorney harve...
4,49529,John Carter (2012),"john carter is a war-weary, former military ca..."


In [391]:
new_df["tags"][0]

'in the 22nd century, a paraplegic marine is dispatched to the moon pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. action adventure fantasy sciencefiction cultureclash future spacewar spacecolony society spacetravel futuristic romance space alien tribe alienplanet cgi marine soldier battle loveaffair antiwar powerrelations mindandsoul 3d samworthington zoesaldana sigourneyweaver jamescameron'

## *Stem contents of tags with NLTK*

In [392]:
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

In [393]:
def stem(text):
    ret = []
    words = text.split(" ")
    for word in words:
        ret.append(stemmer.stem(word))
    return " ".join(ret)

In [394]:
new_df["tags"] = new_df["tags"].apply(stem)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df["tags"] = new_df["tags"].apply(stem)


## *Vectorize movie tags data*

In [395]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000, stop_words="english")

In [396]:
vectors = cv.fit_transform(new_df["tags"]).toarray()

In [397]:
vectors.shape

(3155, 5000)

In [398]:
# for i in cv.get_feature_names_out():
    # print(i)

## *Measure Cosine Similarity*

In [399]:
from sklearn.metrics.pairwise import cosine_similarity

In [400]:
similarity_matrix = cosine_similarity(vectors)

In [401]:
similarity_matrix = pd.DataFrame(similarity_matrix, index=movies['title'], columns=movies['title'])

In [402]:
similarity_matrix

title,Avatar (2009),Pirates of the Caribbean: At World's End (2007),Spectre (2015),The Dark Knight Rises (2012),John Carter (2012),Spider-Man 3 (2007),Tangled (2010),Avengers: Age of Ultron (2015),Harry Potter and the Half-Blood Prince (2009),Batman v Superman: Dawn of Justice (2016),...,The Signal (2014),George Washington (2000),The Last Waltz (1978),Down Terrace (2009),Clerks (1994),In the Company of Men (1997),Pink Flamingos (1972),Cure (1997),Primer (2004),El Mariachi (1992)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Avatar (2009),1.000000,0.079057,0.083853,0.069007,0.186501,0.107583,0.039528,0.141421,0.056796,0.093250,...,0.026726,0.000000,0.000000,0.055902,0.000000,0.000000,0.000000,0.000000,0.050637,0.018765
Pirates of the Caribbean: At World's End (2007),0.079057,1.000000,0.058926,0.036370,0.073721,0.113402,0.020833,0.124226,0.059868,0.098295,...,0.028172,0.000000,0.000000,0.029463,0.037268,0.032075,0.000000,0.023810,0.026688,0.039559
Spectre (2015),0.083853,0.058926,1.000000,0.057864,0.078193,0.072169,0.022097,0.131762,0.063500,0.104257,...,0.000000,0.000000,0.000000,0.093750,0.000000,0.000000,0.063500,0.075761,0.000000,0.041959
The Dark Knight Rises (2012),0.069007,0.036370,0.057864,1.000000,0.032174,0.059391,0.054554,0.065060,0.039193,0.193047,...,0.055328,0.054554,0.000000,0.096440,0.000000,0.020998,0.039193,0.093522,0.052414,0.064744
John Carter (2012),0.186501,0.073721,0.078193,0.032174,1.000000,0.100322,0.055291,0.197814,0.079444,0.108696,...,0.024922,0.000000,0.000000,0.078193,0.000000,0.056750,0.000000,0.042126,0.023610,0.157483
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
In the Company of Men (1997),0.000000,0.032075,0.000000,0.020998,0.056750,0.104757,0.024056,0.028689,0.034565,0.028375,...,0.032530,0.096225,0.000000,0.068041,0.129099,1.000000,0.069130,0.000000,0.061633,0.091358
Pink Flamingos (1972),0.000000,0.000000,0.063500,0.039193,0.000000,0.024441,0.000000,0.000000,0.000000,0.000000,...,0.060718,0.044901,0.000000,0.127000,0.040161,0.069130,1.000000,0.128290,0.028760,0.021315
Cure (1997),0.000000,0.023810,0.075761,0.093522,0.042126,0.019440,0.017857,0.000000,0.000000,0.000000,...,0.048295,0.000000,0.031174,0.176777,0.000000,0.000000,0.128290,1.000000,0.022875,0.084770
Primer (2004),0.050637,0.026688,0.000000,0.052414,0.023610,0.043581,0.020016,0.071611,0.028760,0.000000,...,0.108266,0.080064,0.000000,0.028307,0.035806,0.061633,0.028760,0.022875,1.000000,0.057011


## *Make Recommendations*

In [403]:
def make_rocommendation(movie_name):
    recommended_movies = []
    similar_scores = similarity_matrix[movie_name]
    similar_scores = pd.DataFrame(similarity_matrix[movie_name])
    similar_scores = similar_scores.sort_values(movie_name, ascending=False)[1:11]
    # similar_scores = similar_scores.rename(columns={movie_name: "similarity_index"})
    similar_scores = similar_scores.merge(new_df, on="title")
    for m in similar_scores["title"]:
        recommended_movies.append(m)
    return recommended_movies

In [411]:
make_rocommendation("The Godfather (1972)")

['The Godfather: Part II (1974)',
 'The Godfather: Part III (1990)',
 'Sexy Beast (2000)',
 'Road to Perdition (2002)',
 'Donnie Brasco (1997)',
 'The Usual Suspects (1995)',
 'J. Edgar (2011)',
 'Only God Forgives (2013)',
 'Idlewild (2006)',
 'Auto Focus (2002)']

In [410]:
for mov in movies.title:
    if "God" in mov:
        print(mov)

Shin Godzilla (2016)
Exodus: Gods and Kings (2014)
Gods of Egypt (2016)
Gods and Generals (2003)
The Godfather: Part III (1990)
Godsend (2004)
Son of God (2014)
The Godfather: Part II (1974)
The Godfather (1972)
Only God Forgives (2013)
Gods and Monsters (1998)


In [406]:
import pickle

pickle.dump(similarity_matrix, open("similarity_matrix.pkl", "wb"))
pickle.dump(new_df, open("movies.pkl", "wb"))