# Movie Recommendation System

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
##Step 1: Read CSV File
df = pd.read_csv("movie_dataset.csv")

##Step 2: Select Features
features = ['keywords','cast','genres','director']

##Step 3: Create a column in DF which combines all selected features
for feature in features:
   df[feature] = df[feature].fillna('
                                    ')



In [3]:
def combine_features(row):
    try:
        return row['keywords'] +" "+row['cast']+" "+row["genres"]+" "+row["director"]
    except:
        print("Error:", row)

df["combined_features"] = df.apply(combine_features,axis=1)


In [4]:
df.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director,combined_features
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron,culture clash future space war space colony so...
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski,ocean drug abuse exotic island east india trad...
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes,spy based on novel secret agent sequel mi6 Dan...
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan,dc comics crime fighter terrorist secret ident...
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton,based on novel mars medallion space travel pri...


In [5]:
##Step 4: Create count matrix from this new combined column
cv = CountVectorizer()

count_matrix = cv.fit_transform(df["combined_features"])

In [6]:
##Step 5: Compute the Cosine Similarity based on the count_matrix
cosine_sim = cosine_similarity(count_matrix) 

In [7]:
sim_df = pd.DataFrame(cosine_sim,index=df.title,columns=df.title)

In [8]:
sim_df.head()

title,Avatar,Pirates of the Caribbean: At World's End,Spectre,The Dark Knight Rises,John Carter,Spider-Man 3,Tangled,Avengers: Age of Ultron,Harry Potter and the Half-Blood Prince,Batman v Superman: Dawn of Justice,...,On The Downlow,Sanctuary: Quite a Conundrum,Bang,Primer,Cavite,El Mariachi,Newlyweds,"Signed, Sealed, Delivered",Shanghai Calling,My Date with Drew
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Avatar,1.0,0.105409,0.120386,0.037743,0.23094,0.19245,0.0,0.140546,0.082061,0.117851,...,0.0,0.0,0.0,0.070273,0.0,0.075485,0.0,0.0,0.0,0.0
Pirates of the Caribbean: At World's End,0.105409,1.0,0.076139,0.035806,0.07303,0.255604,0.0,0.066667,0.116775,0.111803,...,0.0,0.0,0.0,0.0,0.0,0.035806,0.0,0.036515,0.0,0.0
Spectre,0.120386,0.076139,1.0,0.163572,0.208514,0.125109,0.0,0.190347,0.088911,0.170251,...,0.0,0.0,0.0,0.0,0.0,0.081786,0.0,0.0,0.111456,0.0
The Dark Knight Rises,0.037743,0.035806,0.163572,1.0,0.039223,0.078446,0.0,0.035806,0.083624,0.120096,...,0.089984,0.050637,0.125436,0.107417,0.087706,0.153846,0.0,0.039223,0.0,0.0
John Carter,0.23094,0.07303,0.208514,0.039223,1.0,0.2,0.0,0.219089,0.04264,0.163299,...,0.0,0.05164,0.0,0.109545,0.0,0.039223,0.0,0.0,0.0,0.0


In [13]:
movie_user_likes = "Avatar"
print("Movies Similar to "+ movie_user_likes + ":")
sim_df[movie_user_likes].sort_values(ascending=False)[1:10]

Movies Similar to Avatar:


title
Guardians of the Galaxy            0.423390
Aliens                             0.377426
Star Wars: Clone Wars: Volume 1    0.346410
Star Trek Into Darkness            0.344265
Star Trek Beyond                   0.335968
Alien                              0.333333
Lockout                            0.314270
Jason X                            0.307920
The Helix... Loaded                0.288675
Name: Avatar, dtype: float64