Importing the dependencies


In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
#loading the data from csv to pandas dataframe
movies_data = pd.read_csv('/content/movies.csv')

In [4]:
movies_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [5]:
movies_data.shape

(4803, 24)

In [6]:
#selecting the relevant features for feature extraction
selected_features = ['genres','keywords','tagline','cast','director']
print(selected_features)

['genres', 'keywords', 'tagline', 'cast', 'director']


In [9]:
#replacing the nullvalues with nullstring
for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

In [11]:
# combining all the 5 selected features

combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']

In [12]:
#converting the textdata into feature vectors
vectorizer = TfidfVectorizer()

In [13]:
feature_vectors = vectorizer.fit_transform(combined_features)

Cosine similarity

In [14]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

Getting the movie names from users

In [15]:
# getting the movie name from the user

movie_name = input(' Enter your favourite movie name : ')

 Enter your favourite movie name : hallo


In [16]:
# creating a list with all the movie names given in the dataset

list_of_all_titles = movies_data['title'].tolist()
print(list_of_all_titles)

['Avatar', "Pirates of the Caribbean: At World's End", 'Spectre', 'The Dark Knight Rises', 'John Carter', 'Spider-Man 3', 'Tangled', 'Avengers: Age of Ultron', 'Harry Potter and the Half-Blood Prince', 'Batman v Superman: Dawn of Justice', 'Superman Returns', 'Quantum of Solace', "Pirates of the Caribbean: Dead Man's Chest", 'The Lone Ranger', 'Man of Steel', 'The Chronicles of Narnia: Prince Caspian', 'The Avengers', 'Pirates of the Caribbean: On Stranger Tides', 'Men in Black 3', 'The Hobbit: The Battle of the Five Armies', 'The Amazing Spider-Man', 'Robin Hood', 'The Hobbit: The Desolation of Smaug', 'The Golden Compass', 'King Kong', 'Titanic', 'Captain America: Civil War', 'Battleship', 'Jurassic World', 'Skyfall', 'Spider-Man 2', 'Iron Man 3', 'Alice in Wonderland', 'X-Men: The Last Stand', 'Monsters University', 'Transformers: Revenge of the Fallen', 'Transformers: Age of Extinction', 'Oz: The Great and Powerful', 'The Amazing Spider-Man 2', 'TRON: Legacy', 'Cars 2', 'Green Lant

In [17]:
# finding the close match for the movie name given by the user

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

['Shalako', 'The Gallows', 'Shallow Hal']


In [18]:
close_match = find_close_match[0]
print(close_match)

Shalako


In [19]:
# finding the index of the movie with title

index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
print(index_of_the_movie)

4261


In [20]:
# getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

[(0, 0.025781015193219), (1, 0.007873941939354116), (2, 0.009230099874041338), (3, 0.01007309491070678), (4, 0.008784616470661496), (5, 0.00934551800347005), (6, 0.0), (7, 0.009105985691667162), (8, 0.0), (9, 0.008896551517604635), (10, 0.01014705662774264), (11, 0.008014182598869676), (12, 0.034535384145800055), (13, 0.026396033705709084), (14, 0.008962666984297507), (15, 0.0), (16, 0.008634410094588279), (17, 0.008811738235458015), (18, 0.0086192844056957), (19, 0.019111409875255475), (20, 0.008984009111667003), (21, 0.007571210439036569), (22, 0.011756146554243039), (23, 0.0), (24, 0.037181891077348886), (25, 0.0), (26, 0.009398169147782218), (27, 0.02007560478347008), (28, 0.016450891243750106), (29, 0.009438875749408106), (30, 0.009092266182047006), (31, 0.00886581145881286), (32, 0.0), (33, 0.009049407472676541), (34, 0.0), (35, 0.01732082658366286), (36, 0.03209294624969744), (37, 0.007809761630571699), (38, 0.009207057083897944), (39, 0.00849607053706172), (40, 0.0), (41, 0.021

In [21]:
# sorting the movies based on their similarity score

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_movies)

[(4261, 1.0), (1343, 0.2000858170986255), (1175, 0.15397987411921138), (1546, 0.12642261756362275), (505, 0.11389377358670322), (3616, 0.11259311289656797), (4339, 0.1090879790334199), (1984, 0.10790536944761059), (1112, 0.10135318133379158), (2490, 0.1002263908438245), (625, 0.1001606705836706), (1787, 0.09997840087235138), (3336, 0.09764264038891264), (3162, 0.09681564326000412), (1006, 0.0957365146123957), (812, 0.0949053300144436), (535, 0.09226934716005152), (2486, 0.08883930276042563), (3098, 0.0880299415229903), (2826, 0.08576709876795423), (1828, 0.08512291064582327), (833, 0.08368609481638464), (3143, 0.08347917871333621), (4071, 0.08296885312780994), (1962, 0.08280488636783154), (2503, 0.08273098050905395), (513, 0.08268089573249505), (3715, 0.07873085554836678), (3884, 0.07843182265568391), (3349, 0.07830688353924553), (4469, 0.07595783845889617), (4042, 0.07431954882381732), (1212, 0.07408340601716915), (2351, 0.07392789305195217), (3247, 0.07232353665727592), (2550, 0.0719

In [22]:
# print the name of similar movies based on the index

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

Movies suggested for you : 

1 . Shalako
2 . Never Say Never Again
3 . The Last of the Mohicans
4 . The Hunt for Red October
5 . The League of Extraordinary Gentlemen
6 . Robin and Marian
7 . Dr. No
8 . The Thief and the Cobbler
9 . Finding Forrester
10 . Highlander
11 . Entrapment
12 . A Bridge Too Far
13 . Diamonds Are Forever
14 . Thunderball
15 . Indiana Jones and the Last Crusade
16 . Pocahontas
17 . First Knight
18 . Meteor
19 . War, Inc.
20 . Time Bandits
21 . The Untouchables
22 . Shanghai Noon
23 . You Only Live Twice
24 . From Russia with Love
25 . Jane Got a Gun
26 . The Homesman
27 . The Rock
28 . Blackthorn
29 . Goldfinger


**Movie recommendation system**

In [25]:
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = movies_data['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

 Enter your favourite movie name :  Mission: Impossible
Movies suggested for you : 

1 . Mission: Impossible
2 . Mission: Impossible II
3 . Raising Cain
4 . Clear and Present Danger
5 . Agent Cody Banks 2: Destination London
6 . Dr. No
7 . Mission: Impossible - Rogue Nation
8 . RED 2
9 . Femme Fatale
10 . The Da Vinci Code
11 . Flushed Away
12 . The Ghost Writer
13 . Ronin
14 . Jack Ryan: Shadow Recruit
15 . London Has Fallen
16 . Body Double
17 . A View to a Kill
18 . The Spy Who Loved Me
19 . Live and Let Die
20 . Scarface
21 . Around the World in 80 Days
22 . Tomorrow Never Dies
23 . The Untouchables
24 . National Treasure
25 . Dressed to Kill
26 . 8 Women
27 . Agent Cody Banks
28 . Mission to Mars
29 . Mission: Impossible III
