In [42]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [43]:
# loading the data from the csv file to apandas dataframe
movies_data = pd.read_csv('/content/Hydra-Movie-Scrape.csv')
# printing the first 5 rows of the dataframe
movies_data.head()

Unnamed: 0,Title,Year,Summary,Short Summary,Genres,IMDB ID,Runtime,YouTube Trailer,Rating,Movie Poster,Director,Writers,Cast
0,Patton Oswalt: Annihilation,2017,"Patton Oswald, despite a personal tragedy, pro...","Patton Oswalt, despite a personal tragedy, pro...",Uncategorized,tt7026230,66.0,4hZi5QaMBFc,7.4,https://hydramovies.com/wp-content/uploads/201...,Bobcat Goldthwait,Patton Oswalt,Patton Oswalt
1,New York Doll,2005,A recovering alcoholic and recently converted ...,A recovering alcoholic and recently converted ...,Documentary|Music,tt0436629,75.0,jwD04NsnLLg,7.9,https://hydramovies.com/wp-content/uploads/201...,Greg Whiteley,Arthur Kane,Sylvain Sylvain
2,Mickey's Magical Christmas: Snowed in at the H...,2001,After everyone is snowed in at the House of Mo...,Mickey and all his friends hold their own Chri...,Adventure|Animation|Comedy|Family|Fantasy,tt0300195,65.0,uCKwHHftrU4,6.8,https://hydramovies.com/wp-content/uploads/201...,Tony Craig,Thomas Hart,Carlos Alazraqui|Wayne Allwine
3,Mickey's House of Villains,2001,The villains from the popular animated Disney ...,The villains from the popular animated Disney ...,Animation|Comedy|Family|Fantasy|Horror,tt0329374,0.0,JA03ciYt-Ek,6.6,https://hydramovies.com/wp-content/uploads/201...,Jamie Mitchell,Thomas Hart,Tony Anselmo|Wayne Allwine
4,And Then I Go,2017,"In the cruel world of junior high, Edwin suffe...","In the cruel world of junior high, Edwin suffe...",Drama,tt2018111,99.0,8CdIiD6-iF0,7.6,https://hydramovies.com/wp-content/uploads/201...,Vincent Grashaw,Brett Haley,Arman Darbo|Sawyer Barth


In [44]:

# number of rows and columns in the data frame

movies_data.shape


(2266, 13)

In [45]:
# selecting the relevant features for recommendation

selected_features = ['Title','Cast','Director','Genres']
print(selected_features)

['Title', 'Cast', 'Director', 'Genres']


In [46]:
# replacing the null valuess with null string

for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

In [47]:
# combining all the 5 selected features

combined_features = movies_data['Genres']+' '+movies_data['Title']+' '+movies_data['Cast']+' '+movies_data['Director']
print(combined_features)

0       Uncategorized Patton Oswalt: Annihilation Patt...
1       Documentary|Music New York Doll Sylvain Sylvai...
2       Adventure|Animation|Comedy|Family|Fantasy Mick...
3       Animation|Comedy|Family|Fantasy|Horror Mickey'...
4       Drama And Then I Go Arman Darbo|Sawyer Barth V...
                              ...                        
2261    Action|Adventure|Drama|Sci-Fi|Thriller Gravity...
2262    Action|Horror|Mystery|Thriller Grave Encounter...
2263    Action|Comedy Grandma's Boy Allen Covert|Linda...
2264    Action|Drama Gran Torino Bee Vang|Clint Eastwo...
2265                                      Grace Is Gone  
Length: 2266, dtype: object


In [48]:
# converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [49]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [50]:
print(feature_vectors)


  (0, 2579)	0.30835527452519124
  (0, 780)	0.30835527452519124
  (0, 307)	0.30835527452519124
  (0, 4978)	0.5934507346346591
  (0, 5086)	0.548204399815973
  (0, 6860)	0.2490767664826929
  (1, 7144)	0.3354812492546271
  (1, 2649)	0.2542971002876796
  (1, 6474)	0.6709624985092542
  (1, 1780)	0.35331463518787837
  (1, 7307)	0.31301384007183114
  (1, 4789)	0.30499487463870584
  (1, 4684)	0.18038295123169013
  (1, 1772)	0.15564659846157963
  (2, 1469)	0.203565568261719
  (2, 6710)	0.21542222238226838
  (2, 203)	0.2805916156363919
  (2, 7077)	0.24959124062329463
  (2, 154)	0.30708936119690156
  (2, 1100)	0.24191996794277804
  (2, 4641)	0.30708936119690156
  (2, 4910)	0.12810007087470737
  (2, 3062)	0.2275961245153726
  (2, 6588)	0.08031050673691294
  (2, 434)	0.23553379664410448
  :	:
  (2262, 4704)	0.15495661175282457
  (2262, 6630)	0.11521483076496165
  (2262, 79)	0.060998896965022205
  (2262, 3044)	0.1758646584544373
  (2263, 2600)	0.3778814549489091
  (2263, 4809)	0.3778814549489091
  (2

In [51]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [52]:
print(similarity)

[[1.         0.         0.         ... 0.         0.         0.        ]
 [0.         1.         0.         ... 0.         0.         0.        ]
 [0.         0.         1.         ... 0.00862097 0.         0.        ]
 ...
 [0.         0.         0.00862097 ... 1.         0.00293097 0.        ]
 [0.         0.         0.         ... 0.00293097 1.         0.        ]
 [0.         0.         0.         ... 0.         0.         1.        ]]


In [53]:
# getting the movie name from the user

movie_name = input(' Enter your favourite movie name : ')

 Enter your favourite movie name : 89


In [54]:
# creating a list with all the movie names given in the dataset

list_of_all_titles = movies_data['Title'].tolist()
print(list_of_all_titles)

['Patton Oswalt: Annihilation', 'New York Doll', "Mickey's Magical Christmas: Snowed in at the House of Mouse", "Mickey's House of Villains", 'And Then I Go', 'An Extremely Goofy Movie', 'Peter Rabbit', 'Love Songs', '89', 'The Foster Boy', 'Forever My Girl', 'Tom Segura: Disgraceful', 'The Secret Rules of Modern Living: Algorithms', 'Secrets in the Fall', 'Silent Night', 'Suicide Squad: Hell to Pay', 'Wildling', 'The Humanity Bureau', 'Farewell Ferris Wheel', "Don't Talk to Irene", 'Blood Road', 'Andre the Giant', 'Dead on Arrival', 'Big Time', 'Adventures in Babysitting', 'Banana in a Nutshell', 'Hostiles', 'Maze Runner: The Death Cure', 'Den of Thieves', 'V.I.P.', 'Walk Hard: The Dewey Cox Story', 'Freaky Friday', 'Perfect Strangers', 'Paterno', 'Shirley: Visions of Reality', '5 Centimeters Per Second', 'Faces Places', 'The Post', 'The Anthem of the Heart', 'My Teacher', 'You Were Never Really Here', 'Petals on the Wind', 'Jesus Christ Superstar Live in Concert', 'Dare to Be Wild', 

In [55]:
# finding the close match for the movie name given by the user

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

['89']


In [56]:
close_match = find_close_match[0]
print(close_match)

89


In [57]:
# finding the Runtime of the movie with title

index_of_the_movie = movies_data[movies_data.Title == close_match]['Runtime'].values[0]
print(index_of_the_movie)

91.0


In [64]:
# getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

IndexError: ignored

In [None]:
len(similarity_score)

NameError: ignored

In [None]:
# sorting the movies based on their similarity score

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_movies)

[(91, 1.0000000000000002), (17, 0.1659536766728308), (510, 0.12557819600742648), (1967, 0.12554846009927162), (1031, 0.11019946039042386), (3182, 0.10077025350586073), (3911, 0.09642674251566738), (573, 0.09556607210906284), (3398, 0.0928217349112489), (1643, 0.08760163271733348), (1180, 0.08311301598191306), (1827, 0.07913817238664286), (167, 0.0786229576198461), (1473, 0.07796266879040994), (1974, 0.07748127933869473), (1679, 0.0774385996024145), (1142, 0.07716465602220232), (1273, 0.07643489551700768), (3562, 0.07541266454398408), (1882, 0.07424653329915917), (491, 0.07412599244827359), (1811, 0.07405664671513529), (3337, 0.07378322647049067), (1699, 0.07378204679465714), (1394, 0.07278547849343672), (1066, 0.07270745583175334), (2284, 0.07253422563881112), (728, 0.07245968483137322), (1252, 0.07245576799597495), (533, 0.07221963466620349), (547, 0.07200001061797887), (2462, 0.07166597419722975), (2672, 0.07070755476592622), (1660, 0.07038240034710432), (2800, 0.06979116325147591), 

In [None]:
# print the name of similar movies based on the index

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['Title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

Movies suggested for you : 

1 . Centre of My World
2 . The Humanity Bureau
3 . The 9th Life of Louis Drax
4 . Meet the Robinsons
5 . Little Boy
6 . Clash of the Titans
7 . My Teacher, My Obsession
8 . The Secret Life of Pets
9 . The Substance: Albert Hofmann's LSD
10 . Soul Boys of the Western World
11 . Transporter 2
12 . Pirates of the Caribbean: At World's End
13 . In This Corner of the World
14 . The Dreamers
15 . Master and Commander: The Far Side of the World
16 . She's Out of My League
17 . Unleashed
18 . The Secret World of Arrietty
19 . Google and the World Brain
20 . Now You See Me
21 . The Eyes of My Mother
22 . Predestination
23 . New World OrdeRx
24 . Seeking a Friend for the End of the World
25 . The Incredible Hulk
26 . World Trade Center
27 . Ghost World
28 . The Brothers Grimsby
29 . The Transporter


MOVIE RECOMMENDATION SYSTEM

In [None]:
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = movies_data['Title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = movies_data[movies_data.Title == close_match]['Runtime'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['Title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

 Enter your favourite movie name : lucky
Movies suggested for you : 

1 . Of Mind and Music
2 . Get on Up
3 . Music and Lyrics
4 . The von Trapp Family: A Life of Music
5 . Love & Mercy
6 . Dreamgirls
7 . Contemporary Color
8 . Thunder and the House of Magic
9 . Final Destination 2
10 . Score: A Film Music Documentary
11 . Pirate Radio
12 . Into the Mind
13 . Eternal Sunshine of the Spotless Mind
14 . Fist Fight
15 . Filmage: The Story of Descendents/All
16 . A Beautiful Mind
17 . Rememory
18 . Confessions of a Dangerous Mind
19 . Anthropoid
20 . Man of Steel
21 . Nick and Norah's Infinite Playlist
22 . Battle of the Year
23 . I'll Follow You Down
24 . All Things Must Pass: The Rise and Fall of Tower Records
25 . Beauty and the Beast
26 . Robin Williams: Come Inside My Mind
27 . Frailty
28 . Summer Holiday
29 . Soul Boys of the Western World
