In [2]:
#Recommendation Systems can be classified into 3 main categories
#content based     popularity based   collaborative based

#Here we will implement popularity based recommendation system

In [3]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
df = pd.read_csv('movies.csv')

In [5]:
df.head(5)

Unnamed: 0,name,rating,genre,year,released,score,votes,director,writer,star,country,budget,gross,company,runtime
0,The Shining,R,Drama,1980,"June 13, 1980 (United States)",8.4,927000.0,Stanley Kubrick,Stephen King,Jack Nicholson,United Kingdom,19000000.0,46998772.0,Warner Bros.,146.0
1,The Blue Lagoon,R,Adventure,1980,"July 2, 1980 (United States)",5.8,65000.0,Randal Kleiser,Henry De Vere Stacpoole,Brooke Shields,United States,4500000.0,58853106.0,Columbia Pictures,104.0
2,Star Wars: Episode V - The Empire Strikes Back,PG,Action,1980,"June 20, 1980 (United States)",8.7,1200000.0,Irvin Kershner,Leigh Brackett,Mark Hamill,United States,18000000.0,538375067.0,Lucasfilm,124.0
3,Airplane!,PG,Comedy,1980,"July 2, 1980 (United States)",7.7,221000.0,Jim Abrahams,Jim Abrahams,Robert Hays,United States,3500000.0,83453539.0,Paramount Pictures,88.0
4,Caddyshack,R,Comedy,1980,"July 25, 1980 (United States)",7.3,108000.0,Harold Ramis,Brian Doyle-Murray,Chevy Chase,United States,6000000.0,39846344.0,Orion Pictures,98.0


In [6]:
df.shape

(7668, 15)

In [7]:
selected_features = ['rating','genre','name','director','score','votes']
print(selected_features)

['rating', 'genre', 'name', 'director', 'score', 'votes']


In [9]:
df.isnull().sum()

name           0
rating        77
genre          0
year           0
released       2
score          3
votes          3
director       0
writer         3
star           1
country        3
budget      2171
gross        189
company       17
runtime        4
dtype: int64

In [12]:
for feature in selected_features:
    df[feature] = df[feature].fillna('')

In [13]:
df.isnull().sum()

name           0
rating         0
genre          0
year           0
released       2
score          0
votes          0
director       0
writer         3
star           1
country        3
budget      2171
gross        189
company       17
runtime        4
dtype: int64

In [16]:
combined_features = df['genre']+' '+df['director']
print(combined_features)

0          Drama Stanley Kubrick
1       Adventure Randal Kleiser
2          Action Irvin Kershner
3            Comedy Jim Abrahams
4            Comedy Harold Ramis
                  ...           
7663         Drama Joseph Ebanks
7664         Comedy Dusty Dukatz
7665          Drama Nkanya Nkwai
7666         Drama James Randall
7667         Horror Pereko Mosia
Length: 7668, dtype: object


In [17]:
vectorizer = TfidfVectorizer()

In [19]:
feature_vectors = vectorizer.fit_transform(combined_features)
print(feature_vectors)

  (0, 1843)	0.7310326090490687
  (0, 3038)	0.6446272179738416
  (0, 893)	0.2237120344420853
  (1, 1801)	0.6664377411257473
  (1, 2640)	0.6664377411257473
  (1, 34)	0.3342476243841129
  (2, 1767)	0.7326236054535132
  (2, 1539)	0.6460301643996345
  (2, 20)	0.21426077433369853
  (3, 12)	0.7729778050078491
  (3, 1628)	0.5942167178304802
  (3, 653)	0.22228766321147675
  (4, 2632)	0.720923162061851
  (4, 1368)	0.6603754320048295
  (4, 653)	0.21017631457225772
  (5, 724)	0.7205020082012832
  (5, 2881)	0.5875857088048887
  (5, 1488)	0.368266060051934
  (6, 1877)	0.8342708669997094
  (6, 1647)	0.47321066567016445
  (6, 20)	0.2829554494466448
  (7, 2877)	0.7043639015524631
  (7, 2106)	0.5930098162372259
  (7, 351)	0.3901420921101488
  (8, 1937)	0.8011848300027395
  :	:
  (7659, 1771)	0.5027771048530024
  (7659, 1488)	0.35500788833953284
  (7660, 2842)	0.7072038395120096
  (7660, 3042)	0.684205614919884
  (7660, 653)	0.17811627070982355
  (7661, 1831)	0.7817201754786246
  (7661, 1691)	0.583043311

In [20]:
similarity = cosine_similarity(feature_vectors)
print(similarity)

[[1.         0.         0.         ... 0.04391112 0.06166047 0.        ]
 [0.         1.         0.         ... 0.         0.         0.        ]
 [0.         0.         1.         ... 0.         0.         0.        ]
 ...
 [0.04391112 0.         0.         ... 1.         0.05410068 0.        ]
 [0.06166047 0.         0.         ... 0.05410068 1.         0.        ]
 [0.         0.         0.         ... 0.         0.         1.        ]]


In [21]:
print(similarity.shape)

(7668, 7668)


In [27]:
movie_name = input("Enter your favourite movie name ")
list_of_titles = df['name'].tolist()

find_close_match = difflib.get_close_matches(movie_name,list_of_titles)
print(find_close_match)

close_match = find_close_match[0]
print(close_match)
movie_index = df[df['name'] == close_match].index.values[0]

Enter your favourite movie name Iron Man
['Iron Man', 'Iron Man 3', 'Iron Man 2']
Iron Man


In [28]:
similarity_score = list(enumerate(similarity[movie_index]))
sorted_similar_movies = sorted(similarity_score, key=lambda x: x[1], reverse=True)
top_n = 10  
print(f"Movies similar to '{close_match}':")
for i in range(1, top_n + 1):
    index = sorted_similar_movies[i][0]
    print(df['name'][index])


Movies similar to 'Iron Man':
Iron Man
Iron Man 2
Cowboys & Aliens
Made
Elf
Chef
The Jungle Book
The Lion King
3 Ninjas
National Treasure


# Recommendation System


In [29]:
movie_name = input("Enter your favourite movie name ")
list_of_titles = df['name'].tolist()
find_close_match = difflib.get_close_matches(movie_name,list_of_titles)
close_match = find_close_match[0]
movie_index = df[df['name'] == close_match].index.values[0]

similarity_score = list(enumerate(similarity[movie_index]))
sorted_similar_movies = sorted(similarity_score, key=lambda x: x[1], reverse=True)
top_n = 10  
print(f"Movies similar to '{close_match}':")
for i in range(1, top_n + 1):
    index = sorted_similar_movies[i][0]
    print(df['name'][index])


Enter your favourite movie name avatar
Movies similar to 'Avatar':
Aliens
Terminator 2: Judgment Day
True Lies
Avatar
Titanic
The Abyss
Fast Talking
Say Anything
Singles
Jerry Maguire
