<h1 align="center", style="color:red;font-familly:bold">Recommender System</h1>

# **I- Importing the dependencies**

In [3]:
import numpy as np
import pandas as pd
import difflib

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# **II- Data Collection and Pre-Preocessing**

In [5]:
#Loading the data from the csv file to a pandas dataframe 
movies_data = pd.read_csv('rms/bd/movies1.csv')

In [6]:
#Printing the first 5 rows of the dataframe
movies_data.head(3)

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444


In [7]:
# number of rows and columns in the data frame 
movies_data.shape

(1000, 16)

In [8]:
#selecting all the features 
columns = movies_data.columns
columns

Index(['Poster_Link', 'Series_Title', 'Released_Year', 'Certificate',
       'Runtime', 'Genre', 'IMDB_Rating', 'Overview', 'Meta_score', 'Director',
       'Star1', 'Star2', 'Star3', 'Star4', 'No_of_Votes', 'Gross'],
      dtype='object')

In [9]:
#selecting the relevant features for recommendation
selected_features = ['Poster_Link', 'Series_Title', 'Released_Year', 'Certificate', 'Runtime', 'Genre', 'IMDB_Rating',
                     'Overview', 'Meta_score', 'Director', 'Star1', 'Star2', 'Star3', 'Star4', 'No_of_Votes', 'Gross']
print(selected_features)

['Poster_Link', 'Series_Title', 'Released_Year', 'Certificate', 'Runtime', 'Genre', 'IMDB_Rating', 'Overview', 'Meta_score', 'Director', 'Star1', 'Star2', 'Star3', 'Star4', 'No_of_Votes', 'Gross']


In [10]:
#Replacing the null values with null string 
for feature in selected_features:
    movies_data[feature] = movies_data[feature].fillna('')

In [11]:
#combining all the 5 selected features 
combined_features = movies_data['Poster_Link']+' '+movies_data['Series_Title']+' '+movies_data['Released_Year']
print(combined_features)

0      https://m.media-amazon.com/images/M/MV5BMDFkYT...
1      https://m.media-amazon.com/images/M/MV5BM2MyNj...
2      https://m.media-amazon.com/images/M/MV5BMTMxNT...
3      https://m.media-amazon.com/images/M/MV5BMWMwMG...
4      https://m.media-amazon.com/images/M/MV5BMWU4N2...
                             ...                        
995    https://m.media-amazon.com/images/M/MV5BNGEwMT...
996    https://m.media-amazon.com/images/M/MV5BODk3Yj...
997    https://m.media-amazon.com/images/M/MV5BM2U3Yz...
998    https://m.media-amazon.com/images/M/MV5BZTBmMj...
999    https://m.media-amazon.com/images/M/MV5BMTY5OD...
Length: 1000, dtype: object


In [12]:
#converting the next data to feature vectors
vectorizer = TfidfVectorizer()

In [13]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [14]:
print(feature_vectors)

  (0, 77)	0.3732754284001163
  (0, 2306)	0.5111126479843474
  (0, 2413)	0.5111126479843474
  (0, 2561)	0.16967128752961222
  (0, 891)	0.0708343186611917
  (0, 117)	0.0708343186611917
  (0, 115)	0.0708343186611917
  (0, 118)	0.0896029371755012
  (0, 1133)	0.5111126479843474
  (0, 827)	0.0708343186611917
  (0, 409)	0.0708343186611917
  (0, 162)	0.0708343186611917
  (0, 1050)	0.0708343186611917
  (0, 800)	0.0708343186611917
  (1, 55)	0.46237348611875717
  (1, 697)	0.5280218531607563
  (1, 120)	0.27596909729772895
  (1, 1114)	0.5841351655192976
  (1, 2561)	0.1939121757519442
  (1, 891)	0.08095439746752076
  (1, 117)	0.08095439746752076
  (1, 115)	0.08095439746752076
  (1, 827)	0.08095439746752076
  (1, 409)	0.08095439746752076
  (1, 162)	0.08095439746752076
  :	:
  (998, 2070)	0.5825643802340742
  (998, 27)	0.5085860859570162
  (998, 891)	0.08073670474189439
  (998, 117)	0.08073670474189439
  (998, 115)	0.08073670474189439
  (998, 118)	0.10212910944124616
  (998, 827)	0.08073670474189439
 

# **III- Cosine Similarity**

In [16]:
#getting the similarity scores using cosine similarity
similarity = cosine_similarity(feature_vectors)

In [17]:
print(similarity)

[[1.         0.07877613 0.08270874 ... 0.04170919 0.0549025  0.07425427]
 [0.07877613 1.         0.08466378 ... 0.03972291 0.05228793 0.07600947]
 [0.08270874 0.08466378 1.         ... 0.0448265  0.05900587 0.07980397]
 ...
 [0.04170919 0.03972291 0.0448265  ... 1.         0.04753999 0.04024435]
 [0.0549025  0.05228793 0.05900587 ... 0.04753999 1.         0.0529743 ]
 [0.07425427 0.07600947 0.07980397 ... 0.04024435 0.0529743  1.        ]]


In [18]:
print(similarity.shape)

(1000, 1000)


# IV- Movie Recommendation Sytem

In [20]:
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = movies_data['Series_Title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = movies_data[movies_data.Series_Title == close_match].index.values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['Series_Title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

 Enter your favourite movie name :  Batman


Movies suggested for you : 

1 . Batman Begins
2 . Batman: Mask of the Phantasm
3 . C.R.A.Z.Y.
4 . Serenity
5 . Black
6 . Cinderella Man
7 . V for Vendetta
8 . Lord of War
9 . Sin City
10 . Pride & Prejudice
11 . Joyeux Noël
12 . Brokeback Mountain
13 . Walk the Line
14 . Adams æbler
15 . Match Point
16 . Babam ve Oglum
17 . The World's Fastest Indian
18 . Harry Potter and the Goblet of Fire
19 . Once
20 . Lion
21 . WALL·E
22 . Pride
23 . Heat
24 . Up
25 . Moon
26 . Rush
27 . Her
28 . Deadpool
29 . Amour
