**Types of Recommendation Systems**

1.   Content Based Recommendation System
2.   Popularity Based Recommendation System
3.   Collaborative Recommendation System



**Importing the libraries**

In [5]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
# loading the movies data
movies = pd.read_csv('./data/movies.csv')

In [7]:
movies.shape

(4803, 24)

In [13]:
# print the first five rows
pd.set_option('display.max_columns', None)
movies.head(2)

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski


In [14]:
# select the relevant features
selected_features = ['genres','keywords','original_title','overview','tagline','title','cast','director']

In [15]:
movies[selected_features].isna().sum()

genres             28
keywords          412
original_title      0
overview            3
tagline           844
title               0
cast               43
director           30
dtype: int64

In [17]:
movies = movies[selected_features].fillna('')

In [19]:
# combining all the 5 selected features
combined_features = movies[selected_features].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)

In [21]:
print(combined_features[0])

Action Adventure Fantasy Science Fiction culture clash future space war space colony society Avatar In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. Enter the World of Pandora. Avatar Sam Worthington Zoe Saldana Sigourney Weaver Stephen Lang Michelle Rodriguez James Cameron


In [22]:
# converting the text data into numerical data
vectorizer = TfidfVectorizer()

In [23]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [25]:
print(feature_vectors)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 314452 stored elements and shape (4803, 31674)>
  Coords	Values
  (0, 576)	0.05552287249490444
  (0, 721)	0.06361667418861082
  (0, 10053)	0.07921109292531174
  (0, 24720)	0.07389117037591678
  (0, 10333)	0.07397626570741078
  (0, 6822)	0.13944649029207198
  (0, 5464)	0.14412534728090148
  (0, 11141)	0.10293144610710211
  (0, 26211)	0.2249184223444861
  (0, 30414)	0.07997028866714566
  (0, 5784)	0.16116678982694885
  (0, 26020)	0.12400247973187499
  (0, 2141)	0.3772968432669652
  (0, 13904)	0.03370327087232612
  (0, 27972)	0.0757439774177829
  (0, 241)	0.1834455854038367
  (0, 4929)	0.1160534030595115
  (0, 20746)	0.17919455867442288
  (0, 17554)	0.13273886777572982
  (0, 14473)	0.03871367606514675
  (0, 8076)	0.1672840999284348
  (0, 28275)	0.02873959179822235
  (0, 18823)	0.13382353287729085
  (0, 20677)	0.34497387231616145
  (0, 20161)	0.045290151056417934
  :	:
  (4802, 22067)	0.08065544619548977
  (4802, 9456)	0.0703543

In [26]:
# getting similarity score using cosine similarity
similarity = cosine_similarity(feature_vectors)

In [27]:
print(similarity)

[[1.         0.05176078 0.02708462 ... 0.020484   0.02434631 0.00590704]
 [0.05176078 1.         0.03598704 ... 0.0511643  0.03780307 0.01469859]
 [0.02708462 0.03598704 1.         ... 0.01877076 0.03625338 0.01071917]
 ...
 [0.020484   0.0511643  0.01877076 ... 1.         0.0243797  0.03974862]
 [0.02434631 0.03780307 0.03625338 ... 0.0243797  1.         0.02560517]
 [0.00590704 0.01469859 0.01071917 ... 0.03974862 0.02560517 1.        ]]


In [28]:
print(similarity.shape)

(4803, 4803)


In [30]:
for score in range(5):
    print(similarity[score])
    print("--------------------------------------------------------------------------------------")

[1.         0.05176078 0.02708462 ... 0.020484   0.02434631 0.00590704]
--------------------------------------------------------------------------------------
[0.05176078 1.         0.03598704 ... 0.0511643  0.03780307 0.01469859]
--------------------------------------------------------------------------------------
[0.02708462 0.03598704 1.         ... 0.01877076 0.03625338 0.01071917]
--------------------------------------------------------------------------------------
[0.03170259 0.03866084 0.0337337  ... 0.03864237 0.03867962 0.02161863]
--------------------------------------------------------------------------------------
[0.07390828 0.08088013 0.02881022 ... 0.03352541 0.02391006 0.01575113]
--------------------------------------------------------------------------------------


**Get movie recommendation system based on the user input**

In [107]:
def get_recommended_movies(movie_name, count):
    # creating a list with all the movie names given in the dataset
    list_of_all_titles = movies['title'].tolist()
    # print(list_of_all_titles)
    
    # find the close match for the movie given by the user
    close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
    # print(f"Close matches: {close_match}")
    
    if len(close_match) > 0:
        
    
        close_match = close_match[0]
        # print(close_match)
        
        index_of_movie = movies[movies['title'] == close_match].index[0]
        
        scores = list(enumerate(similarity[index_of_movie]))
        # print(len(scores))
        # print(scores)
        
        # sort the socres in descending to find the best similarity scores
        sorted_scores = sorted(scores, key=lambda x:x[1], reverse=True)
        # print(sorted_scores)
    
        # return the top five recommended movies based on the user input
        movie_names=[]
        movie_scores=[]

        for movie in sorted_scores[:count]:
            index, score = movie
            
            name = movies.loc[index].title
            movie_names.append(name)
            movie_scores.append(score)

        recommendations = pd.DataFrame(data={"Movies":movie_names, "Scores": movie_scores}, index=[x+1 for x in range(count)])
        print(recommendations)
            
    else:
        print("No recommended movies found!!")

In [109]:
# getting the movie name from the user
user_input = input('Enter your favorite movie name: ')
get_recommended_movies(user_input, 20)

                            Movies    Scores
1                           Avatar  1.000000
2                            Alien  0.156733
3                    Lost in Space  0.150333
4                        Moonraker  0.140175
5                        Lifeforce  0.139452
6       Zathura: A Space Adventure  0.136417
7                           Aliens  0.133426
8                     Space Chimps  0.129606
9                    Pandora's Box  0.127774
10                Star Trek Beyond  0.125437
11         Guardians of the Galaxy  0.121904
12                         Gattaca  0.121580
13                         Gravity  0.119641
14  Transformers: Dark of the Moon  0.114111
15                The Book of Life  0.114066
16                   Space Cowboys  0.114056
17                 Treasure Planet  0.109218
18                       Apollo 18  0.109165
19                 Men in Black II  0.108887
20                            Moon  0.108685
