In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate
import pandastable as pt
import warnings; warnings.simplefilter('ignore')

In [2]:
import tkinter as tk
import tkinter.ttk
from tkinter import *
from tkinter import ttk
import tkinter.messagebox as tkm
from PIL import Image, ImageTk
import pyglet


# Simple Recommendation System #


This is done by sorting the movies on the basis of popularity and ratings and then display on the top. This type of system gives generalized movie recommendations only. 

In [3]:
md = pd. read_csv(r'C:\Users\saumy\Downloads\Dataset\movies_metadata.csv')
md.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [4]:
md['genres'] = md['genres'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

We will use IMDB's Weighted Rating formula to construct the recommendation chart. 

Weighted Rating (WR) =  (v/v+m.R)+(m/v+m.C) 
where,

v is the number of votes for the movie
m is the minimum votes required to be listed in the chart
R is the average rating of the movie
C is the mean vote across the whole report


Now a movie should have a minimum number of votes to qualify for the recommendation chart. Setting this cutoff as 93 percentile. 

In [5]:
vote_counts = md[md['vote_count'].notnull()]['vote_count'].astype('int')
vote_averages = md[md['vote_average'].notnull()]['vote_average'].astype('int')
C = vote_averages.mean()
C

5.244896612406511

In [6]:
m = vote_counts.quantile(0.93)
m

281.0

In [7]:
md['year'] = pd.to_datetime(md['release_date'], errors='coerce').apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)

In [8]:
qualified = md[(md['vote_count'] >= m) & (md['vote_count'].notnull()) & (md['vote_average'].notnull())][['title', 'year', 'vote_count', 'vote_average', 'popularity', 'genres']]
qualified['vote_count'] = qualified['vote_count'].astype('int')
qualified['vote_average'] = qualified['vote_average'].astype('int')
qualified.shape

(3189, 6)

Therefore, the average rating for a movie on TMDB is **5.245**
Minimum number of votes a movie should have to be considered on the charts = **281**
Number of movies that qualify to be on the charts = __3189__

Next, we will calculate the Weighted Rating and sort the movies according to the ratings

In [9]:
def weighted_rating(x):
    v = x['vote_count']
    R = x['vote_average']
    return (v/(v+m) * R) + (m/(m+v) * C)

In [10]:
qualified['wr'] = qualified.apply(weighted_rating, axis=1)

In [11]:
qualified = qualified.sort_values('wr', ascending=False).head(250)

## Top Movie Charts #

In [12]:
qualified.head(15)

Unnamed: 0,title,year,vote_count,vote_average,popularity,genres,wr
15480,Inception,2010,14075,8,29.108149,"[Action, Thriller, Science Fiction, Mystery, A...",7.946072
12481,The Dark Knight,2008,12269,8,123.167259,"[Drama, Action, Crime, Thriller]",7.938312
22879,Interstellar,2014,11187,8,32.213481,"[Adventure, Drama, Science Fiction]",7.932492
2843,Fight Club,1999,9678,8,63.869599,[Drama],7.922263
4863,The Lord of the Rings: The Fellowship of the Ring,2001,8892,8,32.070725,"[Adventure, Fantasy, Action]",7.915602
292,Pulp Fiction,1994,8670,8,140.950236,"[Thriller, Crime]",7.913509
314,The Shawshank Redemption,1994,8358,8,51.645403,"[Drama, Crime]",7.910385
7000,The Lord of the Rings: The Return of the King,2003,8226,8,29.324358,"[Adventure, Fantasy, Action]",7.908994
351,Forrest Gump,1994,8147,8,48.307194,"[Comedy, Drama, Romance]",7.908141
5814,The Lord of the Rings: The Two Towers,2002,7641,8,29.423537,"[Adventure, Fantasy, Action]",7.902274


## Genre Specific Charts

Now constructing charts for particular genres like Comedy, Action, Romance, etc,. For genre specific charts we will take the minimum votes required to be 85 percentile. 

In [13]:
s = md.apply(lambda x: pd.Series(x['genres']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'genre'
gen_md = md.drop('genres', axis=1).join(s)

In [14]:
def build_chart(genre, percentile=0.85):
    df = gen_md[gen_md['genre'] == genre]
    vote_counts = df[df['vote_count'].notnull()]['vote_count'].astype('int')
    vote_averages = df[df['vote_average'].notnull()]['vote_average'].astype('int')
    C = vote_averages.mean()
    m = vote_counts.quantile(percentile)
    
    qualified = df[(df['vote_count'] >= m) & (df['vote_count'].notnull()) & (df['vote_average'].notnull())][['title', 'year', 'vote_count', 'vote_average', 'popularity']]
    qualified['vote_count'] = qualified['vote_count'].astype('int')
    qualified['vote_average'] = qualified['vote_average'].astype('int')
    
    qualified['wr'] = qualified.apply(lambda x: (x['vote_count']/(x['vote_count']+m) * x['vote_average']) + (m/(m+x['vote_count']) * C), axis=1)
    qualified = qualified.sort_values('wr', ascending=False).head(15)
   
    #Returning the list
    lll = qualified['title'].tolist()
    return lll

### Top Comedy Movies ###

In [15]:
build_chart('Comedy')

['Dilwale Dulhania Le Jayenge',
 'Forrest Gump',
 'Back to the Future',
 'The Intouchables',
 'The Grand Budapest Hotel',
 'Life Is Beautiful',
 'Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb',
 'Modern Times',
 'Some Like It Hot',
 'The Great Dictator',
 'The Apartment',
 'City Lights',
 'Feast',
 'The Kid',
 'The General']

### Top Romance Movies ###

In [16]:
build_chart('Romance')

['Dilwale Dulhania Le Jayenge',
 'Forrest Gump',
 'Vertigo',
 'Your Name.',
 'Some Like It Hot',
 'Cinema Paradiso',
 'Paperman',
 'Sing Street',
 'The Apartment',
 'The Handmaiden',
 'City Lights',
 'The Way He Looks',
 'In a Heartbeat',
 'Titanic',
 'Silver Linings Playbook']

### Top Thriller Movies ###

In [17]:
build_chart('Thriller')

['Inception',
 'The Dark Knight',
 'Pulp Fiction',
 'Se7en',
 'The Imitation Game',
 'The Silence of the Lambs',
 'The Prestige',
 'Leon: The Professional',
 'Memento',
 'The Shining',
 'Reservoir Dogs',
 'The Usual Suspects',
 'Scarface',
 'Room',
 'Psycho']

# Content Based Recommendation System #

To personalize the recommendations we will now build a content based recomendation system taking into account the parameters like the movie taglines, cast, crew, actors, etc.

To build the recommendation system we can take two separate parameters which will include
1) Movie description 
2) Movie cast, crew and genre

In [18]:
links_small = pd.read_csv(r'C:\Users\saumy\Downloads\Dataset\links_small.csv')
links_small = links_small[links_small['tmdbId'].notnull()]['tmdbId'].astype('int')

In [19]:
md = md.drop([19730, 29503, 35587])

In [20]:
md['id'] = md['id'].astype('int')

In [21]:
smd = md[md['id'].isin(links_small)]
smd.shape

(9099, 25)

## Movie Description Based Recommender ##

In [22]:
smd['tagline'] = smd['tagline'].fillna('')
smd['description'] = smd['overview'] + smd['tagline']
smd['description'] = smd['description'].fillna('')

In [23]:
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(smd['description'])

In [24]:
tfidf_matrix.shape

(9099, 268124)

We will now use the cosine similarity to find the simarilities between two movies quantitatively. 

In [25]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [26]:
cosine_sim[0]

array([1.        , 0.00680476, 0.        , ..., 0.        , 0.00344913,
       0.        ])

The next step is to return the most similar movies based on the cosine similarity score.

In [27]:
smd = smd.reset_index()
titles = smd['title']
indices = pd.Series(smd.index, index=smd['title'])

In [28]:
def get_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:51]
    movie_indices = [i[0] for i in sim_scores]
    return titles.iloc[movie_indices]

In [29]:
get_recommendations('Shutter Island').head(10)

6971                    The Rape of Europa
8691                         Inherent Vice
4540                                 Basic
5741                       Titicut Follies
578       Some Folks Call It a Sling Blade
5182                     Soldier of Orange
3929                          Carmen Jones
7156                             Nuremberg
2324                             Psycho II
7823    Captain America: The First Avenger
Name: title, dtype: object

In [30]:
get_recommendations('JFK').head(10)

7242     The File on Thelma Jordon
5987    A Love Song for Bobby Long
1135      Night Falls on Manhattan
4489                         Q & A
8680             The Young Savages
7344           Law Abiding Citizen
3537                  Criminal Law
6667                      Fracture
6560                         Bobby
5425                   Pretty Baby
Name: title, dtype: object

In [31]:
get_recommendations('Batman Returns').head(10)

7565                 Batman: Under the Red Hood
7931                      The Dark Knight Rises
8227    Batman: The Dark Knight Returns, Part 2
2579               Batman: Mask of the Phantasm
6900                            The Dark Knight
132                              Batman Forever
8165    Batman: The Dark Knight Returns, Part 1
912                          The Wrong Trousers
6144                              Batman Begins
7901                           Batman: Year One
Name: title, dtype: object

The recommendation system only identifies the keywords and hence recommends all the batman movies. But this is not what we want. We want a recommendation system that that gives personalized recommendations as well as diversify the movies across all genres. For this, we will take into consideration the genre, cast and crew.

## Movie Data Based Recommendation System ##

In [32]:
credits = pd.read_csv(r'C:\Users\saumy\Downloads\Dataset\keywords.csv')
keywords = pd.read_csv(r'C:\Users\saumy\Downloads\Dataset\credits.csv')

In [33]:
keywords['id'] = keywords['id'].astype('int')
credits['id'] = credits['id'].astype('int')
md['id'] = md['id'].astype('int')

In [34]:
md.shape

(45463, 25)

Merging the cast, crew, genres and credits from the keywords and credits files. 

In [35]:
md = md.merge(credits, on='id')
md = md.merge(keywords, on='id')
smd = md[md['id'].isin(links_small)]
smd.shape

(9219, 28)

We will consider only the director from the crew and the top 5 actors that appear in the credit list. 

In [36]:
smd['cast'] = smd['cast'].apply(literal_eval)
smd['crew'] = smd['crew'].apply(literal_eval)
smd['keywords'] = smd['keywords'].apply(literal_eval)
smd['cast_size'] = smd['cast'].apply(lambda x: len(x))
smd['crew_size'] = smd['crew'].apply(lambda x: len(x))

In [37]:
def get_director(x):
    for i in x:
        if i['job'] == 'Director':
            return i['name']
    return np.nan

In [38]:
smd['director'] = smd['crew'].apply(get_director)

In [39]:
smd['cast'] = smd['cast'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
smd['cast'] = smd['cast'].apply(lambda x: x[:5] if len(x) >=3 else x)

In [40]:
smd['keywords'] = smd['keywords'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

We will now use the Count Vectorizer to build our count matrix and calculate the cosine similarities and return the similar movies based on the similarity score. 

In [41]:
smd['cast'] = smd['cast'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])

In [42]:
smd['director'] = smd['director'].astype('str').apply(lambda x: str.lower(x.replace(" ", "")))
smd['director'] = smd['director'].apply(lambda x: [x,x])

In [43]:
s = smd.apply(lambda x: pd.Series(x['keywords']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'keyword'
s = s.value_counts()
s[:5]

independent film        610
woman director          550
murder                  399
duringcreditsstinger    327
based on novel          318
Name: keyword, dtype: int64

In [44]:
s = s[s > 1]
stemmer = SnowballStemmer('english')
stemmer.stem('ants')

'ant'

In [45]:
def filter_keywords(x):
    words = []
    for i in x:
        if i in s:
            words.append(i)
    return words

In [46]:
smd['keywords'] = smd['keywords'].apply(filter_keywords)
smd['keywords'] = smd['keywords'].apply(lambda x: [stemmer.stem(i) for i in x])
smd['keywords'] = smd['keywords'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])

In [47]:
smd['soup'] = smd['keywords'] + smd['cast'] + smd['director'] + smd['genres']
smd['soup'] = smd['soup'].apply(lambda x: ' '.join(x))

In [48]:
count = CountVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
count_matrix = count.fit_transform(smd['soup'])

In [49]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [50]:
smd = smd.reset_index()
titles = smd['title']
indices = pd.Series(smd.index, index=smd['title'])

In [51]:
get_recommendations('Shutter Island').head(10)

7934                   A Letter to Elia
6599                       The Departed
5625    Alice Doesn't Live Here Anymore
5627     Who's That Knocking at My Door
6047                        The Aviator
4072                      Boxcar Bertha
1997                 The Color of Money
15                               Casino
5376                       Mean Streets
5848                 New York, New York
Name: title, dtype: object

In [52]:
get_recommendations('JFK').head(10)

13                                Nixon
413                      Heaven & Earth
4340                           The Hand
7125                                 W.
7698    Wall Street: Money Never Sleeps
5512                         Comandante
2566                   Any Given Sunday
6539                 World Trade Center
3571                       Criminal Law
3248                      Thirteen Days
Name: title, dtype: object

In [53]:
get_recommendations('Batman Returns').head(10)

524                                 Batman
2492                         Sleepy Hollow
1260                        Batman & Robin
6981                       The Dark Knight
9024    Batman v Superman: Dawn of Justice
6218                         Batman Begins
1745                           Beetlejuice
6174                               Vincent
1146                         Mars Attacks!
8123                          Dark Shadows
Name: title, dtype: object

We have not included the ratings and reviews previously. Therefore, we will also add ratings as another parameter besides the cast, crew and genre. We will take top 25 movies and calculate the vote of the 60th percentile movie. Then we will calculate the weighted rating of each movie using IMDB's formula. 

In [54]:
def improved_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:26]
    movie_indices = [i[0] for i in sim_scores]
    
    movies = smd.iloc[movie_indices][['title', 'vote_count', 'vote_average', 'year']]
    vote_counts = movies[movies['vote_count'].notnull()]['vote_count'].astype('int')
    vote_averages = movies[movies['vote_average'].notnull()]['vote_average'].astype('int')
    C = vote_averages.mean()
    m = vote_counts.quantile(0.60)
    qualified = movies[(movies['vote_count'] >= m) & (movies['vote_count'].notnull()) & (movies['vote_average'].notnull())]
    qualified['vote_count'] = qualified['vote_count'].astype('int')
    qualified['vote_average'] = qualified['vote_average'].astype('int')
    qualified['wr'] = qualified.apply(weighted_rating, axis=1)
    qualified = qualified.sort_values('wr', ascending=False).head(10)
    #return qualified
    lll = qualified['title'].tolist()
    return lll

In [55]:
improved_recommendations('Shutter Island')

['GoodFellas',
 'The Wolf of Wall Street',
 'The Departed',
 'Gangs of New York',
 'The Aviator',
 'Casino',
 'Raging Bull',
 'Mean Streets',
 'The Color of Money',
 'Murder on the Orient Express']

In [56]:
improved_recommendations('JFK')

['Platoon',
 'Wall Street',
 'Natural Born Killers',
 'Savages',
 'Any Given Sunday',
 'Born on the Fourth of July',
 'The Doors',
 'World Trade Center',
 'Wall Street: Money Never Sleeps',
 'Alexander']

In [57]:
improved_recommendations('Batman Returns')

['The Dark Knight',
 'The Dark Knight Rises',
 'Batman Begins',
 'Edward Scissorhands',
 'Batman',
 'Alice in Wonderland',
 'Charlie and the Chocolate Factory',
 'Dark Shadows',
 'Batman v Superman: Dawn of Justice',
 'Suicide Squad']

Hence the improved system removes the movies that don't have a good rating. However, it does not provide recommendations across genres. It is not personalised yet since it recommends same movies to everyone and fails to take into account the users' taste. Therefore, we will use **Collaborative Filtering** to improve personalised recommendations.

# Collaborative Filtering #

We will use the **Surprise** library to implement the **Singular Value Decomposition(SVD)** to minimise RMSE and MAE

In [58]:
reader = Reader()

In [59]:
ratings = pd.read_csv(r'C:\Users\saumy\Downloads\Dataset\ratings_small.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [60]:
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
svd = SVD()
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=10, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8927  0.8719  0.8917  0.8825  0.8898  0.9033  0.8805  0.8995  0.8994  0.9037  0.8915  0.0100  
MAE (testset)     0.6877  0.6723  0.6873  0.6768  0.6851  0.6972  0.6806  0.6881  0.6940  0.6945  0.6864  0.0076  
Fit time          1.24    0.91    0.91    0.90    0.91    0.90    0.92    0.90    0.97    1.01    0.96    0.10    
Test time         0.05    0.04    0.04    0.18    0.04    0.04    0.04    0.04    0.15    0.04    0.07    0.05    


{'test_rmse': array([0.89268795, 0.87189994, 0.89169526, 0.88250463, 0.88983363,
        0.90330385, 0.88053904, 0.89947026, 0.89940677, 0.90371175]),
 'test_mae': array([0.68765508, 0.67226457, 0.68727517, 0.67682824, 0.68512618,
        0.69722989, 0.68058215, 0.68810384, 0.6939762 , 0.69447214]),
 'fit_time': (1.2356054782867432,
  0.9072010517120361,
  0.905097484588623,
  0.8950996398925781,
  0.9111056327819824,
  0.9000613689422607,
  0.9231240749359131,
  0.8982741832733154,
  0.9736611843109131,
  1.0132179260253906),
 'test_time': (0.04801011085510254,
  0.0410001277923584,
  0.040029048919677734,
  0.18237686157226562,
  0.04055023193359375,
  0.039994239807128906,
  0.03953838348388672,
  0.04199934005737305,
  0.1540844440460205,
  0.0410153865814209)}

Hence, we get  
            **Root Mean Squared Error(RMSE) = 0.8904**  
            **Mean Absolute Error(MAE) = 0.6845**

Now, training our dataset

In [61]:
trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x12b24ccd4d0>

In [62]:
ratings[ratings['userId'] == 1]

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
5,1,1263,2.0,1260759151
6,1,1287,2.0,1260759187
7,1,1293,2.0,1260759148
8,1,1339,3.5,1260759125
9,1,1343,2.0,1260759131


In [63]:
svd.predict(1, 451, 3)


Prediction(uid=1, iid=451, r_ui=3, est=2.480160954461496, details={'was_impossible': False})

For a movie with ID = 451, we get an estimation of 2.655. Now we will combine all the recommendation systems above and build a **Personalised Hybrid Recommendation System**

# Hybrid Recommendation System #

The recommendation system will take the userID and movie name as input and will yield an output predicting the ratings given by that user for that movie. 

In [64]:
def convert_int(x):
    try:
        return int(x)
    except:
        return np.nan

In [65]:
id_map = pd.read_csv(r'C:\Users\saumy\Downloads\Dataset\links_small.csv')[['movieId', 'tmdbId']]
id_map['tmdbId'] = id_map['tmdbId'].apply(convert_int)
id_map.columns = ['movieId', 'id']
id_map = id_map.merge(smd[['title', 'id']], on='id').set_index('title')

In [66]:
indices_map = id_map.set_index('id')

In [67]:
def hybrid(userId, title):
    idx = indices[title]
    tmdbId = id_map.loc[title]['id']
    movie_id = id_map.loc[title]['movieId']
    
    sim_scores = list(enumerate(cosine_sim[int(idx)]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:26]
    movie_indices = [i[0] for i in sim_scores]
    
    movies = smd.iloc[movie_indices][['title', 'vote_count', 'vote_average', 'year', 'id']]
    movies['est'] = movies['id'].apply(lambda x: svd.predict(userId, indices_map.loc[x]['movieId']).est)
    movies = movies.sort_values('est', ascending=False).head(10)
    #return movies.head(10)
    lll = movies['title'].tolist()
    return lll

In [68]:
hybrid(1, 'Shutter Island')

['Raging Bull',
 'GoodFellas',
 'The Departed',
 'The Wolf of Wall Street',
 'Casino',
 'Kundun',
 "Alice Doesn't Live Here Anymore",
 'Confessions',
 'Silent Fall',
 'The Color of Money']

In [69]:
hybrid(500, 'Shutter Island')

['Death and the Maiden',
 'The Departed',
 'No Direction Home: Bob Dylan',
 'Casino',
 'Raging Bull',
 'GoodFellas',
 'The Aviator',
 'Murder on the Orient Express',
 'A Personal Journey with Martin Scorsese Through American Movies',
 'The Wolf of Wall Street']

In [70]:
hybrid(133, 'Shutter Island')

['Raging Bull',
 'The Departed',
 'Death and the Maiden',
 'The Age of Innocence',
 'The Aviator',
 'The Wolf of Wall Street',
 'Murder on the Orient Express',
 'Kundun',
 'Gangs of New York',
 'GoodFellas']

Thus we get a personalised recommendations which are different for different users and give much more accurate suggestions. 

In [71]:
hybrid(1, 'Casino')

['Raging Bull',
 'GoodFellas',
 'The Departed',
 'The Wolf of Wall Street',
 'Taxi Driver',
 'Once Upon a Time in America',
 'Shutter Island',
 'Kundun',
 'The King of Comedy',
 "Alice Doesn't Live Here Anymore"]

# Creating a GUI Using Tkinter #

In [76]:
l = [None for i in range(10)]
def rmovies():
    event1 = menu.get()
    event2 = e3_var.get()
    event3 = e1_var.get()
    event4 = e2_var.get()
    
    if event2 == "Enter Movie Name" and event3 == 0:
        qualify = build_chart(event1)
        for i in range(10):
            l[i] = Label(root ,  text = qualify[i], font=("TTCommons-Italic",14),bg="#2D283E", fg ="#802BB1",wraplength=300)
            l[i].grid(row = 2+i, column = 6, pady = 5)
    elif event3 == 0 and event1 == "Select Genre":
        qualify = improved_recommendations(event2)
        for i in range(10):
            l[i] = Label(root ,  text = qualify[i], font=("TTCommons-Italic",14),bg="#2D283E", fg ="#802BB1",wraplength=300)
            l[i].grid(row = 2+i, column = 6, pady = 5)
    elif event1 == "Select Genre" and event2 == "Enter Movie Name":
        qualify = hybrid(event3, event4)
        #return qualify
        for i in range(10):
            l[i] = Label(root,  text = qualify[i], font=("TTCommons-Italic",14),bg="#2D283E", fg ="#802BB1",wraplength=300)
            l[i].grid(row = 2+i, column = 6, pady = 5)
            
    else:
        tkm.showinfo("Error", "Please select only one system")
        #drop.config(text="Select Genre")
        #drop.config(text="Select Genre")
    

In [85]:

    


# create root window
root = Tk()

# root window title and dimension4
root.title("AZ Movie Recommender System")
#photo = PhotoImage(file = r'C:\Users\saumy\cinema')
#root.iconphoto(False, photo)
root.iconbitmap(r'Downloads\Camcorder.ico')
#setting bg colour of root window
root.configure(bg="#2D283E")

#root.configure(bg='PapayaWhip')

# Set geometry (widthxheight)
root.geometry('1600x700')



# Dropdown menu options
options = [
    'Select Genre',
    'Action',
 'Adventure',
 'Animation',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Family',
 'Fantasy',
 'History',
 'Horror',
 'Music',
 'Mystery',
 'Romance',
 'Science Fiction',
 'TV Movie',
 'Thriller',
 'War',
 'Western'
]

# adding a label to the root window
l1 = Label(root, text = "MOVIE RECOMMENDATION SYSTEM", fg ="#802BB1",bg = "#2D283E" ,font= ("Take A Shot Personal Use",42))
l1.grid(row = 0,column = 0, columnspan=10, pady = 20)
l2 = Label(root, text = "Select genre of the movie you want to watch : ",font= ("CoreSansA35Light",14),bg = "#2D283E" ,fg ="#D1D7E0", wraplength = 200)
l2.grid(row = 1,column = 0, pady = 2, padx = 2, sticky='s')

# datatype of menu text and entry widgets
menu = StringVar()
e1_var = IntVar()
e2_var = StringVar()
e3_var = StringVar()
#pd_variable = Variable(second_frame)

# initial menu text
menu.set( "Select Genre" )
e1_var.set(0)
e2_var.set("Enter Movie")
e3_var.set("Enter Movie Name")

# Create Dropdown menu
drop = OptionMenu( root , menu , *options)
drop.grid(row = 3,column = 0, padx = 10, sticky='n')

# Creating seperators for better UI 
x1 = tkinter.ttk.Separator(root, orient=VERTICAL).grid(column=1, row=1, rowspan=12, sticky='ns')

l3 = Label(root, text = "Enter movie name to get generalised recommendations", font= ("CoreSansA35Light",14), wraplength=200,bg = "#2D283E", fg ="#D1D7E0")
l3.grid(row = 1,column = 2, pady = 2, padx = 2, sticky='s')

e3 = Entry(root, textvariable = e3_var, width=30,font= ("CoreSansA35Light",10))
e3.grid(row = 3, column=2, pady = 2, padx = 10)


# button widget with green color text
button = Button(root, text = "SEE RECOMMENDATIONS" , fg = "white",bg = "#802BB1", command = rmovies)
button.grid(row = 15,column = 3, sticky = 's', pady = 30)

# Creating seperators for better UI 
x2 = tkinter.ttk.Separator(root, orient=VERTICAL).grid(column=3, row=1, rowspan=12, sticky='ns')

# Creating label for taking movie as input for content based recommendation
l16 = Label(root, text = "Enter the User ID and movie name to get personalised recommendations",fg="#D1D7E0", bg ="#2D283E",font= ("CoreSansA35Light",12), wraplength=200)
l16.grid(row = 1,column = 4, pady = 2, padx = 2,sticky='s')

#Creating textbox widget to take movie input from user and display recommendations accordingly
e1 = Entry(root, textvariable = e1_var, width=30,font= ("CoreSansA35Light",10))
e1.grid(row = 3, column=4, pady = 2, padx = 10)
e2 = Entry(root, textvariable = e2_var, width=30,font= ("CoreSansA35Light",10))
e2.grid(row = 4, column=4, pady = 2, padx = 10)

# Creating seperators for better UI 
x3 = tkinter.ttk.Separator(root, orient=VERTICAL).grid(column=5, row=1, rowspan=12, sticky='ns')

l17 = Label(root, text="Results",bg = "#2D283E",fg="#D1D7E0", font= ("CoreSansA35Light",14)).grid(row=1, column= 6, sticky ='n', pady=2, padx=2)

# Creating seperators for better UI 
x4 = tkinter.ttk.Separator(root, orient=HORIZONTAL).grid(column=0, row=16, columnspan=8, sticky='ew', padx = 10) 

#Section for all time blockbuster hits
l4 = Label(root, text = "All Time Favourites", font= ("TTCommons-DemiBoldItalic",20), wraplength=300,bg = "#2D283E", fg ="#D1D7E0")
l4.grid(row = 17,column = 0, pady = 2, padx = 10, sticky='s')

# Read the Image
inception = Image.open(r'Downloads/inception.jpeg')
# Resize the image using resize() method
resize_inception = inception.resize((150, 225))
img = ImageTk.PhotoImage(resize_inception)
# create label and add resize image
l5 = Label(image=img)
l5.image = img
l5.grid(row= 19, column =0,sticky='s',pady=25)

# Read the Image
tdk = Image.open(r'Downloads/i2.jpg')
# Resize the image using resize() method
resize_tdk = tdk.resize((150, 225))
img = ImageTk.PhotoImage(resize_tdk)
# create label and add resize image
l6 = Label(image=img)
l6.image = img
l6.grid(row= 19, column =1,sticky='s',pady=25)

# Read the Image
tdk = Image.open(r'Downloads/is.jpg')
# Resize the image using resize() method
resize_tdk = tdk.resize((150, 225))
img = ImageTk.PhotoImage(resize_tdk)
# create label and add resize image
l7 = Label(image=img)
l7.image = img
l7.grid(row= 19, column =2,sticky='s',pady=25)

# Read the Image
tdk = Image.open(r'Downloads/lr.png')
# Resize the image using resize() method
resize_tdk = tdk.resize((150, 225))
img = ImageTk.PhotoImage(resize_tdk)
# create label and add resize image
l8 = Label(image=img)
l8.image = img
l8.grid(row= 19, column =3,sticky='s',pady=25)

# Read the Image
tdk = Image.open(r'Downloads/pf.png')
# Resize the image using resize() method
resize_tdk = tdk.resize((150, 225))
img = ImageTk.PhotoImage(resize_tdk)
# create label and add resize image
l9 = Label(image=img)
l9.image = img
l9.grid(row= 19, column =4,sticky='s',pady=25)

# Read the Image
tdk = Image.open(r'Downloads/sr.jpg')
# Resize the image using resize() method
resize_tdk = tdk.resize((150, 225))
img = ImageTk.PhotoImage(resize_tdk)
# create label and add resize image
l10 = Label(image=img)
l10.image = img
l10.grid(row= 19, column =5,sticky='s',pady=25)

# Read the Image
tdk = Image.open(r'Downloads/fg.jpg')
# Resize the image using resize() method
resize_tdk = tdk.resize((150, 225))
img = ImageTk.PhotoImage(resize_tdk)
# create label and add resize image
l11 = Label(image=img)
l11.image = img
l11.grid(row= 19, column =7,sticky='s',pady=25)


# start the program
root.mainloop()