In [1]:
import pandas as pd
import numpy as np
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from surprise import Reader, Dataset, SVD, KNNBasic
from surprise.model_selection import cross_validate, KFold

import warnings; warnings.simplefilter('ignore')

In [2]:
data = pd.read_csv('emotionalData.csv')
data = data.rename(columns={'tconst': 'imdb_id'})

In [3]:
theMovie = pd.read_csv('movies_metadata.csv')

In [4]:
 merge = pd.merge(data,theMovie, on ='imdb_id')

In [5]:
merge.to_csv('mergedData.csv')

**ast.literal_eval:** Safely evaluate an expression node or a string containing a Python literal or container display. The string or node provided may only consist of the following Python literal structures: strings, bytes, numbers, tuples, lists, dicts, sets, booleans, None, bytes and sets.

**TfidfVectorizer, CountVectorizer:** The TfidfVectorizer will tokenize documents, learn the vocabulary and inverse document frequency weightings, and allow you to encode new documents. ... A vocabulary of 8 words is learned from the documents and each word is assigned a unique integer index in the output vector. 

The only difference is that the TfidfVectorizer() returns floats while the CountVectorizer() returns ints. And that’s to be expected – as explained in the documentation quoted above, TfidfVectorizer() assigns a score while CountVectorizer() counts.

**cosine_similarity** computes the L2-normalized dot product of vectors. That is, if  and  are row vectors, their cosine similarity  is defined as:
                                                    K(X, Y) = XY / (||X||*||Y||)
 
This is called cosine similarity, because Euclidean (L2) normalization projects the vectors onto the unit sphere, and their dot product is then the cosine of the angle between the points denoted by the vectors.

**Snowball Stemmer:** It is a stemming algorithm which is also known as the Porter2 stemming algorithm as it is a better version of the Porter Stemmer since some issues of it were fixed in this stemmer.

input------> output <br>
cared ----> care <br>
university ----> univers <br>
fairly ----> fair <br>
easily ----> easili <br>
singing ----> sing <br>
sings ----> sing <br>
sung ----> sung <br>
singer ----> singer <br>
sportingly ----> sport <br>

In [6]:
md = pd. read_csv('mergedData.csv')
md = md.rename(columns={'Score': 'IMDB-rating'})
md.head(5)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,imdb_id,Title,IMDB-rating,NormalisedScore,Year,wordcount,totalemotion,Emotional Expression,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,0,1,tt0047478,1954 Seven Samurai,8.6,80.851064,1954,3563.0,1617.0,68.210889,...,1954-04-26,271841.0,207.0,"[{'iso_639_1': 'ja', 'name': '日本語'}]",Released,The Mighty Warriors Who Became the Seven Natio...,Seven Samurai,False,8.2,892.0
1,1,3,tt0062622,2001 A Space Odyssey 1968 REMASTERED 720p Blu...,8.3,74.468085,1968,2153.0,800.0,45.706531,...,1968-04-10,68700000.0,149.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,An epic drama of adventure and exploration,2001: A Space Odyssey,False,7.9,3075.0
2,2,4,tt1187043,3 Idiots 2009 Hindi 1080p Blu Ray x264 DD 5 1...,8.3,74.468085,2009,6748.0,2981.0,64.908545,...,2009-12-23,70000000.0,170.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Don't BE A STUPID BE AN I.D.I.O.T.,3 Idiots,False,7.8,850.0
3,3,5,tt0066921,A Clockwork Orange (1971) 1080p H 264 Multi (...,8.3,74.468085,1971,5070.0,2154.0,60.282602,...,1971-12-18,26589000.0,136.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Being the adventures of a young man whose prin...,A Clockwork Orange,False,8.0,3432.0
4,4,7,tt1360860,About Elly 2009 1080p BluRay Remux AVC FLAC2 ...,8.0,68.085106,2009,3480.0,1307.0,46.800927,...,2009-02-10,0.0,119.0,"[{'iso_639_1': 'de', 'name': 'Deutsch'}, {'iso...",Released,,About Elly,False,7.3,98.0


In [7]:
def get_EmotionScore(df):
    df1 = df[["Agressiveness", "Anger","Anticipation", "Awe", "Contempt", "Disgust", "Dissaproval", "Fear", "Joy", "Love", "Negative", "Optimism", "Positive", "Remorse", "Sadness", "Submission", "Surprise", "Trust", "AFINN(-4&-5)", "Ero"]]
    df1 = df1.astype(int)
    df3 = df1.idxmax(axis = 1)
    df2 = list(df3)
    return df2[0]

In [8]:
md['genres'] = md['genres'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
# to clean unwanted data in genres and keep genres keywords like( qction, drama, comedy)

**Weighted Rating (WR) =  (vv+m.R)+(mv+m.C)** <br>
where,

v is the number of votes for the movie <br>
m is the minimum votes required to be listed in the chart <br>
R is the average rating of the movie <br>
C is the mean vote across the whole report <br>

In [9]:
def weighted_rating(x):
    v = x['vote_count']
    R = x['vote_average']
    return (v/(v+m) * R) + (m/(m+v) * C)

In [10]:
C = md['vote_average'].mean()
m = md['vote_count'].quantile(0.85) # I used 85% as cutoff
md['weighted_Rating'] = md.apply(weighted_rating, axis=1)


# Simple Recommender based on Emotional score

In [11]:
def emotionalData_build(emotion):
    hh = md.sort_values(by=[emotion], ascending=False)
    hh['vote_count'] = md['vote_count'].astype('int')
    hh = hh[['title', 'Year', 'IMDB-rating', 'weighted_Rating', 'vote_count', 'vote_average', 'popularity', emotion]]
    return hh

In [12]:
emotionalData_build('Agressiveness').head(5)

Unnamed: 0,title,Year,IMDB-rating,weighted_Rating,vote_count,vote_average,popularity,Agressiveness
17,City Lights,1931,8.5,7.962115,444,8.2,10.891524,47.37069
7,Amadeus,1984,8.3,7.923136,1107,7.8,12.677592,40.008172
56,Scarface,1983,8.2,7.963056,3017,8.0,11.299673,26.732432
89,The Usual Suspects,1995,8.5,7.999228,3334,8.1,16.302466,26.341695
88,The Sting,1973,8.2,7.940888,639,7.9,12.016821,26.294434


# Content Based Recommender

I will build two Content Based Recommenders based on:

- Movie Overviews and Taglines
- Movie Cast, Crew, Keywords and Genre

 **1. Content Based: Movie Description Based Recommender**
 
 Let us first try to build a recommender using movie descriptions and taglines.

In [13]:
links_small = pd.read_csv('links_small.csv')
links_small = links_small[links_small['tmdbId'].notnull()]['tmdbId'].astype('int')

In [14]:
smd = md[md['id'].isin(links_small)]
smd.shape

(96, 55)

In [15]:
smd['tagline'] = smd['tagline'].fillna('')
smd['description'] = smd['overview'] + smd['tagline']
smd['description'] = smd['description'].fillna('')

The TfidfVectorizer will tokenize documents, learn the vocabulary and inverse document frequency weightings, and allow you to encode new documents. ... A vocabulary of 8 words is learned from the documents and each word is assigned a unique integer index in the output vector.

In [16]:
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(smd['description'])

In [17]:
tfidf_matrix.shape


(96, 4866)

In [18]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)


In [19]:
cosine_sim[0]


array([1.        , 0.00805253, 0.        , 0.        , 0.        ,
       0.        , 0.01238417, 0.        , 0.        , 0.        ,
       0.        , 0.00609925, 0.        , 0.03148928, 0.        ,
       0.02409024, 0.        , 0.00885412, 0.        , 0.        ,
       0.00903198, 0.        , 0.        , 0.0176804 , 0.        ,
       0.        , 0.        , 0.01465997, 0.01249017, 0.        ,
       0.00848464, 0.        , 0.        , 0.        , 0.02448305,
       0.        , 0.        , 0.        , 0.00673802, 0.00828134,
       0.02040472, 0.        , 0.        , 0.        , 0.        ,
       0.02240047, 0.        , 0.        , 0.        , 0.00525777,
       0.01242957, 0.        , 0.01194161, 0.        , 0.        ,
       0.02721096, 0.00654227, 0.        , 0.01978169, 0.        ,
       0.        , 0.02214331, 0.        , 0.        , 0.        ,
       0.00586742, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.01243

We now have a pairwise cosine similarity matrix for all the movies in our dataset. The next step is to write a function that returns the 30 most similar movies based on the cosine similarity score.

In [20]:
smd = smd.reset_index()
titles = smd['title']
indices = pd.Series(smd.index, index=smd['title'])

In [21]:
def get_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    movie_indices = [i[0] for i in sim_scores]
    return titles.iloc[movie_indices]

In [22]:
get_recommendations('Avatar').head(10)


84                  The Matrix
45              Paths of Glory
26           Full Metal Jacket
35          Lawrence of Arabia
54            Schindler's List
77    The Silence of the Lambs
6                       Aliens
36      Leon: The Professional
52         Saving Private Ryan
58               Spirited Away
Name: title, dtype: object

We see that for The Avatar, our system is able to identify it as a other species movie  and subsequently recommend other movie which has alien, time travel as its top recommendations.

In [23]:
get_recommendations('The Dark Knight').head(10)


66                    The Dark Knight Rises
78                             The Departed
20                         Django Unchained
1                     2001: A Space Odyssey
19                                 Das Boot
92              Witness for the Prosecution
37                        Life Is Beautiful
2                                  3 Idiots
77                 The Silence of the Lambs
72    The Lord of the Rings: The Two Towers
Name: title, dtype: object

**Content based: Metadata Based Recommender**

We now have our cast, crew, genres and credits, all in one dataframe. Let us wrangle this a little more using the following intuitions:

**Crew:** From the crew, we will only pick the director as our feature since the others don't contribute that much to the feel of the movie. <br>
**Cast:** Lesser known actors and minor roles do not really affect people's opinion of a movie. Therefore, we must only select the major characters and their respective actors. Arbitrarily we will choose the top 3 actors that appear in the credits list.

In [24]:
credits = pd.read_csv('credits.csv')
keywords = pd.read_csv('keywords.csv')

In [25]:
keywords['id'] = keywords['id'].astype('int')
credits['id'] = credits['id'].astype('int')
md['id'] = md['id'].astype('int')

In [26]:
md.shape


(102, 55)

In [27]:
md = md.merge(credits, on='id')
md = md.merge(keywords, on='id')

In [28]:
links_small = pd.read_csv('links_small.csv')
links_small = links_small[links_small['tmdbId'].notnull()]['tmdbId'].astype('int')

In [29]:
smd = md[md['id'].isin(links_small)]
smd.shape

(96, 58)

In [30]:
smd['cast'] = smd['cast'].apply(literal_eval)
smd['crew'] = smd['crew'].apply(literal_eval)
smd['keywords'] = smd['keywords'].apply(literal_eval)
smd['cast_size'] = smd['cast'].apply(lambda x: len(x))
smd['crew_size'] = smd['crew'].apply(lambda x: len(x))

In [31]:
def get_director(x):
    for i in x:
        if i['job'] == 'Director':
            return i['name']
    return np.nan

In [32]:
smd['director'] = smd['crew'].apply(get_director)


In [33]:
smd['cast'] = smd['cast'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
smd['cast'] = smd['cast'].apply(lambda x: x[:3] if len(x) >=3 else x)


In [34]:
smd['keywords'] = smd['keywords'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])


I plan on doing is creating a metadata dump for every movie which consists of genres, director, main actors and keywords. I then use a Count Vectorizer to create our count matrix as we did in the Description Recommender. The remaining steps are similar to what we did earlier: we calculate the cosine similarities and return movies that are most similar.

These are steps I follow in the preparation of my genres and credits data:

- Strip Spaces and Convert to Lowercase from all our features. This way, our engine will not confuse between Johnny Depp and Johnny Galecki.
- Mention Director 3 times to give it more weight relative to the entire cast.

In [35]:
smd['cast'] = smd['cast'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])


In [36]:
smd['cast']

0           [toshirōmifune, takashishimura, yoshioinaba]
1           [keirdullea, garylockwood, williamsylvester]
2                   [aamirkhan, kareenakapoor, madhavan]
3         [malcolmmcdowell, patrickmagee, adriennecorri]
4      [golshiftehfarahani, shahabhosseini, taranehal...
                             ...                        
94            [milesteller, j.k.simmons, melissabenoist]
95       [tyronepower, marlenedietrich, charleslaughton]
97         [samworthington, zoesaldana, sigourneyweaver]
100              [hughgrant, andiemacdowell, jamesfleet]
101                                  [davidattenborough]
Name: cast, Length: 96, dtype: object

In [37]:
smd['director'] = smd['director'].astype('str').apply(lambda x: str.lower(x.replace(" ", "")))
smd['director'] = smd['director'].apply(lambda x: [x,x, x])

**Keywords** <br>
We will do a small amount of pre-processing of our keywords before putting them to any use. As a first step, we calculate the frequenct counts of every keyword that appears in the dataset.

In [38]:
s = smd.apply(lambda x: pd.Series(x['keywords']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'keyword'

In [39]:
s = s.value_counts()
s[:10]

based on novel    11
violence           8
world war ii       7
dystopia           7
love               5
corruption         5
revenge            5
nazis              5
friends            5
friendship         5
Name: keyword, dtype: int64

In [40]:
s = s[s > 1]


In [41]:
stemmer = SnowballStemmer('english')
stemmer.stem('humans')
# stemmer convert words like dogs into dog, such that we get keywords count accurately.

'human'

In [42]:
def filter_keywords(x):
    words = []
    for i in x:
        if i in s:
            words.append(i)
    return words

In [43]:
smd['keywords'] = smd['keywords'].apply(filter_keywords)
smd['keywords'] = smd['keywords'].apply(lambda x: [stemmer.stem(i) for i in x])
smd['keywords'] = smd['keywords'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])

In [44]:
smd['keywords'].head(2)

0                 [japan, peasant, moralambigu, battl]
1    [artificialintellig, manvsmachin, spacetravel,...
Name: keywords, dtype: object

In [45]:
smd['soup'] = smd['keywords'] + smd['cast'] + smd['director'] + smd['genres']
smd['soup'] = smd['soup'].apply(lambda x: ' '.join(x))

In [46]:
smd['soup'].head(2)

0    japan peasant moralambigu battl toshirōmifune ...
1    artificialintellig manvsmachin spacetravel ast...
Name: soup, dtype: object

In [47]:
count = CountVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
count_matrix = count.fit_transform(smd['soup'])

In [48]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)


In [49]:
smd = smd.reset_index()
titles = smd['title']
indices = pd.Series(smd.index, index=smd['title'])

In [50]:
get_recommendations('Avatar').head(10)

6                                        Aliens
64                   Terminator 2: Judgment Day
87                                      Titanic
5                                         Alien
60                      The Empire Strikes Back
59    Star Wars: Episode I - The Phantom Menace
1                         2001: A Space Odyssey
32                                 Interstellar
61                           Return of the Jedi
3                            A Clockwork Orange
Name: title, dtype: object

In [51]:
get_recommendations('The Dark Knight').head(10)


66       The Dark Knight Rises
74                The Prestige
38                     Memento
32                Interstellar
86          The Usual Suspects
53                    Scarface
51              Reservoir Dogs
77    The Silence of the Lambs
63                 Taxi Driver
78                The Departed
Name: title, dtype: object

If we compair our resent results with our another recommended system(Movie Overviews and Taglines), The results are lot different.This one have lot more similar movies

# Collaborative Filtering
- using K-nearest neighbours
- using singular-Value Decomposition

In [52]:
reader = Reader()

In [53]:
ratings = pd.read_csv('ratings_small.csv')
ratings.head()


Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [54]:
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
kf = KFold(n_splits=5)
kf.split(data)

<generator object KFold.split at 0x000001A05644D190>

**Using K-nearest neighbor (KNNBasic)**

In [55]:
# We'll use the k-nearest neighbor algorithm.
knn = KNNBasic()
# Run 5-fold cross-validation and print results
cross_validate(knn, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9585  0.9624  0.9696  0.9684  0.9734  0.9665  0.0053  
MAE (testset)     0.7372  0.7399  0.7473  0.7425  0.7475  0.7429  0.0041  
Fit time          0.19    0.52    0.52    0.53    0.51    0.45    0.13    
Test time         1.71    4.20    4.45    4.31    4.32    3.80    1.05    


{'test_rmse': array([0.95854337, 0.96238106, 0.96960783, 0.96839379, 0.97335441]),
 'test_mae': array([0.7371735 , 0.73994904, 0.74730864, 0.74245913, 0.74751347]),
 'fit_time': (0.18525147438049316,
  0.5172545909881592,
  0.5154478549957275,
  0.5340359210968018,
  0.51409912109375),
 'test_time': (1.70637845993042,
  4.201348304748535,
  4.445612668991089,
  4.307009696960449,
  4.323373556137085)}

In [56]:
trainset = data.build_full_trainset()
knn.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x1a058e4f280>

In [57]:
ratings[ratings['userId'] == 1]

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
5,1,1263,2.0,1260759151
6,1,1287,2.0,1260759187
7,1,1293,2.0,1260759148
8,1,1339,3.5,1260759125
9,1,1343,2.0,1260759131


**Root Mean Sqaure Error** using KNNBasic is  0.9681

In [58]:
knn.predict(1, 2, 3)

Prediction(uid=1, iid=2, r_ui=3, est=3.2584390074346286, details={'actual_k': 40, 'was_impossible': False})

For movie with ID **2**, we get an estimated prediction of **3.2584** by user **1**

In [59]:
knn.predict(2, 2, 3)

Prediction(uid=2, iid=2, r_ui=3, est=3.3101434365546045, details={'actual_k': 40, 'was_impossible': False})

For **same movie**, we get an estimated prediction of **3.31014** by user **2**

**Using singular-Value Decomposition(SVD)**

In [60]:
# We'll use the famous SVD algorithm.
svd = SVD()
# Run 5-fold cross-validation and print results
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8914  0.8925  0.9005  0.8999  0.9029  0.8974  0.0046  
MAE (testset)     0.6838  0.6866  0.6920  0.6942  0.6968  0.6907  0.0048  
Fit time          16.28   15.44   16.25   15.26   16.01   15.85   0.42    
Test time         0.40    0.49    0.37    0.51    0.38    0.43    0.06    


{'test_rmse': array([0.89138738, 0.89253021, 0.90054564, 0.89987824, 0.90287449]),
 'test_mae': array([0.68377244, 0.68663138, 0.69201596, 0.6942273 , 0.6967859 ]),
 'fit_time': (16.282403469085693,
  15.441909313201904,
  16.250487327575684,
  15.255274057388306,
  16.0091872215271),
 'test_time': (0.397996187210083,
  0.4933156967163086,
  0.37157559394836426,
  0.510810136795044,
  0.37914419174194336)}

**Root Mean Sqaure Error** using SVD is  0.8964

In [61]:
trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1a058821640>

In [62]:
svd.predict(1, 2, 3)

Prediction(uid=1, iid=2, r_ui=3, est=2.57727381324865, details={'was_impossible': False})

For movie with ID **2**, we get an estimated prediction of **2.794324** by user **1**

In [63]:
svd.predict(2, 2, 3)

Prediction(uid=2, iid=2, r_ui=3, est=3.233694236120992, details={'was_impossible': False})

For **same movie**, we get an estimated prediction of **3.44728** by user **2**

# Hybrid Recommender

In [64]:
def convert_int(x):
    try:
        return int(x)
    except:
        return np.nan

In [65]:
id_map = pd.read_csv('links_small.csv')[['movieId', 'tmdbId']]
id_map['tmdbId'] = id_map['tmdbId'].apply(convert_int)
id_map.columns = ['movieId', 'id']
id_map = id_map.merge(smd[['title', 'id']], on='id').set_index('title')

In [66]:
indices_map = id_map.set_index('id')


In [67]:
def hybrid(userId, title):
    idx = indices[title]
    tmdbId = id_map.loc[title]['id']
    #print(idx)
    movie_id = id_map.loc[title]['movieId']
    
    sim_scores = list(enumerate(cosine_sim[int(idx)]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:26]
    movie_indices = [i[0] for i in sim_scores]
    emotion = get_EmotionScore(md.loc[md['title']== title])
    movies = smd.iloc[movie_indices][['title', 'vote_count', 'vote_average', 'Year', 'id', emotion]]
    movies['est'] = movies['id'].apply(lambda x: svd.predict(userId, indices_map.loc[x]['movieId']).est)
    movies = movies.sort_values('est', ascending=False)
    return movies.head(5)

In [68]:
hybrid(1, 'The Dark Knight')


Unnamed: 0,title,vote_count,vote_average,Year,id,Trust,est
76,The Shawshank Redemption,8358.0,8.5,1994,278,41.551177,3.70407
86,The Usual Suspects,3334.0,8.1,1995,629,38.776227,3.597704
67,The Godfather,6024.0,8.5,1972,238,60.690334,3.570446
49,Rear Window,1531.0,8.2,1954,567,27.561833,3.505309
29,GoodFellas,3211.0,8.2,1990,769,39.990811,3.435524


In the above output we can see we can see recommandations for user **1** for Avatar movie and **est** is **predicted ratings** using **collabarative filter**

In [69]:
hybrid(2, 'The Dark Knight')


Unnamed: 0,title,vote_count,vote_average,Year,id,Trust,est
76,The Shawshank Redemption,8358.0,8.5,1994,278,41.551177,4.444154
67,The Godfather,6024.0,8.5,1972,238,60.690334,4.389689
88,To Kill a Mockingbird,676.0,7.9,1962,595,54.418776,4.23495
51,Reservoir Dogs,3821.0,8.1,1992,500,33.492096,4.189413
86,The Usual Suspects,3334.0,8.1,1995,629,38.776227,4.122201


for same movie as input we see different recommandations for user **2**