# <center> Recommendation System on Movie Lens Dataset

## 1. Reading Data

In [1]:
# Jupyter Notebook with Matplotlib Inline
%matplotlib notebook

# Importing necessary modules
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as plt
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
from subprocess import check_output

In [2]:
# Set height, width, maximum rows and columns
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
# List of files in the data directory
print(os.listdir("data/"))

['data', 'desktop.ini', 'ml-100k.zip', 'Read Me.txt', 'u.data', 'u.genre', 'u.info', 'u.item', 'u.occupation', 'u.user']


In [4]:
# Reading ratings data
rating_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv('data/u.data', sep='\t', names = rating_cols) 

In [5]:
print(ratings.shape)
ratings.head(5)

(100000, 4)


Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [6]:
# Reading genre data
genre_cols = ['genre', 'genre_id']
genres = pd.read_csv('data/u.genre', sep='|', names = genre_cols) 

In [7]:
print(genres.shape)
genres.head(5)

(19, 2)


Unnamed: 0,genre,genre_id
0,unknown,0
1,Action,1
2,Adventure,2
3,Animation,3
4,Children's,4


In [8]:
# Reading movies data
movies_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'imdb_url','unknown', 'Action', 'Adventure',
'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy','Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
movies = pd.read_csv('data/u.item', sep = '|', names = movies_cols,  encoding='latin-1')

In [9]:
print(movies.shape)
movies.head(2).transpose()

(1682, 24)


Unnamed: 0,0,1
movie_id,1,2
title,Toy Story (1995),GoldenEye (1995)
release_date,01-Jan-1995,01-Jan-1995
video_release_date,,
imdb_url,http://us.imdb.com/M/title-exact?Toy%20Story%2...,http://us.imdb.com/M/title-exact?GoldenEye%20(...
unknown,0,0
Action,0,1
Adventure,0,1
Animation,1,0
Children's,1,0


In [10]:
# Reading users data
user_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv('data/u.user', sep='|', names = user_cols) 

In [11]:
print(users.shape)
users.head()

(943, 5)


Unnamed: 0,user_id,age,sex,occupation,zip_code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [12]:
# Shape of all the data
for i in [ratings, genres, movies, users]:
    print(i.shape)

(100000, 4)
(19, 2)
(1682, 24)
(943, 5)


In [13]:
movies.head(3).transpose()

Unnamed: 0,0,1,2
movie_id,1,2,3
title,Toy Story (1995),GoldenEye (1995),Four Rooms (1995)
release_date,01-Jan-1995,01-Jan-1995,01-Jan-1995
video_release_date,,,
imdb_url,http://us.imdb.com/M/title-exact?Toy%20Story%2...,http://us.imdb.com/M/title-exact?GoldenEye%20(...,http://us.imdb.com/M/title-exact?Four%20Rooms%...
unknown,0,0,0
Action,0,1,0
Adventure,0,1,0
Animation,1,0,0
Children's,1,0,0


In [14]:
ratings.head(3).transpose()

Unnamed: 0,0,1,2
user_id,196,186,22
movie_id,242,302,377
rating,3,3,1
timestamp,881250949,891717742,878887116


In [15]:
users.head(3).transpose()

Unnamed: 0,0,1,2
user_id,1,2,3
age,24,53,23
sex,M,F,M
occupation,technician,other,writer
zip_code,85711,94043,32067


## 2. Merging Data

In [16]:
# Merging ratings and movies
z = ratings.merge(movies, on = 'movie_id', how = "inner")

In [17]:
z.columns

Index(['user_id', 'movie_id', 'rating', 'timestamp', 'title', 'release_date', 'video_release_date', 'imdb_url', 'unknown', 'Action', 'Adventure', 'Animation', 'Children's', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'], dtype='object')

In [18]:
# Merge combined data and users
z = users.merge(z, on = 'user_id', how = "inner")

In [19]:
# Merge combined data and users
zz = z.copy(deep = True)

In [20]:
# New combined data
zz.head(3).transpose()

Unnamed: 0,0,1,2
user_id,1,1,1
age,24,24,24
sex,M,M,M
occupation,technician,technician,technician
zip_code,85711,85711,85711
movie_id,242,51,265
rating,5,4,4
timestamp,889751633,878543275,878542441
title,Kolya (1996),Legends of the Fall (1994),"Hunt for Red October, The (1990)"
release_date,24-Jan-1997,01-Jan-1994,01-Jan-1990


In [21]:
zz.shape

(100000, 31)

## 2.1 Data Merge - New Version

In [22]:
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
m_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'imdb_url','unknown', 'Action', 'Adventure',
'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy','Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

In [23]:
users_1 = pd.read_csv("data/u.user",sep='|',names=u_cols)
ratings_1 = pd.read_csv('data/u.data',sep='\t', names=r_cols)
movies_1 = pd.read_csv('data/u.item', sep='|', names=m_cols, encoding='latin-1')
movielens=pd.merge(users_1 , ratings_1)
movielens=pd.merge(movielens,movies_1)
movielens.head(3)

Unnamed: 0,user_id,age,sex,occupation,zip_code,movie_id,rating,timestamp,title,release_date,video_release_date,imdb_url,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,24,M,technician,85711,61,4,878542420,Three Colors: White (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?Trzy%20kolory...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,13,47,M,educator,29206,61,4,882140552,Three Colors: White (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?Trzy%20kolory...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,18,35,F,other,37212,61,4,880130803,Three Colors: White (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?Trzy%20kolory...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


## 3. Data Pre-Processing

In [24]:
# Format 'title' i.e. remove 'year' from title
zz['title'] = zz['title'].astype(str).str[:-7]

In [25]:
zz.title.head()

0                        Kolya
1          Legends of the Fall
2    Hunt for Red October, The
3      Remains of the Day, The
4                 Men in Black
Name: title, dtype: object

## 4. Data Inspection

In [26]:
# Checking for null values in the dataframe
zz.isnull().values.any()

True

In [27]:
# Checking for null values in the columns
zz.isnull().any()

user_id               False
age                   False
sex                   False
occupation            False
zip_code              False
movie_id              False
rating                False
timestamp             False
title                 False
release_date           True
video_release_date     True
imdb_url               True
unknown               False
Action                False
Adventure             False
Animation             False
Children's            False
Comedy                False
Crime                 False
Documentary           False
Drama                 False
Fantasy               False
Film-Noir             False
Horror                False
Musical               False
Mystery               False
Romance               False
Sci-Fi                False
Thriller              False
War                   False
Western               False
dtype: bool

** Note:** Null values are in `release_date`, `video_release_date`, `imdb_url`, the features which are not significant for this project

In [28]:
zz.describe()

Unnamed: 0,user_id,age,movie_id,rating,timestamp,video_release_date,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
count,100000.0,100000.0,100000.0,100000.0,100000.0,0.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
mean,462.48475,32.96985,425.53013,3.52986,883528900.0,,0.0001,0.25589,0.13753,0.03605,0.07182,0.29832,0.08055,0.00758,0.39895,0.01352,0.01733,0.05317,0.04954,0.05245,0.19461,0.1273,0.21872,0.09398,0.01854
std,266.61442,11.562623,330.798356,1.125674,5343856.0,,0.01,0.436362,0.344408,0.186416,0.258191,0.457523,0.272144,0.086733,0.489685,0.115487,0.130498,0.224373,0.216994,0.222934,0.395902,0.33331,0.41338,0.291802,0.134894
min,1.0,7.0,1.0,1.0,874724700.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,254.0,24.0,175.0,3.0,879448700.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,447.0,30.0,322.0,4.0,882826900.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,682.0,40.0,631.0,4.0,888260000.0,,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,943.0,73.0,1682.0,5.0,893286600.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## 5. EDA 

### 5.1 Univariate Analysis 

In [29]:
zz.head()

Unnamed: 0,user_id,age,sex,occupation,zip_code,movie_id,rating,timestamp,title,release_date,video_release_date,imdb_url,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,24,M,technician,85711,242,5,889751633,Kolya,24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,24,M,technician,85711,51,4,878543275,Legends of the Fall,01-Jan-1994,,http://us.imdb.com/M/title-exact?Legends%20of%...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1
2,1,24,M,technician,85711,265,4,878542441,"Hunt for Red October, The",01-Jan-1990,,http://us.imdb.com/M/title-exact?Hunt+for+Red+...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,1,24,M,technician,85711,86,5,878543541,"Remains of the Day, The",01-Jan-1993,,http://us.imdb.com/M/title-exact?Remains%20of%...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,1,24,M,technician,85711,257,4,874965954,Men in Black,04-Jul-1997,,http://us.imdb.com/M/title-exact?Men+in+Black+...,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0


<div class="alert alert-warning">
  <strong>user_id</strong> 
</div>

In [30]:
# Total number of users
len(zz.user_id.unique())

943

<div class="alert alert-warning">
  <strong>age</strong> 
</div>

In [32]:
np.mean(zz['age'])

32.96985

In [33]:
np.sqrt(np.sqrt(len(zz)))

17.782794100389228

In [34]:
# Plotting distribution of 'age' with sqrt(sqrt(100000)) bins
zz['age'].hist(bins = int(np.sqrt(np.sqrt(len(zz)))))

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0xc42c780>

**Interpretation:** We see from the above graph that distribution is slightly skewed to the right and the data is majorly distributed between 20 and 40.

<div class="alert alert-warning">
  <strong>gender</strong> 
</div>

In [35]:
# Number of observations by gender 
zz.sex.value_counts()

M    74260
F    25740
Name: sex, dtype: int64

** Interpretation:** From the above table of value counts, it is apparent that the data is highly comprised of 'Males' than 'Females' 

In [36]:
sns.countplot(x="sex", data=zz)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0xc4a65f8>

<div class="alert alert-warning">
  <strong>occupation</strong> 
</div>

In [37]:
# Number of observations by gender 
zz.occupation.value_counts()

student          21957
other            10663
educator          9442
engineer          8175
programmer        7801
administrator     7479
writer            5536
librarian         5273
technician        3506
executive         3403
healthcare        2804
artist            2308
entertainment     2095
scientist         2058
marketing         1950
retired           1609
lawyer            1345
none               901
salesman           856
doctor             540
homemaker          299
Name: occupation, dtype: int64

In [38]:
plt.gcf().subplots_adjust(bottom=0.25)
pd.value_counts(zz['occupation']).plot.bar()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0xc4a66d8>

<div class="alert alert-warning">
  <strong>movie_id</strong> 
</div>

In [39]:
# Total number of movies
len(zz.movie_id.unique())

1682

<div class="alert alert-warning">
  <strong>rating</strong> 
</div>

In [40]:
np.mean(z['rating'])

3.52986

In [41]:
# Ratings and their respective counts
sns.countplot(x = zz.rating, data = zz)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0xc834ef0>

**Interpretation:** From the above plot, it is apparent that most of the user ratings were either 3 or 4.

** Transformation 1 - Replacing ratings with `below_avg`, `avg` and `above_avg`**

In [42]:
# Function to categorize 'rating'
def transformation_1(df):
    df['rating'].replace([1, 2, 3, 4, 5],
                      ['below_avg', 'below_avg', 'avg', 'above_avg', 'above_avg'], 
                      inplace = True)

In [43]:
transformation_1(zz)

In [44]:
zz.rating.head(10)

0    above_avg
1    above_avg
2    above_avg
3    above_avg
4    above_avg
5    above_avg
6          avg
7    below_avg
8    above_avg
9          avg
Name: rating, dtype: object

<div class="alert alert-warning">
  <strong>title</strong> 
</div>

In [45]:
# Top 5 mostly rated movies
zz.title.value_counts().head(5)

Star Wars             583
Contact               509
Fargo                 508
Return of the Jedi    507
Liar Liar             485
Name: title, dtype: int64

In [46]:
# Top 5 least rated movies
zz.title.value_counts().tail(5)

Next Step, The            1
He Walked by Night        1
Good Morning              1
Witness                   1
Someone Else's America    1
Name: title, dtype: int64

In [48]:
sns.countplot(x="title", data=zz)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1569c0f0>

** Note:** Movie count is too huge to fit them in one graph

In [49]:
top_movies = zz.groupby('title').size().sort_values(ascending = False)[:10]

In [50]:
top_movies

title
Star Wars                 583
Contact                   509
Fargo                     508
Return of the Jedi        507
Liar Liar                 485
English Patient, The      481
Scream                    478
Toy Story                 452
Air Force One             431
Independence Day (ID4)    429
dtype: int64

**Most rated movies (by count)**

In [51]:
from matplotlib import rcParams
rcParams.update({'figure.autolayout': True})

In [52]:
top_movies.plot(kind = 'barh')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x19de9940>

In [53]:
# Movies with overall number of ratings = 1 
zz.groupby('title').size().sort_values(ascending = False)[-370:]

title
Loaded                                                                        5
Locusts, The                                                                  5
Hearts and Minds                                                              5
Getting Even with Dad                                                         5
Stonewall                                                                     5
Panther                                                                       5
Heavy                                                                         5
Story of Xinghua, The                                                         5
Of Human Bondage                                                              5
Old Lady Who Walked in the Sea, The (Vieille qui marchait dans la mer, La)    5
Stripes                                                                       5
Brother Minister: The Assassination of Malcolm X                              5
Above the Rim                     

In [54]:
# Movies with overall number of ratings = 1 
zz.groupby('title').size().sort_values(ascending = False)[-134:]

title
Wedding Bell Blues                                    1
They Made Me a Criminal                               1
You So Crazy                                          1
War at Home, The                                      1
The Courtyard                                         1
Wings of Courage                                      1
Touki Bouki (Journey of the Hyena)                    1
T-Men                                                 1
Symphonie pastorale, La                               1
Three Lives and Only One Death                        1
Vermont Is For Lovers                                 1
Sweet Nothing                                         1
Tainted                                               1
Window to Paris                                       1
Witness                                               1
Walk in the Sun, A                                    1
Woman in Question, The                                1
Terror in a Texas Town                    

<div class="alert alert-warning">
  <strong>genre</strong> 
</div>

In [55]:
# Dataframe with all genres
all_genres = z.iloc[:, 6: ].drop(['timestamp', 'title', 'release_date', 'video_release_date', 'imdb_url'], axis = 1)

In [56]:
all_genres.head()

Unnamed: 0,rating,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,5,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,4,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1
2,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,5,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,4,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0


In [57]:
all_genres.shape

(100000, 20)

In [58]:
# Multiplying each genre with ratings 
result = all_genres.mul(all_genres['rating'], axis=0)

In [59]:
result = result.drop(['rating'], axis = 1)

In [60]:
result.describe()

Unnamed: 0,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
count,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
mean,0.00032,0.89056,0.48184,0.12894,0.24083,1.01252,0.29258,0.02784,1.47108,0.04347,0.06796,0.17495,0.17445,0.19082,0.70482,0.45328,0.76749,0.35861,0.06699
std,0.03435,1.621767,1.277046,0.698498,0.918683,1.67361,1.037703,0.334703,1.930016,0.393449,0.52819,0.787406,0.803632,0.848678,1.513179,1.253799,1.540128,1.161664,0.507627
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0


In [61]:
result.describe().ix['mean'].plot(kind='bar')

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x19f3e278>

## 5.2 Bivariate Analysis

### 5.2.1 Genre Count

In [62]:
all_genres.head()

Unnamed: 0,rating,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,5,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,4,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1
2,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,5,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,4,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0


In [63]:
all_genres.columns

Index(['rating', 'unknown', 'Action', 'Adventure', 'Animation', 'Children's', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'], dtype='object')

In [64]:
genre_count = pd.DataFrame(columns=['col','entries'])

In [65]:
col_list = []

In [66]:
list((all_genres.columns))

['rating',
 'unknown',
 'Action',
 'Adventure',
 'Animation',
 "Children's",
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Fantasy',
 'Film-Noir',
 'Horror',
 'Musical',
 'Mystery',
 'Romance',
 'Sci-Fi',
 'Thriller',
 'War',
 'Western']

In [67]:
genre_sum = list()

In [68]:
for col in list((all_genres.columns)):
    genre_sum.append(all_genres[col].sum())

In [69]:
genres = list((all_genres.columns))

In [70]:
genre_df = pd.DataFrame({'genres': genres, 'entries': genre_sum})

In [71]:
genre_df.head()

Unnamed: 0,genres,entries
0,rating,352986
1,unknown,10
2,Action,25589
3,Adventure,13753
4,Animation,3605


In [72]:
sns.barplot(y='genres', x='entries', data = genre_df.iloc[1:])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x19eaf828>

### 5.2.2 Multiplying each genre with ratings 

In [73]:
t = z.iloc[:, 1: ].drop(['occupation', 'zip_code', 'movie_id', 'timestamp', 'title', 'release_date', 'video_release_date', 'imdb_url'], axis = 1)

In [74]:
t.iloc[:,3:] = t.iloc[:,3:].mul(t['rating'], axis=0)

In [75]:
t.head()

Unnamed: 0,age,sex,rating,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,24,M,5,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0
1,24,M,4,0,0,0,0,0,0,0,0,4,0,0,0,0,0,4,0,0,4,4
2,24,M,4,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0
3,24,M,5,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0
4,24,M,4,0,4,4,0,0,4,0,0,0,0,0,0,0,0,0,4,0,0,0


### 5.2.3 Gender vs Age

In [76]:
# Gender vs Age
sns.boxplot(x="sex", y="age", data=zz)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a2456d8>

### 5.2.3 Gender vs Rating

In [77]:
# Gender vs Rating
sns.boxplot(x="sex", y="rating", data=z)
plt.show()

<IPython.core.display.Javascript object>

In [78]:
# Gender vs Rating
sns.violinplot(x="sex", y="rating", data=z)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a466c18>

### 5.2.4 Gender vs Rating vs Title

In [79]:
zz.head()

Unnamed: 0,user_id,age,sex,occupation,zip_code,movie_id,rating,timestamp,title,release_date,video_release_date,imdb_url,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,24,M,technician,85711,242,above_avg,889751633,Kolya,24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,24,M,technician,85711,51,above_avg,878543275,Legends of the Fall,01-Jan-1994,,http://us.imdb.com/M/title-exact?Legends%20of%...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1
2,1,24,M,technician,85711,265,above_avg,878542441,"Hunt for Red October, The",01-Jan-1990,,http://us.imdb.com/M/title-exact?Hunt+for+Red+...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,1,24,M,technician,85711,86,above_avg,878543541,"Remains of the Day, The",01-Jan-1993,,http://us.imdb.com/M/title-exact?Remains%20of%...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,1,24,M,technician,85711,257,above_avg,874965954,Men in Black,04-Jul-1997,,http://us.imdb.com/M/title-exact?Men+in+Black+...,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0


In [80]:
gen = z[['sex', 'title', 'rating']]

In [81]:
gen.head()

Unnamed: 0,sex,title,rating
0,M,Kolya (1996),5
1,M,Legends of the Fall (1994),4
2,M,"Hunt for Red October, The (1990)",4
3,M,"Remains of the Day, The (1993)",5
4,M,Men in Black (1997),4


In [82]:
new_gen = gen.pivot_table(index = 'title', columns = 'sex', values = 'rating')

In [83]:
new_gen.head()

sex,F,M
title,Unnamed: 1_level_1,Unnamed: 2_level_1
'Til There Was You (1997),2.2,2.5
1-900 (1994),1.0,3.0
101 Dalmatians (1996),3.116279,2.772727
12 Angry Men (1957),4.269231,4.363636
187 (1997),3.5,2.870968


In [84]:
new_gen['diff'] = new_gen['M'] - new_gen['F']

In [85]:
new_gen.head()

sex,F,M,diff
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
'Til There Was You (1997),2.2,2.5,0.3
1-900 (1994),1.0,3.0,2.0
101 Dalmatians (1996),3.116279,2.772727,-0.343552
12 Angry Men (1957),4.269231,4.363636,0.094406
187 (1997),3.5,2.870968,-0.629032


In [86]:
# Top 10 movies highly rated by Females but not by Males
new_gen.sort_values('diff').head(10)

sex,F,M,diff
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Loch Ness (1995),4.0,1.0,-3.0
Love and Death on Long Island (1997),4.0,1.0,-3.0
"Visitors, The (Visiteurs, Les) (1993)",5.0,2.0,-3.0
"Lay of the Land, The (1997)",4.0,1.0,-3.0
Rough Magic (1995),4.0,1.0,-3.0
Faster Pussycat! Kill! Kill! (1965),5.0,2.666667,-2.333333
Chairman of the Board (1998),4.0,1.714286,-2.285714
Two Much (1996),4.0,1.8,-2.2
"Simple Wish, A (1997)",3.0,1.0,-2.0
Mina Tannenbaum (1994),5.0,3.0,-2.0


In [87]:
new_gen.sort_values('diff').head(10)['diff'].plot(kind='bar')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1aa3e6a0>

In [88]:
# Top 10 movies highly rated by Males but not by Females 
new_gen.sort_values('diff', ascending=False).head(10)

sex,F,M,diff
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Delta of Venus (1994),1.0,5.0,4.0
Two or Three Things I Know About Her (1966),1.0,4.666667,3.666667
Paths of Glory (1957),1.0,4.419355,3.419355
"Magic Hour, The (1998)",1.0,4.25,3.25
So Dear to My Heart (1949),1.0,4.0,3.0
Spirits of the Dead (Tre passi nel delirio) (1968),1.0,4.0,3.0
Little City (1998),2.0,5.0,3.0
Aparajito (1956),1.0,4.0,3.0
Killer (Bulletproof Heart) (1994),1.0,4.0,3.0
Stalker (1979),1.0,3.8,2.8


In [89]:
new_gen.sort_values('diff', ascending=False).head(10)['diff'].plot(kind = 'bar')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1c6365f8>

### 5.2.5 Gender vs Genre

In [258]:
t = z.iloc[:, 1: ].drop(['occupation', 'zip_code', 'movie_id', 'timestamp', 'release_date', 'video_release_date', 'imdb_url'], axis = 1)

In [260]:
# Format 'title' i.e. remove 'year' from title
t['title'] = t['title'].astype(str).str[:-7]

In [262]:
t['title'] = t['title'].fillna("").astype('str')

In [263]:
t.head()

Unnamed: 0,age,sex,rating,title,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,24,M,5,Kolya,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,24,M,4,Legends of the Fall,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1
2,24,M,4,"Hunt for Red October, The",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,24,M,5,"Remains of the Day, The",0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,24,M,4,Men in Black,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0


In [264]:
t.iloc[:,3:].head()

Unnamed: 0,title,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,Kolya,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Legends of the Fall,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1
2,"Hunt for Red October, The",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,"Remains of the Day, The",0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,Men in Black,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0


In [265]:
for col in ['unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']:
    t[[col]] = t[[col]]*col

In [266]:
t.head()

Unnamed: 0,age,sex,rating,title,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,24,M,5,Kolya,,,,,,Comedy,,,,,,,,,,,,,
1,24,M,4,Legends of the Fall,,,,,,,,,Drama,,,,,,Romance,,,War,Western
2,24,M,4,"Hunt for Red October, The",,Action,,,,,,,,,,,,,,,Thriller,,
3,24,M,5,"Remains of the Day, The",,,,,,,,,Drama,,,,,,,,,,
4,24,M,4,Men in Black,,Action,Adventure,,,Comedy,,,,,,,,,,Sci-Fi,,,


In [267]:
# Function to consolidate all 'genre'
def transformation_2(df):
    df['genre'] = df.apply(lambda x: 
                            x ['unknown'] + ' ' + 
                            x ['Action'] + ' ' + 
                            x ['Adventure'] + ' ' + 
                            x ['Animation'] + ' ' + 
                            x ['Children\'s'] + ' ' + 
                            x ['Comedy'] + ' ' + 
                            x ['Crime'] + ' ' + 
                            x ['Documentary'] + ' ' + 
                            x ['Drama'] + ' ' + 
                            x ['Fantasy'] + ' ' + 
                            x ['Film-Noir'] + ' ' + 
                            x ['Horror'] + ' ' + 
                            x ['Musical'] + ' ' + 
                            x ['Mystery'] + ' ' + 
                            x ['Romance'] + ' ' + 
                            x ['Sci-Fi'] + ' ' + 
                            x ['Thriller'] + ' ' + 
                            x ['War'] + ' ' + 
                            x ['Western'], axis = 1)

In [268]:
transformation_2(t)

In [269]:
t[['genre']].head()

Unnamed: 0,genre
0,Comedy
1,Drama Romance War Western
2,Action Thriller
3,Drama
4,Action Adventure Comedy Sci-Fi


In [270]:
t['genre'].apply(lambda x: x.lstrip() if type(x) is str else x).head()

0                             Comedy             
1                Drama      Romance   War Western
2                 Action               Thriller  
3                                 Drama          
4    Action Adventure   Comedy          Sci-Fi   
Name: genre, dtype: object

In [271]:
td = t.copy(deep = True)

In [101]:
td['genre'] = td['genre'].str.lstrip()

In [102]:
td['genre'].head()

0                             Comedy             
1                Drama      Romance   War Western
2                 Action               Thriller  
3                                 Drama          
4    Action Adventure   Comedy          Sci-Fi   
Name: genre, dtype: object

In [277]:
td['genre'].value_counts()

        Drama                                                      13257
     Comedy                                                         9828
     Comedy         Romance                                         5055
        Drama      Romance                                          4767
 Action               Thriller                                      3550
        Drama        Thriller                                       2627
     Comedy   Drama                                                 2422
        Drama         War                                           2012
 Action Adventure             Sci-Fi                                1865
           Horror                                                   1558
 Action Adventure                                                   1532
   Animation Children's        Musical                              1489
 Action Adventure              Thriller                             1342
             Mystery   Thriller                    

In [278]:
td = td.drop(['unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'], axis = 1)

In [279]:
td.head()

Unnamed: 0,age,sex,rating,title,genre
0,24,M,5,Kolya,Comedy
1,24,M,4,Legends of the Fall,Drama Romance War Western
2,24,M,4,"Hunt for Red October, The",Action Thriller
3,24,M,5,"Remains of the Day, The",Drama
4,24,M,4,Men in Black,Action Adventure Comedy Sci-Fi


### 5.2.5 Gender vs Genre vs Rating

In [107]:
pd.pivot_table(td, index = ['genre', 'sex'])

Unnamed: 0_level_0,Unnamed: 1_level_0,age,rating
genre,sex,Unnamed: 2_level_1,Unnamed: 3_level_1
Action,F,28.651515,2.795455
Action,M,28.886212,2.852744
Action Western,F,33.25,3.1875
Action Western,M,35.628099,3.950413
Action Thriller,F,31.798726,3.472611
Action Thriller,M,31.864014,3.407233
Action Thriller War,F,32.428571,3.642857
Action Thriller War,M,33.12037,3.175926
Action Sci-Fi,F,29.053571,3.107143
Action Sci-Fi,M,29.596825,3.177778


### 5.2.6 Genre vs Age vs Gender

In [108]:
td.head()

Unnamed: 0,age,sex,rating,genre
0,24,M,5,Comedy
1,24,M,4,Drama Romance War Western
2,24,M,4,Action Thriller
3,24,M,5,Drama
4,24,M,4,Action Adventure Comedy Sci-Fi


In [109]:
t_gen = td.pivot_table(index = 'genre', columns = 'sex', values = 'rating')

In [110]:
t_gen.head()

sex,F,M
genre,Unnamed: 1_level_1,Unnamed: 2_level_1
Action,2.795455,2.852744
Action Western,3.1875,3.950413
Action Thriller,3.472611,3.407233
Action Thriller War,3.642857,3.175926
Action Sci-Fi,3.107143,3.177778


In [111]:
t_gen['diff'] = t_gen['M'] - t_gen['F']

In [112]:
t_gen.head()

sex,F,M,diff
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Action,2.795455,2.852744,0.05729
Action Western,3.1875,3.950413,0.762913
Action Thriller,3.472611,3.407233,-0.065378
Action Thriller War,3.642857,3.175926,-0.466931
Action Sci-Fi,3.107143,3.177778,0.070635


In [113]:
# Top 10 genres highly rated by Males but not by Females 
t_gen.sort_values('diff', ascending=False).head(10)

sex,F,M,diff
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mystery Sci-Fi,1.0,3.8,2.8
War,1.714286,3.470588,1.756303
Comedy Crime Drama,2.0,3.666667,1.666667
Action Crime Romance,2.882353,3.7,0.817647
Action Western,3.1875,3.950413,0.762913
Action Adventure Crime Thriller,2.0,2.736842,0.736842
Film-Noir Sci-Fi Thriller,3.0,3.727273,0.727273
Mystery Romance Thriller,2.857143,3.580645,0.723502
Drama Western,2.666667,3.333333,0.666667
Drama Mystery Sci-Fi Thriller,3.491228,4.10396,0.612732


In [114]:
# Top 10 genres highly rated by Males but not by Females 
t_gen.sort_values('diff', ascending=False).head(10)['diff'].plot(kind = 'bar')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x258ba588>

In [115]:
# Top 10 genres highly rated by Females but not by Males
t_gen.sort_values('diff').head(10)

sex,F,M,diff
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Action Adventure Children's,3.0,1.0,-2.0
Action Crime,4.25,3.0,-1.25
Comedy Fantasy,3.230769,2.181818,-1.048951
Action Adventure Drama Romance,3.782609,2.746032,-1.036577
Action Adventure Children's Fantasy,3.666667,2.714286,-0.952381
Adventure Children's Drama,2.782609,1.865385,-0.917224
Action Adventure Comedy War,3.0,2.210526,-0.789474
Action Adventure Romance,3.0,2.285714,-0.714286
Comedy Western,3.2,2.5,-0.7
Children's Comedy Mystery,2.857143,2.25641,-0.600733


In [116]:
# Top 10 genres highly rated by Females but not by Males
t_gen.sort_values('diff').head(10)['diff'].plot(kind = 'bar')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x23033940>

### 5.2.7 Rating vs Title

In [117]:
dff = z[['movie_id', 'title', 'rating']]
dff.head()

Unnamed: 0,movie_id,title,rating
0,242,Kolya (1996),5
1,51,Legends of the Fall (1994),4
2,265,"Hunt for Red October, The (1990)",4
3,86,"Remains of the Day, The (1993)",5
4,257,Men in Black (1997),4


In [118]:
dff_1 = dff.groupby('title').agg([np.mean, np.size])

In [119]:
dff_1.head()

Unnamed: 0_level_0,movie_id,movie_id,rating,rating
Unnamed: 0_level_1,mean,size,mean,size
title,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
'Til There Was You (1997),1300.0,9,2.333333,9
1-900 (1994),1353.0,5,2.6,5
101 Dalmatians (1996),225.0,109,2.908257,109
12 Angry Men (1957),178.0,125,4.344,125
187 (1997),330.0,41,3.02439,41


In [120]:
dff_1.drop('movie_id', axis = 1).head()

Unnamed: 0_level_0,rating,rating
Unnamed: 0_level_1,mean,size
title,Unnamed: 1_level_2,Unnamed: 2_level_2
'Til There Was You (1997),2.333333,9
1-900 (1994),2.6,5
101 Dalmatians (1996),2.908257,109
12 Angry Men (1957),4.344,125
187 (1997),3.02439,41


** 5.2.6.1 High rated movies (by rating)**

In [121]:
dff_1[dff_1['rating']['size']> 200]['rating']['mean'].sort_values(ascending = True).head(35).plot(kind = 'barh')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x230785c0>

** 5.2.6.2 Low rated movies (by rating)**

In [122]:
dff_1[dff_1['rating']['size'] < 50]['rating']['mean'].sort_values(ascending = False).head(35).plot(kind = 'barh')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x25c41860>

### 5.2.8 Ratings vs User - Cumulative Density Function 

In [123]:
movies_per_user = zz.groupby(by='user_id')['rating'].count()

In [124]:
movies_per_user = movies_per_user.sort_values(ascending=False)
movies_per_user.head()

user_id
405    737
655    685
13     636
450    540
276    518
Name: rating, dtype: int64

In [125]:
# Cumulative Density Function
sns.kdeplot(movies_per_user, cumulative = True)
plt.xlabel('Ratings per user')

<IPython.core.display.Javascript object>

Text(0.5,0,'Ratings per user')

** Interpretation:** 82% of the users have made less than 200 ratings while 18% of the users have rated more than 200 of them. 

## 6. Transformations

In [126]:
zz.head()

Unnamed: 0,user_id,age,sex,occupation,zip_code,movie_id,rating,timestamp,title,release_date,video_release_date,imdb_url,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,24,M,technician,85711,242,above_avg,889751633,Kolya,24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,24,M,technician,85711,51,above_avg,878543275,Legends of the Fall,01-Jan-1994,,http://us.imdb.com/M/title-exact?Legends%20of%...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1
2,1,24,M,technician,85711,265,above_avg,878542441,"Hunt for Red October, The",01-Jan-1990,,http://us.imdb.com/M/title-exact?Hunt+for+Red+...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,1,24,M,technician,85711,86,above_avg,878543541,"Remains of the Day, The",01-Jan-1993,,http://us.imdb.com/M/title-exact?Remains%20of%...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,1,24,M,technician,85711,257,above_avg,874965954,Men in Black,04-Jul-1997,,http://us.imdb.com/M/title-exact?Men+in+Black+...,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0


In [127]:
# Function to categorize 'rating'
def transformation_3(df):
    df['occupation'].replace(['student', 'other', 'educator', 'engineer', 'programmer', 'administrator', 'writer', 'librarian', 'technician', 'executive', 'healthcare', 'artist', 'entertainment', 'scientist', 'marketing', 'retired', 'lawyer', 'none', 'salesman', 'doctor', 'homemaker'],
                         ['category_1', 'category_2', 'category_2', 'category_2', 'category_2', 'category_2', 'category_3', 'category_3', 'category_4', 'category_4', 'category_4', 'category_4', 'category_4', 'category_4', 'category_5', 'category_5', 'category_5', 'category_5', 'category_5', 'category_5', 'category_5'], 
                      inplace = True)

In [128]:
transformation_3(zz)

In [129]:
zz['occupation'].head()

0    category_4
1    category_4
2    category_4
3    category_4
4    category_4
Name: occupation, dtype: object

## 7. Content Based Recommendation System

### Code - Version 1

In [285]:
# Build a 1-dimensional array with movie titles
titles = td['title']
indices = pd.Series(td.index, index=td['title'])

In [286]:
td.head()

Unnamed: 0,age,sex,rating,title,genre
0,24,M,5,Kolya,Comedy
1,24,M,4,Legends of the Fall,Drama Romance War Western
2,24,M,4,"Hunt for Red October, The",Action Thriller
3,24,M,5,"Remains of the Day, The",Drama
4,24,M,4,Men in Black,Action Adventure Comedy Sci-Fi


In [287]:
td['genre'] = td['genre'].fillna(" ").astype('str')

In [288]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer().fit_transform(td['genre'])
tfidf.shape

(100000, 21)

In [289]:
tfidf

<100000x21 sparse matrix of type '<class 'numpy.float64'>'
	with 227058 stored elements in Compressed Sparse Row format>

In [290]:
from sklearn.metrics.pairwise import linear_kernel
cosine_similarities = linear_kernel(tfidf)

MemoryError: 

## Collaborative Filtering Recommendation Model

In [159]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [162]:
# Randomly sample 1% of the ratings dataset
sample_data = ratings.sample(frac=0.02)

In [163]:
sample_data.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
73905,8,510,4,879362233
88606,58,223,5,884305150
95609,936,825,4,886832502
43519,608,702,1,880406862
27636,317,748,5,891446843


In [165]:
sample_data.shape

(2000, 4)

In [168]:
from sklearn import model_selection

In [171]:
from sklearn import cross_validation as cv
train_data, test_data = cv.train_test_split(small_data, test_size=0.2)

In [193]:
# Create two user-item matrices, one for training and another for testing
train_data_matrix = train_data[['user_id', 'movie_id', 'rating']].values
test_data_matrix = test_data[['user_id', 'movie_id', 'rating']].values

In [179]:
# Check their shape
print(train_data_matrix.shape)
print(test_data_matrix.shape)

(1600, 3)
(400, 3)


In [204]:
from sklearn.metrics.pairwise import pairwise_distances

In [210]:
pairwise_distances(train_data, metric='correlation')[:2][:2]

array([[  0.00000000e+00,   1.15463195e-14,   1.70974346e-14, ...,
          1.67532654e-13,   6.22835117e-13,   2.57571742e-14],
       [  1.15463195e-14,   0.00000000e+00,   5.01820807e-14, ...,
          1.23789867e-13,   6.40709708e-13,   3.47499807e-14]])

In [211]:
1 - pairwise_distances(train_data, metric='correlation')[:2][:2]

array([[ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.]])

In [212]:
# User Similarity Matrix
user_correlation = 1 - pairwise_distances(train_data, metric='correlation')

In [213]:
print(user_correlation[:4, :4])

[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]


In [207]:
user_correlation[np.isnan(user_correlation)] = 0

In [208]:
print(user_correlation[:4, :4])

[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]


In [180]:
# User Similarity Matrix
user_correlation = 1 - pairwise_distances(train_data, metric='correlation')
user_correlation[np.isnan(user_correlation)] = 0
print(user_correlation[:4, :4])

[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]


In [300]:
train_data_matrix

array([[ 345,   12,    5],
       [ 500,   49,    4],
       [ 230,   97,    5],
       ..., 
       [ 864,  578,    3],
       [ 634, 1162,    1],
       [ 432,  255,    5]], dtype=int64)

In [299]:
train_data_matrix.T

array([[ 345,  500,  230, ...,  864,  634,  432],
       [  12,   49,   97, ...,  578, 1162,  255],
       [   5,    4,    5, ...,    3,    1,    5]], dtype=int64)

In [181]:
# Item Similarity Matrix
item_correlation = 1 - pairwise_distances(train_data_matrix.T, metric='correlation')
item_correlation[np.isnan(item_correlation)] = 0
print(item_correlation[:4, :4])

[[ 1.          0.00288541 -0.01119268]
 [ 0.00288541  1.         -0.18799895]
 [-0.01119268 -0.18799895  1.        ]]


### Code - Version 2

In [308]:
# Using Collabrative Filtering
movies_cateogary = movies.columns[6:]

In [309]:
movies_cateogary

Index(['Action', 'Adventure', 'Animation', 'Children's', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'], dtype='object')

In [303]:
movies.loc[0]

movie_id                                                              1
title                                                  Toy Story (1995)
release_date                                                01-Jan-1995
video_release_date                                                  NaN
imdb_url              http://us.imdb.com/M/title-exact?Toy%20Story%2...
unknown                                                               0
Action                                                                0
Adventure                                                             0
Animation                                                             1
Children's                                                            1
Comedy                                                                1
Crime                                                                 0
Documentary                                                           0
Drama                                                           

In [310]:
def dot_product(vector1,vector2):
    return sum([ i*j for i,j in zip(vector1, vector2)])

In [311]:
def movie_score(movie_features,user_preferences):
    return dot_product(movie_features, user_preferences)

In [312]:
# Toy Story Movie Featrues
ts_f = movies.loc[0][6:]
ts_f

Action         0
Adventure      0
Animation      1
Children's     1
Comedy         1
Crime          0
Documentary    0
Drama          0
Fantasy        0
Film-Noir      0
Horror         0
Musical        0
Mystery        0
Romance        0
Sci-Fi         0
Thriller       0
War            0
Western        0
Name: 0, dtype: object

In [313]:
from collections import OrderedDict
##Manually using preferences for some user , we can change it according to our needs 
user_preferences = OrderedDict(zip(movies_cateogary, []))

In [None]:
ts_user_predicted_score = dot_product(ts_f, user_preferences.values())  
ts_user_predicted_score

In [316]:
ratings.head(1)

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949


In [317]:
td.head(1)

Unnamed: 0,age,sex,rating,title,genre
0,24,M,5,Kolya,Comedy


In [322]:
new_z = z[['user_id', 'rating', 'title']]

In [323]:
# Format 'title' i.e. remove 'year' from title
new_z['title'] = new_z['title'].astype(str).str[:-7]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [324]:
new_z

Unnamed: 0,user_id,rating,title
0,1,5,Kolya
1,1,4,Legends of the Fall
2,1,4,"Hunt for Red October, The"
3,1,5,"Remains of the Day, The"
4,1,4,Men in Black
5,1,4,Star Trek: First Contact
6,1,3,"To Wong Foo, Thanks for Everything! Julie Newmar"
7,1,1,Batman Forever
8,1,4,Die Hard
9,1,3,Twister


In [325]:
ratings_mtx = new_z.pivot_table(values='rating', index='user_id', columns='title')

In [None]:

ratings_mtx_df.fillna(0, inplace=True)

movie_index = ratings_mtx_df.columns

ratings_mtx_df.head() 

### Version - 3

In [328]:
ratings_matrix = ratings.pivot_table(index=['movie_id'],columns=['user_id'],values='rating').reset_index(drop=True)

In [329]:
ratings_matrix.head()

user_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,...,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,834,835,836,837,838,839,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858,859,860,861,862,863,864,865,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,887,888,889,890,891,892,893,894,895,896,897,898,899,900,901,902,903,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943
0,5.0,4.0,,,4.0,4.0,,,,4.0,,,3.0,,1.0,5.0,4.0,5.0,,3.0,5.0,,5.0,,5.0,3.0,,,,,,,,,,,,5.0,,,4.0,5.0,5.0,4.0,5.0,,,,2.0,,,,,4.0,,4.0,5.0,5.0,2.0,,,2.0,3.0,4.0,3.0,3.0,3.0,,,4.0,,4.0,2.0,,4.0,,5.0,,4.0,,4.0,4.0,4.0,2.0,,,,,5.0,,,4.0,5.0,4.0,5.0,5.0,4.0,,4.0,,3.0,3.0,,,,4.0,,4.0,4.0,,,,,,,,4.0,,,4.0,4.0,,,3.0,4.0,,,4.0,,5.0,4.0,,,5.0,,,3.0,4.0,,,3.0,,,4.0,3.0,,,4.0,,4.0,5.0,,,,,,5.0,4.0,,4.0,,4.0,,,,,,5.0,,,,,,3.0,,,3.0,4.0,,,3.0,4.0,,4.0,,,,,5.0,,,,4.0,4.0,,,,4.0,1.0,5.0,3.0,3.0,3.0,2.0,,,,,5.0,5.0,,,2.0,,,4.0,,,,,,4.0,4.0,,,,,,,5.0,3.0,4.0,,3.0,4.0,,,,,,,4.0,4.0,4.0,,4.0,4.0,3.0,4.0,4.0,...,,,,5.0,4.0,3.0,,4.0,,4.0,,5.0,4.0,,5.0,4.0,4.0,,,,3.0,5.0,5.0,,,,,5.0,,3.0,,,4.0,3.0,,,4.0,2.0,,2.0,,4.0,,,5.0,,,,4.0,,4.0,2.0,4.0,5.0,4.0,4.0,,3.0,,,,,4.0,4.0,,5.0,,1.0,,4.0,4.0,,,5.0,5.0,4.0,5.0,5.0,,3.0,,,,4.0,,4.0,,,,,,4.0,4.0,,3.0,3.0,3.0,,4.0,4.0,4.0,4.0,2.0,,4.0,,4.0,,,,5.0,4.0,4.0,4.0,,,,,,,,5.0,,4.0,,,,5.0,4.0,4.0,,,4.0,,,4.0,4.0,4.0,,,,3.0,,,5.0,4.0,,,,3.0,,,,3.0,,,,,4.0,,3.0,,,,,,,,,,5.0,1.0,,4.0,4.0,,5.0,,3.0,,,,,,,4.0,4.0,4.0,5.0,3.0,,5.0,4.0,5.0,,3.0,4.0,,5.0,5.0,4.0,4.0,4.0,5.0,,3.0,,5.0,5.0,3.0,,,,5.0,,,4.0,,,2.0,,,4.0,3.0,3.0,4.0,,3.0,5.0,3.0,5.0,,,5.0,,3.0,3.0,,4.0,3.0,2.0,3.0,4.0,,4.0,,,5.0,,
1,3.0,,,,3.0,,,,,,,,3.0,,,,,,,,,2.0,,,,,,,,3.0,,,,,,,,,,,,5.0,,,,,,,1.0,,,,,,,,,,,,,,,3.0,,,,,,,,3.0,,,,,,,,,,,4.0,,,,4.0,,,,,3.0,,,2.0,,,,,,,2.0,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,3.0,,,,3.0,,,4.0,2.0,,,,,,3.0,,,,,,4.0,,,,3.0,,,,,3.0,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,3.0,4.0,...,,,,,,,,,,,,3.0,,,,4.0,,,,,,3.0,,,,,,,,,,,,4.0,,,,,,,,,,,3.0,,,,,,,,3.0,,,4.0,,4.0,,,,,,3.0,,,,,,,3.0,,,,,,,,,3.0,1.0,,,,,,,,,,,,,,,,3.0,,,,,3.0,5.0,,4.0,,,,,,4.0,,3.0,4.0,,,,,,,,3.0,,,,,,,,,,,3.0,,,,3.0,,,,,,,,,,,,,,4.0,,5.0,,,,,,,,,,,,,,,,,,4.0,,,,2.0,,2.0,,,,,,,,,,3.0,,,,,,4.0,,,3.0,,,4.0,,,,3.0,,,3.0,,,,,,,,,,,,,,,,,3.0,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,5.0
2,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,,,,3.0,,,,,,,,,,4.0,,,3.0,2.0,,,,,,,,,,,,,,,,,,4.0,2.0,,,,,,,,,,,,,1.0,,,,3.0,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,,,,,,,,,,,,,,,3.0,,,,,,,,,,,,3.0,,,3.0,,,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,,,,,,,4.0,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,5.0,,2.0,,,,,...,,,,,,3.0,,,,,,,,,,,,,,,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,,3.0,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0,,2.0,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,5.0,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,3.0,,,4.0,,,,,,,,,,,,,,,,,,,,,2.0,,,,,,3.0,1.0,,,,,,4.0,,,,,,,,,,,,,4.0,,,,,,,
3,3.0,,,,,,5.0,,,4.0,,5.0,5.0,,,5.0,,3.0,4.0,,,5.0,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,2.0,,,,,,,,,,4.0,,,4.0,,3.0,,,,,,,,,,,,,3.0,,,,,,2.0,3.0,,,5.0,,,,,4.0,,4.0,,,,,5.0,,,2.0,,,,,,,2.0,,,,,,4.0,,,,,,,,,,,,,,,2.0,,,,,,,,,,,,,,4.0,,,,,,,5.0,,,,,,,4.0,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,,,,,4.0,,,3.0,3.0,,,4.0,,,,,,4.0,,,4.0,,,,,,5.0,,3.0,4.0,,3.0,3.0,,,,,,,,,,4.0,3.0,4.0,,,,,,,,,,,,,,,4.0,,...,,,,,,,,,,,,,,3.0,,3.0,,,4.0,,,4.0,2.0,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,4.0,4.0,4.0,,,,,,,,5.0,4.0,,,,,5.0,3.0,,,,,,,1.0,,,2.0,,,,,,3.0,,,,,,4.0,,3.0,,3.0,,,,,4.0,5.0,,,,,,,,4.0,2.0,,,,,,,,,,,,,,,,,,5.0,,,4.0,,,,,,,3.0,,,,,,,,,,,,,5.0,,,,,5.0,,,2.0,,,,,,4.0,,,,4.0,,,,,,2.0,3.0,,,,3.0,,,,,4.0,3.0,4.0,4.0,,,3.0,,,3.0,,,,,,,3.0,,,,,,,4.0,,,,,,,,,,4.0,,,4.0,,,1.0,,,,,,,,,,,,,,3.0,5.0,,,,,,2.0,,,
4,3.0,,,,,,,,,,,,1.0,,,,,,,,2.0,,,,,,,3.0,,,,,,,,,,,,,,,4.0,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,3.0,,,,,,,3.0,,,,,,,,,2.0,,,,,,,,,,,,4.0,,,,,3.0,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,,3.0,,,,,,3.0,,,,,,4.0,,,,,,,,,,,,,,,5.0,,,,,,,,,3.0,,,4.0,,,,,,4.0,,,,,,,,,,,,,,,,,,


In [330]:
ratings_matrix.fillna(0, inplace = True)

In [331]:
ratings_matrix.head()

user_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,...,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,834,835,836,837,838,839,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858,859,860,861,862,863,864,865,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,887,888,889,890,891,892,893,894,895,896,897,898,899,900,901,902,903,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943
0,5.0,4.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,3.0,0.0,1.0,5.0,4.0,5.0,0.0,3.0,5.0,0.0,5.0,0.0,5.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,4.0,5.0,5.0,4.0,5.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,5.0,5.0,2.0,0.0,0.0,2.0,3.0,4.0,3.0,3.0,3.0,0.0,0.0,4.0,0.0,4.0,2.0,0.0,4.0,0.0,5.0,0.0,4.0,0.0,4.0,4.0,4.0,2.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,4.0,5.0,4.0,5.0,5.0,4.0,0.0,4.0,0.0,3.0,3.0,0.0,0.0,0.0,4.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,4.0,4.0,0.0,0.0,3.0,4.0,0.0,0.0,4.0,0.0,5.0,4.0,0.0,0.0,5.0,0.0,0.0,3.0,4.0,0.0,0.0,3.0,0.0,0.0,4.0,3.0,0.0,0.0,4.0,0.0,4.0,5.0,0.0,0.0,0.0,0.0,0.0,5.0,4.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,3.0,4.0,0.0,0.0,3.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,4.0,1.0,5.0,3.0,3.0,3.0,2.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0,0.0,2.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,3.0,4.0,0.0,3.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,4.0,0.0,4.0,4.0,3.0,4.0,4.0,...,0.0,0.0,0.0,5.0,4.0,3.0,0.0,4.0,0.0,4.0,0.0,5.0,4.0,0.0,5.0,4.0,4.0,0.0,0.0,0.0,3.0,5.0,5.0,0.0,0.0,0.0,0.0,5.0,0.0,3.0,0.0,0.0,4.0,3.0,0.0,0.0,4.0,2.0,0.0,2.0,0.0,4.0,0.0,0.0,5.0,0.0,0.0,0.0,4.0,0.0,4.0,2.0,4.0,5.0,4.0,4.0,0.0,3.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,5.0,0.0,1.0,0.0,4.0,4.0,0.0,0.0,5.0,5.0,4.0,5.0,5.0,0.0,3.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,3.0,3.0,3.0,0.0,4.0,4.0,4.0,4.0,2.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,5.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,4.0,0.0,0.0,0.0,5.0,4.0,4.0,0.0,0.0,4.0,0.0,0.0,4.0,4.0,4.0,0.0,0.0,0.0,3.0,0.0,0.0,5.0,4.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,4.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,1.0,0.0,4.0,4.0,0.0,5.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,4.0,5.0,3.0,0.0,5.0,4.0,5.0,0.0,3.0,4.0,0.0,5.0,5.0,4.0,4.0,4.0,5.0,0.0,3.0,0.0,5.0,5.0,3.0,0.0,0.0,0.0,5.0,0.0,0.0,4.0,0.0,0.0,2.0,0.0,0.0,4.0,3.0,3.0,4.0,0.0,3.0,5.0,3.0,5.0,0.0,0.0,5.0,0.0,3.0,3.0,0.0,4.0,3.0,2.0,3.0,4.0,0.0,4.0,0.0,0.0,5.0,0.0,0.0
1,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,5.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,3.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,3.0,0.0,0.0,4.0,0.0,0.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0
2,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,4.0,0.0,5.0,5.0,0.0,0.0,5.0,0.0,3.0,4.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,3.0,3.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,3.0,4.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0,4.0,0.0,0.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,4.0,0.0,0.0,0.0,0.0,5.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,4.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,4.0,3.0,4.0,4.0,0.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,5.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0
4,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


<div class="alert alert-warning">
  <strong>Why do we have to subtract 1 from  `pairwise-distances`! <br> We can use the pair-wise distances and sort them ascendingly, isn't it?</strong> 
</div>

** Cosine Similarity **

In [338]:
movie_similarity_1 = 1 - pairwise_distances(ratings_matrix.values, metric="cosine")
np.fill_diagonal(movie_similarity_1, 0) 
ratings_matrix_1 = pd.DataFrame(movie_similarity_1)

In [339]:
ratings_matrix_1.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,...,1432,1433,1434,1435,1436,1437,1438,1439,1440,1441,1442,1443,1444,1445,1446,1447,1448,1449,1450,1451,1452,1453,1454,1455,1456,1457,1458,1459,1460,1461,1462,1463,1464,1465,1466,1467,1468,1469,1470,1471,1472,1473,1474,1475,1476,1477,1478,1479,1480,1481,1482,1483,1484,1485,1486,1487,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,1515,1516,1517,1518,1519,1520,1521,1522,1523,1524,1525,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535,1536,1537,1538,1539,1540,1541,1542,1543,1544,1545,1546,1547,1548,1549,1550,1551,1552,1553,1554,1555,1556,1557,1558,1559,1560,1561,1562,1563,1564,1565,1566,1567,1568,1569,1570,1571,1572,1573,1574,1575,1576,1577,1578,1579,1580,1581,1582,1583,1584,1585,1586,1587,1588,1589,1590,1591,1592,1593,1594,1595,1596,1597,1598,1599,1600,1601,1602,1603,1604,1605,1606,1607,1608,1609,1610,1611,1612,1613,1614,1615,1616,1617,1618,1619,1620,1621,1622,1623,1624,1625,1626,1627,1628,1629,1630,1631,1632,1633,1634,1635,1636,1637,1638,1639,1640,1641,1642,1643,1644,1645,1646,1647,1648,1649,1650,1651,1652,1653,1654,1655,1656,1657,1658,1659,1660,1661,1662,1663,1664,1665,1666,1667,1668,1669,1670,1671,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,0.0,0.997039,0.997689,0.99791,0.996733,0.962146,0.998568,0.99734,0.997408,0.993006,0.998487,0.998082,0.995471,0.98997,0.998601,0.992954,0.996548,0.949126,0.982047,0.985209,0.997361,0.998327,0.994417,0.997996,0.998633,0.994888,0.995405,0.997991,0.994498,0.976923,0.998385,0.990111,0.997163,0.996416,0.970524,0.982128,0.983863,0.995084,0.997878,0.995919,0.990529,0.997945,0.996788,0.997911,0.987819,0.98885,0.995609,0.995682,0.99712,0.998614,0.99754,0.992133,0.997212,0.997036,0.99809,0.998082,0.986917,0.997246,0.987911,0.987988,0.988447,0.995565,0.993664,0.997997,0.997466,0.997393,0.993814,0.996412,0.997997,0.997329,0.997837,0.997084,0.996435,0.993298,0.972525,0.995848,0.998125,0.990529,0.998279,0.993985,0.996593,0.997825,0.997049,0.992497,0.996666,0.993286,0.997831,0.997696,0.997187,0.995474,0.997547,0.997495,0.994384,0.99489,0.997921,0.998234,0.997695,0.997865,0.997122,0.997762,0.996335,0.995132,0.995975,0.985784,0.994165,0.997959,0.997566,0.997585,0.998061,0.991268,0.998449,0.99369,0.947385,0.996344,0.996776,0.991138,0.99845,0.998171,0.974817,0.995303,0.998707,0.997329,0.997212,0.99408,0.99816,0.996411,0.997549,0.996225,0.993892,0.992979,0.990589,0.996785,0.994317,0.993024,0.995721,0.992942,0.991888,0.987568,0.994517,0.995029,0.99546,0.995188,0.997498,0.998384,0.996416,0.980029,0.997636,0.997784,0.969232,0.997145,0.998782,0.993196,0.997339,0.997127,0.997631,0.997844,0.997607,0.99556,0.99775,0.995062,0.997779,0.99424,0.995504,0.998142,0.98582,0.986805,0.996755,0.997714,0.996867,0.988785,0.992067,0.998104,0.998008,0.997985,0.9951,0.998005,0.996792,0.993393,0.995978,0.996113,0.998757,0.997254,0.997612,0.996844,0.996065,0.997349,0.996029,0.997507,0.996492,0.992644,0.996552,0.993341,0.996346,0.995845,0.997974,0.997734,0.99367,0.994572,0.993624,0.997683,0.997139,0.99805,0.997535,0.998011,0.993634,0.997432,0.991495,0.996371,0.996084,0.998017,0.995372,0.98964,0.99041,0.996665,0.997575,0.99766,0.997323,0.997731,0.996138,0.995661,0.983722,0.998825,0.997089,0.984219,0.99733,0.997512,0.996979,0.997956,0.997493,0.997447,0.996765,0.996132,0.996631,0.997691,0.99862,0.991443,0.998465,0.996758,0.997687,0.997306,0.997867,0.983573,0.991717,0.986485,0.994071,0.99558,0.971332,0.997544,0.997032,0.997484,...,0.737154,0.899328,0.982111,0.854071,0.97394,0.987626,0.992256,0.986507,0.989725,0.969517,0.995516,0.987912,0.919915,0.992597,0.918818,0.906734,0.961293,0.951452,0.970833,0.918818,0.918818,0.96695,0.941299,0.968952,0.918818,0.918818,0.99133,0.918818,0.918818,0.920307,0.927585,0.918703,0.82709,0.939939,0.994827,0.980613,0.979231,0.952115,0.984822,0.979031,0.983997,0.980667,0.920309,0.991843,0.721453,0.995431,0.977166,0.975525,0.944217,0.989911,0.993584,0.935004,0.961876,0.894102,0.968043,0.950038,0.970831,0.960938,0.901272,0.894102,0.894102,0.894102,0.983147,0.97058,0.97126,0.894102,0.929328,0.902634,0.902687,0.986046,0.982118,0.982328,0.977667,0.883013,0.971161,0.993631,0.958775,0.995203,0.977348,0.965032,0.857235,0.906595,0.983779,0.950433,0.970588,0.992,0.896657,0.954778,0.993368,0.986179,0.993889,0.968915,0.979068,0.987139,0.877763,0.790646,0.926025,0.974823,0.982717,0.809471,0.979882,0.802944,0.86725,0.932297,0.893319,0.827179,0.959704,0.986206,0.988199,0.986151,0.982517,0.934984,0.920574,0.762415,0.885872,0.762415,0.835879,0.874012,0.91281,0.964926,0.894808,0.843118,0.974888,0.838432,0.762415,0.949933,0.762415,0.920155,0.762415,0.762415,0.762415,0.762415,0.762415,0.762415,0.762415,0.762415,0.762415,0.762415,0.762415,0.762415,0.93222,0.762415,0.762415,0.762415,0.762415,0.822947,0.762415,0.762415,0.762415,0.762415,0.762415,0.762415,0.81378,0.762415,0.762415,0.690186,0.906016,0.690186,0.93314,0.941776,0.931772,0.987215,0.972777,0.972777,0.989503,0.835365,0.987054,0.829581,0.943129,0.790605,0.987332,0.981176,0.839184,0.932677,0.813497,0.816124,0.833841,0.7362,0.624644,0.707056,0.703239,0.992294,0.983209,0.989779,0.588453,0.992328,0.989935,0.934485,0.991142,0.985684,0.850254,0.776674,0.776674,0.988803,0.987918,0.896701,0.885041,0.770361,0.839379,0.770361,0.770361,0.770361,0.770361,0.770361,0.770361,0.770361,0.848562,0.770361,0.770361,0.826312,0.830969,0.719565,0.770361,0.789284,0.770361,0.770361,0.770361,0.770361,0.770361,0.656807,0.863123,0.956327,0.987848,0.969451,0.984167,0.796927,0.973975,0.973975,0.995625,0.480541,0.475309,0.9088,0.475309,0.475309,0.475309,0.475309,0.475309,0.475309,0.730885,0.988919,0.981689,0.962383,0.951715,0.951715,0.987154,0.640047,0.640047,0.640047,0.993776,0.992843
1,0.997039,0.0,0.996853,0.997374,0.998149,0.952847,0.996165,0.995496,0.993385,0.987632,0.997726,0.996343,0.990777,0.982864,0.996077,0.98756,0.997779,0.941519,0.972518,0.977199,0.998344,0.997852,0.991249,0.997509,0.996093,0.992697,0.997848,0.997385,0.997953,0.970976,0.998124,0.985676,0.998636,0.995693,0.97852,0.984892,0.984184,0.997999,0.997686,0.998132,0.995407,0.996885,0.997593,0.996918,0.982261,0.98682,0.993953,0.992837,0.998555,0.996462,0.998262,0.988554,0.997991,0.99864,0.997635,0.996502,0.98132,0.995019,0.982712,0.982933,0.983755,0.998305,0.997431,0.996557,0.995944,0.99822,0.9977,0.998535,0.997147,0.995274,0.997917,0.998342,0.998157,0.991345,0.979292,0.995302,0.997931,0.995387,0.997494,0.997801,0.994937,0.998207,0.995126,0.996159,0.997978,0.989396,0.996204,0.997677,0.995559,0.998287,0.99724,0.996646,0.989517,0.998019,0.998425,0.997997,0.996461,0.99594,0.997056,0.994089,0.998099,0.997235,0.995302,0.983343,0.996585,0.995693,0.993664,0.99658,0.997762,0.995461,0.995611,0.994865,0.938835,0.994449,0.993514,0.984732,0.997053,0.99749,0.968311,0.996537,0.997496,0.996808,0.996621,0.988503,0.996759,0.991774,0.99433,0.997969,0.988417,0.987132,0.986631,0.99526,0.99152,0.98884,0.993355,0.990371,0.985845,0.992234,0.997706,0.997349,0.995831,0.997222,0.996957,0.997866,0.998402,0.973126,0.99652,0.996498,0.962093,0.99382,0.996977,0.990759,0.995829,0.995883,0.997571,0.996821,0.996443,0.998175,0.99823,0.992309,0.998396,0.991655,0.994625,0.997982,0.980495,0.982201,0.997407,0.996619,0.994687,0.983455,0.98775,0.997398,0.997339,0.996734,0.992389,0.997055,0.995888,0.990578,0.993344,0.993769,0.997288,0.995161,0.996192,0.998375,0.993749,0.996993,0.993763,0.99663,0.994984,0.98919,0.994321,0.99028,0.994491,0.993459,0.997568,0.99634,0.990079,0.991521,0.990471,0.996472,0.997353,0.997638,0.995649,0.997276,0.991496,0.997597,0.988496,0.995448,0.99392,0.997744,0.993052,0.985235,0.985725,0.996849,0.997027,0.995896,0.998337,0.997947,0.997966,0.992883,0.975437,0.997243,0.994199,0.978062,0.997332,0.998679,0.998499,0.997831,0.99825,0.998295,0.998668,0.998429,0.998802,0.996654,0.997483,0.987056,0.995528,0.994836,0.998563,0.997321,0.997184,0.975866,0.989393,0.978415,0.990695,0.991977,0.971352,0.994456,0.995708,0.996213,...,0.722352,0.898745,0.988543,0.847487,0.982379,0.991803,0.995584,0.992187,0.992809,0.973902,0.997123,0.992021,0.914809,0.995886,0.911035,0.898879,0.953843,0.945082,0.96492,0.911035,0.911035,0.961196,0.934518,0.962351,0.911035,0.911035,0.991591,0.911035,0.911035,0.910719,0.918766,0.926428,0.819484,0.933829,0.995878,0.987721,0.986765,0.960147,0.990553,0.971583,0.985902,0.980491,0.915789,0.995018,0.711354,0.997896,0.976161,0.983756,0.953466,0.9933,0.993825,0.947686,0.966436,0.905849,0.973976,0.958834,0.978177,0.963431,0.912795,0.905849,0.905849,0.905849,0.985957,0.97746,0.973464,0.905849,0.930915,0.91362,0.895464,0.985135,0.987709,0.975682,0.984141,0.875321,0.962442,0.996389,0.969265,0.996445,0.971924,0.957307,0.846686,0.894706,0.979606,0.945829,0.978129,0.994397,0.910058,0.962084,0.993023,0.99124,0.992723,0.960755,0.969246,0.983568,0.86728,0.776318,0.924163,0.976446,0.989741,0.802486,0.971962,0.789422,0.863103,0.924451,0.88112,0.817886,0.970321,0.99078,0.985471,0.992272,0.988711,0.946975,0.927695,0.779265,0.896302,0.779265,0.836102,0.888065,0.921243,0.973103,0.891389,0.851938,0.980636,0.851886,0.779265,0.945261,0.779265,0.914638,0.779265,0.779265,0.779265,0.779265,0.779265,0.779265,0.779265,0.779265,0.779265,0.779265,0.779265,0.779265,0.93621,0.779265,0.779265,0.779265,0.779265,0.821229,0.779265,0.779265,0.779265,0.779265,0.779265,0.779265,0.810961,0.779265,0.779265,0.687498,0.900479,0.687498,0.929742,0.934754,0.918824,0.987588,0.97385,0.97385,0.986439,0.82042,0.983652,0.819996,0.930776,0.783178,0.987157,0.978061,0.831132,0.916057,0.80619,0.805382,0.817068,0.719119,0.608095,0.693946,0.689555,0.993304,0.982907,0.982766,0.575247,0.987938,0.987546,0.929826,0.992977,0.981973,0.841626,0.762437,0.762437,0.993192,0.988831,0.888129,0.880118,0.763633,0.832442,0.763633,0.763633,0.763633,0.763633,0.763633,0.763633,0.763633,0.840872,0.763633,0.763633,0.818063,0.823258,0.707885,0.763633,0.78151,0.763633,0.763633,0.763633,0.763633,0.763633,0.642614,0.844971,0.947394,0.992709,0.960712,0.99075,0.778525,0.967346,0.967346,0.994694,0.466307,0.46112,0.90355,0.46112,0.46112,0.46112,0.46112,0.46112,0.46112,0.715502,0.986034,0.974448,0.953896,0.94666,0.94666,0.97722,0.62552,0.62552,0.62552,0.994732,0.990269
2,0.997689,0.996853,0.0,0.996117,0.997037,0.957826,0.997599,0.994353,0.995232,0.989078,0.997559,0.996326,0.992847,0.985622,0.997257,0.991316,0.997806,0.945098,0.97802,0.981824,0.997053,0.99672,0.9906,0.998525,0.997416,0.992001,0.996602,0.995762,0.994964,0.97088,0.997191,0.986219,0.997277,0.997128,0.970755,0.982268,0.982548,0.995638,0.996616,0.995183,0.993179,0.99686,0.995929,0.995864,0.982426,0.986938,0.992668,0.992317,0.996529,0.996768,0.996374,0.987319,0.996936,0.996718,0.997529,0.996074,0.982588,0.994336,0.98334,0.983399,0.984455,0.995692,0.993557,0.995706,0.994972,0.996026,0.994594,0.99671,0.995666,0.994388,0.995925,0.995759,0.995,0.990337,0.973477,0.993784,0.99666,0.99101,0.996469,0.994634,0.994061,0.996217,0.993895,0.99444,0.995993,0.988754,0.995242,0.995552,0.994353,0.995478,0.995325,0.996559,0.992868,0.994455,0.996721,0.996759,0.994904,0.995342,0.994521,0.995678,0.995391,0.993057,0.996135,0.988501,0.995527,0.997955,0.995964,0.997944,0.998252,0.990284,0.997484,0.993615,0.943515,0.993195,0.993764,0.987494,0.998414,0.99809,0.965931,0.997039,0.998144,0.998585,0.998587,0.991154,0.998137,0.994926,0.995189,0.997336,0.992506,0.989645,0.984508,0.993328,0.989368,0.988137,0.991906,0.987343,0.988374,0.987931,0.993916,0.993797,0.992007,0.99314,0.994884,0.997015,0.996338,0.981754,0.998464,0.998235,0.966615,0.996584,0.997272,0.988761,0.994429,0.994312,0.996232,0.996762,0.995265,0.996247,0.997623,0.992159,0.996468,0.989411,0.991783,0.996827,0.979909,0.980902,0.995116,0.99521,0.994028,0.98321,0.988876,0.996043,0.995888,0.995555,0.991422,0.995944,0.993707,0.988482,0.992661,0.99255,0.997292,0.994663,0.995055,0.997303,0.992607,0.994955,0.992648,0.995341,0.993948,0.987955,0.993024,0.989382,0.992839,0.991653,0.996087,0.994991,0.988807,0.991218,0.988372,0.99554,0.997066,0.996083,0.99489,0.995776,0.988681,0.996067,0.987156,0.993107,0.992896,0.996068,0.991231,0.984632,0.984982,0.994777,0.995072,0.994856,0.996833,0.996613,0.996059,0.995064,0.979319,0.997939,0.994482,0.980656,0.996883,0.996933,0.996237,0.996135,0.99608,0.996126,0.996467,0.996114,0.996706,0.995373,0.998384,0.990936,0.997491,0.993807,0.996607,0.998526,0.99565,0.980946,0.992471,0.985952,0.994959,0.996099,0.974247,0.997184,0.998173,0.998311,...,0.739157,0.902294,0.983004,0.852589,0.977418,0.987283,0.991901,0.989073,0.98905,0.969314,0.995343,0.985464,0.913925,0.991993,0.905578,0.900879,0.951282,0.939631,0.960436,0.905578,0.905578,0.956197,0.928281,0.95952,0.905578,0.905578,0.987104,0.905578,0.905578,0.914835,0.915421,0.918277,0.823994,0.936607,0.99625,0.983536,0.981129,0.952891,0.988552,0.974301,0.982697,0.975543,0.917016,0.99441,0.727069,0.995554,0.973247,0.976188,0.949804,0.993496,0.994022,0.94005,0.964087,0.900473,0.968189,0.952918,0.974839,0.961274,0.907557,0.900473,0.900473,0.900473,0.984613,0.975575,0.96999,0.900473,0.929786,0.908537,0.901487,0.987235,0.982887,0.980877,0.984022,0.879188,0.964717,0.993196,0.960614,0.997241,0.976937,0.956022,0.860044,0.907579,0.976826,0.947171,0.972612,0.990407,0.900303,0.959988,0.990666,0.986117,0.989306,0.96242,0.974579,0.981988,0.880614,0.793749,0.92379,0.96997,0.984054,0.807125,0.972713,0.807025,0.864419,0.926076,0.892457,0.827531,0.962338,0.986162,0.984648,0.98707,0.985471,0.936296,0.919353,0.764473,0.887426,0.764473,0.835258,0.876301,0.914203,0.966698,0.892728,0.843847,0.975268,0.838899,0.764473,0.940334,0.764473,0.917621,0.764473,0.764473,0.764473,0.764473,0.764473,0.764473,0.764473,0.764473,0.764473,0.764473,0.764473,0.764473,0.924693,0.764473,0.764473,0.764473,0.764473,0.82203,0.764473,0.764473,0.764473,0.764473,0.764473,0.764473,0.812697,0.764473,0.764473,0.695473,0.908656,0.695473,0.932853,0.939986,0.920866,0.987894,0.976526,0.976526,0.988934,0.840886,0.981078,0.829713,0.947655,0.788908,0.982283,0.981589,0.839338,0.935027,0.812454,0.817973,0.837898,0.741822,0.630379,0.710765,0.707745,0.98976,0.984434,0.988352,0.592553,0.988223,0.9859,0.938989,0.993966,0.981978,0.848578,0.780422,0.780422,0.989325,0.982548,0.892754,0.883807,0.768696,0.836313,0.768696,0.768696,0.768696,0.768696,0.768696,0.768696,0.768696,0.844135,0.768696,0.768696,0.822981,0.830722,0.721785,0.768696,0.787217,0.768696,0.768696,0.768696,0.768696,0.768696,0.66194,0.857348,0.95583,0.991794,0.964057,0.987589,0.799193,0.965848,0.965848,0.992127,0.486502,0.481305,0.912924,0.481305,0.481305,0.481305,0.481305,0.481305,0.481305,0.733056,0.987093,0.97283,0.950896,0.958501,0.958501,0.984153,0.642587,0.642587,0.642587,0.992964,0.991763
3,0.99791,0.997374,0.996117,0.0,0.997121,0.963887,0.997821,0.99844,0.996942,0.993899,0.998558,0.998643,0.995517,0.990396,0.997209,0.991982,0.996168,0.952424,0.981647,0.985174,0.997059,0.998666,0.996497,0.997099,0.997612,0.996792,0.995769,0.998755,0.994992,0.980935,0.998492,0.992586,0.997527,0.995566,0.973964,0.985027,0.988557,0.99517,0.998705,0.996975,0.989942,0.998581,0.997422,0.998711,0.990375,0.991779,0.997785,0.997241,0.997229,0.998371,0.998028,0.994679,0.998127,0.997295,0.998364,0.998736,0.989103,0.998245,0.990418,0.990568,0.990953,0.995749,0.99429,0.998765,0.998513,0.998064,0.994584,0.996578,0.998677,0.998376,0.99821,0.998052,0.997541,0.995733,0.977286,0.997967,0.998534,0.991343,0.998833,0.994761,0.998236,0.998361,0.998198,0.991295,0.997877,0.995227,0.99859,0.998448,0.998437,0.996353,0.998631,0.998405,0.994102,0.995815,0.998118,0.99871,0.998583,0.998566,0.998496,0.997379,0.997617,0.996503,0.99338,0.982399,0.992524,0.995983,0.995828,0.996876,0.997331,0.992353,0.99687,0.991513,0.950157,0.997688,0.997297,0.991603,0.996697,0.996442,0.978221,0.992993,0.997412,0.995841,0.995099,0.994123,0.996101,0.994285,0.997702,0.996311,0.993245,0.993263,0.99325,0.998254,0.996206,0.994952,0.99755,0.995224,0.992414,0.986595,0.995988,0.99572,0.997522,0.996254,0.998562,0.998642,0.996716,0.976597,0.995436,0.995475,0.970731,0.996295,0.998258,0.995826,0.998493,0.998447,0.99803,0.998419,0.99861,0.995207,0.997694,0.996767,0.998089,0.996265,0.997537,0.998524,0.988714,0.989851,0.998073,0.998636,0.998024,0.991312,0.993987,0.99881,0.998742,0.998726,0.997023,0.998668,0.998273,0.99588,0.997562,0.997732,0.998417,0.998323,0.99857,0.997087,0.997771,0.998755,0.997768,0.998687,0.997978,0.995154,0.997986,0.995866,0.998052,0.997388,0.998796,0.99859,0.995734,0.996504,0.99568,0.998708,0.997816,0.998794,0.998484,0.998793,0.996173,0.998429,0.994385,0.998283,0.997843,0.998735,0.997283,0.992476,0.992826,0.998532,0.998651,0.998533,0.997956,0.998628,0.997488,0.993427,0.984455,0.997752,0.997824,0.985699,0.995762,0.99805,0.997373,0.998614,0.99809,0.997986,0.997376,0.996881,0.99706,0.998792,0.997673,0.990892,0.996924,0.998283,0.998366,0.996106,0.998721,0.982923,0.987721,0.983205,0.990319,0.993724,0.967003,0.996022,0.995276,0.995718,...,0.727242,0.900164,0.984096,0.857895,0.976828,0.991009,0.994168,0.985068,0.990891,0.972758,0.995382,0.991561,0.926848,0.992537,0.924435,0.911914,0.966775,0.956222,0.975969,0.924435,0.924435,0.971664,0.946705,0.97399,0.924435,0.924435,0.993161,0.924435,0.924435,0.924485,0.931883,0.92843,0.832414,0.943045,0.99459,0.980971,0.979014,0.957552,0.985116,0.980358,0.986296,0.986659,0.926636,0.99157,0.712036,0.996689,0.981785,0.979473,0.945834,0.989696,0.99176,0.940165,0.964443,0.896539,0.974338,0.954996,0.971759,0.964499,0.90355,0.896539,0.896539,0.896539,0.984558,0.969082,0.974585,0.896539,0.935772,0.904978,0.905483,0.983584,0.982208,0.980597,0.976708,0.888512,0.974112,0.993894,0.96208,0.994828,0.974943,0.970006,0.849734,0.903653,0.98781,0.953187,0.970942,0.994193,0.903846,0.950983,0.993315,0.989016,0.996321,0.972855,0.978965,0.990614,0.870319,0.782193,0.933393,0.981665,0.98342,0.814806,0.981548,0.793406,0.875327,0.938715,0.88932,0.827801,0.96276,0.987883,0.989515,0.987029,0.981876,0.93953,0.930485,0.777494,0.896851,0.777494,0.845091,0.885071,0.92247,0.970166,0.901182,0.855858,0.98055,0.851621,0.777494,0.958006,0.777494,0.92514,0.777494,0.777494,0.777494,0.777494,0.777494,0.777494,0.777494,0.777494,0.777494,0.777494,0.777494,0.777494,0.943589,0.777494,0.777494,0.777494,0.777494,0.83124,0.777494,0.777494,0.777494,0.777494,0.777494,0.777494,0.821532,0.777494,0.777494,0.688428,0.904055,0.688428,0.935929,0.943817,0.935283,0.984706,0.971668,0.971668,0.991115,0.823746,0.9906,0.829683,0.935757,0.79551,0.989226,0.982852,0.839735,0.921426,0.817406,0.80864,0.823883,0.723347,0.612432,0.696456,0.690252,0.993654,0.980044,0.987648,0.578663,0.990856,0.993106,0.926825,0.989394,0.988683,0.854009,0.764165,0.764165,0.988016,0.991144,0.901225,0.889286,0.776071,0.84491,0.776071,0.776071,0.776071,0.776071,0.776071,0.776071,0.776071,0.85451,0.776071,0.776071,0.831615,0.832849,0.716659,0.776071,0.79471,0.776071,0.776071,0.776071,0.776071,0.776071,0.645911,0.862241,0.949715,0.986762,0.971726,0.983052,0.788113,0.977876,0.977876,0.996194,0.468749,0.463502,0.902598,0.463502,0.463502,0.463502,0.463502,0.463502,0.463502,0.719699,0.990727,0.983644,0.967553,0.942079,0.942079,0.984677,0.631616,0.631616,0.631616,0.995213,0.99418
4,0.996733,0.998149,0.997037,0.997121,0.0,0.95575,0.996229,0.99532,0.993806,0.987841,0.997736,0.996468,0.991266,0.983646,0.99609,0.987993,0.997469,0.944591,0.974168,0.978832,0.997751,0.997523,0.99156,0.997344,0.996039,0.993427,0.997761,0.996923,0.997576,0.972021,0.997672,0.986339,0.998082,0.995869,0.980837,0.988119,0.986794,0.998008,0.997446,0.997548,0.994875,0.996841,0.997693,0.996858,0.982786,0.988391,0.994042,0.992915,0.997994,0.996358,0.997969,0.988381,0.998197,0.998352,0.997493,0.996331,0.982689,0.994816,0.98336,0.98356,0.98451,0.997273,0.996466,0.996403,0.995701,0.997942,0.997268,0.997729,0.996684,0.995052,0.997048,0.997785,0.997486,0.991256,0.983032,0.995899,0.997642,0.995392,0.997188,0.996655,0.994873,0.997578,0.994469,0.995418,0.99824,0.989462,0.995835,0.997062,0.995013,0.997917,0.996551,0.996776,0.990653,0.99734,0.997765,0.997497,0.99595,0.995752,0.996528,0.99423,0.997196,0.996129,0.995253,0.984893,0.995419,0.996277,0.993958,0.996089,0.997325,0.99462,0.995792,0.994393,0.942256,0.993255,0.992987,0.985545,0.99691,0.99722,0.967373,0.996044,0.997223,0.997022,0.996809,0.989132,0.996764,0.992555,0.994691,0.997407,0.989602,0.987381,0.98584,0.994775,0.990772,0.988533,0.993117,0.989209,0.986542,0.990993,0.997119,0.997037,0.994908,0.996113,0.996503,0.997279,0.997773,0.975036,0.996559,0.99662,0.965685,0.994443,0.996501,0.990078,0.995069,0.995003,0.997351,0.996497,0.996028,0.997107,0.997962,0.992528,0.99768,0.991021,0.99332,0.997565,0.980443,0.982115,0.996879,0.995828,0.993979,0.983551,0.988445,0.996922,0.996733,0.996243,0.991959,0.996432,0.99511,0.990355,0.993157,0.993375,0.997099,0.995019,0.995716,0.99786,0.993735,0.996436,0.993842,0.996238,0.994219,0.989093,0.993952,0.990844,0.994329,0.992722,0.99709,0.995805,0.989878,0.99174,0.989704,0.996421,0.997181,0.9971,0.995305,0.996793,0.99094,0.997132,0.98837,0.994891,0.99361,0.997156,0.992402,0.98568,0.985877,0.996693,0.9967,0.995485,0.997979,0.998135,0.998447,0.99362,0.976896,0.996935,0.994309,0.979213,0.996881,0.998239,0.997546,0.997047,0.997322,0.997408,0.998129,0.998232,0.998111,0.996535,0.997232,0.988216,0.995828,0.99467,0.997991,0.997318,0.996716,0.97794,0.99023,0.980698,0.991707,0.993156,0.973355,0.994847,0.996023,0.996428,...,0.730032,0.907431,0.987473,0.853458,0.981934,0.993682,0.996683,0.990819,0.993576,0.978677,0.996508,0.99134,0.920001,0.994381,0.910525,0.902997,0.95331,0.943851,0.963637,0.910525,0.910525,0.959759,0.93309,0.961702,0.910525,0.910525,0.990396,0.910525,0.910525,0.914691,0.919027,0.93439,0.82578,0.937337,0.995346,0.988394,0.986238,0.965174,0.992578,0.972538,0.986755,0.982659,0.921499,0.993874,0.718688,0.998221,0.978427,0.983961,0.949362,0.993475,0.993734,0.94722,0.963142,0.899845,0.976387,0.957891,0.975144,0.962992,0.906963,0.899845,0.899845,0.899845,0.983868,0.974657,0.970109,0.899845,0.933754,0.907784,0.899993,0.985335,0.988079,0.97698,0.984742,0.880096,0.963187,0.995817,0.971131,0.996399,0.974148,0.956832,0.852603,0.901152,0.978633,0.948844,0.979755,0.995461,0.918239,0.963156,0.992263,0.993162,0.991771,0.961397,0.9703,0.982747,0.872442,0.783816,0.930306,0.97905,0.989549,0.809148,0.972059,0.796612,0.870149,0.925452,0.88623,0.825736,0.972348,0.992224,0.985302,0.990721,0.987918,0.950387,0.935579,0.792212,0.906001,0.792212,0.845929,0.896988,0.929887,0.977163,0.897941,0.863907,0.983692,0.862491,0.792212,0.947381,0.792212,0.921509,0.792212,0.792212,0.792212,0.792212,0.792212,0.792212,0.792212,0.792212,0.792212,0.792212,0.792212,0.792212,0.940302,0.792212,0.792212,0.792212,0.792212,0.830442,0.792212,0.792212,0.792212,0.792212,0.792212,0.792212,0.819802,0.792212,0.792212,0.70047,0.908281,0.70047,0.937728,0.941528,0.919389,0.98756,0.975271,0.975271,0.987614,0.827819,0.982888,0.827636,0.936243,0.790436,0.985784,0.980229,0.838422,0.920253,0.813312,0.811126,0.82493,0.727979,0.617761,0.701373,0.696355,0.991381,0.983149,0.984065,0.583928,0.986521,0.986986,0.933216,0.993187,0.982371,0.848008,0.768951,0.768951,0.991072,0.986767,0.893199,0.885247,0.771031,0.838409,0.771031,0.771031,0.771031,0.771031,0.771031,0.771031,0.771031,0.846409,0.771031,0.771031,0.824508,0.830553,0.717606,0.771031,0.788692,0.771031,0.771031,0.771031,0.771031,0.771031,0.651384,0.850053,0.948888,0.992151,0.962193,0.988711,0.787014,0.966636,0.966636,0.992649,0.476078,0.470912,0.907687,0.470912,0.470912,0.470912,0.470912,0.470912,0.470912,0.722474,0.98695,0.973784,0.952554,0.949225,0.949225,0.97833,0.63317,0.63317,0.63317,0.994379,0.99041


In [382]:
# Input Movie
input_movie = movies.title[1]
print("Input Provided - ", input_movie)

# Get the index of the movie
inp = movies[movies['title'] == input_movie].index.tolist()[0]
print()
print("Index of the movie - ",inp)

# Create a column 'similarity' with the each similarity ratings of every other movie with the 'input-movie'
mv['similarity'] = ratings_matrix_1.iloc[inp]
print()
print("Most Similar Movies")
print(mv.sort_values(["similarity"], ascending = False )[1:10])

Input Provided -  GoldenEye (1995)

Index of the movie -  1

Most Similar Movies
     movie_id                                 title  similarity
549       550     Die Hard: With a Vengeance (1995)    0.998772
745       746                    Real Genius (1985)    0.998741
225       226                     Die Hard 2 (1990)    0.998679
230       231                 Batman Returns (1992)    0.998668
53         54                       Outbreak (1995)    0.998640
32         33                      Desperado (1995)    0.998636
575       576                    Cliffhanger (1993)    0.998633
448       449  Star Trek: The Motion Picture (1979)    0.998618
384       385                      True Lies (1994)    0.998595


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


** Euclidean Similarity **

In [389]:
movie_similarity_2 = 1 - pairwise_distances(ratings_matrix.values, metric="euclidean")
np.fill_diagonal(movie_similarity_2, 0) 
ratings_matrix_2 = pd.DataFrame(movie_similarity_2)

In [390]:
# Input Movie
input_movie = movies.title[1]
print("Input Provided - ", input_movie)

# Get the index of the movie
inp = movies[movies['title'] == input_movie].index.tolist()[0]
print()
print("Index of the movie - ",inp)

# Create a column 'similarity' with the each similarity ratings of every other movie with the 'input-movie'
mv['similarity'] = ratings_matrix_2.iloc[inp]
print()
print("Most Similar Movies")
print(mv.sort_values(["similarity"], ascending = False )[1:10])

Input Provided -  GoldenEye (1995)

Index of the movie -  1

Most Similar Movies
     movie_id                              title  similarity
48         49                        I.Q. (1994)   -0.412591
32         33                   Desperado (1995)   -0.412714
53         54                    Outbreak (1995)   -0.412820
549       550  Die Hard: With a Vengeance (1995)   -0.412933
232       233                 Under Siege (1992)   -0.412975
568       569                        Wolf (1994)   -0.413204
183       184            Army of Darkness (1993)   -0.413298
225       226                  Die Hard 2 (1990)   -0.413608
216       217       Bram Stoker's Dracula (1992)   -0.413742


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


** Manhattan Distance **

In [391]:
movie_similarity_3 = 1 - pairwise_distances(ratings_matrix.values, metric="manhattan")
np.fill_diagonal(movie_similarity_3, 0) 
ratings_matrix_3 = pd.DataFrame(movie_similarity_3)

In [392]:
# Input Movie
input_movie = movies.title[1]
print("Input Provided - ", input_movie)

# Get the index of the movie
inp = movies[movies['title'] == input_movie].index.tolist()[0]
print()
print("Index of the movie - ",inp)

# Create a column 'similarity' with the each similarity ratings of every other movie with the 'input-movie'
mv['similarity'] = ratings_matrix_3.iloc[inp]
print()
print("Most Similar Movies")
print(mv.sort_values(["similarity"], ascending = False )[1:10])

Input Provided -  GoldenEye (1995)

Index of the movie -  1

Most Similar Movies
     movie_id                              title  similarity
53         54                    Outbreak (1995)   -1.974989
549       550  Die Hard: With a Vengeance (1995)   -1.983540
48         49                        I.Q. (1994)   -1.989231
32         33                   Desperado (1995)   -1.993987
232       233                 Under Siege (1992)   -2.170507
568       569                        Wolf (1994)   -2.620389
183       184            Army of Darkness (1993)   -2.777930
225       226                  Die Hard 2 (1990)   -2.992242
230       231              Batman Returns (1992)   -3.062584


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


** Pearson Correlation **

In [394]:
movie_similarity_4 = 1 - pairwise_distances(ratings_matrix.values, metric="correlation")
np.fill_diagonal(movie_similarity_4, 0) 
ratings_matrix_4 = pd.DataFrame(movie_similarity_4)

In [395]:
# Input Movie
input_movie = movies.title[1]
print("Input Provided - ", input_movie)

# Get the index of the movie
inp = movies[movies['title'] == input_movie].index.tolist()[0]
print()
print("Index of the movie - ",inp)

# Create a column 'similarity' with the each similarity ratings of every other movie with the 'input-movie'
mv['similarity'] = ratings_matrix_4.iloc[inp]
print()
print("Most Similar Movies")
print(mv.sort_values(["similarity"], ascending = False )[1:10])

Input Provided -  GoldenEye (1995)

Index of the movie -  1

Most Similar Movies
      movie_id                                 title  similarity
1626      1627                      Wife, The (1995)    0.920049
1466      1467  Saint of Fort Washington, The (1993)    0.919636
1442      1443                      8 Seconds (1994)    0.919588
101        102                Aristocats, The (1970)    0.919405
1302      1303                   Getaway, The (1994)    0.919312
1409      1410                         Harlem (1993)    0.919297
1284      1285               Princess Caraboo (1994)    0.919293
950        951    Indian in the Cupboard, The (1995)    0.919201
100        101                    Heavy Metal (1981)    0.919156


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


** Note:** The movies returned by the recommendation system with `cosine`, `euclidean` and `manhattan distance` are quite similar to each other. However, they are not so much when the recommendation system uses `pearson correlation`.

### Suggestions for Content-Based filtering from other data scietists I met during the meet-up:

1. Use weighted average on each movie:
    - How about multiplying `rating count` and `average rating`.

2. Since, dataset is with 100k records; work on Top 25%, Middle 50% and Bottom 25% of the records on the weighted scale.

3. Use metadata td-idf matrix (cosine similarity) rather than just the movies.

4. Also you can scrap the description of the movies from IMDB and use that as part of the 'Corpus'.

5. Try 'Movie Cast', 'Director' and 'Production'

6. For collaborative filtering - try 'movie-movie' similarity and 'user-user' similarity (Computationally Expensive)

7. Try to build a Hybrid Recommender