In [19]:
import numpy as np
import pandas as pd
#set some pandas options controling output format
pd.set_option('display.notebook_repr_html',True) # output as flat text and not HTML
pd.set_option('display.max_rows', None) # this is the maximum number of rows we will display
pd.set_option('display.max_columns', None) # this is the maximum number of rows we will display

# bring in matplotlib for graphics
import matplotlib.pyplot as plt
%matplotlib inline

In [20]:
# 1. Load the ratings by user information that you collected into a pandas dataframe.
# read the CSV into a dataframe
ratings = pd.read_csv('MOVIES.csv', index_col=0)
ratings.head()  # take a peak at the date.

Unnamed: 0,SIMON,Adia,Eddie,Jabari,TOM,Taline,Matt,Salman,Anthony,Sahara,Kaamil,KHAIRUL
Extraction,4.0,,3.0,,,,4.0,,4.0,,,
Bad Boys for Life,4.0,,5.0,,2.0,4.0,4.0,4.0,,5.0,5.0,5.0
Sonic the Hedgehog,,,2.0,,,4.0,3.0,,,,4.0,5.0
Bloodshot,2.0,1.0,3.0,,,1.0,1.0,,,2.0,,
Trolls World Tour,,4.0,1.0,4.0,4.0,,,,,,2.0,


In [21]:
# 2.a. Show the average ratings for each USER
# Calculate the average by column
ratings.mean()

SIMON      3.000000
Adia       2.750000
Eddie      3.000000
Jabari     3.000000
TOM        2.600000
Taline     3.375000
Matt       2.700000
Salman     3.333333
Anthony    3.500000
Sahara     2.750000
Kaamil     3.800000
KHAIRUL    4.250000
dtype: float64

In [22]:
# 2.b. Show the average ratings for each MOVIE
# Calculate the average by row 
rmean = pd.Series(ratings.mean(axis=1))
rmean

Extraction            3.750000
Bad Boys for Life     4.222222
Sonic the Hedgehog    3.600000
Bloodshot             1.666667
Trolls World Tour     3.000000
The Platform          2.000000
Code 8                3.400000
The Gentlemen         3.500000
Frozen 2              2.777778
Underwater            2.500000
The Invisible Man     3.500000
dtype: float64

In [23]:
# 3.a. Create a new pandas dataframe, with normalized ratings for each user. Again, show
#      the average ratings for each user and each movie.
# To calculate this I will be using the following formula mentioned in this post:
#      http://bi-analytics.org/topic/9-standardization-vs-normalization/
# The formula is as follows:
#      Xnew = (X-Xmin) / (Xmax - Xmin
# Calculate the normalized ratings for each cell. And transpose the rows and columns
ratings_norm = ((ratings - ratings.min()) / (ratings.max() - ratings.min())).transpose()
ratings_norm    # Display the dataframe

Unnamed: 0,Extraction,Bad Boys for Life,Sonic the Hedgehog,Bloodshot,Trolls World Tour,The Platform,Code 8,The Gentlemen,Frozen 2,Underwater,The Invisible Man
SIMON,1.0,1.0,,0.0,,,,,0.5,0.0,0.5
Adia,,,,0.0,1.0,,,,0.666667,,0.666667
Eddie,0.5,1.0,0.25,0.5,0.0,,,,0.5,0.5,0.75
Jabari,,,,,1.0,0.0,,,,,0.5
TOM,,0.333333,,,1.0,0.333333,1.0,,0.0,,
Taline,,1.0,1.0,0.0,,0.666667,1.0,1.0,,0.666667,1.0
Matt,1.0,1.0,0.666667,0.0,,0.333333,0.333333,0.666667,0.666667,0.333333,0.666667
Salman,,1.0,,,,,,,0.0,,0.0
Anthony,1.0,,,,,,0.0,,1.0,,0.0
Sahara,,1.0,,0.25,,0.0,,,0.5,,


In [24]:

# 3.b. Show the average NORMALIZED ratings for each USER
# Calculate the average of the normalized ratings by row.
ratings_norm.mean(axis=1)

SIMON      0.500000
Adia       0.583333
Eddie      0.500000
Jabari     0.500000
TOM        0.533333
Taline     0.791667
Matt       0.566667
Salman     0.333333
Anthony    0.500000
Sahara     0.437500
Kaamil     0.600000
KHAIRUL    0.750000
dtype: float64

In [25]:
# 3.c. Show the average NORMALIZED ratings for each MOVIE
# Calculate the average of the normalized ratings by column.
rnorm = pd.Series(ratings_norm.mean())
rnorm

Extraction            0.875000
Bad Boys for Life     0.925926
Sonic the Hedgehog    0.716667
Bloodshot             0.125000
Trolls World Tour     0.600000
The Platform          0.266667
Code 8                0.600000
The Gentlemen         0.833333
Frozen 2              0.425926
Underwater            0.375000
The Invisible Man     0.575000
dtype: float64

In [26]:
# create a dataframe with both the average and normalized series by movie.
ratings_averages = pd.DataFrame({'mean':rmean, 'norm':rnorm},rmean.index)
ratings_averages

Unnamed: 0,mean,norm
Extraction,3.75,0.875
Bad Boys for Life,4.222222,0.925926
Sonic the Hedgehog,3.6,0.716667
Bloodshot,1.666667,0.125
Trolls World Tour,3.0,0.6
The Platform,2.0,0.266667
Code 8,3.4,0.6
The Gentlemen,3.5,0.833333
Frozen 2,2.777778,0.425926
Underwater,2.5,0.375


In [27]:

# Sort the movies by the AVERAGE rating
ratings_averages.sort_values('mean',ascending=False)

Unnamed: 0,mean,norm
Bad Boys for Life,4.222222,0.925926
Extraction,3.75,0.875
Sonic the Hedgehog,3.6,0.716667
The Gentlemen,3.5,0.833333
The Invisible Man,3.5,0.575
Code 8,3.4,0.6
Trolls World Tour,3.0,0.6
Frozen 2,2.777778,0.425926
Underwater,2.5,0.375
The Platform,2.0,0.266667


In [28]:
# Sort the movies by the NORMALIZED average rating
ratings_averages.sort_values('norm',ascending=False)

Unnamed: 0,mean,norm
Bad Boys for Life,4.222222,0.925926
Extraction,3.75,0.875
The Gentlemen,3.5,0.833333
Sonic the Hedgehog,3.6,0.716667
Trolls World Tour,3.0,0.6
Code 8,3.4,0.6
The Invisible Man,3.5,0.575
Frozen 2,2.777778,0.425926
Underwater,2.5,0.375
The Platform,2.0,0.266667


In [29]:

# 5.a. [Extra credit] Create another new pandas dataframe, with STANDARDIZED ratings for each user.
#      Once again, show the average ratings for each user and each movie.
# To calculate this I will be using the following formula mentioned in this post:
#      http://bi-analytics.org/topic/9-standardization-vs-normalization/
# The formula is as follows:
#      Xnew = (X-μ) / σ
# Calculate the normalized ratings for each cell. And transpose the rows and columns
ratings_stand = ((ratings - ratings.mean()) / (ratings.std())).transpose()
ratings_stand

Unnamed: 0,Extraction,Bad Boys for Life,Sonic the Hedgehog,Bloodshot,Trolls World Tour,The Platform,Code 8,The Gentlemen,Frozen 2,Underwater,The Invisible Man
SIMON,1.118034,1.118034,,-1.118034,,,,,0.0,-1.118034,0.0
Adia,,,,-1.390759,0.993399,,,,0.19868,,0.19868
Eddie,0.0,1.67332,-0.83666,0.0,-1.67332,,,,0.0,0.0,0.83666
Jabari,,,,,1.0,-1.0,,,,,0.0
TOM,,-0.447214,,,1.043498,-0.447214,1.043498,,-1.19257,,
Taline,,0.589256,0.589256,-2.239171,,-0.353553,0.589256,0.589256,,-0.353553,0.589256
Matt,1.37032,1.37032,0.316228,-1.791957,,-0.737865,-0.737865,0.316228,0.316228,-0.737865,0.316228
Salman,,1.154701,,,,,,,-0.57735,,-0.57735
Anthony,0.866025,,,,,,-0.866025,,0.866025,,-0.866025
Sahara,,1.317465,,-0.439155,,-1.024695,,,0.146385,,


In [30]:
# 5.b. Show the average NORMALIZED ratings for each USER
# Calculate the average of the normalized ratings by row.
ratings_stand.mean(axis=1)

SIMON      0.000000e+00
Adia       0.000000e+00
Eddie      0.000000e+00
Jabari     0.000000e+00
TOM       -8.881784e-17
Taline     0.000000e+00
Matt      -1.998401e-16
Salman    -2.960595e-16
Anthony    0.000000e+00
Sahara     6.938894e-18
Kaamil     1.665335e-16
KHAIRUL    0.000000e+00
dtype: float64

In [31]:
# 5.C. Show the average NORMALIZED ratings for each MOVIE
# Calculate the average of the normalized ratings by column.
rstand = pd.Series(ratings_stand.mean())
rstand

Extraction            0.838595
Bad Boys for Life     0.930147
Sonic the Hedgehog    0.150280
Bloodshot            -1.163179
Trolls World Tour    -0.055918
The Platform         -0.712665
Code 8                0.042288
The Gentlemen         0.452742
Frozen 2             -0.193622
Underwater           -0.552363
The Invisible Man     0.118002
dtype: float64

In [32]:
# Add the average standardized rating for each movie to the dataframe that already 
# contains the average and normamalized average movie rating.
ratings_averages['stand'] = rstand
ratings_averages

Unnamed: 0,mean,norm,stand
Extraction,3.75,0.875,0.838595
Bad Boys for Life,4.222222,0.925926,0.930147
Sonic the Hedgehog,3.6,0.716667,0.15028
Bloodshot,1.666667,0.125,-1.163179
Trolls World Tour,3.0,0.6,-0.055918
The Platform,2.0,0.266667,-0.712665
Code 8,3.4,0.6,0.042288
The Gentlemen,3.5,0.833333,0.452742
Frozen 2,2.777778,0.425926,-0.193622
Underwater,2.5,0.375,-0.552363


In [33]:

# Sort the movies by the AVERAGE rating
ratings_averages.sort_values('mean',ascending=False)

Unnamed: 0,mean,norm,stand
Bad Boys for Life,4.222222,0.925926,0.930147
Extraction,3.75,0.875,0.838595
Sonic the Hedgehog,3.6,0.716667,0.15028
The Gentlemen,3.5,0.833333,0.452742
The Invisible Man,3.5,0.575,0.118002
Code 8,3.4,0.6,0.042288
Trolls World Tour,3.0,0.6,-0.055918
Frozen 2,2.777778,0.425926,-0.193622
Underwater,2.5,0.375,-0.552363
The Platform,2.0,0.266667,-0.712665


In [34]:
# Sort the movies by the NORMALIZED average rating
ratings_averages.sort_values('norm',ascending=False)

Unnamed: 0,mean,norm,stand
Bad Boys for Life,4.222222,0.925926,0.930147
Extraction,3.75,0.875,0.838595
The Gentlemen,3.5,0.833333,0.452742
Sonic the Hedgehog,3.6,0.716667,0.15028
Trolls World Tour,3.0,0.6,-0.055918
Code 8,3.4,0.6,0.042288
The Invisible Man,3.5,0.575,0.118002
Frozen 2,2.777778,0.425926,-0.193622
Underwater,2.5,0.375,-0.552363
The Platform,2.0,0.266667,-0.712665


In [35]:
# Sort the movies by the STANDARDIZED average rating
ratings_averages.sort_values('stand',ascending=False)

Unnamed: 0,mean,norm,stand
Bad Boys for Life,4.222222,0.925926,0.930147
Extraction,3.75,0.875,0.838595
The Gentlemen,3.5,0.833333,0.452742
Sonic the Hedgehog,3.6,0.716667,0.15028
The Invisible Man,3.5,0.575,0.118002
Code 8,3.4,0.6,0.042288
Trolls World Tour,3.0,0.6,-0.055918
Frozen 2,2.777778,0.425926,-0.193622
Underwater,2.5,0.375,-0.552363
The Platform,2.0,0.266667,-0.712665
