In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    "User": ["Sam", "Luke", "Priscilla", "Frank", "Celeste"],
    "10 things I hate about you": [np.nan, 3, np.nan, 4, 5],
    "Sinners": [4, 5, np.nan, 3, 2],
    "Wicked": [3, 4, 5, 4, 4],
    "Thunderbolts": [3, np.nan, 4, 3, 2],
    "F1": [4, 4, 3, 5, 5],
    "Fantastic Four": [4, 3, 4, 3, 5]
}

df = pd.DataFrame(data)
df

Unnamed: 0,User,10 things I hate about you,Sinners,Wicked,Thunderbolts,F1,Fantastic Four
0,Sam,,4.0,3,3.0,4,4
1,Luke,3.0,5.0,4,,4,3
2,Priscilla,,,5,4.0,3,4
3,Frank,4.0,3.0,4,3.0,5,3
4,Celeste,5.0,2.0,4,2.0,5,5


Average Rating by person:

In [3]:
avg_user = df.set_index("User").mean(axis=1)
print("Average Rating per User:\n", avg_user)

Average Rating per User:
 User
Sam          3.600000
Luke         3.800000
Priscilla    4.000000
Frank        3.666667
Celeste      3.833333
dtype: float64


Average rating by movie:

In [5]:
avg_movie = df.set_index("User").mean(axis=0)
print("Average Rating per Movie:\n", avg_movie)

Average Rating per Movie:
 10 things I hate about you    4.0
Sinners                       3.5
Wicked                        4.0
Thunderbolts                  3.0
F1                            4.2
Fantastic Four                3.8
dtype: float64


Normalized Ratings:

In [6]:
def normalize(series):
    return (series - series.min()) / (series.max() - series.min())

normalized_df = df.set_index("User").apply(normalize, axis=1)
normalized_df

Unnamed: 0_level_0,10 things I hate about you,Sinners,Wicked,Thunderbolts,F1,Fantastic Four
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Sam,,1.0,0.0,0.0,1.0,1.0
Luke,0.0,1.0,0.5,,0.5,0.0
Priscilla,,,1.0,0.5,0.0,0.5
Frank,0.5,0.0,0.5,0.0,1.0,0.0
Celeste,1.0,0.0,0.666667,0.0,1.0,1.0


In [8]:
print("Average normalized rating per user:\n", normalized_df.mean(axis=1))

Average normalized rating per user:
 User
Sam          0.600000
Luke         0.400000
Priscilla    0.500000
Frank        0.333333
Celeste      0.611111
dtype: float64


In [9]:
print("\nAverage normalized rating per movie:\n", normalized_df.mean(axis=0))


Average normalized rating per movie:
 10 things I hate about you    0.500000
Sinners                       0.500000
Wicked                        0.533333
Thunderbolts                  0.125000
F1                            0.700000
Fantastic Four                0.500000
dtype: float64


The advantages and disadvantages of using normalized ratings instead of the actual ratings are that normalized ratings can help reduce bias and make it easier to identify patterns between users. On the other hand, normalized ratings do not provide all the details for example, a normalized rating of 0.50 loses the sense of how much someone actually liked the movie. Overall, normalization is useful for comparison but does not clearly show whether someone truly liked or disliked a movie.

EXTRA CREDIT:

In [10]:
def standardize(series):
    return (series - series.mean()) / series.std()

standardized_df = df.set_index("User").apply(standardize, axis=1)
standardized_df

Unnamed: 0_level_0,10 things I hate about you,Sinners,Wicked,Thunderbolts,F1,Fantastic Four
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Sam,,0.730297,-1.095445,-1.095445,0.730297,0.730297
Luke,-0.956183,1.434274,0.239046,,0.239046,-0.956183
Priscilla,,,1.224745,0.0,-1.224745,0.0
Frank,0.408248,-0.816497,0.408248,-0.816497,1.632993,-0.816497
Celeste,0.792594,-1.245505,0.113228,-1.245505,0.792594,0.792594


In [11]:
print("Average standardized rating per user:\n", standardized_df.mean(axis=1))

Average standardized rating per user:
 User
Sam         -1.332268e-16
Luke         1.776357e-16
Priscilla    0.000000e+00
Frank        2.035409e-16
Celeste     -1.110223e-16
dtype: float64


In [12]:
print("\nAverage standardized rating per movie:\n", standardized_df.mean(axis=0))


Average standardized rating per movie:
 10 things I hate about you    0.081553
Sinners                       0.025642
Wicked                        0.177964
Thunderbolts                 -0.789362
F1                            0.434037
Fantastic Four               -0.049958
dtype: float64
