# MovieLens data

In [1]:
import numpy as np
import pandas as pd

In [17]:
unames = ["user_id", "gender", "age", "occupation", "zip"]
users = pd.read_table("datasets/movielens/users.dat", sep="::",
    header=None, names=unames, engine="python")

rnames = ["user_id", "movie_id", "rating", "timestamp"]
ratings = pd.read_table("datasets/movielens/ratings.dat", sep="::",
    header=None, names=rnames, engine="python")

mnames = ["movie_id", "title", "genres"]
movies = pd.read_table("datasets/movielens/movies.dat", sep="::",
    header=None, names=mnames, engine="python")

In [18]:
users

Unnamed: 0,user_id,gender,age,occupation,zip
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,02460
4,5,M,25,20,55455
...,...,...,...,...,...
6035,6036,F,25,15,32603
6036,6037,F,45,1,76006
6037,6038,F,56,1,14706
6038,6039,F,45,0,01060


In [19]:
users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6040 entries, 0 to 6039
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   user_id     6040 non-null   int64 
 1   gender      6040 non-null   object
 2   age         6040 non-null   int64 
 3   occupation  6040 non-null   int64 
 4   zip         6040 non-null   object
dtypes: int64(3), object(2)
memory usage: 236.1+ KB


In [20]:
ratings

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291
...,...,...,...,...
1000204,6040,1091,1,956716541
1000205,6040,1094,5,956704887
1000206,6040,562,5,956704746
1000207,6040,1096,4,956715648


In [21]:
movies

Unnamed: 0,movie_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
3878,3948,Meet the Parents (2000),Comedy
3879,3949,Requiem for a Dream (2000),Drama
3880,3950,Tigerland (2000),Drama
3881,3951,Two Family House (2000),Drama


# average rating for every movie

In [23]:
ratings.groupby("movie_id")["rating"].mean()

movie_id
1       4.146846
2       3.201141
3       3.016736
4       2.729412
5       3.006757
          ...   
3948    3.635731
3949    4.115132
3950    3.666667
3951    3.900000
3952    3.780928
Name: rating, Length: 3706, dtype: float64

In [25]:

ratings.groupby("movie_id").agg({"user_id": "count", "rating": "mean"})

Unnamed: 0_level_0,user_id,rating
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2077,4.146846
2,701,3.201141
3,478,3.016736
4,170,2.729412
5,296,3.006757
...,...,...
3948,862,3.635731
3949,304,4.115132
3950,54,3.666667
3951,40,3.900000


# average movie ratings by age and gender

In [50]:
age_bins = pd.cut(users["age"], bins=[0, 16, 21, 35, 60])
users["age_bin"] = age_bins
users

Unnamed: 0,user_id,gender,age,occupation,zip,age_bin
0,1,F,1,10,48067,"(0, 16]"
1,2,M,56,16,70072,"(35, 60]"
2,3,M,25,15,55117,"(21, 35]"
3,4,M,45,7,02460,"(35, 60]"
4,5,M,25,20,55455,"(21, 35]"
...,...,...,...,...,...,...
6035,6036,F,25,15,32603,"(21, 35]"
6036,6037,F,45,1,76006,"(35, 60]"
6037,6038,F,56,1,14706,"(35, 60]"
6038,6039,F,45,0,01060,"(35, 60]"


In [132]:
users_ratings = pd.merge(ratings, users, on="user_id")
users_ratings = pd.merge(users_ratings, movies)
users_ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,gender,age,occupation,zip,age_bin,title,genres
0,1,1193,5,978300760,F,1,10,48067,"(0, 16]",One Flew Over the Cuckoo's Nest (1975),Drama
1,2,1193,5,978298413,M,56,16,70072,"(35, 60]",One Flew Over the Cuckoo's Nest (1975),Drama
2,12,1193,4,978220179,M,25,12,32793,"(21, 35]",One Flew Over the Cuckoo's Nest (1975),Drama
3,15,1193,4,978199279,M,25,7,22903,"(21, 35]",One Flew Over the Cuckoo's Nest (1975),Drama
4,17,1193,5,978158471,M,50,1,95350,"(35, 60]",One Flew Over the Cuckoo's Nest (1975),Drama


In [133]:
users_ratings.groupby("title").agg({"user_id": "count", "rating": "mean"})

Unnamed: 0_level_0,user_id,rating
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"$1,000,000 Duck (1971)",37,3.027027
'Night Mother (1986),70,3.371429
'Til There Was You (1997),52,2.692308
"'burbs, The (1989)",303,2.910891
...And Justice for All (1979),199,3.713568
...,...,...
"Zed & Two Noughts, A (1985)",29,3.413793
Zero Effect (1998),301,3.750831
Zero Kelvin (Kjærlighetens kjøtere) (1995),2,3.500000
Zeus and Roxanne (1997),23,2.521739


In [134]:
ratings_grouped = users_ratings.groupby(["title", "age_bin", "gender"]).agg({"user_id": "count", "rating": "mean"})

In [137]:
ratings_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,user_id,rating
title,age_bin,gender,Unnamed: 3_level_1,Unnamed: 4_level_1
"$1,000,000 Duck (1971)","(0, 16]",F,0,
"$1,000,000 Duck (1971)","(0, 16]",M,0,
"$1,000,000 Duck (1971)","(16, 21]",F,1,5.000000
"$1,000,000 Duck (1971)","(16, 21]",M,5,2.600000
"$1,000,000 Duck (1971)","(21, 35]",F,12,3.250000
...,...,...,...,...
eXistenZ (1999),"(16, 21]",M,69,3.405797
eXistenZ (1999),"(21, 35]",F,46,3.086957
eXistenZ (1999),"(21, 35]",M,211,3.312796
eXistenZ (1999),"(35, 60]",F,10,3.600000


In [138]:
ratings_grouped[ratings_grouped["user_id"] >= 250]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,user_id,rating
title,age_bin,gender,Unnamed: 3_level_1,Unnamed: 4_level_1
12 Angry Men (1957),"(21, 35]",M,262,4.389313
"13th Warrior, The (1999)","(21, 35]",M,351,3.219373
2001: A Space Odyssey (1968),"(21, 35]",M,811,4.092478
2001: A Space Odyssey (1968),"(35, 60]",M,334,4.275449
2010 (1984),"(21, 35]",M,284,3.461268
...,...,...,...,...
X-Men (2000),"(16, 21]",M,326,3.966258
X-Men (2000),"(21, 35]",M,729,3.858711
You've Got Mail (1998),"(21, 35]",M,289,3.179931
Young Frankenstein (1974),"(21, 35]",M,583,4.236707


# use pivot table

In [139]:
ratings_table = users_ratings.pivot_table(values=["rating", "user_id"], index=["title"],
    columns=["gender", "age_bin"],
    aggfunc={"rating": "mean", "user_id": "count"}, margins=True, fill_value=None)

In [140]:
ratings_table

Unnamed: 0_level_0,rating,rating,rating,rating,rating,rating,rating,rating,rating,user_id,user_id,user_id,user_id,user_id,user_id,user_id,user_id,user_id
gender,F,F,F,F,M,M,M,M,All,F,F,F,F,M,M,M,M,All
age_bin,"(0, 16]","(16, 21]","(21, 35]","(35, 60]","(0, 16]","(16, 21]","(21, 35]","(35, 60]",Unnamed: 9_level_2,"(0, 16]","(16, 21]","(21, 35]","(35, 60]","(0, 16]","(16, 21]","(21, 35]","(35, 60]",Unnamed: 18_level_2
title,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
"$1,000,000 Duck (1971)",,5.000000,3.250000,3.333333,,2.600000,3.000000,1.500000,3.027027,0,1,12,3,0,5,14,2,37
'Night Mother (1986),3.000000,4.500000,3.360000,3.250000,1.000000,5.000000,3.000000,4.200000,3.371429,1,2,25,8,1,1,22,10,70
'Til There Was You (1997),2.000000,2.666667,2.708333,2.666667,5.000000,2.000000,2.800000,2.000000,2.692308,1,6,24,6,1,2,10,2,52
"'burbs, The (1989)",,3.035714,2.698113,2.636364,4.500000,3.338710,2.709402,3.066667,2.910891,0,28,53,11,2,62,117,30,303
...And Justice for All (1979),,,3.761905,3.928571,3.000000,3.428571,3.672897,3.775510,3.713568,0,0,21,14,1,7,107,49,199
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zero Effect (1998),4.500000,4.000000,3.775000,4.000000,4.000000,3.865385,3.666667,3.705882,3.750831,2,8,40,9,6,52,150,34,301
Zero Kelvin (Kjærlighetens kjøtere) (1995),,,,,,,3.500000,,3.500000,0,0,0,0,0,0,2,0,2
Zeus and Roxanne (1997),1.000000,2.500000,3.166667,,1.600000,2.500000,3.166667,1.000000,2.521739,1,2,6,0,5,2,6,1,23
eXistenZ (1999),4.000000,2.714286,3.086957,3.600000,3.076923,3.405797,3.312796,3.065217,3.256098,1,14,46,10,13,69,211,46,410


In [141]:
ratings_table[ratings_table.loc[:, ("user_id", "All",)] >= 250]

  ratings_table[ratings_table.loc[:, ("user_id", "All",)] >= 250]


Unnamed: 0_level_0,rating,rating,rating,rating,rating,rating,rating,rating,rating,user_id,user_id,user_id,user_id,user_id,user_id,user_id,user_id,user_id
gender,F,F,F,F,M,M,M,M,All,F,F,F,F,M,M,M,M,All
age_bin,"(0, 16]","(16, 21]","(21, 35]","(35, 60]","(0, 16]","(16, 21]","(21, 35]","(35, 60]",Unnamed: 9_level_2,"(0, 16]","(16, 21]","(21, 35]","(35, 60]","(0, 16]","(16, 21]","(21, 35]","(35, 60]",Unnamed: 18_level_2
title,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
"'burbs, The (1989)",,3.035714,2.698113,2.636364,4.500000,3.338710,2.709402,3.066667,2.910891,0,28,53,11,2,62,117,30,303
10 Things I Hate About You (1999),4.035714,3.487179,3.666667,3.625000,3.444444,3.382857,3.229075,3.384615,3.422857,28,78,102,24,27,175,227,39,700
101 Dalmatians (1961),3.812500,3.395833,3.868132,4.156250,3.263158,3.229730,3.619718,3.486111,3.596460,16,48,91,32,19,74,213,72,565
101 Dalmatians (1996),3.470588,2.636364,3.186813,3.950000,2.705882,2.375000,3.008772,3.232558,3.046703,17,22,91,20,17,40,114,43,364
12 Angry Men (1957),3.600000,3.666667,4.393939,4.292683,5.000000,4.161765,4.389313,4.260870,4.295455,10,24,66,41,7,68,262,138,616
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Young Guns II (1990),5.000000,3.066667,2.800000,2.800000,3.000000,3.130952,2.891089,2.428571,2.907859,1,15,25,5,2,84,202,35,369
Young Sherlock Holmes (1985),,3.727273,3.523810,3.333333,3.000000,3.604651,3.362694,3.236111,3.390501,0,11,42,15,3,43,193,72,379
Zero Effect (1998),4.500000,4.000000,3.775000,4.000000,4.000000,3.865385,3.666667,3.705882,3.750831,2,8,40,9,6,52,150,34,301
eXistenZ (1999),4.000000,2.714286,3.086957,3.600000,3.076923,3.405797,3.312796,3.065217,3.256098,1,14,46,10,13,69,211,46,410


In [146]:
ratings_by_id = users_ratings.groupby("title").size()

In [147]:
active_movie_ids = ratings_by_id[ratings_by_id >= 250]
active_movie_ids

title
'burbs, The (1989)                   303
10 Things I Hate About You (1999)    700
101 Dalmatians (1961)                565
101 Dalmatians (1996)                364
12 Angry Men (1957)                  616
                                    ... 
Young Guns (1988)                    562
Young Guns II (1990)                 369
Young Sherlock Holmes (1985)         379
Zero Effect (1998)                   301
eXistenZ (1999)                      410
Length: 1216, dtype: int64

In [151]:
active_movie_ids.index

Index([''burbs, The (1989)', '10 Things I Hate About You (1999)',
       '101 Dalmatians (1961)', '101 Dalmatians (1996)', '12 Angry Men (1957)',
       '13th Warrior, The (1999)', '2 Days in the Valley (1996)',
       '20,000 Leagues Under the Sea (1954)', '2001: A Space Odyssey (1968)',
       '2010 (1984)',
       ...
       'X-Men (2000)', 'Year of Living Dangerously (1982)',
       'Yellow Submarine (1968)', 'You've Got Mail (1998)',
       'Young Frankenstein (1974)', 'Young Guns (1988)',
       'Young Guns II (1990)', 'Young Sherlock Holmes (1985)',
       'Zero Effect (1998)', 'eXistenZ (1999)'],
      dtype='object', name='title', length=1216)

In [153]:
user_ratings = users_ratings[users_ratings["title"].isin(active_movie_ids.index)]

In [154]:
user_ratings

Unnamed: 0,user_id,movie_id,rating,timestamp,gender,age,occupation,zip,age_bin,title,genres
0,1,1193,5,978300760,F,1,10,48067,"(0, 16]",One Flew Over the Cuckoo's Nest (1975),Drama
1,2,1193,5,978298413,M,56,16,70072,"(35, 60]",One Flew Over the Cuckoo's Nest (1975),Drama
2,12,1193,4,978220179,M,25,12,32793,"(21, 35]",One Flew Over the Cuckoo's Nest (1975),Drama
3,15,1193,4,978199279,M,25,7,22903,"(21, 35]",One Flew Over the Cuckoo's Nest (1975),Drama
4,17,1193,5,978158471,M,50,1,95350,"(35, 60]",One Flew Over the Cuckoo's Nest (1975),Drama
...,...,...,...,...,...,...,...,...,...,...,...
983130,5816,2889,5,957914832,M,25,0,70119,"(21, 35]","Mystery, Alaska (1999)",Comedy
983131,5831,2889,4,960873449,M,25,1,92120,"(21, 35]","Mystery, Alaska (1999)",Comedy
983132,5836,2889,3,957845106,M,25,0,91604,"(21, 35]","Mystery, Alaska (1999)",Comedy
983133,5872,2889,3,977282690,M,25,17,61265,"(21, 35]","Mystery, Alaska (1999)",Comedy


In [166]:
ratings_table = user_ratings.pivot_table(values=["rating", "user_id"], index=["title"],
    columns=["gender", "age_bin"],
    aggfunc={"rating": "mean", "user_id": "count"}, margins=False, fill_value=0)

# top film among female viewers

In [167]:
ratings_table.head()

Unnamed: 0_level_0,rating,rating,rating,rating,rating,rating,rating,rating,user_id,user_id,user_id,user_id,user_id,user_id,user_id,user_id
gender,F,F,F,F,M,M,M,M,F,F,F,F,M,M,M,M
age_bin,"(0, 16]","(16, 21]","(21, 35]","(35, 60]","(0, 16]","(16, 21]","(21, 35]","(35, 60]","(0, 16]","(16, 21]","(21, 35]","(35, 60]","(0, 16]","(16, 21]","(21, 35]","(35, 60]"
title,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
"'burbs, The (1989)",0.0,3.035714,2.698113,2.636364,4.5,3.33871,2.709402,3.066667,0,28,53,11,2,62,117,30
10 Things I Hate About You (1999),4.035714,3.487179,3.666667,3.625,3.444444,3.382857,3.229075,3.384615,28,78,102,24,27,175,227,39
101 Dalmatians (1961),3.8125,3.395833,3.868132,4.15625,3.263158,3.22973,3.619718,3.486111,16,48,91,32,19,74,213,72
101 Dalmatians (1996),3.470588,2.636364,3.186813,3.95,2.705882,2.375,3.008772,3.232558,17,22,91,20,17,40,114,43
12 Angry Men (1957),3.6,3.666667,4.393939,4.292683,5.0,4.161765,4.389313,4.26087,10,24,66,41,7,68,262,138


In [168]:
ratings_table = ratings_table.xs("rating", level=0, axis=1)

In [169]:
ratings_table

gender,F,F,F,F,M,M,M,M
age_bin,"(0, 16]","(16, 21]","(21, 35]","(35, 60]","(0, 16]","(16, 21]","(21, 35]","(35, 60]"
title,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
"'burbs, The (1989)",0.000000,3.035714,2.698113,2.636364,4.500000,3.338710,2.709402,3.066667
10 Things I Hate About You (1999),4.035714,3.487179,3.666667,3.625000,3.444444,3.382857,3.229075,3.384615
101 Dalmatians (1961),3.812500,3.395833,3.868132,4.156250,3.263158,3.229730,3.619718,3.486111
101 Dalmatians (1996),3.470588,2.636364,3.186813,3.950000,2.705882,2.375000,3.008772,3.232558
12 Angry Men (1957),3.600000,3.666667,4.393939,4.292683,5.000000,4.161765,4.389313,4.260870
...,...,...,...,...,...,...,...,...
Young Guns (1988),0.000000,3.300000,3.380000,3.500000,3.000000,3.747475,3.391975,3.086207
Young Guns II (1990),5.000000,3.066667,2.800000,2.800000,3.000000,3.130952,2.891089,2.428571
Young Sherlock Holmes (1985),0.000000,3.727273,3.523810,3.333333,3.000000,3.604651,3.362694,3.236111
Zero Effect (1998),4.500000,4.000000,3.775000,4.000000,4.000000,3.865385,3.666667,3.705882


In [170]:
grouped_by_gender = ratings_table.groupby(level=0, axis=1).mean()
grouped_by_gender

gender,F,M
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"'burbs, The (1989)",2.092548,3.403695
10 Things I Hate About You (1999),3.703640,3.360248
101 Dalmatians (1961),3.808179,3.399679
101 Dalmatians (1996),3.310941,2.830553
12 Angry Men (1957),3.988322,4.452987
...,...,...
Young Guns (1988),2.545000,3.306414
Young Guns II (1990),3.416667,2.862653
Young Sherlock Holmes (1985),2.646104,3.300864
Zero Effect (1998),4.068750,3.809483


In [171]:
sorted_by_female = grouped_by_gender.sort_values("F", ascending=False)
sorted_by_female.head()

gender,F,M
title,Unnamed: 1_level_1,Unnamed: 2_level_1
Rear Window (1954),4.590556,4.361192
It Happened One Night (1934),4.585753,4.129805
"Third Man, The (1949)",4.569638,4.514152
Seven Samurai (The Magnificent Seven) (Shichinin no samurai) (1954),4.565318,4.552485
Schindler's List (1993),4.555058,4.478605


In [172]:
ratings_table_gen = user_ratings.pivot_table(values=["rating", "user_id"], index=["title"],
    columns=["gender"],
    aggfunc={"rating": "mean", "user_id": "count"}, margins=False, fill_value=None)

In [173]:
ratings_table_gen = ratings_table_gen.xs("rating", level=0, axis=1)
ratings_table_gen

gender,F,M
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"'burbs, The (1989)",2.793478,2.962085
10 Things I Hate About You (1999),3.646552,3.311966
101 Dalmatians (1961),3.791444,3.500000
101 Dalmatians (1996),3.240000,2.911215
12 Angry Men (1957),4.184397,4.328421
...,...,...
Young Guns (1988),3.371795,3.425620
Young Guns II (1990),2.934783,2.904025
Young Sherlock Holmes (1985),3.514706,3.363344
Zero Effect (1998),3.864407,3.723140


In [174]:
ratings_table_gen.sort_values("F", ascending=False)

gender,F,M
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"Close Shave, A (1995)",4.644444,4.473795
"Wrong Trousers, The (1993)",4.588235,4.478261
Sunset Blvd. (a.k.a. Sunset Boulevard) (1950),4.572650,4.464589
Wallace & Gromit: The Best of Aardman Animation (1996),4.563107,4.385075
Schindler's List (1993),4.562602,4.491415
...,...,...
"Avengers, The (1998)",1.915254,2.017467
Speed 2: Cruise Control (1997),1.906667,1.863014
Rocky V (1990),1.878788,2.132780
Barb Wire (1996),1.585366,2.100386


In [176]:
ratings_table_gen["diff"] = ratings_table_gen["F"] - ratings_table_gen["M"]
ratings_table_gen = ratings_table_gen.sort_values("diff", ascending=False)

In [177]:
ratings_table_gen

gender,F,M,diff
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Dirty Dancing (1987),3.790378,2.959596,0.830782
Jumpin' Jack Flash (1986),3.254717,2.578358,0.676359
Grease (1978),3.975265,3.367041,0.608224
Little Women (1994),3.870588,3.321739,0.548849
Steel Magnolias (1989),3.901734,3.365957,0.535777
...,...,...,...
"Cable Guy, The (1996)",2.250000,2.863787,-0.613787
"Longest Day, The (1962)",3.411765,4.031447,-0.619682
Dumb & Dumber (1994),2.697987,3.336595,-0.638608
"Kentucky Fried Movie, The (1977)",2.878788,3.555147,-0.676359


In [179]:
ratings_table_gen.tail()[::-1]

gender,F,M,diff
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Good, The Bad and The Ugly, The (1966)",3.494949,4.2213,-0.726351
"Kentucky Fried Movie, The (1977)",2.878788,3.555147,-0.676359
Dumb & Dumber (1994),2.697987,3.336595,-0.638608
"Longest Day, The (1962)",3.411765,4.031447,-0.619682
"Cable Guy, The (1996)",2.25,2.863787,-0.613787


In [181]:
users_ratings

Unnamed: 0,user_id,movie_id,rating,timestamp,gender,age,occupation,zip,age_bin,title,genres
0,1,1193,5,978300760,F,1,10,48067,"(0, 16]",One Flew Over the Cuckoo's Nest (1975),Drama
1,2,1193,5,978298413,M,56,16,70072,"(35, 60]",One Flew Over the Cuckoo's Nest (1975),Drama
2,12,1193,4,978220179,M,25,12,32793,"(21, 35]",One Flew Over the Cuckoo's Nest (1975),Drama
3,15,1193,4,978199279,M,25,7,22903,"(21, 35]",One Flew Over the Cuckoo's Nest (1975),Drama
4,17,1193,5,978158471,M,50,1,95350,"(35, 60]",One Flew Over the Cuckoo's Nest (1975),Drama
...,...,...,...,...,...,...,...,...,...,...,...
1000204,5949,2198,5,958846401,M,18,17,47901,"(16, 21]",Modulations (1998),Documentary
1000205,5675,2703,3,976029116,M,35,14,30030,"(21, 35]",Broken Vessels (1998),Drama
1000206,5780,2845,1,958153068,M,18,17,92886,"(16, 21]",White Boys (1999),Drama
1000207,5851,3607,5,957756608,F,18,20,55410,"(16, 21]",One Little Indian (1973),Comedy|Drama|Western


In [184]:
ratings_grouped = users_ratings.groupby("title").agg({"user_id":"count", "rating": "std"})
ratings_grouped

Unnamed: 0_level_0,user_id,rating
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"$1,000,000 Duck (1971)",37,1.092563
'Night Mother (1986),70,1.118636
'Til There Was You (1997),52,1.020159
"'burbs, The (1989)",303,1.107760
...And Justice for All (1979),199,0.878110
...,...,...
"Zed & Two Noughts, A (1985)",29,1.052794
Zero Effect (1998),301,1.042932
Zero Kelvin (Kjærlighetens kjøtere) (1995),2,0.707107
Zeus and Roxanne (1997),23,1.122884


In [186]:
sorted_ratings = ratings_grouped[ratings_grouped["user_id"] >= 250].sort_values('rating', ascending=False)
sorted_ratings.head(10)

Unnamed: 0_level_0,user_id,rating
title,Unnamed: 1_level_1,Unnamed: 2_level_1
Dumb & Dumber (1994),660,1.321333
"Blair Witch Project, The (1999)",1237,1.316368
Natural Born Killers (1994),700,1.307198
Tank Girl (1995),358,1.277695
"Rocky Horror Picture Show, The (1975)",1233,1.260177
Eyes Wide Shut (1999),945,1.259624
Evita (1996),282,1.253631
Billy Madison (1995),355,1.24997
Fear and Loathing in Las Vegas (1998),298,1.246408
Bicentennial Man (1999),383,1.245533


In [189]:
movies["genres"] = movies["genres"].str.split("|")
movies.head()

Unnamed: 0,movie_id,title,genres
0,1,Toy Story (1995),"[Animation, Children's, Comedy]"
1,2,Jumanji (1995),"[Adventure, Children's, Fantasy]"
2,3,Grumpier Old Men (1995),"[Comedy, Romance]"
3,4,Waiting to Exhale (1995),"[Comedy, Drama]"
4,5,Father of the Bride Part II (1995),[Comedy]


In [193]:
movies_exploded = movies.explode("genres") # explodes along rows
movies_exploded.head()

Unnamed: 0,movie_id,title,genres
0,1,Toy Story (1995),Animation
0,1,Toy Story (1995),Children's
0,1,Toy Story (1995),Comedy
1,2,Jumanji (1995),Adventure
1,2,Jumanji (1995),Children's


In [197]:
ratings_w_genre = pd.merge(pd.merge(users, ratings), movies_exploded)
ratings_w_genre.head()

Unnamed: 0,user_id,gender,age,occupation,zip,age_bin,movie_id,rating,timestamp,title,genres
0,1,F,1,10,48067,"(0, 16]",1193,5,978300760,One Flew Over the Cuckoo's Nest (1975),Drama
1,2,M,56,16,70072,"(35, 60]",1193,5,978298413,One Flew Over the Cuckoo's Nest (1975),Drama
2,12,M,25,12,32793,"(21, 35]",1193,4,978220179,One Flew Over the Cuckoo's Nest (1975),Drama
3,15,M,25,7,22903,"(21, 35]",1193,4,978199279,One Flew Over the Cuckoo's Nest (1975),Drama
4,17,M,50,1,95350,"(35, 60]",1193,5,978158471,One Flew Over the Cuckoo's Nest (1975),Drama


In [205]:
ratings_w_genre.groupby(["genres", "age_bin"]) \
    .agg({"user_id":"count", "rating": "mean"}) \
    .unstack("age_bin")

Unnamed: 0_level_0,user_id,user_id,user_id,user_id,rating,rating,rating,rating
age_bin,"(0, 16]","(16, 21]","(21, 35]","(35, 60]","(0, 16]","(16, 21]","(21, 35]","(35, 60]"
genres,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Action,6578,50186,156181,44512,3.506385,3.447097,3.480763,3.575216
Adventure,3998,26324,79315,24316,3.449975,3.408525,3.467427,3.588214
Animation,2449,10269,24571,6004,3.476113,3.624014,3.714216,3.753997
Children's,4337,16924,39747,11178,3.241642,3.294257,3.459129,3.553587
Comedy,11162,69980,212454,62984,3.497491,3.460417,3.513721,3.623254
Crime,1701,15373,47925,14542,3.71017,3.668054,3.696922,3.790194
Documentary,130,1081,5197,1502,3.730769,3.865865,3.949009,3.944075
Drama,7483,58104,210285,78657,3.794735,3.72193,3.745522,3.852067
Fantasy,1360,7875,21296,5770,3.317647,3.353778,3.462293,3.550607
Film-Noir,330,2280,10714,4937,4.145455,3.997368,4.061135,4.136925
