In [1]:
import pandas as pd

from app.utils.universal_path import universal_path

data_path = universal_path('../../../data/user_ratings.csv')
full_df = pd.read_csv(data_path)

small_df = full_df.sample(10000000, random_state=42)
small_df

Unnamed: 0,BGGId,Rating,Username
16597337,17709,2.0,kerbythepurplecow
15421575,155693,2.0,Asgren
16051321,40531,7.0,dzudz
16392352,22864,7.0,Gamezombiac
5484430,50,8.0,jaya
...,...,...,...
13193370,1540,7.0,BrianSchoff
7040948,264055,6.0,StupidRatCreature
2319147,121921,7.0,ekloff
3075854,77423,5.5,Leiye


# Most popular items
- compute average rating for each item

In [2]:
average_ratings = small_df[['BGGId', 'Rating']].groupby('BGGId').agg(['mean'])
average_ratings.head(10)

Unnamed: 0_level_0,Rating
Unnamed: 0_level_1,mean
BGGId,Unnamed: 1_level_2
1,7.659893
2,6.689061
3,7.472895
4,6.674976
5,7.441088
6,6.4075
7,6.549755
8,6.113426
9,6.459529
10,6.683932


- recommend items with the highest averages

In [3]:
average_ratings_sorted_by_best = small_df[['BGGId', 'Rating']].groupby('BGGId').agg(['mean', 'count']).sort_values(
    by=('Rating', 'mean'), ascending=False)
average_ratings_sorted_by_best.head(10)

Unnamed: 0_level_0,Rating,Rating
Unnamed: 0_level_1,mean,count
BGGId,Unnamed: 1_level_2,Unnamed: 2_level_2
284121,9.762632,57
207203,9.647059,34
342942,9.644515,378
345976,9.6,25
295785,9.591495,162
275777,9.565217,23
249277,9.5641,100
349161,9.5,4
260037,9.383529,17
323046,9.380952,21


## Problems:
### Number of ratings, uncertainty
- average 5 from 3 ratings
- average 4.9 from 100 ratings

In [4]:
MIN_RATINGS = 3
average_ratings_sorted_by_best[average_ratings_sorted_by_best[('Rating', 'count')] > MIN_RATINGS].head(10)

Unnamed: 0_level_0,Rating,Rating
Unnamed: 0_level_1,mean,count
BGGId,Unnamed: 1_level_2,Unnamed: 2_level_2
284121,9.762632,57
207203,9.647059,34
342942,9.644515,378
345976,9.6,25
295785,9.591495,162
275777,9.565217,23
249277,9.5641,100
349161,9.5,4
260037,9.383529,17
323046,9.380952,21


### Bias, normalization
- some users give systematically higher ratings

In [5]:
small_df["Mean game rating"] = small_df['BGGId'].map(average_ratings[('Rating', 'mean')])
small_df

Unnamed: 0,BGGId,Rating,Username,Mean game rating
16597337,17709,2.0,kerbythepurplecow,5.511491
15421575,155693,2.0,Asgren,5.276764
16051321,40531,7.0,dzudz,6.415153
16392352,22864,7.0,Gamezombiac,6.101974
5484430,50,8.0,jaya,7.240703
...,...,...,...,...
13193370,1540,7.0,BrianSchoff,7.097141
7040948,264055,6.0,StupidRatCreature,7.272576
2319147,121921,7.0,ekloff,7.911429
3075854,77423,5.5,Leiye,7.720694


In [6]:
bias_by_username = small_df[['Username', 'Rating', 'Mean game rating']].groupby('Username').agg(['mean'])
bias_by_username['Bias'] = bias_by_username[('Rating', 'mean')] - bias_by_username[('Mean game rating', 'mean')]
bias_by_username

Unnamed: 0_level_0,Rating,Mean game rating,Bias
Unnamed: 0_level_1,mean,mean,Unnamed: 3_level_1
Username,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Fu_Koios,9.000000,7.302158,1.697842
beastvol,7.428571,7.282515,0.146057
mycroft,7.900000,6.916582,0.983418
woh,8.666667,7.175830,1.490836
(mostly) harmless,9.000000,6.291769,2.708231
...,...,...,...
zzzuzu,7.000000,7.103824,-0.103824
zzzvone,6.545455,7.055997,-0.510542
zzzxxxyyy,7.546667,7.782547,-0.235880
zzzzzane,7.432099,7.309132,0.122967


In [7]:
unbiased_ratings = small_df[['Username', 'BGGId', 'Rating']].merge(bias_by_username['Bias'], left_on='Username',
                                                                   right_index=True)
unbiased_ratings['UnbiasedRating'] = unbiased_ratings['Rating'] - unbiased_ratings['Bias']
unbiased_ratings

Unnamed: 0,Username,BGGId,Rating,Bias,UnbiasedRating
16597337,kerbythepurplecow,17709,2.0,-0.878420,2.878420
3122887,kerbythepurplecow,41114,7.0,-0.878420,7.878420
4073178,kerbythepurplecow,38453,4.0,-0.878420,4.878420
4347575,kerbythepurplecow,463,8.0,-0.878420,8.878420
3048157,kerbythepurplecow,100901,5.0,-0.878420,5.878420
...,...,...,...,...,...
12448314,jthomash2,233398,10.0,1.963944,8.036056
15070301,muscularsnails65,193949,10.0,2.123468,7.876532
8086190,ARTURUS22,10547,9.0,1.865938,7.134062
6667068,Collage,70323,9.0,1.774661,7.225339


In [8]:
average_unbiased_ratings_sorted_from_best = unbiased_ratings[['BGGId', 'UnbiasedRating']].groupby('BGGId').agg(
    ['mean', 'count']).sort_values(by=('UnbiasedRating', 'mean'), ascending=False)
average_unbiased_ratings_sorted_from_best

Unnamed: 0_level_0,UnbiasedRating,UnbiasedRating
Unnamed: 0_level_1,mean,count
BGGId,Unnamed: 1_level_2,Unnamed: 2_level_2
342942,9.584915,378
284121,9.558533,57
207203,9.516112,34
345976,9.490754,25
341169,9.331137,265
...,...,...
99918,1.483853,20
155582,1.403662,18
144110,1.154102,35
240624,0.970840,38


In [9]:
# with at least MIN_RATINGS ratings
average_unbiased_ratings_sorted_from_best[
    average_unbiased_ratings_sorted_from_best[('UnbiasedRating', 'count')] > MIN_RATINGS].head(10)

Unnamed: 0_level_0,UnbiasedRating,UnbiasedRating
Unnamed: 0_level_1,mean,count
BGGId,Unnamed: 1_level_2,Unnamed: 2_level_2
342942,9.584915,378
284121,9.558533,57
207203,9.516112,34
345976,9.490754,25
341169,9.331137,265
295785,9.259623,162
349161,9.232332,4
260037,9.227323,17
63170,9.196278,301
279727,9.180363,26


## Exploitation vs Exploration
- "pure exploitation" – always recommend "top items" – what we did above
- what if some other item is actually better, rating is poorer just due to noise?
- "exploration" – presenting items to get more data
- Multi-armed Bandit
  - standard model for "exploitation vs exploration"
  - many algorithms (e.g., "upper confidence bounds")
- core idea:
  - do not use just "averages"
  - quantify uncertainty (e.g., standard deviation)
  - systematic approach: Bayesian statistics
  - pragmatic approach: U(n) ∼ 1/n, roulette wheel selection
