In [1]:
import pandas as pd

from app.utils.universal_path import universal_path

data_path = universal_path('../../../data/user_ratings.csv')
full_df = pd.read_csv(data_path)

small_df = full_df.sample(10000000, random_state=42)
small_df

Unnamed: 0,BGGId,Rating,Username
16597337,17709,2.0,kerbythepurplecow
15421575,155693,2.0,Asgren
16051321,40531,7.0,dzudz
16392352,22864,7.0,Gamezombiac
5484430,50,8.0,jaya
...,...,...,...
13193370,1540,7.0,BrianSchoff
7040948,264055,6.0,StupidRatCreature
2319147,121921,7.0,ekloff
3075854,77423,5.5,Leiye


# Most popular items
- compute average rating for each item

In [2]:
average_ratings = small_df[['BGGId', 'Rating']].groupby('BGGId').agg(['mean'])
average_ratings.head(10)

Unnamed: 0_level_0,Rating
Unnamed: 0_level_1,mean
BGGId,Unnamed: 1_level_2
1,7.659893
2,6.689061
3,7.472895
4,6.674976
5,7.441088
6,6.4075
7,6.549755
8,6.113426
9,6.459529
10,6.683932


- recommend items with the highest averages

In [3]:
average_ratings_sorted_by_best = small_df[['BGGId', 'Rating']].groupby('BGGId').agg(['mean', 'count']).sort_values(
    by=('Rating', 'mean'), ascending=False)
average_ratings_sorted_by_best.head(10)

Unnamed: 0_level_0,Rating,Rating
Unnamed: 0_level_1,mean,count
BGGId,Unnamed: 1_level_2,Unnamed: 2_level_2
284121,9.762632,57
207203,9.647059,34
342942,9.644515,378
345976,9.6,25
295785,9.591495,162
275777,9.565217,23
249277,9.5641,100
349161,9.5,4
260037,9.383529,17
323046,9.380952,21


## Problems:
### Number of ratings, uncertainty
- average 5 from 3 ratings
- average 4.9 from 100 ratings

In [4]:
MIN_RATINGS = 3
average_ratings_sorted_by_best[average_ratings_sorted_by_best[('Rating', 'count')] > MIN_RATINGS].head(10)

Unnamed: 0_level_0,Rating,Rating
Unnamed: 0_level_1,mean,count
BGGId,Unnamed: 1_level_2,Unnamed: 2_level_2
284121,9.762632,57
207203,9.647059,34
342942,9.644515,378
345976,9.6,25
295785,9.591495,162
275777,9.565217,23
249277,9.5641,100
349161,9.5,4
260037,9.383529,17
323046,9.380952,21


### Bias, normalization
- some users give systematically higher ratings
  - => compute user bias, i.e. difference between user's average rating and average rating of all users, then subtract this bias from each rating

In [5]:
small_df["Global average"] = small_df['Rating'].mean()
average_per_user = small_df[['Username', 'Rating']].groupby('Username').agg(['mean'])
small_df["User average"] = small_df['Username'].map(average_per_user[('Rating', 'mean')])
small_df["User bias"] = small_df['User average'] - small_df['Global average']
small_df["Unbiased rating"] = small_df['Rating'] - small_df['User bias']
small_df

Unnamed: 0,BGGId,Rating,Username,Global average,User average,User bias,Unbiased rating
16597337,17709,2.0,kerbythepurplecow,7.130591,5.990566,-1.140025,3.140025
15421575,155693,2.0,Asgren,7.130591,7.276596,0.146005,1.853995
16051321,40531,7.0,dzudz,7.130591,6.856250,-0.274341,7.274341
16392352,22864,7.0,Gamezombiac,7.130591,6.779621,-0.350970,7.350970
5484430,50,8.0,jaya,7.130591,6.720625,-0.409966,8.409966
...,...,...,...,...,...,...,...
13193370,1540,7.0,BrianSchoff,7.130591,7.636364,0.505773,6.494227
7040948,264055,6.0,StupidRatCreature,7.130591,6.439080,-0.691511,6.691511
2319147,121921,7.0,ekloff,7.130591,5.695279,-1.435312,8.435312
3075854,77423,5.5,Leiye,7.130591,6.142857,-0.987734,6.487734


In [6]:
average_unbiased_ratings_sorted_from_best = small_df[['BGGId', 'Unbiased rating']].groupby('BGGId').agg(
    ['mean', 'count']).sort_values(by=('Unbiased rating', 'mean'), ascending=False)
# with at least MIN_RATINGS ratings
average_unbiased_ratings_sorted_from_best[
    average_unbiased_ratings_sorted_from_best[('Unbiased rating', 'count')] > MIN_RATINGS].head(10)

Unnamed: 0_level_0,Unbiased rating,Unbiased rating
Unnamed: 0_level_1,mean,count
BGGId,Unnamed: 1_level_2,Unnamed: 2_level_2
17821,9.303492,15
342942,9.069642,378
345976,9.01235,25
63170,8.999922,301
277538,8.936712,68
341169,8.891127,265
219717,8.888886,33
140135,8.812365,131
324856,8.785799,1181
7935,8.738006,97


## Exploitation vs Exploration
- "pure exploitation" – always recommend "top items" – what we did above
- what if some other item is actually better, rating is poorer just due to noise?
- "exploration" – presenting items to get more data
- Multi-armed Bandit
  - standard model for "exploitation vs exploration"
  - many algorithms (e.g., "upper confidence bounds")
- core idea:
  - do not use just "averages"
  - quantify uncertainty (e.g., standard deviation)
  - systematic approach: Bayesian statistics
  - pragmatic approach: U(n) ∼ 1/n, roulette wheel selection
