# Fairness Validation by Online Evaluation Telemetry

## STEP ONE - Set up environment and connection

In this fairness analysis, we will use our online evaluation data in RDS database, which is keep updating

In [1]:
import argparse
import json

import pandas as pd
import sqlalchemy

In [2]:
def connect_db(dbConf):
    '''
    Setup DB connection
    '''
    # Build connection URL
    dburl = 'postgresql://{}:{}@{}:{}/{}'.format(
        dbConf['username'],
        dbConf['password'],
        dbConf['host'],
        dbConf['port'],
        dbConf['db_name']
    )

    return sqlalchemy.create_engine(dburl, client_encoding='utf8')

In [3]:
config = {
    "host": "seai-recommendation-system-postgresql.czf7x8tyyg2u.us-east-1.rds.amazonaws.com",
    "username": "postgres",
    "password": "teamjurassicpark",
    "port": "5432",
    "db_name":"recommendation_system"
}

In [4]:
# connect to db
engine = connect_db(config)

# select rows with specified model id
with engine.connect() as con:
    df = pd.read_sql_query('SELECT * FROM recommendations;', con=con)

engine.dispose()

In [5]:
df.head()

Unnamed: 0,user_id,movie_id,rating,model_id,recommended_at
0,147880,the+intouchables+2011,0,model_2020-12-04_02:29:13,2020-12-09 06:45:15.149622
1,147880,touch+of+evil+1958,0,model_2020-12-04_02:29:13,2020-12-09 06:45:15.149622
2,147880,seven+chances+1925,0,model_2020-12-04_02:29:13,2020-12-09 06:45:15.149622
3,147880,life+is+beautiful+1997,0,model_2020-12-04_02:29:13,2020-12-09 06:45:15.149622
4,147880,raiders+of+the+lost+ark+1981,0,model_2020-12-04_02:29:13,2020-12-09 06:45:15.149622


In [6]:
df.shape

(9689300, 5)

In [7]:
df = df.sort_values(by='recommended_at')

In [8]:
df_test = df.copy()

In [9]:
df_test = df_test.set_index('recommended_at')

In [10]:
df_test.shape

(9689300, 4)

In [11]:
import matplotlib.pyplot as plt
import seaborn as sns
# Use seaborn style defaults and set the default figure size
sns.set(rc={'figure.figsize':(11, 4)})

In [12]:
df_temp=df_test.groupby('movie_id')['user_id'].nunique()

### Which movies are recommend most to users

In [13]:
(df['movie_id'].value_counts().sort_values(ascending=False)[0:10])

the+intouchables+2011                                    197966
the+godfather+part+ii+1974                               197523
life+is+beautiful+1997                                   197517
star+wars+1977                                           196891
the+lord+of+the+rings+the+return+of+the+king+2003        196889
the+empire+strikes+back+1980                             196643
spirited+away+2001                                       196581
the+lord+of+the+rings+the+fellowship+of+the+ring+2001    196476
schindlers+list+1993                                     195677
inception+2010                                           194393
Name: movie_id, dtype: int64

### Which moives do users like most?
Remember our telemetry rating is in range \[-1,1\]. Any movie above 0 rating means user like it.

In [14]:
df_temp=df_test.groupby('movie_id')['rating'].mean().sort_values(ascending=False)

In [15]:
df_temp.head()

movie_id
the+shawshank+redemption+1994    0.308428
the+godfather+1972               0.169718
the+dark+knight+2008             0.128734
fight+club+1999                  0.105826
pulp+fiction+1994                0.083466
Name: rating, dtype: float64

## STEP TWO - Load the original kafka data for subpopulation validation

users.csv is a file we dump directly from kafka stream, it contains Age / Gender information. We will merge it with the recommendation table to test the fairness performance

In [16]:
users = pd.read_csv('users.csv')

In [17]:
users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 572998 entries, 0 to 572997
Data columns (total 4 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   user_id     572998 non-null  int64 
 1   age         572998 non-null  int64 
 2   occupation  572998 non-null  object
 3   gender      572998 non-null  object
dtypes: int64(2), object(2)
memory usage: 17.5+ MB


### Merge two table, only keep the intersection of users

In [18]:
df_temp = pd.merge(df,users,on='user_id',how='left').dropna()

## STEP THREE -  Get the subpopulation data, see what we recommended and what they like
Interestingly enough, the movies recommended most to every subpopulation aren't exact the same. 

In [19]:
male_users = df_temp[df_temp.gender == "M"]
female_users = df_temp[df_temp.gender == "F"]
df_temp.age
aged_users = df_temp[df_temp.age > df_temp["age"].mean()]
young_user = df_temp[df_temp.age < df_temp["age"].mean()]

## Top 10 movies recommended most to male user group

In [53]:
male_rec = male_users['movie_id'].value_counts().sort_values(ascending=False)[0:10]
print(male_rec)

the+great+beauty+2013       53583
the+best+of+youth+2003      53462
the+civil+war+1990          53410
modern+times+1936           53406
simon+of+the+desert+1965    53367
7+plus+seven+1970           53362
the+intouchables+2011       53350
john+adams+2008             53340
the+cameraman+1928          53323
rashomon+1950               53319
Name: movie_id, dtype: int64


## Top 10 movies recommended most to female user group

In [55]:
female_rec = female_users['movie_id'].value_counts().sort_values(ascending=False)[0:10]
print(female_rec)

the+cameraman+1928                           11092
the+400+blows+1959                           11055
double+indemnity+1944                        11029
the+third+man+1949                           11029
head-on+2004                                 11019
senna+2010                                   10975
seven+chances+1925                           10970
bill+hicks+revelations+1993                  10969
the+revolution+will+not+be+televised+2003    10967
the+secret+in+their+eyes+2009                10966
Name: movie_id, dtype: int64


## Intersection of movies recommended  most to female and male user group
Oops, seems there is no overlap

In [58]:
male_rec_10 = male_rec[:10].to_frame()
female_rec_10 = female_rec[:10].to_frame()
female_rec_10.merge(male_rec_10, on='movie_id')

Unnamed: 0,movie_id


## Top 10 movies recommended most to age-above-average user group

In [59]:
aged_rec = aged_users['movie_id'].value_counts().sort_values(ascending=False)[0:10]
print(aged_rec)

the+third+man+1949                26542
the+cameraman+1928                26510
the+man+who+planted+trees+1987    26475
children+of+paradise+1945         26451
the+intouchables+2011             26431
ordet+1955                        26424
rashomon+1950                     26412
the+lives+of+others+2006          26409
the+civil+war+1990                26405
to+have+and+have+not+1944         26390
Name: movie_id, dtype: int64


## Top 10 movies recommended most to age-below-average user group

In [61]:
young_rec = young_user['movie_id'].value_counts().sort_values(ascending=False)[0:10]
print(young_rec)

the+great+beauty+2013       38187
john+adams+2008             38027
simon+of+the+desert+1965    38013
the+maltese+falcon+1941     37999
the+best+of+youth+2003      37960
head-on+2004                37956
short+term+12+2013          37951
double+indemnity+1944       37937
the+act+of+killing+2012     37937
the+civil+war+1990          37910
Name: movie_id, dtype: int64


## Intersection of movies recommended to different age group

In [63]:
aged_rec_10 = aged_rec[:10].to_frame()
young_rec_10 = young_rec[:10].to_frame()
aged_rec_10.merge(young_rec_10, on='movie_id')

Unnamed: 0,movie_id


## Top 10 movie liked by male user group

In [43]:
male_temp=male_users.groupby('movie_id')['rating'].mean().sort_values(ascending=False)
male_temp.head(10)

movie_id
the+seventh+seal+1957                                    0.071429
searching+for+sugar+man+2012                             0.066667
the+shawshank+redemption+1994                            0.055557
monty+python+and+the+holy+grail+1975                     0.046117
the+godfather+1972                                       0.044787
the+usual+suspects+1995                                  0.042039
star+wars+1977                                           0.041374
the+lord+of+the+rings+the+fellowship+of+the+ring+2001    0.041186
raiders+of+the+lost+ark+1981                             0.040765
interstellar+2014                                        0.040000
Name: rating, dtype: float64

## Top 10 movie liked by female user group

In [37]:
female_temp=female_users.groupby('movie_id')['rating'].mean().sort_values(ascending=False)
female_temp.head(20)

movie_id
the+shawshank+redemption+1994                            0.052233
the+usual+suspects+1995                                  0.040591
schindlers+list+1993                                     0.040065
a+man+escaped+1956                                       0.039252
raiders+of+the+lost+ark+1981                             0.039136
the+lives+of+others+2006                                 0.038849
life+is+beautiful+1997                                   0.038447
my+neighbor+totoro+1988                                  0.038266
spirited+away+2001                                       0.038190
ordet+1955                                               0.037936
12+angry+men+1957                                        0.037703
the+sting+1973                                           0.037328
monty+python+and+the+holy+grail+1975                     0.037037
the+godfather+1972                                       0.037004
the+lord+of+the+rings+the+fellowship+of+the+ring+2001    0.036941
s

## Intersection of top popular 10 movies for female and male user group

In [45]:
female_top_20 = female_temp[:10].to_frame()
male_top_20 = male_temp[:10].to_frame()
female_top_20.merge(male_top_20, on='movie_id')

Unnamed: 0_level_0,rating_x,rating_y
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1
the+shawshank+redemption+1994,0.052233,0.055557
the+usual+suspects+1995,0.040591,0.042039
raiders+of+the+lost+ark+1981,0.039136,0.040765


## Top 10 movies rated by user group above average age

In [48]:
aged_temp=aged_users.groupby('movie_id')['rating'].mean().sort_values(ascending=False)
aged_temp.head(10)

movie_id
the+seventh+seal+1957                                    0.111111
monty+python+and+the+holy+grail+1975                     0.071066
the+shawshank+redemption+1994                            0.056793
the+godfather+1972                                       0.046058
the+usual+suspects+1995                                  0.041484
raiders+of+the+lost+ark+1981                             0.041305
the+lord+of+the+rings+the+fellowship+of+the+ring+2001    0.040226
schindlers+list+1993                                     0.039494
star+wars+1977                                           0.039061
inception+2010                                           0.038935
Name: rating, dtype: float64

## Top 10 movies rated by user group below average age

In [49]:
young_temp=young_user.groupby('movie_id')['rating'].mean().sort_values(ascending=False)
young_temp.head(10)

movie_id
searching+for+sugar+man+2012                             0.090909
interstellar+2014                                        0.062500
the+shawshank+redemption+1994                            0.053752
the+usual+suspects+1995                                  0.042003
the+godfather+1972                                       0.041681
star+wars+1977                                           0.040715
the+lord+of+the+rings+the+fellowship+of+the+ring+2001    0.040614
raiders+of+the+lost+ark+1981                             0.039922
the+lord+of+the+rings+the+return+of+the+king+2003        0.039632
the+matrix+1999                                          0.039101
Name: rating, dtype: float64

In [51]:
elder_top_10 = aged_temp[:10].to_frame()
young_top_10 = young_temp[:10].to_frame()
young_top_10.merge(elder_top_10, on='movie_id')

Unnamed: 0_level_0,rating_x,rating_y
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1
the+shawshank+redemption+1994,0.053752,0.056793
the+usual+suspects+1995,0.042003,0.041484
the+godfather+1972,0.041681,0.046058
star+wars+1977,0.040715,0.039061
the+lord+of+the+rings+the+fellowship+of+the+ring+2001,0.040614,0.040226
raiders+of+the+lost+ark+1981,0.039922,0.041305


## STEP THREE - Test fairness in false discovery rate and precision

In [24]:
def cal_false_discovery(true_pos,false_pos):
    return(false_pos/(false_pos+true_pos))

def cal_precision(true_pos,false_pos):
    return(true_pos/(false_pos+true_pos))

In [25]:
male_tp = male_users[male_users['rating'] == 1].shape[0]
male_fp = male_users[male_users['rating'] == -1].shape[0]
# Make calculations 
false_discovery_rate_male=cal_false_discovery(male_tp,male_fp)
precision_male=cal_precision(male_tp,male_fp)

In [26]:
female_tp = female_users[female_users['rating'] == 1].shape[0]
female_fp = female_users[female_users['rating'] == -1].shape[0]
# Make calculations 
false_discovery_rate_female=cal_false_discovery(female_tp,female_fp)
precision_female=cal_precision(female_tp,female_fp)

In [27]:
aged_tp = aged_users[aged_users['rating'] == 1].shape[0]
aged_fp = aged_users[aged_users['rating'] == -1].shape[0]
# Make calculations 
aged_discovery_rate=cal_false_discovery(aged_tp,aged_fp)
precision_aged=cal_precision(aged_tp,aged_fp)

In [28]:
young_tp = young_user[young_user['rating'] == 1].shape[0]
young_fp = young_user[young_user['rating'] == -1].shape[0]
# Make calculations 
young_discovery_rate=cal_false_discovery(young_tp,young_fp)
precision_young=cal_precision(young_tp,young_fp)

In [29]:
print(f'Male = False Disvovery Rate: {false_discovery_rate_male} Precision {precision_male}')
print(f'Female = False Disvovery Rate: {false_discovery_rate_female} Precision {precision_female}')
print(f'Aged = False Disvovery Rate: {aged_discovery_rate} Precision {precision_aged}')
print(f'Young = False Disvovery Rate: {young_discovery_rate} Precision {precision_young}')

Male = False Disvovery Rate: 0.006380574309640404 Precision 0.9936194256903595
Female = False Disvovery Rate: 0.005620050523686526 Precision 0.9943799494763135
Aged = False Disvovery Rate: 0.006382605646151149 Precision 0.9936173943538489
Young = False Disvovery Rate: 0.0061599626966402435 Precision 0.9938400373033598
