This notebook is used to generate the teaser figure and supply information in the teaser table.

In [76]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
folder_path = "per_user_score"
data = "lfm-1b"
model = "THUDM_neutral"

per_user_score = pd.read_csv(f"{folder_path}/{data}_{model}.csv")
per_user_score

In [85]:
cmap = sns.cubehelix_palette(dark=.25, light=.75)

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(4,1.5))
sns.boxplot(per_user_score.query("gender=='F' & map_continent=='America & Antarctica' & map_age=='older-adult'"), x="NDCG", vert=False, color=cmap[0], ax=ax[0], fliersize=3)
sns.boxplot(per_user_score.query("gender=='F' & map_continent=='America & Antarctica' & map_age=='young'"), x="NDCG", vert=False, color=cmap[0], ax=ax[1])
sns.boxplot(per_user_score.query("gender=='F' & map_continent=='America & Antarctica' & map_age!='adult'"), x="NDCG", vert=False, color=cmap[0], ax=ax[2])

ax[0].get_xaxis().set_ticks([])
ax[1].get_xaxis().set_ticks([])

ax[0].set_yticklabels(["Group 1"], rotation=0)
ax[1].set_yticklabels(["Group 2"], rotation=0)
ax[2].set_yticklabels(["All users"], rotation=0)

for i in range(3):
    ax[i].spines['top'].set_visible(False)
    ax[i].spines['bottom'].set_visible(False)
    
ax[0].spines['top'].set_visible(True)
ax[2].spines['bottom'].set_visible(True)

ax[0].set_xlabel(None)
ax[1].set_xlabel(None)


plt.savefig("per_user_score/teaser_boxplot_singlecolour.pdf", dpi=600, bbox_inches="tight")
plt.show()

# Stats

In [108]:
grp1 = per_user_score.query("gender=='F' & map_continent=='America & Antarctica' & map_age=='older-adult'")["NDCG"]
grp2 = per_user_score.query("gender=='F' & map_continent=='America & Antarctica' & map_age=='young'")["NDCG"]
ind = per_user_score.query("gender=='F' & map_continent=='America & Antarctica' & map_age!='adult'")["NDCG"]

In [109]:
print(grp1.mean(), grp2.mean(), ind.mean())

0.5461165748645672 0.4712010564026577 0.47210464269989566


In [93]:
grp1.count(), grp2.count(), ind.count()

(11, 901, 912)

In [96]:
from evaluate import Fairness

In [97]:
fair = Fairness()

In [None]:
fair.gini(pd.Series([grp1.mean(), grp2.mean()]))

0.036820121936051955

In [None]:
print(fair.gini(ind))