# Fairness

In [1]:

from typing import List, Dict, Any
from tqdm import tqdm
import ast
import os
import numpy as np
import scipy
import pandas as pd
from sklearn.metrics import (
    confusion_matrix,
    multilabel_confusion_matrix,
    classification_report,
)
from mars_gym.utils.utils import mean_confidence_interval

import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)

In [2]:
df = pd.read_csv('src/recsys_fair_metrics/tests/factories/test_set_predictions.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,userid,timestamp,musicbrainz-artist-id,artist,musicbrainz-track-id,track-name,user_gender,user_age,user_country,...,artist_tags,artist_type,artist_uri,popularity,first_artist_genre,clicked,sorted_actions,prob_actions,action_scores,trained
0,3517949,user_000366,2009-06-08T08:01:07Z,0ef3f425-9bd2-4216-9dd2-219d2fe90f1f,Lenny Kravitz,cfce6aaf-b239-4818-9188-d2710def0422,In My Life Today,m,24.0,Finland,...,"psychedelic soul, american, classic pop and ro...",Person,https://musicbrainz.org/artist/0ef3f425-9bd2-4...,7467,rock,1,"['45a663b5-b1cb-4a91-bff6-2bef7bbfdd76', '4ac4...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...","[0.4648118317127228, 0.4329394996166229, 0.293...",1.0
1,5076530,user_000761,2009-06-08T08:01:58Z,0e422e91-a42a-4b4d-8413-9baff67350f2,Billy Ocean,5b861cac-b107-4616-bbcb-53c7324e3f36,"Get Outta My Dreams, Get Into My Car",,,Netherlands,...,"adult contemporary, caribbean queen, british, ...",Person,https://musicbrainz.org/artist/0e422e91-a42a-4...,427,contemporary r&b,1,"['3e55d51d-687f-4a9d-af96-2fabccf802e5', 'cc61...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.4018394649028778, 0.3567405343055725, 0.341...",1.0
2,3271639,user_000761,2009-06-08T08:06:43Z,b3ae82c2-e60b-4551-a76d-6620f1b456aa,Melissa Etheridge,79809a6e-cc37-4a6e-bca2-091364692298,Like The Way I Do,,,Netherlands,...,"heartland rock, roots rock, singer/songwriter",Person,https://musicbrainz.org/artist/b3ae82c2-e60b-4...,621,rock,1,"['d43d12a1-2dc9-4257-a2fd-0a3bb1081b86', 'c296...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.9713646769523621, 0.875041127204895, 0.4424...",1.0
3,3517948,user_000366,2009-06-08T08:07:36Z,0ef3f425-9bd2-4216-9dd2-219d2fe90f1f,Lenny Kravitz,7d9c0076-9b62-4e9d-93e4-4bd4a6bf2ee5,The Resurrection,m,24.0,Finland,...,"psychedelic soul, american, classic pop and ro...",Person,https://musicbrainz.org/artist/0ef3f425-9bd2-4...,7467,rock,1,"['40f5d9e4-2de7-4f2d-ad41-e31a9a9fea27', '0ef3...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.40954360365867615, 0.2930067479610443, 0.27...",1.0
4,2760718,user_000348,2009-06-08T08:20:58Z,66ad8456-1d33-4d7d-b8c3-a8f1a3b1e74f,Dj Mehdi,ac7613ff-c3e4-4996-a7c0-55fd56d81325,Stick It,m,24.0,Finland,...,rock and indie,Person,https://musicbrainz.org/artist/66ad8456-1d33-4...,922,hip hop,1,"['25b75a66-ce83-4db3-b136-395a3c3784c4', '6be2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.764191210269928, 0.4643076956272125, 0.4116...",1.0


In [3]:
from typing import List, Union, Dict, Tuple
from multiprocessing.pool import Pool

def parallel_literal_eval(
    series: Union[pd.Series, np.ndarray], pool: Pool = None, use_tqdm: bool = True
) -> list:
    if pool:
        return _parallel_literal_eval(series, pool, use_tqdm)
    else:
        with Pool(os.cpu_count()) as p:
            return _parallel_literal_eval(series, p, use_tqdm)

def _parallel_literal_eval(
    series: Union[pd.Series, np.ndarray], pool: Pool, use_tqdm: bool = True
) -> list:
    
    if use_tqdm:
        return list(tqdm(pool.map(literal_eval_if_str, series), total=len(series)))
    else:
        return pool.map(literal_eval_if_str, series)

def literal_eval_if_str(element):
    if isinstance(element, str):
        return ast.literal_eval(element)
    return element

## Disparate Mistreatment

In [4]:
user_column = 'userid'
item_column = 'musicbrainz-artist-id'
reclist_column = 'sorted_actions'
reclist_score_column = 'action_scores'
first_rec_column = 'first_rec'
first_recscore_column = 'first_recscore'
fairness_columns = ["artist_gender"]#["artist_area", "artist_gender", , "first_artist_genre"]

df = df[[user_column, item_column, reclist_column, reclist_score_column] + fairness_columns]
df["sorted_actions"] = parallel_literal_eval(df["sorted_actions"])
df["action_scores"]   = parallel_literal_eval(df["action_scores"])

df[first_rec_column] = df[reclist_column].apply(lambda l: l[0])
df[first_recscore_column] = df[reclist_score_column].apply(lambda l: l[0])

df.head()

100%|██████████| 10000/10000 [00:00<00:00, 4363158.22it/s]
100%|██████████| 10000/10000 [00:00<00:00, 4122571.26it/s]


Unnamed: 0,userid,musicbrainz-artist-id,sorted_actions,action_scores,artist_gender,first_rec,first_recscore
0,user_000366,0ef3f425-9bd2-4216-9dd2-219d2fe90f1f,"[45a663b5-b1cb-4a91-bff6-2bef7bbfdd76, 4ac4e32...","[0.4648118317127228, 0.4329394996166229, 0.293...",Male,45a663b5-b1cb-4a91-bff6-2bef7bbfdd76,0.464812
1,user_000761,0e422e91-a42a-4b4d-8413-9baff67350f2,"[3e55d51d-687f-4a9d-af96-2fabccf802e5, cc6115a...","[0.4018394649028778, 0.3567405343055725, 0.341...",Male,3e55d51d-687f-4a9d-af96-2fabccf802e5,0.401839
2,user_000761,b3ae82c2-e60b-4551-a76d-6620f1b456aa,"[d43d12a1-2dc9-4257-a2fd-0a3bb1081b86, c296e10...","[0.9713646769523621, 0.875041127204895, 0.4424...",Female,d43d12a1-2dc9-4257-a2fd-0a3bb1081b86,0.971365
3,user_000366,0ef3f425-9bd2-4216-9dd2-219d2fe90f1f,"[40f5d9e4-2de7-4f2d-ad41-e31a9a9fea27, 0ef3f42...","[0.40954360365867615, 0.2930067479610443, 0.27...",Male,40f5d9e4-2de7-4f2d-ad41-e31a9a9fea27,0.409544
4,user_000348,66ad8456-1d33-4d7d-b8c3-a8f1a3b1e74f,"[25b75a66-ce83-4db3-b136-395a3c3784c4, 6be2828...","[0.764191210269928, 0.4643076956272125, 0.4116...",Male,25b75a66-ce83-4db3-b136-395a3c3784c4,0.764191


In [5]:
def mean_confidence_interval(data, confidence=0.95):
    data = np.array(data)
    data = data[~np.isnan(data)]
    a = 1.0 * data
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n - 1)
    return m, h

In [6]:
def calculate_fairness_metrics(
    df: pd.DataFrame, sub_keys: List[str], ground_truth_key: str, prediction_key: str
) -> pd.DataFrame:
    rows: List[Dict[str, Any]] = []

    for sub_key in sub_keys:
        subs = df[sub_key].unique()

        for sub in subs:
            sub_df = df[df[sub_key] == sub]
            y_true, y_pred = (
                sub_df[ground_truth_key].astype(str),
                sub_df[prediction_key].astype(str),
            )
            #from IPython import embed
            #embed()
            cnf_matrix = confusion_matrix(y_true, y_pred)

            num_positives = np.sum(np.diag(cnf_matrix))
            num_negatives = np.sum(cnf_matrix) - num_positives

            fp = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
            fn = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
            tp = np.diag(cnf_matrix)
            tn = cnf_matrix.sum() - (fp + fn + tp)

            fp = fp.astype(float)
            fn = fn.astype(float)
            tp = tp.astype(float)
            tn = tn.astype(float)

            # Sensitivity, hit rate, recall, or true positive rate
            tpr = tp / (tp + fn)
            # Specificity or true negative rate
            tnr = tn / (tn + fp)
            # Precision or positive predictive value
            ppv = tp / (tp + fp)
            # Negative predictive value
            npv = tn / (tn + fn)
            # Fall out or false positive rate
            fpr = fp / (fp + tn)
            # False negative rate
            fnr = fn / (tp + fn)
            # False discovery rate
            fdr = fp / (tp + fp)
            # positive rate
            pr = (tp + fp) / (tp + fp + fn + tn)
            # positive rate
            nr = (tn + fn) / (tp + fp + fn + tn)

            # Overall accuracy
            acc = (tp + tn) / (tp + fp + fn + tn)

            # Balanced Accuracy (BA)
            bacc = (tpr + tnr) / 2

            # print(classification_report(y_true,y_pred))
            fpr, fpr_c = mean_confidence_interval(fpr)
            fnr, fnr_c = mean_confidence_interval(fnr)
            tpr, tpr_c = mean_confidence_interval(tpr)
            tnr, tnr_c = mean_confidence_interval(tnr)
            pr, pr_c = mean_confidence_interval(pr)
            nr, nr_c = mean_confidence_interval(nr)
            acc, acc_c = mean_confidence_interval(acc)
            bacc, bacc_c = mean_confidence_interval(bacc)

            rows.append(
                {
                    "sub_key": sub_key,
                    "sub": sub,
                    "total_class": len(tp),
                    "false_positive_rate": fpr,
                    "false_positive_rate_C": fpr_c,
                    "false_negative_rate": fnr,
                    "false_negative_rate_C": fnr_c,
                    "true_positive_rate": tpr,
                    "true_positive_rate_C": tpr_c,
                    "true_negative_rate": tnr,
                    "true_negative_rate_C": tnr_c,
                    "positive_rate": pr,
                    "positive_rate_C": pr_c,
                    "negative_rate": nr,
                    "negative_rate_C": nr_c,
                    "accuracy": acc,
                    "accuracy_C": acc_c,
                    "balance_accuracy": bacc,
                    "balance_accuracy_C": bacc_c,
                    "total_positives": num_positives,
                    "total_negatives": num_negatives,
                    "total_individuals": num_positives + num_negatives,
                }
            )

    return pd.DataFrame(data=rows).sort_values(["sub_key", "sub"])

In [7]:
fairness_metrics = calculate_fairness_metrics(
            df.fillna("-"),
            fairness_columns,
            item_column,
            first_rec_column,
        )
fairness_metrics        


invalid value encountered in true_divide


invalid value encountered in true_divide


invalid value encountered in true_divide


invalid value encountered in true_divide



Unnamed: 0,sub_key,sub,total_class,false_positive_rate,false_positive_rate_C,false_negative_rate,false_negative_rate_C,true_positive_rate,true_positive_rate_C,true_negative_rate,...,positive_rate_C,negative_rate,negative_rate_C,accuracy,accuracy_C,balance_accuracy,balance_accuracy_C,total_positives,total_negatives,total_individuals
2,artist_gender,-,818,0.000724,9e-05,0.818933,0.032645,0.181067,0.032645,0.999276,...,0.000264,0.998778,0.000264,0.998556,0.000195,0.590314,0.016293,1402,2024,3426
1,artist_gender,Female,577,0.000845,0.000113,0.792568,0.038856,0.207432,0.038856,0.999155,...,0.0005,0.998267,0.0005,0.99832,0.000187,0.603473,0.019393,1033,972,2005
0,artist_gender,Male,1023,0.000558,6.3e-05,0.827606,0.026972,0.172394,0.026972,0.999442,...,0.000174,0.999022,0.000174,0.998887,0.000135,0.586037,0.013471,1968,2601,4569


In [8]:
fairness_metrics = fairness_metrics[['sub_key', 'sub', 'true_positive_rate', 'true_positive_rate_C', 'total_individuals']]
fairness_metrics["feature"] = fairness_metrics["sub_key"] + "@" + fairness_metrics["sub"].astype(str)
fairness_metrics = fairness_metrics.set_index("feature")
fairness_metrics



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,sub_key,sub,true_positive_rate,true_positive_rate_C,total_individuals
feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
artist_gender@-,artist_gender,-,0.181067,0.032645,3426
artist_gender@Female,artist_gender,Female,0.207432,0.038856,2005
artist_gender@Male,artist_gender,Male,0.172394,0.026972,4569


In [9]:
np.std(fairness_metrics['true_positive_rate'])

0.014899402941231373

In [10]:
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objects as go
TEMPLATE = "plotly_white" 
def plot_fairness_mistreatment(df, metric, title=""):
    data = []

    data.append(
        go.Bar(
            y=df.index,
            x=df[metric],
            orientation="h",
            error_x=dict(type="data", array=df[metric + "_C"])
            if metric + "_C" in df.columns
            else {},
            marker={"color": list(range(len(df.index))), "colorscale": "Tealgrn"},
        )
    )  # Plotly3

    fig = go.Figure(data=data)
    # Change the bar mode
    fig.update_layout(
        template=TEMPLATE,
        legend_orientation="h",
        xaxis_title=metric,
        xaxis_range=(0, np.max([1, df[metric].max()])),
        legend=dict(y=-0.2),
        title=title,
    )


    fig.update_layout(
        shapes=[
            dict(
                type="line",
                line=dict(width=1, dash="dot",),
                yref="paper",
                y0=0,
                y1=1,
                xref="x",
                x0=df[metric].mean(),
                x1=df[metric].mean(),
            )
        ]     
    )

    fig.add_annotation(
        x=0.9,
        y=0,
        xref="x",
        yref="y",
        text="RMSE: 0.36",
        showarrow=False,
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="#ffffff"
            ),
        align="center",
        bordercolor="#c7c7c7",
        borderwidth=1,
        borderpad=4,
        bgcolor="#ff7f0e",
        opacity=0.8
        )

    return fig


In [11]:
plot_fairness_mistreatment(fairness_metrics, "true_positive_rate",
            title="Disparate Mistreatment: "
            + fairness_columns[0]
            + " | "
            + "recall",)

In [12]:
fairness_metrics

Unnamed: 0_level_0,sub_key,sub,true_positive_rate,true_positive_rate_C,total_individuals
feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
artist_gender@-,artist_gender,-,0.181067,0.032645,3426
artist_gender@Female,artist_gender,Female,0.207432,0.038856,2005
artist_gender@Male,artist_gender,Male,0.172394,0.026972,4569


In [13]:
def rmse_fairness(values):
    center = values.mean()
    return np.sqrt(((values - center) ** 2).mean())

rmse_fairness(fairness_metrics['true_positive_rate'].values)

0.014899402941231373

In [14]:
rmse_fairness(fairness_metrics['true_positive_rate'].values)

0.014899402941231373

## Disparate Treatment

In [15]:
df = pd.read_csv('src/recsys_fair_metrics/tests/factories/test_set_predictions.csv')

user_column = 'userid'
item_column = 'musicbrainz-artist-id'
reclist_column = 'sorted_actions'
reclist_score_column = 'action_scores'
first_rec_column = 'first_rec'
first_recscore_column = 'first_recscore'
fairness_columns = "user_gender"#["artist_area", "artist_gender", , "first_artist_genre"]

df = df[[user_column, item_column, reclist_column, reclist_score_column] + [fairness_columns]]
df["sorted_actions"] = parallel_literal_eval(df["sorted_actions"])
df["action_scores"]   = parallel_literal_eval(df["action_scores"])

df[first_rec_column] = df[reclist_column].apply(lambda l: l[0])
df[first_recscore_column] = df[reclist_score_column].apply(lambda l: l[0])

df.head()

100%|██████████| 10000/10000 [00:00<00:00, 4612166.26it/s]
100%|██████████| 10000/10000 [00:00<00:00, 4805022.34it/s]


Unnamed: 0,userid,musicbrainz-artist-id,sorted_actions,action_scores,user_gender,first_rec,first_recscore
0,user_000366,0ef3f425-9bd2-4216-9dd2-219d2fe90f1f,"[45a663b5-b1cb-4a91-bff6-2bef7bbfdd76, 4ac4e32...","[0.4648118317127228, 0.4329394996166229, 0.293...",m,45a663b5-b1cb-4a91-bff6-2bef7bbfdd76,0.464812
1,user_000761,0e422e91-a42a-4b4d-8413-9baff67350f2,"[3e55d51d-687f-4a9d-af96-2fabccf802e5, cc6115a...","[0.4018394649028778, 0.3567405343055725, 0.341...",,3e55d51d-687f-4a9d-af96-2fabccf802e5,0.401839
2,user_000761,b3ae82c2-e60b-4551-a76d-6620f1b456aa,"[d43d12a1-2dc9-4257-a2fd-0a3bb1081b86, c296e10...","[0.9713646769523621, 0.875041127204895, 0.4424...",,d43d12a1-2dc9-4257-a2fd-0a3bb1081b86,0.971365
3,user_000366,0ef3f425-9bd2-4216-9dd2-219d2fe90f1f,"[40f5d9e4-2de7-4f2d-ad41-e31a9a9fea27, 0ef3f42...","[0.40954360365867615, 0.2930067479610443, 0.27...",m,40f5d9e4-2de7-4f2d-ad41-e31a9a9fea27,0.409544
4,user_000348,66ad8456-1d33-4d7d-b8c3-a8f1a3b1e74f,"[25b75a66-ce83-4db3-b136-395a3c3784c4, 6be2828...","[0.764191210269928, 0.4643076956272125, 0.4116...",m,25b75a66-ce83-4db3-b136-395a3c3784c4,0.764191


In [16]:
df = df[[fairness_columns, reclist_column, reclist_score_column]]
df = df.set_index([fairness_columns]).apply(pd.Series.explode).reset_index()
df.head()

Unnamed: 0,user_gender,sorted_actions,action_scores
0,m,45a663b5-b1cb-4a91-bff6-2bef7bbfdd76,0.464812
1,m,4ac4e32b-bd18-402e-adad-ae00e72f8d85,0.432939
2,m,0ef3f425-9bd2-4216-9dd2-219d2fe90f1f,0.293007
3,m,fbb375f9-48bb-4635-824e-4120273b3ba7,0.26873
4,m,09885b8e-f235-4b80-a02a-055539493173,0.219262


In [17]:
def confidence(x):
    return mean_confidence_interval(x)[1]

df_mean = (
        df.fillna("-").groupby([reclist_column, fairness_columns]).agg(
            count=(reclist_score_column, "count"),
            mean_rhat_score=(reclist_score_column, "mean"),
            confidence=(reclist_score_column, confidence))
    ).reset_index()#.rename(columns={item_column: 'count', first_recscore_column: 'mean_rhat_score'})
df_mean

Unnamed: 0,sorted_actions,user_gender,count,mean_rhat_score,confidence
0,0004537a-4b12-43eb-a023-04009e738d2e,-,19,0.171590,0.054135
1,0004537a-4b12-43eb-a023-04009e738d2e,f,8,0.266904,0.253417
2,0004537a-4b12-43eb-a023-04009e738d2e,m,162,0.195494,0.032914
3,000ba849-700e-452e-8858-0db591587e4a,-,14,0.086477,0.008655
4,000ba849-700e-452e-8858-0db591587e4a,f,11,0.189231,0.207413
...,...,...,...,...,...
17953,ffdb2b8f-ecbf-427a-abc7-c47f4ebc8ae3,f,15,0.088896,0.040629
17954,ffdb2b8f-ecbf-427a-abc7-c47f4ebc8ae3,m,145,0.091088,0.028732
17955,ffdf4cb7-5d2e-403e-a5ba-b19c18d0feb4,-,17,0.025102,0.005436
17956,ffdf4cb7-5d2e-403e-a5ba-b19c18d0feb4,f,20,0.125614,0.114891


In [18]:
def filter_treatment_df(df, rec_column, fairness_column, topk = 5, min_size = 30):
    # Filter significance
    df = df[df['count'] >= min_size]

    # more them one group
    df_count = df.groupby([rec_column]).count()['count'].reset_index()
    df_count = df_count[df_count['count'] > 1]  #== len(np.unique(df[fairness_column]))
    df = df[df[rec_column].isin(df_count[rec_column])]

    df.groupby([rec_column]).count()

    return df

df_metric = filter_treatment_df(df_mean, reclist_column, fairness_columns).sort_values('mean_rhat_score')
df_metric.head()

Unnamed: 0,sorted_actions,user_gender,count,mean_rhat_score,confidence
8324,752d58e6-d33f-4dcd-b40c-37acc14242f2,m,139,0.017772,0.006944
10058,8df95482-901f-469f-a268-73b34e05dad9,m,145,0.036846,0.012849
12215,af558762-abed-4f46-8324-34fc51f57b2b,m,118,0.040147,0.006834
10056,8df95482-901f-469f-a268-73b34e05dad9,-,52,0.067645,0.005186
9773,8974da95-e631-45aa-8fd7-aa0c2795f997,m,158,0.075888,0.014698


In [19]:
top_k = 10
df_metric_var = df_metric.groupby(reclist_column)\
                    .agg(var_mean_rhat_score = ('mean_rhat_score', 'var')).reset_index().iloc[:top_k]
df_metric_var.head()

df_metric = df_metric.merge(df_metric_var).sort_values('var_mean_rhat_score', ascending=False)
df_metric.head()

Unnamed: 0,sorted_actions,user_gender,count,mean_rhat_score,confidence,var_mean_rhat_score
4,12be5b16-915f-44bc-978a-8ddfab235b79,m,136,0.102936,0.014604,0.317508
5,12be5b16-915f-44bc-978a-8ddfab235b79,f,84,0.899815,0.05861,0.317508
10,553d8166-27b0-49fe-b8e4-89a984e2c375,m,130,0.219233,0.024759,0.091399
11,553d8166-27b0-49fe-b8e4-89a984e2c375,f,42,0.646781,0.099063,0.091399
13,7746d775-9550-4360-b8d5-c37bd448ce01,f,36,0.589469,0.068529,0.067131


In [20]:

def plot_fairness_treatment(df, metric, first_rec_column, title=""):
    score = "rhat_scores"
    
    y_sorted = list(df[[first_rec_column, 'var_mean_rhat_score']].drop_duplicates()\
                .sort_values('var_mean_rhat_score', ascending=False)[first_rec_column])
    y_sorted.reverse()
    data = []
    i = 0

    for group, rows in df.groupby(metric):
        #rows = rows.sort_values('var_mean_rhat_score', ascending=False)
        data.append(
            go.Bar(
                name=metric + "." + str(group),
                y=[str(a) for a in rows[first_rec_column]],
                x=rows["mean_rhat_score"],
                orientation="h",
                error_x=dict(type="data", array=rows["confidence"]),
            )
        )  # px.colors.sequential.Purp [i for i in range(len(rows))]

        i += 1
    fig = go.Figure(data=data)

    # Change the bar mode
    fig.update_layout(
        template=TEMPLATE,
        legend_orientation="h",
        xaxis_title=score,
        legend=dict(y=-0.2),
        title=title,
    )
    fig.update_layout(yaxis={'categoryorder':'array', 'categoryarray':y_sorted})

    fig.update_layout(
        shapes=[
            dict(
                type="line",
                line=dict(width=1, dash="dot",),
                yref="paper",
                y0=0,
                y1=1,
                xref="x",
                x0=df["mean_rhat_score"].mean(),
                x1=df["mean_rhat_score"].mean(),
            )
        ]
    )

    print(y_sorted)
    return fig

plot_fairness_treatment(
    df_metric,
    fairness_columns,
    reclist_column,
    title="Disparate Treatment"
)

['54bd120a-6bac-4d7c-8b10-ba167bc7e26c', '72ae6caf-4f9f-407e-8c70-8cd195df955c', '45a663b5-b1cb-4a91-bff6-2bef7bbfdd76', '72c536dc-7137-4477-a521-567eeb840fa8', '752d58e6-d33f-4dcd-b40c-37acc14242f2', '5b24fbab-c58f-4c37-a59d-ab232e2d98c4', '1fa14a96-c25c-4bb7-b94d-ff453519eab3', '7746d775-9550-4360-b8d5-c37bd448ce01', '553d8166-27b0-49fe-b8e4-89a984e2c375', '12be5b16-915f-44bc-978a-8ddfab235b79']


### Metric

In [21]:
print("sorted_actions", len(np.unique(df_mean.sorted_actions)))

sorted_actions 5986


In [45]:
df_metric = filter_treatment_df(df = df_mean, 
                                rec_column = reclist_column, 
                                fairness_column = fairness_columns,
                                min_size = 15).sort_values('mean_rhat_score')
df_metric.head()

Unnamed: 0,sorted_actions,user_gender,count,mean_rhat_score,confidence
4346,3c521c8f-0940-451c-85e3-f4310795dbcd,m,161,0.001129,0.000195
4344,3c521c8f-0940-451c-85e3-f4310795dbcd,-,15,0.001341,0.000379
6833,5fd0fb7f-542c-4c4b-bbd9-d1fa74a9a083,m,165,0.00215,0.00044
935,0baceb7b-6237-48c4-9550-3a02e9a71f44,m,137,0.002531,0.000442
3047,2aca9179-373d-432c-84aa-d6a687f4f756,m,146,0.002531,0.000442


In [46]:
df_metric.shape

(8466, 5)

In [47]:
df_pivot = df_metric.pivot(index='sorted_actions', columns='user_gender', values=['mean_rhat_score']).dropna()
df_pivot

Unnamed: 0_level_0,mean_rhat_score,mean_rhat_score,mean_rhat_score
user_gender,-,f,m
sorted_actions,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
004e5eed-e267-46ea-b504-54526f1f377d,0.310695,0.328909,0.209137
006044a1-f04e-40ae-bd9f-c084837a771b,0.017413,0.039320,0.016147
0095329b-f139-4ef4-949b-1a6d3b578cde,0.069225,0.076966,0.037361
00aab979-da36-4efd-9086-e409cda07f9c,0.032715,0.076068,0.019330
00b51372-dc0d-44a8-b8ea-52f63918f9d2,0.009751,0.048526,0.015763
...,...,...,...
fef1e787-e631-41a3-b8f2-a834291d8bd1,0.058292,0.191968,0.046739
ff4308de-5b17-4869-ad77-ea41d8bf3b18,0.074964,0.231163,0.079629
ff50e0a8-2772-4599-8abd-18ee6bc80197,0.073891,0.162912,0.043833
ffdb2b8f-ecbf-427a-abc7-c47f4ebc8ae3,0.069546,0.088896,0.091088


In [48]:
d1 = list(df_pivot['mean_rhat_score']['-'])
d2 = list(df_pivot['mean_rhat_score']['f'])
d3 = list(df_pivot['mean_rhat_score']['m'])

df_pivot = df_pivot['mean_rhat_score']#.columns
df_pivot

user_gender,-,f,m
sorted_actions,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
004e5eed-e267-46ea-b504-54526f1f377d,0.310695,0.328909,0.209137
006044a1-f04e-40ae-bd9f-c084837a771b,0.017413,0.039320,0.016147
0095329b-f139-4ef4-949b-1a6d3b578cde,0.069225,0.076966,0.037361
00aab979-da36-4efd-9086-e409cda07f9c,0.032715,0.076068,0.019330
00b51372-dc0d-44a8-b8ea-52f63918f9d2,0.009751,0.048526,0.015763
...,...,...,...
fef1e787-e631-41a3-b8f2-a834291d8bd1,0.058292,0.191968,0.046739
ff4308de-5b17-4869-ad77-ea41d8bf3b18,0.074964,0.231163,0.079629
ff50e0a8-2772-4599-8abd-18ee6bc80197,0.073891,0.162912,0.043833
ffdb2b8f-ecbf-427a-abc7-c47f4ebc8ae3,0.069546,0.088896,0.091088


In [49]:
from scipy.stats import norm

def ecdf(data):
    """ Compute ECDF """
    x = np.sort(data)
    n = x.size
    y = np.arange(1, n+1) / n
    return(x,y)

def plot_ecdf(df):
    '''
    In an ECDF, x-axis correspond to the range of values for variables 
    and on the y-axis we plot the proportion of data points that are less 
    than are equal to corresponding x-axis value.
    
    https://cmdlinetips.com/2019/05/empirical-cumulative-distribution-function-ecdf-in-python/
    '''    
    data = []
    column = ""
    title  = ""
    for group in df.columns:
        x, y = ecdf(list(df[group]))
        data.append(
            go.Scatter(
                name=column + "." + str(group),
                y=y,
                x=x,
            )
        )  


    fig = go.Figure(data=data)

    # Change the bar mode
    fig.update_layout(
        template=TEMPLATE,
        legend_orientation="h",
        xaxis_title="rhat_score",
        yaxis_title="ECDF",
        legend=dict(y=-0.2),
        title=title,
    )

    fig.add_annotation(
        x=0.8,
        y=0.1,
        xref="x",
        yref="y",
        text="Max K-S: {}".format(0.5),
        showarrow=False,
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="#ffffff"
            ),
        align="center",
        bordercolor="#c7c7c7",
        borderwidth=1,
        borderpad=4,
        bgcolor="#ff7f0e",
        opacity=0.8
    )

    return fig
        
plot_ecdf(df_pivot)

In [50]:
from scipy.stats import ks_2samp


ks_metrics = []
for a in df_pivot.columns:
    for b in df_pivot.columns:
        ks_metrics.append(ks_2samp(df_pivot[a], df_pivot[b]).statistic)

np.reshape(ks_metrics, (3, 3))
#print(ks_2samp(d1, d2))
#print(ks_2samp(d1, d3))

array([[0.        , 0.4299569 , 0.10883621],
       [0.4299569 , 0.        , 0.50538793],
       [0.10883621, 0.50538793, 0.        ]])

In [149]:
print(ks_2samp(d2, d3))

Ks_2sampResult(statistic=0.4944763588157313, pvalue=0.0)


In [150]:
print(ks_2samp(d2, d2))

Ks_2sampResult(statistic=0.0, pvalue=1.0)


In [174]:


x, y = ecdf(d1)
trace1 = go.Scatter(x=x, y=y)

x, y = ecdf(d2)
trace2 = go.Scatter(x=x, y=y)


x, y = ecdf(d3)
trace3 = go.Scatter(x=x, y=y)


layout = go.Layout(
    title="Cumulative Distribution Function"
)
fig = go.Figure(data=go.Data([trace1, trace2, trace3]), layout=layout)
fig


plotly.graph_objs.Data is deprecated.
Please replace it with a list or tuple of instances of the following types
  - plotly.graph_objs.Scatter
  - plotly.graph_objs.Bar
  - plotly.graph_objs.Area
  - plotly.graph_objs.Histogram
  - etc.




### Grafico Dist

In [36]:
df_dist = df[[reclist_column,  fairness_columns, reclist_score_column]].fillna("-")
df_dist

Unnamed: 0,sorted_actions,user_gender,action_scores
0,45a663b5-b1cb-4a91-bff6-2bef7bbfdd76,m,0.464812
1,4ac4e32b-bd18-402e-adad-ae00e72f8d85,m,0.432939
2,0ef3f425-9bd2-4216-9dd2-219d2fe90f1f,m,0.293007
3,fbb375f9-48bb-4635-824e-4120273b3ba7,m,0.268730
4,09885b8e-f235-4b80-a02a-055539493173,m,0.219262
...,...,...,...
999822,5f572b31-8fd9-45ae-9beb-358c98d839df,f,0.083626
999823,8fc89a2c-d01a-41ae-a6fc-d7e7300d8ee5,f,0.070819
999824,7ea95bc8-7c97-4b8c-baf5-f3720bc94b40,f,0.058343
999825,0eb89e0b-adee-4175-b1f0-55a3531937be,f,0.045726


In [46]:
import plotly.express as px
df = px.data.tips()
fig = px.strip(df_dist.sample(1000), x='user_gender', y='action_scores')
fig.show()

In [55]:
df_dist.groupby(fairness_columns).count()

Unnamed: 0_level_0,sorted_actions,action_scores
user_gender,Unnamed: 1_level_1,Unnamed: 2_level_1
-,78884,78884
f,85079,85079
m,835864,835864


In [62]:
df_dist_sample.groupby(fairness_columns).count()

Unnamed: 0_level_0,sorted_actions,action_scores
user_gender,Unnamed: 1_level_1,Unnamed: 2_level_1
-,1000,1000
f,1000,1000
m,1000,1000


In [58]:
df_dist_sample = []
size_sample    = 1000

for c in sorted(list(df_dist[fairness_columns].drop_duplicates())):
    df_dist_sample.append(df_dist[df_dist[fairness_columns] == c].sample(size_sample))
    
df_dist_sample = pd.concat(df_dist_sample)    
df_dist_sample

Unnamed: 0,sorted_actions,user_gender,action_scores
311201,13da4ed2-7344-49aa-aa73-004826896c5c,-,0.031896
566516,a2eb319d-a45e-43c3-bb14-d0ffb577222c,-,0.105921
8109,e3dff124-6537-4937-ae0d-97916bac7288,-,0.154791
809483,eb8b0a8a-dc7d-480c-b83a-70a069150d05,-,0.079178
230870,22fb55ab-41ca-4fed-bffd-9e85c772486b,-,0.028655
...,...,...,...
381140,b79a3909-7611-4b89-ab5a-5609c4aaacae,m,0.195081
741248,c248209c-1985-4fc3-b202-41123f3d84d6,m,0.003340
117680,c78a77fa-507c-4c07-947a-0355029453bd,m,0.348179
633070,df71a2e3-e706-41a2-9a60-b447ec9a36aa,m,0.009487


In [61]:

def plot_dist_fairness_treatment(df, metric, first_recscore_column, title=""):
    score = "rhat_scores"

    data = []
    group_labels = []
    i = 0

    group_labels.append("All")
    data.append(list(df[first_recscore_column]))

    for group, rows in df.groupby(metric):
        data.append(list(rows[first_recscore_column]))
        group_labels.append(metric + "." + str(group))
    #     data.append(
    #         go.Histogram(
    #             name=metric + "." + str(group),
    #             x=list(rows[first_recscore_column]),
    #             marginal="box"
    #             #cumulative_enabled=True
    #         )
    #     )  # px.colors.sequential.Purp [i for i in range(len(rows))]

    #     i += 1
    # fig = go.Figure(data=data)
    # fig.update_layout(barmode='stack')

    fig = ff.create_distplot(data, group_labels, show_hist=False, bin_size=.05)

    # Change the bar mode
    fig.update_layout(
        template=TEMPLATE,
        xaxis_title=score,
        title=title,
    )

    return fig

plot_dist_fairness_treatment(
    df_dist_sample,
    fairness_columns,
    reclist_score_column,
    title="Disparate Treatment"
)

In [37]:
#np.random.randn(500)

In [63]:

def plot_dist2_fairness_treatment(df, metric, first_recscore_column, title=""):
    score = "rhat_scores"

    data = []
    group_labels = []
    i = 0

    for group, rows in df.groupby(metric):
        data.append(
            go.Histogram(
                name=metric + "." + str(group),
                x=list(rows[first_recscore_column]),
                cumulative_enabled=True
            )
        )  # px.colors.sequential.Purp [i for i in range(len(rows))]

        i += 1
    fig = go.Figure(data=data)
    #fig.update_layout(barmode='stack')

    # Change the bar mode
    fig.update_layout(
        template=TEMPLATE,
        xaxis_title=score,
        title=title,
    )

    return fig

plot_dist2_fairness_treatment(
    df_dist_sample,
    fairness_columns,
    reclist_score_column,
    title="Disparate Treatment"
)