# Preparation

## Imports

In [49]:
# Data handling
import pandas as pd
import numpy as np
# Plotting
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
# Statistical analyses
import itertools
from scipy.stats import norm, truncnorm, gaussian_kde
from scipy.stats import ks_2samp, pearsonr
from statsmodels.formula.api import ols
import statsmodels.api as sm
from statistics import NormalDist
# IO
import sys

## Paths

In [50]:
# Base path for functions
base_path = "C:/repos/"
sys.path.append(base_path)

# Path to find run results
results_path = base_path + "twrds_unbiased_anns/runs/results/"

# Path to store figures
figure_path = base_path + "twrds_unbiased_anns/figures/results/classification/"

## Functions

In [51]:
# Data
from twrds_unbiased_anns.src.analysis.data import load_run_data
# Plotting

# Analysis

## Parameters

In [52]:
# Run specific
run_name = "classification_all"
run_date = "13-05-2021"

## Data Loading

In [53]:
df = load_run_data(run_name, run_date, results_path)
df.head(5)

Unnamed: 0.1,Unnamed: 0,run,date,model,loss,category,m_diff,stddev,minority_share,repeat,threshold,noise,shape_color,shape_type,size,actual,prediction,group,prediction_diff,rmse
0,0,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,0,3,5,1,60,0,white,square,116,1,1.0,over,0.0,0.0
1,1,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,0,3,5,1,60,0,white,square,55,0,2.7e-05,over,2.7e-05,2.7e-05
2,2,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,0,3,5,1,60,0,white,square,46,0,6.2e-05,over,6.2e-05,6.2e-05
3,3,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,0,3,5,1,60,0,white,square,53,0,3.3e-05,over,3.3e-05,3.3e-05
4,4,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,0,3,5,1,60,0,white,square,99,1,1.0,over,0.0,0.0


In [54]:
# Add column with binary prediction
df["prediction_bin"] = df["prediction"] >= 0.5

# Add columns for true positive, false positive, true negative, false negative
conditions = [
    (df['actual'] == 1) & (df['prediction_bin'] == 1),
    (df['actual'] == 0) & (df['prediction_bin'] == 1),
    (df['actual'] == 0) & (df['prediction_bin'] == 0),
    (df['actual'] == 1) & (df['prediction_bin'] == 0)
]

# Create a list of the values we want to assign for each condition
values = ['tp', 'fp', 'tn', 'fn']

# Create a new column and use np.select to assign values to it using our lists as arguments
df['result'] = np.select(conditions, values)


# Add other data to dataframe
df["mean_red"] = 100 - df["m_diff"]

# Split dataframe by groups for later analyses
df_over = df[df.group == "over"]
df_under = df[df.group == "under"]

df

Unnamed: 0.1,Unnamed: 0,run,date,model,loss,category,m_diff,stddev,minority_share,repeat,...,shape_type,size,actual,prediction,group,prediction_diff,rmse,prediction_bin,result,mean_red
0,0,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,0,3,5,1,...,square,116,1,1.000000,over,0.000000,0.000000,True,tp,100
1,1,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,0,3,5,1,...,square,55,0,0.000027,over,0.000027,0.000027,False,tn,100
2,2,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,0,3,5,1,...,square,46,0,0.000062,over,0.000062,0.000062,False,tn,100
3,3,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,0,3,5,1,...,square,53,0,0.000033,over,0.000033,0.000033,False,tn,100
4,4,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,0,3,5,1,...,square,99,1,1.000000,over,0.000000,0.000000,True,tp,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
449995,449995,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,50,30,50,5,...,circle,41,0,0.000075,under,0.000075,0.000075,False,tn,50
449996,449996,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,50,30,50,5,...,circle,98,1,0.995452,under,-0.004548,0.004548,True,tp,50
449997,449997,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,50,30,50,5,...,circle,79,0,0.000346,under,0.000346,0.000346,False,tn,50
449998,449998,classification_all,2021-05-13 08:49:03.584,SmallCNN,bce,color,50,30,50,5,...,circle,87,0,0.016659,under,0.016659,0.016659,False,tn,50


In [56]:
def get_results(data):
    total = len(data)
    tp = len(data[data.result == "tp"])
    fp = len(data[data.result == "fp"])
    tn = len(data[data.result == "tn"])
    fn = len(data[data.result == "fn"])
    p = tp + fn
    n = tn + fp
    accuracy = (tp + tn) / total
    tpr = tp / p
    tnr = tn / n
    return total, accuracy, tpr, tnr

results = []

# Group data
predictors = ["stddev", "minority_share", "mean_red", "threshold"]
grouped = df.groupby(predictors)

# Go through groups
for stddev, minority_share, mean_red, threshold in grouped.groups.keys():
    
    # Get data
    data = grouped.get_group((stddev, minority_share, mean_red, threshold))
    
    # Overall data
    total, accuracy, tpr, tnr = get_results(data)
    
    # White shapes
    total_w, accuracy_w, tpr_w, tnr_w = get_results(data[data.group == "over"])
    
    # Red shapes
    total_r, accuracy_r, tpr_r, tnr_r = get_results(data[data.group == "under"])
        
    # Store data
    results.append(
        {
            "stddev": stddev,
            "minority_share": minority_share,
            "m_diff": 100 - mean_red,
            "mean_red": mean_red,
            "threshold": threshold,
            "total": total,
            "accuracy": accuracy,
            "tpr": tpr,
            "tnr": tnr,
            "accuracy_w": accuracy_w,
            "tpr_w": tpr_w,
            "tnr_w": tnr_w,
            "accuracy_r": accuracy_r,
            "tpr_r": tpr_r,
            "tnr_r": tnr_r,
            "accuracy_diff": accuracy_w - accuracy_r,
            "tpr_diff": tpr_w - tpr_r,
            "tnr_diff": tnr_w - tnr_r
        }
    )
    
# Make dataframe of results
df_results = pd.DataFrame(results)
df_results

Unnamed: 0,stddev,minority_share,m_diff,mean_red,threshold,total,accuracy,tpr,tnr,accuracy_w,tpr_w,tnr_w,accuracy_r,tpr_r,tnr_r,accuracy_diff,tpr_diff,tnr_diff
0,3,5,50,50,60,2500,0.6900,0.554598,1.000000,0.7936,0.703448,1.000000,0.5864,0.405747,1.000000,0.2072,0.297701,0.000000
1,3,5,50,50,75,2500,0.9288,0.856452,1.000000,1.0000,1.000000,1.000000,0.8576,0.712903,1.000000,0.1424,0.287097,0.000000
2,3,5,50,50,90,2500,0.8128,0.986486,0.739773,0.8000,1.000000,0.715909,0.8256,0.972973,0.763636,-0.0256,0.027027,-0.047727
3,3,5,30,70,60,2500,0.7488,0.857471,0.500000,0.8016,0.714943,1.000000,0.6960,1.000000,0.000000,0.1056,-0.285057,1.000000
4,3,5,30,70,75,2500,0.7912,0.579032,1.000000,0.9920,0.983871,1.000000,0.5904,0.174194,1.000000,0.4016,0.809677,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,30,50,30,70,75,2500,0.9956,0.991935,0.999206,0.9968,0.993548,1.000000,0.9944,0.990323,0.998413,0.0024,0.003226,0.001587
176,30,50,30,70,90,2500,0.8836,1.000000,0.834659,0.7888,1.000000,0.700000,0.9784,1.000000,0.969318,-0.1896,0.000000,-0.269318
177,30,50,0,100,60,2500,0.8744,0.848276,0.934211,0.7888,0.696552,1.000000,0.9600,1.000000,0.868421,-0.1712,-0.303448,0.131579
178,30,50,0,100,75,2500,0.9940,0.991935,0.996032,0.9920,0.983871,1.000000,0.9960,1.000000,0.992063,-0.0040,-0.016129,0.007937


In [57]:
header = """
% Results of 3rd experiment
\\renewcommand{\\arraystretch}{1}
\\begin{table}[h!]
\\centering
\\renewcommand*\TPTnoteLabel[1]{\\parbox[b]{3em}{\\hfill#1\\,}}
\\begin{threeparttable}
\\begin{tabular*}{\\textwidth}{SSSS @{\\extracolsep{\\fill}} SSSSSS @{\\extracolsep{\\fill}}}
\\toprule
&&&& \\multicolumn{3}{c}{White Shapes} & \\multicolumn{3}{c}{Red Shapes}  \\\\
\\cmidrule(l{2pt}r{2pt}){5-7}
\\cmidrule(l{2pt}r{2pt}){8-10}
{$\\sigma$} & {$\\mathrm{\\%}_{\\mathrm{red}}$} & {$\\mu$} & {$\\theta$} & {Accuracy} & {TPR} & {TNR} & {Accuracy} & {TPR} & {TNR} \\\\ \\midrule
"""

footer_1 = """
\\bottomrule
\\end{tabular*}
"""

footer_2 = """
\\end{threeparttable}
\\end{table}
"""
def print_data(data):
    for index, row in data.sort_values(by = ["minority_share", "mean_red", "threshold"]).iterrows():
        stddev = row["stddev"]
        minority_share = row["minority_share"]
        m_red = row["mean_red"]
        threshold = row["threshold"]
        accuracy_w = row["accuracy_w"].round(4) * 100
        accuracy_r = row["accuracy_r"].round(4) * 100
        tpr_w = row["tpr_w"].round(4) * 100
        tpr_r = row["tpr_r"].round(4) * 100
        tnr_w = row["tnr_w"].round(4) * 100
        tnr_r = row["tnr_r"].round(4) * 100      
        row_text = "{:.0f} & {:.0f} & {:.0f} & {:.0f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\".format(stddev, minority_share, m_red, threshold, accuracy_w, tpr_w, tnr_w, accuracy_r, tpr_r, tnr_r)
        print(row_text)
        
def print_to_latex(data):
    # Iterate through all standard deviations
    for std in data.stddev.unique():        
        # Header
        print(header)        
        # Data
        print_data(data[data.stddev == std])
        # Footer with caption and label
        caption = "\\caption{Results of experiment 3 for standard deviation $\\sigma =" + str(std) + "$.}"
        label = "\n\\label{table:results_exp_3_std_" + str(std) + "}"    
        print(footer_1+caption+label+footer_2)        

In [58]:
print_to_latex(df_results)


% Results of 3rd experiment
\renewcommand{\arraystretch}{1}
\begin{table}[h!]
\centering
\renewcommand*\TPTnoteLabel[1]{\parbox[b]{3em}{\hfill#1\,}}
\begin{threeparttable}
\begin{tabular*}{\textwidth}{SSSS @{\extracolsep{\fill}} SSSSSS @{\extracolsep{\fill}}}
\toprule
&&&& \multicolumn{3}{c}{White Shapes} & \multicolumn{3}{c}{Red Shapes}  \\
\cmidrule(l{2pt}r{2pt}){5-7}
\cmidrule(l{2pt}r{2pt}){8-10}
{$\sigma$} & {$\mathrm{\%}_{\mathrm{red}}$} & {$\mu$} & {$\theta$} & {Accuracy} & {TPR} & {TNR} & {Accuracy} & {TPR} & {TNR} \\ \midrule

3 & 5 & 50 & 60 & 79.36 & 70.34 & 100.00 & 58.64 & 40.57 & 100.00 \\
3 & 5 & 50 & 75 & 100.00 & 100.00 & 100.00 & 85.76 & 71.29 & 100.00 \\
3 & 5 & 50 & 90 & 80.00 & 100.00 & 71.59 & 82.56 & 97.30 & 76.36 \\
3 & 5 & 70 & 60 & 80.16 & 71.49 & 100.00 & 69.60 & 100.00 & 0.00 \\
3 & 5 & 70 & 75 & 99.20 & 98.39 & 100.00 & 59.04 & 17.42 & 100.00 \\
3 & 5 & 70 & 90 & 79.92 & 100.00 & 71.48 & 70.48 & 0.27 & 100.00 \\
3 & 5 & 100 & 60 & 79.20 & 70.11 & 100.00 & 7

### Find largest bias with equal groups

In [66]:
df_results.sort_values(by = "accuracy_diff", ascending = False).head(5)

Unnamed: 0,stddev,minority_share,m_diff,mean_red,threshold,total,accuracy,tpr,tnr,accuracy_w,tpr_w,tnr_w,accuracy_r,tpr_r,tnr_r,accuracy_diff,tpr_diff,tnr_diff
82,10,10,50,50,75,2500,0.752,0.5,1.0,1.0,1.0,1.0,0.504,0.0,1.0,0.496,1.0,0.0
28,3,50,50,50,75,2500,0.7668,0.539516,0.990476,0.9824,0.983871,0.980952,0.5512,0.095161,1.0,0.4312,0.88871,-0.019048
4,3,5,30,70,75,2500,0.7912,0.579032,1.0,0.992,0.983871,1.0,0.5904,0.174194,1.0,0.4016,0.809677,0.0
55,5,30,50,50,75,2500,0.798,0.592742,1.0,0.9984,0.996774,1.0,0.5976,0.18871,1.0,0.4008,0.808065,0.0
37,5,5,50,50,75,2500,0.7992,0.595968,0.999206,0.9992,1.0,0.998413,0.5992,0.191935,1.0,0.4,0.808065,-0.001587


## OLS Analysis

In [67]:
predictors = ["stddev", "mean_red", "minority_share", "threshold"]

# Initialize all combinations for OLS regression to test for interactions
ols_predictors = ""
for predictor in predictors:
    ols_predictors = ols_predictors + predictor + " + " 
ols_predictors = ols_predictors + "stddev * mean_red * minority_share * threshold"

# Define targets
targets = ["accuracy_diff", "tpr_diff", "tnr_diff"]

# Create empty dataframe for OLS results
ols_df = pd.DataFrame()

for target in targets:
    model = ols('{} ~ {}'.format(target, ols_predictors), df_results).fit()
    ols_df["{}_coef".format(target)] = model.params
    ols_df["{}_t".format(target)] = model.tvalues
    ols_df["{}_p".format(target)] = model.pvalues

ols_df

Unnamed: 0,accuracy_diff_coef,accuracy_diff_t,accuracy_diff_p,tpr_diff_coef,tpr_diff_t,tpr_diff_p,tnr_diff_coef,tnr_diff_t,tnr_diff_p
Intercept,0.4471534,0.733223,0.464469,-1.077645,-0.935846,0.350728,1.459044,1.676829,0.095481
stddev,-0.05328482,-1.479696,0.140873,-0.1049745,-1.54384,0.124555,-0.02218847,-0.431854,0.666415
mean_red,-0.003559313,-0.444488,0.657276,0.004300159,0.284398,0.776464,-0.007222179,-0.632124,0.528186
minority_share,-0.008445853,-0.411124,0.681519,-0.04519377,-1.165083,0.245677,-0.01557616,-0.531411,0.595853
threshold,-0.002224124,-0.27715,0.782014,0.02353182,1.55296,0.12236,-0.01881463,-1.643206,0.102256
stddev:mean_red,0.0005266837,1.113866,0.266967,0.001146202,1.283788,0.201027,0.0002506329,0.371503,0.710743
stddev:minority_share,0.0006346817,0.523208,0.601536,0.004137362,1.806307,0.072704,-0.0003609239,-0.208533,0.835071
mean_red:minority_share,0.0001013861,0.375856,0.70751,0.0004343934,0.852856,0.394983,0.0003838014,0.997219,0.320127
stddev:mean_red:minority_share,-7.088144e-06,-0.445005,0.656903,-4.425521e-05,-1.471453,0.143085,-1.462902e-06,-0.064371,0.948753
stddev:threshold,0.000571733,1.20653,0.229351,0.001240379,1.386273,0.167545,0.0002307578,0.341305,0.733311
