<a href="https://colab.research.google.com/github/scaairesearch/da_cv_fer/blob/main/Bias_Evaluation_Inference_FER_v2_12a.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. get dataset / dataloader
3. get output
2. load model
4. get task accuracy
4. get demographic parity
5. get equality odds



## Cloning Repo, Installing dependencies

In [1]:
# !pip install --upgrade Pillow

In [2]:
# !git clone https://github.com/scaairesearch/da_cv_fer.git
# import os
# os.getcwd()
# os.chdir('da_cv_fer')

In [3]:

# !pip install -q -r requirements.txt # -q is for quiet install
# # for some reason the below does not get installed with requirements
# !pip install -q opendatasets
# !pip install -q facenet_pytorch

In [4]:
# # Installs from files
# from da_cv_fer.data_config import DataConfig
# from da_cv_fer.ds_sfew_crop import DatasetSFEWCROP

# from da_cv_fer.ds_expw_crop_image_race import EXPWIMAGECROPRACE
# from da_cv_fer.utils import *
# from da_cv_fer.run_config import RunConfig




In [5]:
# install general libraries
import os
import zipfile

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

from tqdm import tqdm # for beautiful model training updates

import matplotlib.pyplot as plt # for plots

import time
from pathlib import Path

from copy import deepcopy
import numpy as np

from sklearn.metrics import accuracy_score, precision_score, confusion_matrix, classification_report
import pandas as pd


## Initializing Configs

In [6]:

# #initializing dataconfig, it tells us about dataset configuration and directory structure
# dataconfig = DataConfig()

## Loading csv files (evaluation results)

In [7]:
# checking the mounted drive and mounting if not done
if not os.path.exists('/content/gdrive'):
  from google.colab import drive
  drive.mount('/content/gdrive')
else:
    print("Google Drive is already mounted.")

Mounted at /content/gdrive


In [8]:
# non_dann_result.to_csv('/content/gdrive/MyDrive/CV_FER/results/Result dataframes/non_dann_result_08_04.csv')
# dann_result.to_csv('/content/gdrive/MyDrive/CV_FER/results/Result dataframes/dann_result_08_04.csv')
# ewc_dann_result.to_csv('/content/gdrive/MyDrive/CV_FER/results/Result dataframes/ewc_dann_result_08_04.csv')

In [9]:
# base_dir = ""
non_dann_result=pd.read_csv('/content/gdrive/MyDrive/CV_FER/results/Result dataframes/non_dann_result_23_05_wo_se_a.csv').sort_values(by=['image'])
dann_result=pd.read_csv('/content/gdrive/MyDrive/CV_FER/results/Result dataframes/dann_result_23_05_wo_se_a.csv').sort_values(by=['image'])
ewc_dann_result=pd.read_csv('/content/gdrive/MyDrive/CV_FER/results/Result dataframes/ewc_dann_result_23_05_wo_se_a.csv').sort_values(by=['image'])

In [10]:
non_dann_result

Unnamed: 0.1,Unnamed: 0,image,gt_emotion,gt_race,p_emotion
167,167,afraid_African_214.jpg,Fear,African,Fear
2502,2502,afraid_american_238.jpg,Surprise,African,Fear
2069,2069,afraid_asian_103.jpg,Happy,Asian,Angry
1006,1006,afraid_asian_336.jpg,Surprise,Asian,Neutral
2424,2424,afraid_asian_367.jpg,Surprise,Asian,Neutral
...,...,...,...,...,...
4325,4325,unhappy_expression_537.jpg,Happy,White,Sad
2049,2049,unhappy_expression_643.jpg,Disgust,White,Disgust
8202,8202,unhappy_expression_730.jpg,Neutral,Asian,Disgust
8344,8344,unhappy_expression_749.jpg,Disgust,White,Happy


In [11]:
dann_result

Unnamed: 0.1,Unnamed: 0,image,gt_emotion,gt_race,p_emotion
4064,4064,afraid_African_214.jpg,Fear,African,Neutral
174,174,afraid_american_238.jpg,Surprise,African,Neutral
4002,4002,afraid_asian_103.jpg,Happy,Asian,Angry
845,845,afraid_asian_336.jpg,Surprise,Asian,Neutral
4416,4416,afraid_asian_367.jpg,Surprise,Asian,Neutral
...,...,...,...,...,...
8214,8214,unhappy_expression_537.jpg,Happy,White,Sad
1497,1497,unhappy_expression_643.jpg,Disgust,White,Happy
4619,4619,unhappy_expression_730.jpg,Neutral,Asian,Disgust
3359,3359,unhappy_expression_749.jpg,Disgust,White,Happy


In [12]:
ewc_dann_result

Unnamed: 0.1,Unnamed: 0,image,gt_emotion,gt_race,p_emotion
7694,7694,afraid_African_214.jpg,Fear,African,Neutral
293,293,afraid_american_238.jpg,Surprise,African,Fear
2763,2763,afraid_asian_103.jpg,Happy,Asian,Angry
7041,7041,afraid_asian_336.jpg,Surprise,Asian,Neutral
7260,7260,afraid_asian_367.jpg,Surprise,Asian,Neutral
...,...,...,...,...,...
5436,5436,unhappy_expression_537.jpg,Happy,White,Sad
5743,5743,unhappy_expression_643.jpg,Disgust,White,Happy
8046,8046,unhappy_expression_730.jpg,Neutral,Asian,Surprise
4669,4669,unhappy_expression_749.jpg,Disgust,White,Happy


### Demographic Parity

In [13]:
output = {'non_dann':non_dann_result,
           'dann':dann_result,
           'ewc_dann':ewc_dann_result}

In [14]:
# races and emotions
races = ['Asian', 'African', 'White', 'Indian', 'Latino', 'Middle Eastern']
emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

In [15]:
# Function to calculate Demographic Parity
def calculate_demographic_parity(df, races, emotions):
    results = {} # {emotion:{race:value}}

    for emotion in emotions:
        emotion_results = {} # {race:values}

        # Calculate the proportion of the emotion predictions within each demographic group
        for race in races:
            total_group_count = df[df['gt_race'] == race].shape[0]
            predicted_emotion_count = df[(df['gt_race'] == race) & (df['p_emotion'] == emotion)].shape[0]
            proportion = predicted_emotion_count / total_group_count if total_group_count > 0 else 0
            emotion_results[race] = round(proportion,3)

        results[emotion] = emotion_results

    df = pd.DataFrame(results, index=races, columns=emotions)
    count_four_fifths_row = pd.DataFrame(0, index=['count_four_fifths'], columns=emotions)
    df = pd.concat([df,count_four_fifths_row])
    for emotion in emotions:
      df[emotion] = (df[emotion]/df[emotion].max()).round(2)
      df.at['count_four_fifths',emotion] = int((df[emotion]>=0.8).sum())
    return(df.round(3))

    # return results

In [16]:
for model, results in output.items():
  print(model)
  print(calculate_demographic_parity(results,races,emotions))

non_dann
                   Angry  Disgust  Fear  Happy   Sad  Surprise  Neutral
Asian               0.64     0.74  0.69   0.63  0.56      1.00     0.91
African             1.00     1.00  1.00   0.55  0.47      0.65     1.00
White               0.75     0.88  0.98   0.65  0.49      0.82     0.76
Indian              0.92     0.71  0.96   0.58  0.70      0.73     0.67
Latino              0.59     0.53  0.63   1.00  0.30      0.73     0.68
Middle Eastern      0.82     0.87  0.51   0.45  1.00      0.76     0.51
count_four_fifths   3.00     3.00  3.00   1.00  1.00      2.00     2.00
dann
                   Angry  Disgust  Fear  Happy   Sad  Surprise  Neutral
Asian               0.61     0.67  0.57   0.57  0.60      1.00     0.97
African             0.99     1.00  1.00   0.50  0.44      0.82     1.00
White               0.87     0.77  0.75   0.63  0.46      0.85     0.90
Indian              0.89     0.87  0.87   0.49  0.70      0.89     0.75
Latino              0.54     0.30  0.52   1.00  0.

In [17]:
# # Calculate and print the demographic parity results

# def dem_parity_ratio (df, emotions):
#   count_fourth_fifths_row = pd.DataFrame(0, index=['count_fourth_fifths'], columns=emotions)
#   # df = df.append(count_fourth_fifths_row)
#   df = pd.concat([df,count_fourth_fifths_row])
#   for emotion in emotions:
#     # new_column = f'{emotion}_ratio'
#     df[emotion] = df[emotion]/df[emotion].max()
#     # num_rows = float((df[emotion]>0.8).sum())
#     df.at['count_fourth_fifths',emotion] = int((df[emotion]>0.8).sum())
#   df = df.round(2)
#   return(df)


# demographic_parity_results = [demographic_parity_results_non_dann,
#                               demographic_parity_results_dann,
#                               demographic_parity_results_ewc]
# for results in demographic_parity_results:
#   print( dem_parity_ratio(pd.DataFrame(results), emotions))



# demographic_parity_results_non_dann = calculate_demographic_parity(non_dann_result, races, emotions)
# demographic_parity_results_dann = calculate_demographic_parity(dann_result, races, emotions)
# demographic_parity_results_ewc = calculate_demographic_parity(ewc_dann_result, races, emotions)

# df_demographic_parity_results = pd.DataFrame(data = demographic_parity_results_ewc,
#                                              columns = emotions ,
#                                              index = races)
# df_demographic_parity_results




In [18]:
# # Print the results
# for emotion, proportions in demographic_parity_results_non_dann.items():
#     print(f"Emotion: {emotion}")
#     for group, proportion in proportions.items():
#         print(f"  {group}: {proportion:.4f}")
#     print()

In [19]:
df = non_dann_result

In [20]:
df

Unnamed: 0.1,Unnamed: 0,image,gt_emotion,gt_race,p_emotion
167,167,afraid_African_214.jpg,Fear,African,Fear
2502,2502,afraid_american_238.jpg,Surprise,African,Fear
2069,2069,afraid_asian_103.jpg,Happy,Asian,Angry
1006,1006,afraid_asian_336.jpg,Surprise,Asian,Neutral
2424,2424,afraid_asian_367.jpg,Surprise,Asian,Neutral
...,...,...,...,...,...
4325,4325,unhappy_expression_537.jpg,Happy,White,Sad
2049,2049,unhappy_expression_643.jpg,Disgust,White,Disgust
8202,8202,unhappy_expression_730.jpg,Neutral,Asian,Disgust
8344,8344,unhappy_expression_749.jpg,Disgust,White,Happy


In [21]:
pivot_race_gt_emotion = pd.pivot_table(df, index='gt_race', columns='gt_emotion', aggfunc='size', fill_value=0)
pivot_race_gt_emotion

gt_emotion,Angry,Disgust,Fear,Happy,Neutral,Sad,Surprise
gt_race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
African,27,42,7,243,367,93,35
Asian,37,52,9,346,407,158,63
Indian,21,22,3,157,285,64,28
Latino,3,0,0,115,13,38,25
Middle Eastern,7,8,1,51,80,38,17
White,249,258,74,1879,2026,625,485


In [22]:
pivot_race_p_emotion = pd.pivot_table(df, index='gt_race', columns='p_emotion', aggfunc='size', fill_value=0)
pivot_race_p_emotion

p_emotion,Angry,Disgust,Fear,Happy,Neutral,Sad,Surprise
gt_race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
African,128,134,40,228,80,114,90
Asian,108,131,36,343,95,177,182
Indian,84,68,27,171,38,120,72
Latino,18,17,6,99,13,17,24
Middle Eastern,26,29,5,46,10,60,26
White,660,812,266,1842,415,823,778


In [23]:
df_race_gt_emotion = pivot_race_gt_emotion.reset_index()
df_race_gt_emotion

gt_emotion,gt_race,Angry,Disgust,Fear,Happy,Neutral,Sad,Surprise
0,African,27,42,7,243,367,93,35
1,Asian,37,52,9,346,407,158,63
2,Indian,21,22,3,157,285,64,28
3,Latino,3,0,0,115,13,38,25
4,Middle Eastern,7,8,1,51,80,38,17
5,White,249,258,74,1879,2026,625,485


In [24]:
df_race_gt_emotion.columns

Index(['gt_race', 'Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad',
       'Surprise'],
      dtype='object', name='gt_emotion')

In [25]:
df_race_gt_emotion.columns[1:]

Index(['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'], dtype='object', name='gt_emotion')

### Race Emotion Distribution

In [26]:
def get_race_emotion_distribution (df:pd.DataFrame) -> dict:
  race_emotion_distribution = {} # {race:{emotion:count}}

  for index, row in df.iterrows():
    emotion_distribution ={} # {emotion:count}
    # for emotion in df.columns[1:]:
    for emotion in ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'] :
      try:
        emotion_distribution[emotion] = row[emotion]
      except:
        emotion_distribution[emotion] = 0

    race_emotion_distribution[row[df.columns[0]]] = emotion_distribution

  return race_emotion_distribution


In [27]:
ground_truth_race_emotion_distribution = get_race_emotion_distribution(df_race_gt_emotion)
ground_truth_race_emotion_distribution

{'African': {'Angry': 27,
  'Disgust': 42,
  'Fear': 7,
  'Happy': 243,
  'Neutral': 367,
  'Sad': 93,
  'Surprise': 35},
 'Asian': {'Angry': 37,
  'Disgust': 52,
  'Fear': 9,
  'Happy': 346,
  'Neutral': 407,
  'Sad': 158,
  'Surprise': 63},
 'Indian': {'Angry': 21,
  'Disgust': 22,
  'Fear': 3,
  'Happy': 157,
  'Neutral': 285,
  'Sad': 64,
  'Surprise': 28},
 'Latino': {'Angry': 3,
  'Disgust': 0,
  'Fear': 0,
  'Happy': 115,
  'Neutral': 13,
  'Sad': 38,
  'Surprise': 25},
 'Middle Eastern': {'Angry': 7,
  'Disgust': 8,
  'Fear': 1,
  'Happy': 51,
  'Neutral': 80,
  'Sad': 38,
  'Surprise': 17},
 'White': {'Angry': 249,
  'Disgust': 258,
  'Fear': 74,
  'Happy': 1879,
  'Neutral': 2026,
  'Sad': 625,
  'Surprise': 485}}

In [28]:
ground_truth_race_emotion_distribution.keys()

dict_keys(['African', 'Asian', 'Indian', 'Latino', 'Middle Eastern', 'White'])

In [29]:
predicted_race_emotion_distribution = get_race_emotion_distribution(pivot_race_p_emotion.reset_index() )
predicted_race_emotion_distribution

{'African': {'Angry': 128,
  'Disgust': 134,
  'Fear': 40,
  'Happy': 228,
  'Neutral': 80,
  'Sad': 114,
  'Surprise': 90},
 'Asian': {'Angry': 108,
  'Disgust': 131,
  'Fear': 36,
  'Happy': 343,
  'Neutral': 95,
  'Sad': 177,
  'Surprise': 182},
 'Indian': {'Angry': 84,
  'Disgust': 68,
  'Fear': 27,
  'Happy': 171,
  'Neutral': 38,
  'Sad': 120,
  'Surprise': 72},
 'Latino': {'Angry': 18,
  'Disgust': 17,
  'Fear': 6,
  'Happy': 99,
  'Neutral': 13,
  'Sad': 17,
  'Surprise': 24},
 'Middle Eastern': {'Angry': 26,
  'Disgust': 29,
  'Fear': 5,
  'Happy': 46,
  'Neutral': 10,
  'Sad': 60,
  'Surprise': 26},
 'White': {'Angry': 660,
  'Disgust': 812,
  'Fear': 266,
  'Happy': 1842,
  'Neutral': 415,
  'Sad': 823,
  'Surprise': 778}}

In [30]:
np.array(list(predicted_race_emotion_distribution['Asian'].values()))

array([108, 131,  36, 343,  95, 177, 182])

In [31]:
# Function to perform Chi-Square test for each demographic group
import numpy as np
from scipy.stats import chi2
def chi_square_test(expected_counts, predicted_counts,
                    demographic_groups = ['African', 'Asian', 'Indian', 'Latino', 'Middle Eastern', 'White'],
                    emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'], alpha=0.05):
    results = {}

    for group in demographic_groups:
        expected = np.array(list(expected_counts[group].values()))
        predicted = np.array(list(predicted_counts[group].values()))

        # Calculate the Chi-Square statistic
        chi_square_statistic = np.sum((predicted - expected) ** 2 / expected)

        # Degrees of freedom
        degrees_of_freedom = len(emotions) - 1

        # Critical value for the significance level
        critical_value = chi2.ppf(1 - alpha, degrees_of_freedom)

        # P-Value
        p_value = chi2.sf(chi_square_statistic, degrees_of_freedom)

        # Store the results
        results[group] = {
            'Chi-Square Statistic': chi_square_statistic,
            'Critical Value': critical_value,
            'P-Value': p_value,
            'Significant': chi_square_statistic > critical_value
        }

    return results


In [32]:
# chi_square_test(expected_counts, predicted_counts, demographic_groups, emotions, alpha=0.05)
results = chi_square_test(ground_truth_race_emotion_distribution, predicted_race_emotion_distribution )

  chi_square_statistic = np.sum((predicted - expected) ** 2 / expected)


In [33]:
# Print the results
for group, result in results.items():
    print(f"Demographic Group: {group}")
    print(f"  Chi-Square Statistic: {result['Chi-Square Statistic']:.4f}")
    print(f"  Critical Value (0.05 significance level): {result['Critical Value']:.4f}")
    print(f"  P-Value: {result['P-Value']:.4f}")
    if result['Significant']:
        print("  Result: Significant difference between predicted and expected distributions.")
    else:
        print("  Result: No significant difference between predicted and expected distributions.")
    print()

Demographic Group: African
  Chi-Square Statistic: 1051.4452
  Critical Value (0.05 significance level): 12.5916
  P-Value: 0.0000
  Result: Significant difference between predicted and expected distributions.

Demographic Group: Asian
  Chi-Square Statistic: 803.5255
  Critical Value (0.05 significance level): 12.5916
  P-Value: 0.0000
  Result: Significant difference between predicted and expected distributions.

Demographic Group: Indian
  Chi-Square Statistic: 810.6397
  Critical Value (0.05 significance level): 12.5916
  P-Value: 0.0000
  Result: Significant difference between predicted and expected distributions.

Demographic Group: Latino
  Chi-Square Statistic: inf
  Critical Value (0.05 significance level): 12.5916
  P-Value: 0.0000
  Result: Significant difference between predicted and expected distributions.

Demographic Group: Middle Eastern
  Chi-Square Statistic: 201.9382
  Critical Value (0.05 significance level): 12.5916
  P-Value: 0.0000
  Result: Significant differenc

### Disparate Impact

In [34]:
#@title Function to calculate accuracy for each race
def calculate_accuracy_per_race(df):
    accuracies = {}
    for race, group in df.groupby('gt_race'):
      # print (race)
      correct_predictions = (group['gt_emotion'] == group['p_emotion']).sum()

      total_predictions = group.shape[0]
      accuracy = correct_predictions / total_predictions
      # print(f' correct_predictions:{correct_predictions}, total_predictions: {total_predictions}, accuracy : {accuracy}')
      accuracies[race] = accuracy
    return accuracies
  # print(group['Angry'])


In [35]:
accuracies_per_race_non_dann  = calculate_accuracy_per_race(non_dann_result)
accuracies_per_race_non_dann

{'African': 0.29606879606879605,
 'Asian': 0.28451492537313433,
 'Indian': 0.2603448275862069,
 'Latino': 0.4793814432989691,
 'Middle Eastern': 0.26732673267326734,
 'White': 0.3036097212294496}

In [36]:
max_accuracy_non_dann = max(accuracies_per_race_non_dann.values())
disparate_impact_ratios_non_dann = {race: (accuracy) / max_accuracy_non_dann for race, accuracy in accuracies_per_race_non_dann.items()}
disparate_impact_ratios_non_dann

{'African': 0.6176058756703917,
 'Asian': 0.5935042529289039,
 'Indian': 0.5430849091583241,
 'Latino': 1.0,
 'Middle Eastern': 0.5576493133184286,
 'White': 0.6333364077259487}

In [37]:
accuracies_per_race_dann  = calculate_accuracy_per_race(dann_result)
print(accuracies_per_race_dann)
max_accuracy_dann = max(accuracies_per_race_dann.values())
print(max_accuracy_dann)
disparate_impact_ratios_dann = {race: (accuracy) / max_accuracy_dann for race, accuracy in accuracies_per_race_dann.items()}
print(disparate_impact_ratios_dann)

{'African': 0.3058968058968059, 'Asian': 0.3050373134328358, 'Indian': 0.27413793103448275, 'Latino': 0.4381443298969072, 'Middle Eastern': 0.24752475247524752, 'White': 0.3232666190135811}
0.4381443298969072
{'African': 0.6981644746350629, 'Asian': 0.6962028094820017, 'Indian': 0.6256795131845841, 'Latino': 1.0, 'Middle Eastern': 0.564938846825859, 'White': 0.7378085186898204}


In [38]:
accuracies_per_race_ewc_dann  = calculate_accuracy_per_race(ewc_dann_result)
print(accuracies_per_race_ewc_dann)
max_accuracy_ewc_dann = max(accuracies_per_race_ewc_dann.values())
print(max_accuracy_ewc_dann)
disparate_impact_ratios_ewc_dann = {race: (accuracy) / max_accuracy_ewc_dann for race, accuracy in accuracies_per_race_ewc_dann.items()}
print(disparate_impact_ratios_ewc_dann)

{'African': 0.300982800982801, 'Asian': 0.30223880597014924, 'Indian': 0.2672413793103448, 'Latino': 0.4381443298969072, 'Middle Eastern': 0.24257425742574257, 'White': 0.31683345246604716}
0.4381443298969072
{'African': 0.6869489810666282, 'Asian': 0.6898156277436347, 'Indian': 0.6099391480730223, 'Latino': 1.0, 'Middle Eastern': 0.5536400698893419, 'White': 0.7231257620989783}


In [39]:
dicts = [disparate_impact_ratios_non_dann, disparate_impact_ratios_dann, disparate_impact_ratios_ewc_dann]
labels = ['non_dann', 'dann', 'dann_ewc']
df_disparate_impact = round(pd.DataFrame(dicts, index=labels),3)
df_disparate_impact

Unnamed: 0,African,Asian,Indian,Latino,Middle Eastern,White
non_dann,0.618,0.594,0.543,1.0,0.558,0.633
dann,0.698,0.696,0.626,1.0,0.565,0.738
dann_ewc,0.687,0.69,0.61,1.0,0.554,0.723


### Equality of Opportunity and Equalized Odds

In [40]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

def calculate_tpr_fpr_per_race(df, emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']):

    # float_formatter = ":.2f".format
    # np.set_printoptions() #(formatter={'float_kind':})
    race_groups = df.groupby('gt_race')
    tpr_fpr = {}
    df_tpr_fpr = pd.DataFrame()
    for race, group in race_groups:
        cm = confusion_matrix(group['gt_emotion'], group['p_emotion'], labels=emotions)
        # disp = ConfusionMatrixDisplay(confusion_matrix=cm,
        #                               display_labels= emotions)
        # disp.plot()
        # plt.show()
        # print(race)
        # print(cm)
        tp = cm.diagonal()
        # print(f'tp : {tp}')
        fn = cm.sum(axis=1) - tp
        # print(f'fn : {fn}')
        fp = cm.sum(axis=0) - tp
        tn = cm.sum() - (tp + fn + fp)

        # print(f'tp+fn : {fn + tp}')
        tpr = tp / (tp + fn)
        tpr = np.nan_to_num(tpr)

        fpr = fp / (fp + tn)
        fpr = np.nan_to_num(fpr)

        tpr_diff_fpr = tpr - fpr
        tpr_diff_fpr = np.abs(tpr_diff_fpr)




        # print(f'Positive Rate = {tpr} for race: {race}')
        # print(f'tpr : {tpr}')

        # print(f'fpr : {fpr}')
        # fpr = fp / (fp + tn)
        tpr_fpr[race] = {'TPR': np.round(tpr,2)
                         ,'FPR': np.round(fpr,2)
                        #  ,"TPR-FPR": np.round(tpr_diff_fpr,2)
                         }
        df_trp_fpr_race = pd.DataFrame(tpr_fpr[race], index=emotions)
        df_trp_fpr_race.columns = [f'{race} {key}' for key in df_trp_fpr_race.columns]
        # print(df_trp_fpr_race)
        df_tpr_fpr = pd.concat([df_tpr_fpr, df_trp_fpr_race], axis=1)
    return df_tpr_fpr, tpr_fpr

    # df_tpr_fpr = pd.DataFrame(tpr_fpr['African'], index=emotions)
    # TPR_cols = [col for col in df_tpr_fpr.columns if 'TPR' in col]
    # FPR_cols = [col for col in df_tpr_fpr.columns if 'FPR' in col]
    # df_tpr = df_tpr_fpr[TPR_cols]
    # df_tpr_diff = round(df_tpr.apply(lambda x: x.max()-x, axis=1),2)
    # print(df_tpr_diff)
    # df_tpr_ratio = round(df_tpr.apply(lambda x: x/x.max() if x.max() >0 else 0, axis=1),2)
    # df_tpr_ratio = df_tpr_ratio.T

    # print(df_tpr_ratio)




    # df_fpr = df_tpr_fpr[FPR_cols]
    # df_tpr_fpr = df_tpr_fpr[TPR_cols + FPR_cols]
    # # print(df_tpr_fpr)
    # return df_tpr_fpr, tpr_fpr
    # return tpr_fpr

emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
# tpr_fpr_per_race = calculate_tpr_fpr_per_race(non_dann_result)
# tpr_fpr_per_race


In [41]:
df, dict_tpr_fpr = calculate_tpr_fpr_per_race(non_dann_result)
df

  tpr = tp / (tp + fn)


Unnamed: 0,African TPR,African FPR,Asian TPR,Asian FPR,Indian TPR,Indian FPR,Latino TPR,Latino FPR,Middle Eastern TPR,Middle Eastern FPR,White TPR,White FPR
Angry,0.63,0.14,0.46,0.09,0.52,0.13,0.67,0.08,0.43,0.12,0.49,0.1
Disgust,0.26,0.16,0.08,0.12,0.23,0.11,0.0,0.09,0.25,0.14,0.23,0.14
Fear,0.14,0.05,0.33,0.03,0.0,0.05,0.0,0.03,0.0,0.02,0.14,0.05
Happy,0.63,0.13,0.64,0.17,0.66,0.16,0.7,0.23,0.61,0.1,0.67,0.15
Sad,0.08,0.15,0.09,0.18,0.17,0.21,0.18,0.06,0.32,0.29,0.13,0.15
Surprise,0.06,0.11,0.1,0.17,0.04,0.13,0.12,0.12,0.12,0.13,0.07,0.15
Neutral,0.14,0.07,0.09,0.09,0.07,0.06,0.0,0.07,0.05,0.05,0.06,0.08


In [100]:
def get_four_fifths_count(df_fpr, emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']):
  df_fpr_ratio  = round(df_fpr.apply(lambda x: x/x.max() if x.max() >0 else 0, axis=1),2)
  df_fpr_ratio = df_fpr_ratio.T
  for emotion in emotions:
    df_fpr_ratio.at['count_four_fifths',emotion] = int((df_fpr_ratio[emotion]>=0.8).sum())

  return df_fpr_ratio

def get_ten_percent(df, emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']):
  df  = round(df.apply(lambda x: x.max() - x, axis=1),2)
  df = df.T
  for emotion in emotions:
    df.at['count_ten_percent',emotion] = int((df[emotion]<=0.1).sum())

  return df


def get_eoo_eo(df, emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']):
  TPR_cols = [col for col in df.columns if 'TPR' in col]
  FPR_cols = [col for col in df.columns if 'FPR' in col]

  df_tpr = df[TPR_cols]
  # df_tpr_ratio = round(df_tpr.apply(lambda x: x/x.max() if x.max() >0 else 0, axis=1),2)
  # df_tpr_ratio = df_tpr_ratio.T
  # for emotion in emotions:
  #   df_tpr_ratio.at['count_four_fifths',emotion] = int((df_tpr_ratio[emotion]>=0.8).sum())
  df_tpr_ratio = get_four_fifths_count(df_tpr)
  # print(df_tpr_ratio[df_tpr_ratio.index == 'count_four_fifths'])
  # print(df_tpr_ratio)

  # df_tpr_diff = get_ten_percent(df_tpr)
  # print(df_tpr_diff[df_tpr_diff.index == 'count_ten_percent'])
  # print(df_tpr_diff)

  df_tpr_ratio = df_tpr_ratio.add_suffix('_TPR')
  df_tpr_ratio.index = df_tpr_ratio.index.str.rstrip(' TPR')





  df_fpr =df[FPR_cols]
  # df_fpr_ratio  = round(df_fpr.apply(lambda x: x/x.max() if x.max() >0 else 0, axis=1),2)
  # df_fpr_ratio = df_fpr_ratio.T
  # for emotion in emotions:
  #   df_fpr_ratio.at['count_four_fifths',emotion] = int((df_fpr_ratio[emotion]>=0.8).sum())
  # print(df_fpr_ratio)
  df_fpr_ratio = get_four_fifths_count(df_fpr)
  df_fpr_ratio = df_fpr_ratio.add_suffix('_FPR')
  df_fpr_ratio.index = df_fpr_ratio.index.str.rstrip(' FPR')

  # print(df_fpr_ratio[df_fpr_ratio.index == 'count_four_fifths'])
  # print(df_fpr_ratio)

  df = pd.concat([df_tpr_ratio,df_fpr_ratio], axis = 1)
  # print(df)



  # df_fpr_diff = get_ten_percent(df_fpr)
  # print(df_fpr_diff[df_fpr_diff.index == 'count_ten_percent'])
  # # print(df_fpr_diff)


  return df


  # df_fpr = df[FPR_cols]
  # df_tpr_fpr = df[TPR_cols + FPR_cols]
  # # print(df_tpr_fpr)
  # df

In [94]:
# for result in results:
#   print(result)

# output = {'non_dann':non_dann_result,
#            'dann':dann_result,
#            'ewc_dann':ewc_dann_result}
dict_eoo = {}
for name, result in output.items():
  print("\n",name,"********")
  df, dict_tpr_fpr = calculate_tpr_fpr_per_race(result)
  df = get_eoo_eo(df)
  df
  # print(df)
  # tpr, fpr = get_eoo_eo(df)
  # dict_eoo[name] = {'TPR':tpr, 'FPR':fpr}

# for name, eoo in dict_eoo.items():
#   print(eoo['TPR'])
#   # get_eoo_eo(result)



 non_dann ********
                   Angry_TPR  Disgust_TPR  Fear_TPR  Happy_TPR  Sad_TPR  \
African                 0.94         1.00      0.42       0.90     0.25   
Asian                   0.69         0.31      1.00       0.91     0.28   
Indian                  0.78         0.88      0.00       0.94     0.53   
Latino                  1.00         0.00      0.00       1.00     0.56   
Middle Eastern          0.64         0.96      0.00       0.87     1.00   
White                   0.73         0.88      0.42       0.96     0.41   
count_four_fifths       2.00         4.00      1.00       6.00     1.00   

                   Surprise_TPR  Neutral_TPR  Angry_FPR  Disgust_FPR  \
African                    0.50         1.00       1.00         1.00   
Asian                      0.83         0.64       0.64         0.75   
Indian                     0.33         0.50       0.93         0.69   
Latino                     1.00         0.00       0.57         0.56   
Middle Eastern     

  tpr = tp / (tp + fn)
  tpr = tp / (tp + fn)
  tpr = tp / (tp + fn)


In [101]:
get_eoo_eo(calculate_tpr_fpr_per_race(non_dann_result)[0])

  tpr = tp / (tp + fn)


Unnamed: 0,Angry_TPR,Disgust_TPR,Fear_TPR,Happy_TPR,Sad_TPR,Surprise_TPR,Neutral_TPR,Angry_FPR,Disgust_FPR,Fear_FPR,Happy_FPR,Sad_FPR,Surprise_FPR,Neutral_FPR
African,0.94,1.0,0.42,0.9,0.25,0.5,1.0,1.0,1.0,1.0,0.57,0.52,0.65,0.78
Asian,0.69,0.31,1.0,0.91,0.28,0.83,0.64,0.64,0.75,0.6,0.74,0.62,1.0,1.0
Indian,0.78,0.88,0.0,0.94,0.53,0.33,0.5,0.93,0.69,1.0,0.7,0.72,0.76,0.67
Latino,1.0,0.0,0.0,1.0,0.56,1.0,0.0,0.57,0.56,0.6,1.0,0.21,0.71,0.78
Middle Eastern,0.64,0.96,0.0,0.87,1.0,1.0,0.36,0.86,0.88,0.4,0.43,1.0,0.76,0.56
White,0.73,0.88,0.42,0.96,0.41,0.58,0.43,0.71,0.88,1.0,0.65,0.52,0.88,0.89
count_four_fifths,2.0,4.0,1.0,6.0,1.0,3.0,1.0,3.0,3.0,3.0,1.0,1.0,2.0,2.0


In [102]:
get_eoo_eo(calculate_tpr_fpr_per_race(dann_result)[0])

  tpr = tp / (tp + fn)


Unnamed: 0,Angry_TPR,Disgust_TPR,Fear_TPR,Happy_TPR,Sad_TPR,Surprise_TPR,Neutral_TPR,Angry_FPR,Disgust_FPR,Fear_FPR,Happy_FPR,Sad_FPR,Surprise_FPR,Neutral_FPR
African,0.84,0.86,0.0,0.87,0.43,0.17,0.77,1.0,1.0,1.0,0.44,0.44,0.81,0.77
Asian,0.64,0.0,0.49,0.92,0.38,0.56,0.74,0.57,0.71,0.57,0.56,0.64,1.0,0.92
Indian,0.57,1.0,1.0,0.89,0.43,1.0,0.55,0.93,0.86,0.86,0.5,0.72,0.89,0.62
Latino,1.0,0.0,0.0,1.0,0.62,0.67,1.0,0.5,0.29,0.57,1.0,0.24,0.78,1.0
Middle Eastern,0.64,0.0,0.0,0.79,1.0,1.0,0.45,1.0,0.86,0.29,0.44,1.0,0.81,0.85
White,0.79,0.57,0.28,0.98,0.43,0.5,0.61,0.79,0.71,0.71,0.61,0.44,0.89,0.92
count_four_fifths,2.0,2.0,1.0,5.0,1.0,2.0,1.0,3.0,3.0,2.0,1.0,1.0,5.0,4.0


In [103]:
get_eoo_eo(calculate_tpr_fpr_per_race(ewc_dann_result)[0])

  tpr = tp / (tp + fn)


Unnamed: 0,Angry_TPR,Disgust_TPR,Fear_TPR,Happy_TPR,Sad_TPR,Surprise_TPR,Neutral_TPR,Angry_FPR,Disgust_FPR,Fear_FPR,Happy_FPR,Sad_FPR,Surprise_FPR,Neutral_FPR
African,1.0,1.0,0.0,0.94,0.31,0.0,0.95,1.0,1.0,0.91,0.61,0.52,0.68,0.91
Asian,0.84,0.8,1.0,0.89,0.31,0.92,1.0,0.72,0.8,0.64,0.67,0.63,1.0,0.91
Indian,0.81,0.0,1.0,0.9,0.73,0.0,0.7,0.89,0.6,1.0,0.67,0.78,0.63,0.73
Latino,0.96,0.0,0.0,1.0,0.5,1.0,0.75,0.67,0.8,0.55,1.0,0.22,0.89,0.73
Middle Eastern,0.61,0.0,0.0,0.81,1.0,0.5,0.55,1.0,0.6,0.64,0.39,1.0,0.84,1.0
White,0.87,0.7,0.58,0.98,0.58,0.5,0.75,0.89,1.0,0.73,0.61,0.52,0.79,1.0
count_four_fifths,5.0,2.0,2.0,6.0,1.0,2.0,2.0,4.0,4.0,2.0,1.0,1.0,3.0,4.0


In [44]:
dict_tpr_fpr

{'African': {'TPR': array([0.7 , 0.1 , 0.  , 0.59, 0.08, 0.  , 0.19]),
  'FPR': array([0.18, 0.05, 0.1 , 0.11, 0.14, 0.13, 0.1 ])},
 'Asian': {'TPR': array([0.59, 0.08, 0.33, 0.56, 0.08, 0.11, 0.2 ]),
  'FPR': array([0.13, 0.04, 0.07, 0.12, 0.17, 0.19, 0.1 ])},
 'Indian': {'TPR': array([0.57, 0.  , 0.33, 0.57, 0.19, 0.  , 0.14]),
  'FPR': array([0.16, 0.03, 0.11, 0.12, 0.21, 0.12, 0.08])},
 'Latino': {'TPR': array([0.67, 0.  , 0.  , 0.63, 0.13, 0.12, 0.15]),
  'FPR': array([0.12, 0.04, 0.06, 0.18, 0.06, 0.17, 0.08])},
 'Middle Eastern': {'TPR': array([0.43, 0.  , 0.  , 0.51, 0.26, 0.06, 0.11]),
  'FPR': array([0.18, 0.03, 0.07, 0.07, 0.27, 0.16, 0.11])},
 'White': {'TPR': array([0.61, 0.07, 0.19, 0.62, 0.15, 0.06, 0.15]),
  'FPR': array([0.16, 0.05, 0.08, 0.11, 0.14, 0.15, 0.11])}}

In [45]:
# tpr_fpr_per_race_non_dann = calculate_tpr_fpr_per_race(non_dann_result)
# tpr_fpr_per_race_dann = calculate_tpr_fpr_per_race(dann_result)
# tpr_fpr_per_race_ewc_dann = calculate_tpr_fpr_per_race(ewc_dann_result)

In [46]:
for race, metrics in tpr_fpr_per_race.items():
    # print(f"Race: {race}, TPR: {metrics['TPR']}, FPR: {metrics['FPR']}")
    print(f"Race: {race}, Difference: {metrics['TPR'] - metrics['FPR']}")


NameError: name 'tpr_fpr_per_race' is not defined

In [None]:
tpr_fpr_per_race_non_dann = calculate_tpr_fpr_per_race(non_dann_result)
tpr_fpr_per_race_dann = calculate_tpr_fpr_per_race(dann_result)
tpr_fpr_per_race_ewc_dann = calculate_tpr_fpr_per_race(ewc_dann_result)

