In [126]:
import pandas as pd
from aif360.datasets import StandardDataset
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing
from sklearn.model_selection import train_test_split

In [127]:
df = pd.read_pickle('data/lifesnaps/lifesnapsv2.pkl')
df

Unnamed: 0,id,date,nightly_temperature,nremhr,rmssd,spo2,full_sleep_breathing_rate,stress_score,sleep_points_percentage,exertion_points_percentage,...,"activityType_['Workout', 'Bike']","activityType_['Workout', 'Hike', 'Walk']","activityType_['Workout', 'Treadmill', 'Walk']","activityType_['Workout', 'Walk', 'Bike']","activityType_['Workout', 'Walk']",activityType_['Workout'],"activityType_['Yoga/Pilates', 'Sport']","activityType_['Yoga/Pilates', 'Walk']","activityType_['Yoga/Pilates', 'Workout']",activityType_['Yoga/Pilates']
0,621e332267b776a24092a584,2021-05-24,34.036874,57.4370,37.914,95.60,17.2,0.0,0.000000,0.000,...,0,0,0,0,0,0,0,0,0,0
1,621e30e467b776a240e817c7,2021-05-24,33.274919,52.5365,38.161,94.50,17.2,0.0,0.000000,0.000,...,0,0,0,0,0,0,0,0,0,0
2,621e367e67b776a24087d75d,2021-05-24,33.019585,62.9565,32.762,96.00,12.0,72.0,0.733333,0.700,...,0,0,0,0,0,0,0,0,0,0
3,621e360b67b776a24039709f,2021-05-24,34.175203,68.3210,31.413,96.00,15.4,69.0,0.633333,0.650,...,0,0,0,0,0,0,0,0,0,0
4,621e2f6167b776a240e082a9,2021-05-24,34.892637,62.5580,24.507,97.20,13.0,68.0,0.666667,0.625,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4904,621e300767b776a2404dc717,2022-01-17,33.186399,62.3900,34.057,95.85,15.0,72.0,0.733333,0.700,...,0,0,0,0,0,0,0,0,0,0
4905,621e309b67b776a240b532b0,2022-01-17,31.815133,62.3900,34.057,96.00,15.0,72.0,0.733333,0.700,...,0,0,0,0,0,0,0,0,0,0
4906,621e342e67b776a2404ce460,2022-01-17,33.864390,51.0640,80.576,96.00,17.2,66.0,0.533333,0.650,...,0,0,0,0,0,0,0,0,0,0
4907,621e309267b776a240ae1cdb,2022-01-17,34.121912,62.3900,34.057,96.00,15.0,72.0,0.733333,0.700,...,0,0,0,0,0,0,0,0,0,0


In [128]:
dataset = StandardDataset(df,
                          protected_attribute_names=['gender'],
                          privileged_classes=[[1]], 
                          label_name='label_stress_sema',
                          favorable_classes=[0],
                          features_to_drop=['id', 'date'] 
                         )



In [129]:
# Define privileged and unprivileged groups
privileged_groups = [{'gender': 1}]
unprivileged_groups = [{'gender': 0}]

# Create the metric object
metric = BinaryLabelDatasetMetric(dataset,
                                  unprivileged_groups=unprivileged_groups,
                                  privileged_groups=privileged_groups)

In [130]:
# Disparate Impact (DI)
# DI = (probability of favorable outcome for unprivileged group) / (probability of favorable outcome for privileged group)
# If DI = 1, there is no disparate impact
# the DI value ideally should be close to 1 for fairness. A value less than 1 indicates potential discrimination against the unprivileged group, while a value greater than 1 indicates potential discrimination against the privileged group.

disparate_impact = metric.disparate_impact()
print("Disparate Impact (DI):", disparate_impact)

Disparate Impact (DI): 0.9141932002158661


In [131]:
# Statistical Parity Difference (SPD)
# A value of 0 means that members of the unprivileged group (female) and the privileged group (male) are exposed to advertising messages at equal rates, which is considered a fair situation. A negative value of statistical parity difference indicates that the unprivileged group is at a disadvantage and a positive value indicates that the privileged group is at a disadvantage.

spd = metric.statistical_parity_difference()
print("Statistical Parity Difference (SPD):", spd)

Statistical Parity Difference (SPD): -0.06630525437864887
