# Fairness Metric Calculations

Steps:
1. Read from dataset
2. Define passing criteria
3. Calculate passing rate for each protected class (i.e., sex, age)
4. Calculate fairness metric (i.e., statistical parity difference, disparate impact)

In [25]:
import pandas as pd

df = pd.read_excel("student-por.xlsx")

## Statistical Parity Difference
### Calculating Sex vs. Grades (First Period, Second Period, Final Grade)

In [26]:
grades = ['G1', 'G2', 'G3']
sex_grades_results = []

for grade in grades:
    df['pass'] = (df[grade] >= 10).astype(int)

    pass_rates = df.groupby('sex', observed=True)['pass'].mean()

    p_female = pass_rates['F']
    p_male = pass_rates['M']

    ## Privileged group is male, unprivileged group is female
    stat_dispar = p_female - p_male

    ## Perfect fairness threshold for statistical parity difference is 0
    sex_grades_results.append({
        'Grade': grade,
        'Pass Rate (Female)': round(p_female, 3),
        'Pass Rate (Male)': round(p_male, 3),
        'Statistical Parity Diff': round(stat_dispar, 3)
    })

print(pd.DataFrame(sex_grades_results))


  Grade  Pass Rate (Female)  Pass Rate (Male)  Statistical Parity Diff
0    G1               0.789             0.714                    0.074
1    G2               0.812             0.726                    0.086
2    G3               0.869             0.812                    0.057


### Calculating Age vs. Grades (First Period, Second Period, Final Grade)

In [27]:
age_grade_results = []
df['age_group'] = pd.cut(df['age'], bins = [14,17,df['age'].max()], labels=['15-17', '18-22'])

for grade in grades:
    df['pass'] = (df[grade] >= 10).astype(int)

    pass_rates = df.groupby('age_group', observed=True)['pass'].mean()

    older = pass_rates['18-22']
    younger = pass_rates['15-17']

    ## Privileged group is younger, unprivileged group is older
    stat_dispar = older - younger

    ## Perfect fairness threshold for statistical parity difference is 0
    age_grade_results.append({
        'Pass Rate (18+)': round(older, 3),
        'Pass Rate (15-17)': round(younger, 3),
        'Statistical Parity Diff': round(stat_dispar, 3)
    })

print(pd.DataFrame(age_grade_results))

   Pass Rate (18+)  Pass Rate (15-17)  Statistical Parity Diff
0            0.652              0.799                   -0.147
1            0.696              0.808                   -0.112
2            0.790              0.868                   -0.077


## Disparate Impact
### Calculating Sex vs. Grades (First Period, Second Period, Final Grade)

In [28]:
grades = ['G1', 'G2', 'G3']
sex_grades_results = []

for grade in grades:
    df['pass'] = (df[grade] >= 10).astype(int)

    pass_rates = df.groupby('sex', observed=True)['pass'].mean()

    p_female = pass_rates['F']
    p_male = pass_rates['M']

    ## Privileged group is male, unprivileged group is female
    disparate_imp = p_female / p_male

    ## Perfect fairness threshold for disparate impact is 1.0
    sex_grades_results.append({
        'Grade': grade,
        'Pass Rate (Female)': round(p_female, 3),
        'Pass Rate (Male)': round(p_male, 3),
        'Disparate Impact': round(disparate_imp, 3)
    })

print(pd.DataFrame(sex_grades_results))

  Grade  Pass Rate (Female)  Pass Rate (Male)  Disparate Impact
0    G1               0.789             0.714             1.104
1    G2               0.812             0.726             1.119
2    G3               0.869             0.812             1.071


### Calculating Age vs. Grades (First Period, Second Period, Final Grade)

In [29]:
age_grade_results = []
df['age_group'] = pd.cut(df['age'], bins = [14,17,df['age'].max()], labels=['15-17', '18-22'])

for grade in grades:
    df['pass'] = (df[grade] >= 10).astype(int)

    pass_rates = df.groupby('age_group', observed=True)['pass'].mean()

    older = pass_rates['18-22']
    younger = pass_rates['15-17']

    ## Privileged group is younger, unprivileged group is older
    disparate_imp = older / younger

    ## Perfect fairness threshold for disparate impact is 1.0
    age_grade_results.append({
        'Pass Rate (18+)': round(older, 3),
        'Pass Rate (15-17)': round(younger, 3),
        'Disparate Impact': round(disparate_imp, 3)
    })

print(pd.DataFrame(age_grade_results))

   Pass Rate (18+)  Pass Rate (15-17)  Disparate Impact
0            0.652              0.799             0.816
1            0.696              0.808             0.862
2            0.790              0.868             0.911
