# Reweighing
The algorithm selected for transforming the original dataset is Reweighing. The new passing metric for any grade is now a score greater than 5.

## Statistical Parity Difference Reweighed
### Calculating Sex vs. Grades (First Period, Second Period, Final Grade)

In [2]:
import pandas as pd

df = pd.read_excel("student-por.xlsx")

In [None]:
grades = ['G1', 'G2', 'G3']
sex_grades_results = []

for grade in grades:
    ## Redefining passing threshold
    df['pass'] = (df[grade] >= 5).astype(int)

    pass_rates = df.groupby('sex', observed=True)['pass'].mean()

    p_female = pass_rates['F']
    p_male = pass_rates['M']

    ## Privileged group is male, unprivileged group is female
    stat_dispar = p_female - p_male

    ## Perfect fairness threshold for statistical parity difference is 0
    sex_grades_results.append({
        'Grade': grade,
        'Pass Rate (Female)': round(p_female, 3),
        'Pass Rate (Male)': round(p_male, 3),
        'Statistical Parity Diff': round(stat_dispar, 3)
    })

print(pd.DataFrame(sex_grades_results))

  Grade  Pass Rate (Female)  Pass Rate (Male)  Statistical Parity Diff
0    G1               0.997             0.992                    0.005
1    G2               0.990             0.989                    0.001
2    G3               0.982             0.966                    0.016


### Calculating Age vs. Grades (First Period, Second Period, Final Grade)

In [4]:
age_grade_results = []
df['age_group'] = pd.cut(df['age'], bins = [14,17,df['age'].max()], labels=['15-17', '18-22'])

for grade in grades:
    ## Redefining passing threshold
    df['pass'] = (df[grade] >= 5).astype(int)

    pass_rates = df.groupby('age_group', observed=True)['pass'].mean()

    older = pass_rates['18-22']
    younger = pass_rates['15-17']

    ## Privileged group is younger, unprivileged group is older
    stat_dispar = older - younger

    ## Perfect fairness threshold for statistical parity difference is 0
    age_grade_results.append({
        'Pass Rate (18+)': round(older, 3),
        'Pass Rate (15-17)': round(younger, 3),
        'Statistical Parity Diff': round(stat_dispar, 3)
    })

print(pd.DataFrame(age_grade_results))

   Pass Rate (18+)  Pass Rate (15-17)  Statistical Parity Diff
0            0.983              1.000                   -0.017
1            0.972              0.996                   -0.023
2            0.939              0.989                   -0.050


## Disparate Impact Reweighed
### Calculating Sex vs. Grades (First Period, Second Period, Final Grade)

In [6]:
grades = ['G1', 'G2', 'G3']
sex_grades_results = []

for grade in grades:
    ## Redefining passing threshold
    df['pass'] = (df[grade] >= 5).astype(int)

    pass_rates = df.groupby('sex', observed=True)['pass'].mean()

    p_female = pass_rates['F']
    p_male = pass_rates['M']

    ## Privileged group is male, unprivileged group is female
    disparate_imp = p_female / p_male

    ## Perfect fairness threshold for disparate impact is 1.0
    sex_grades_results.append({
        'Grade': grade,
        'Pass Rate (Female)': round(p_female, 3),
        'Pass Rate (Male)': round(p_male, 3),
        'Disparate Impact': round(disparate_imp, 3)
    })

print(pd.DataFrame(sex_grades_results))

  Grade  Pass Rate (Female)  Pass Rate (Male)  Disparate Impact
0    G1               0.997             0.992             1.005
1    G2               0.990             0.989             1.001
2    G3               0.982             0.966             1.016


### Calculating Age vs. Grades (First Period, Second Period, Final Grade)

In [7]:
age_grade_results = []
df['age_group'] = pd.cut(df['age'], bins = [14,17,df['age'].max()], labels=['15-17', '18-22'])

for grade in grades:
    ## Redefining passing threshold
    df['pass'] = (df[grade] >= 5).astype(int)

    pass_rates = df.groupby('age_group', observed=True)['pass'].mean()

    older = pass_rates['18-22']
    younger = pass_rates['15-17']

    ## Privileged group is younger, unprivileged group is older
    disparate_imp = older / younger

    ## Perfect fairness threshold for disparate impact is 1.0
    age_grade_results.append({
        'Pass Rate (18+)': round(older, 3),
        'Pass Rate (15-17)': round(younger, 3),
        'Disparate Impact': round(disparate_imp, 3)
    })

print(pd.DataFrame(age_grade_results))

   Pass Rate (18+)  Pass Rate (15-17)  Disparate Impact
0            0.983              1.000             0.983
1            0.972              0.996             0.977
2            0.939              0.989             0.949


# Splitting the Dataset
To assure that the 