In [33]:
# Pre-processing Bias Mitigation
## Selected Algorithm: Reweighting

In [35]:
import pandas as pd
from aif360.datasets import StandardDataset
from aif360.algorithms.preprocessing import Reweighing

# Load the dataset
df = pd.read_excel("student-por.xlsx")

# Convert 'sex' column to binary values (0 for Female, 1 for Male)
df["sex"] = df["sex"].map({"F": 0, "M": 1})

# Ensure all categorical columns are properly encoded
df = df.apply(lambda col: col.astype('category').cat.codes if col.dtypes == 'object' else col)

# Convert the dataset into an AIF360-compatible format
dataset = StandardDataset(
    df,
    label_name="G3",  # Outcome variable
    favorable_classes=[df["G3"].max()],  # Favorable outcome (highest grade)
    protected_attribute_names=["sex"],  # Protected attribute
    privileged_classes=[[1]],  # Male is privileged group
)

# Apply the Reweighing algorithm
reweighing = Reweighing(unprivileged_groups=[{"sex": 0}], privileged_groups=[{"sex": 1}])
reweighed_dataset = reweighing.fit_transform(dataset)

# Add weights to the original dataset
df["weights"] = reweighed_dataset.instance_weights

# Save the transformed dataset
df.to_excel("reweighed_dataset.xlsx", index=False)