<a href="https://colab.research.google.com/github/mrunalpatil07/aies/blob/main/AIES_Experiment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Name:-Mrunal Patil                                                                  
Class:- B.Tech B DIV                                                                       
PRN NO. 22SC114501069                                                                  
Title:- Impact of Data Quality on AI Fairness.

In [1]:
# Install fairlearn if not already installed
# pip install fairlearn
!pip install fairlearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from fairlearn.metrics import (
    MetricFrame,
    true_positive_rate,
    false_positive_rate,
    false_negative_rate,
    selection_rate
)

# Load your dataset
df = pd.read_csv('/content/Student_Performance_on_an_Entrance_Examination.csv')

# 🎯 Binary target: 1 if Performance is 'Excellent', else 0
df['target'] = (df['Performance'] == 'Excellent').astype(int)

# ⚖️ Sensitive feature
sensitive_feature = 'Gender'

# Drop rows with missing values in critical columns
df = df.dropna(subset=['target', sensitive_feature])

# 🔁 Drop columns not used
drop_cols = ['Performance']  # Original performance column now replaced with binary 'target'
df = df.drop(columns=drop_cols)

# 🧠 Define categorical features to encode (excluding target and sensitive)
categorical_cols = df.select_dtypes(include='object').columns.tolist()
categorical_cols = [col for col in categorical_cols if col != sensitive_feature]

# One-hot encode
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# ✨ Define X, y, and sensitive attribute
X = df.drop(columns=['target', sensitive_feature])
y = df['target']
sensitive = df[sensitive_feature]

# 🧪 Split the data
X_train, X_test, y_train, y_test, sens_train, sens_test = train_test_split(
    X, y, sensitive, test_size=0.3, stratify=sensitive
)

# 🔁 Train the model
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# 📊 Fairness evaluation
metric_frame = MetricFrame(
    metrics={
        'TPR': true_positive_rate,
        'FPR': false_positive_rate,
        'FNR': false_negative_rate,
        'Selection Rate': selection_rate
    },
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=sens_test
)

print("📈 Fairness Metrics by Gender:\n")
print(metric_frame.by_group)


Collecting fairlearn
  Downloading fairlearn-0.12.0-py3-none-any.whl.metadata (7.0 kB)
Downloading fairlearn-0.12.0-py3-none-any.whl (240 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/240.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m235.5/240.0 kB[0m [31m7.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fairlearn
Successfully installed fairlearn-0.12.0
📈 Fairness Metrics by Gender:

          TPR       FPR    FNR  Selection Rate
Gender                                        
female  0.125  0.035294  0.875        0.043011
male    0.000  0.022989  1.000        0.018692
