In [None]:
!pip install fairlearn



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from fairlearn.metrics import MetricFrame, selection_rate
from fairlearn.reductions import ExponentiatedGradient, DemographicParity

In [None]:
df = pd.read_csv("train.csv")

In [None]:
X = df[["Pclass", "Sex", "Age", "Fare"]]
y = df["Survived"]

In [None]:
X["Age"].fillna(X["Age"].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X["Age"].fillna(X["Age"].median(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["Age"].fillna(X["Age"].median(), inplace=True)


In [None]:
X = pd.get_dummies(X, drop_first=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.8044692737430168


In [None]:
# For fairness, let's check demographic parity by 'Sex'
df_test = df.iloc[X_test.index]  # Get corresponding rows
sensitive_feature = df_test["Sex"]  # Keep original 'Sex' column

mf = MetricFrame(metrics=selection_rate, y_true=y_test, y_pred=y_pred, sensitive_features=sensitive_feature)

print("Selection Rate by Gender:")
print(mf.by_group)


Selection Rate by Gender:
Sex
female    0.956522
male      0.027273
Name: selection_rate, dtype: float64
