In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

df = pd.read_csv("adult.csv", on_bad_lines='skip')
df.replace('?', pd.NA, inplace=True)
df.dropna(inplace=True)
df = df[['age','workclass','education','gender','race','hours-per-week','income']]

le = LabelEncoder()
df['workclass'] = le.fit_transform(df['workclass'])
df['education'] = le.fit_transform(df['education'])
df['gender'] = le.fit_transform(df['gender'])     # male=1, female=0
df['race'] = le.fit_transform(df['race'])
df['income'] = le.fit_transform(df['income'])  # >50K=1, <=50K=0

X = df.drop('income', axis=1)
y = df['income']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

test_df = X_test.copy()
test_df['actual'] = y_test
test_df['pred'] = y_pred
male_pred_rate = test_df[test_df['gender'] == 1]['pred'].mean()
female_pred_rate = test_df[test_df['gender'] == 0]['pred'].mean()
print("Demographic Parity Rates:")
print("Male predicted positive rate   :", male_pred_rate)
print("Female predicted positive rate :", female_pred_rate)
dp_difference = abs(male_pred_rate - female_pred_rate)
print("\nDemographic Parity Difference :", dp_difference)

plt.figure(figsize=(6,4))
df['gender'].value_counts().plot(kind='bar')
plt.title("Gender Distribution in Dataset")
plt.xlabel("Gender (0=Female, 1=Male)")
plt.ylabel("Count")
plt.show()

plt.figure(figsize=(6,4))
test_df.groupby('gender')['pred'].mean().plot(kind='bar')
plt.title("Prediction Rate (>50K) by Gender")
plt.xlabel("Gender (0=Female, 1=Male)")
plt.ylabel("Positive Prediction Rate")
plt.show()

plt.figure(figsize=(6,4))
test_df.groupby('gender')['pred'].sum().plot(kind='bar')
plt.title("Total Positive Predictions by Gender")
plt.xlabel("Gender (0=Female, 1=Male)")
plt.ylabel("Number of Positive Predictions")
plt.show()
