In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualOpportunity, BoundedGroupLoss
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc

In [None]:
# Load dataset (replace with actual dataset)
data = pd.read_csv('employee_performance.csv')

In [None]:
# Preprocessing
data = pd.get_dummies(data, drop_first=True)  # Convert categorical features
X = data.drop(columns=['Performance'])
y = data['Performance']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Hyperparameter tuning using GridSearchCV
param_grid = {'learning_rate': [0.01, 0.1, 0.2], 'n_estimators': [50, 100, 200]}
grid = GridSearchCV(LGBMClassifier(), param_grid, cv=10)
grid.fit(X_train, y_train)

In [None]:
# Best model
base_model = grid.best_estimator_

In [None]:
# Apply fairness constraints
constraints = [DemographicParity(), EqualOpportunity(), BoundedGroupLoss()]
fair_models = {constraint.__class__.__name__: ExponentiatedGradient(base_model, constraints=constraint) for constraint in constraints}

for name, model in fair_models.items():
    model.fit(X_train, y_train, sensitive_features=data['Gender'])

In [None]:
# Predictions and evaluation
st.title("Fair Employee Performance Prediction Dashboard")
st.write("### Model Performance with Fairness Constraints")
for name, model in fair_models.items():
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    st.write(f"#### {name}")
    st.write(f"Accuracy: {acc * 100:.2f}%")
    st.text(report)

    # Confusion Matrix
    fig, ax = plt.subplots()
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_title(f'{name} Confusion Matrix')
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    st.pyplot(fig)
    
    # ROC-AUC Curve
    y_scores = base_model.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_scores)
    fig, ax = plt.subplots()
    ax.plot(fpr, tpr, label=f'AUC: {auc(fpr, tpr):.2f}')
    ax.plot([0, 1], [0, 1], 'k--')
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title(f'{name} ROC-AUC Curve')
    ax.legend()
    st.pyplot(fig)

In [None]:
# Function to predict performance with fairness
def predict_performance(features, gender, constraint_name):
    input_data = np.array(features).reshape(1, -1)
    prediction = fair_models[constraint_name].predict(input_data, sensitive_features=[gender])
    return "High Performance" if prediction[0] == 1 else "Low Performance"