# import library

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import r2_score, confusion_matrix
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

# load data 

In [3]:
df = pd.read_csv("ev_charging_dataset_Final(in).csv")




# LOAD MODEL

In [4]:
cost_model = joblib.load("saved_models/cost_model.pkl")

long_model = joblib.load("saved_models/long_model.pkl")

In [10]:
map_encoding = {}

for col in df.select_dtypes(include="object").columns:
    unique_vals = df[col].unique()
    map_encoding[col] = {v: i for i, v in enumerate(unique_vals)}
    df[col] = df[col].map(map_encoding[col])

print("✔ Map Encoding Completed")

✔ Map Encoding Completed


# FEATURE SELCTION

In [11]:
y_cost = df["Charging Cost (USD)"]
y_long = df["Long Session"]

In [12]:
COST_FEATURES = [
    "Energy Consumed (kWh)",
    "State of Charge (End %)",
    "Charger Type",
    "Energy per 100 km (kWh/100 km)",
    "Charging Station Location",
    "Vehicle Model",
    "Charging Rate (kW)"
]

LONG_FEATURES = [
    "Charging Time Difference (minutes)",
    "Charging Rate (kW)",
    "Battery Capacity (kWh)",
    "Vehicle Age (years)",
    "Energy Consumed (kWh)",
    "Temperature (°C)",
    "Charging Station Location"
]

X_cost = df[COST_FEATURES]
X_long = df[LONG_FEATURES]

In [None]:
cost_groups = X_cost.apply(lambda col: col >= col.median())

long_groups = pd.DataFrame()

for col in LONG_FEATURES:
    try:
        long_groups[col] = pd.qcut(X_long[col], q=3, labels=["Low","Med","High"], duplicates="drop")
        if long_groups[col].nunique() < 3:
            raise Exception()
    except:
        try:
            long_groups[col] = pd.cut(X_long[col], bins=3, labels=["Low","Med","High"])
            if long_groups[col].nunique() < 3:
                raise Exception()
        except:
            long_groups[col] = np.where(X_long[col] >= X_long[col].median(), "High", "Low")

In [14]:
pred_cost_before = pd.Series(cost_model.predict(X_cost), index=X_cost.index)
pred_long_before = pd.Series(long_model.predict(X_long), index=X_long.index)

print("✔ Predictions (before mitigation) computed")


✔ Predictions (before mitigation) computed


In [None]:
print("\nCOST MODEL FAIRNESS — BEFORE \n")

for col in COST_FEATURES:
    g = cost_groups[col]
    mpd = pred_cost_before.groupby(g).mean()
    dir_ratio = mpd.max() / (mpd.min() + 1e-9)

    print(f"\n--- {col} ---")
    print("Mean Prediction Difference:\n", mpd)
    print(f"DIR: {dir_ratio:.3f}")





--- Energy Consumed (kWh) ---
Mean Prediction Difference:
 Energy Consumed (kWh)
False     6.171657
True     15.549985
dtype: float64
DIR: 2.520

--- State of Charge (End %) ---
Mean Prediction Difference:
 State of Charge (End %)
False     7.157580
True     13.888119
dtype: float64
DIR: 1.940

--- Charger Type ---
Mean Prediction Difference:
 Charger Type
False    16.149821
True      8.051664
dtype: float64
DIR: 2.006

--- Energy per 100 km (kWh/100 km) ---
Mean Prediction Difference:
 Energy per 100 km (kWh/100 km)
False     7.561172
True     14.160470
dtype: float64
DIR: 1.873

--- Charging Station Location ---
Mean Prediction Difference:
 Charging Station Location
False    10.998968
True     10.749737
dtype: float64
DIR: 1.023

--- Vehicle Model ---
Mean Prediction Difference:
 Vehicle Model
False    10.697068
True     10.976076
dtype: float64
DIR: 1.026

--- Charging Rate (kW) ---
Mean Prediction Difference:
 Charging Rate (kW)
False    10.621587
True     11.100055
dtype: float

In [22]:
# cost after mitigation
from sklearn.utils.class_weight import compute_sample_weight

cost_model_fixed = RandomForestRegressor(n_estimators=200, random_state=42)
cost_model_fixed.fit(X_cost, y_cost, sample_weight=compute_sample_weight("balanced", y_cost))
pred_cost_after = pd.Series(cost_model_fixed.predict(X_cost), index=X_cost.index)
print("✔ Predictions (after mitigation) computed")
print("\nCOST MODEL FAIRNESS — AFTER \n")
for col in COST_FEATURES:
    g = cost_groups[col]
    mpd = pred_cost_after.groupby(g).mean()
    dir_ratio = mpd.max() / (mpd.min() + 1e-9)

    print(f"\n--- {col} ---")
    print("Mean Prediction Difference:\n", mpd)
    print(f"DIR: {dir_ratio:.3f}")
    

✔ Predictions (after mitigation) computed

COST MODEL FAIRNESS — AFTER 


--- Energy Consumed (kWh) ---
Mean Prediction Difference:
 Energy Consumed (kWh)
False     6.171729
True     15.552414
dtype: float64
DIR: 2.520

--- State of Charge (End %) ---
Mean Prediction Difference:
 State of Charge (End %)
False     7.157652
True     13.890334
dtype: float64
DIR: 1.941

--- Charger Type ---
Mean Prediction Difference:
 Charger Type
False    16.152696
True      8.052053
dtype: float64
DIR: 2.006

--- Energy per 100 km (kWh/100 km) ---
Mean Prediction Difference:
 Energy per 100 km (kWh/100 km)
False     7.561262
True     14.162882
dtype: float64
DIR: 1.873

--- Charging Station Location ---
Mean Prediction Difference:
 Charging Station Location
False    10.999792
True     10.751331
dtype: float64
DIR: 1.023

--- Vehicle Model ---
Mean Prediction Difference:
 Vehicle Model
False    10.698231
True     10.977390
dtype: float64
DIR: 1.026

--- Charging Rate (kW) ---
Mean Prediction Difference:

In [None]:
print("\nLONG MODEL FAIRNESS — BEFORE \n")

for col in LONG_FEATURES:
    g = long_groups[col]
    dp = pred_long_before.groupby(g).mean()

    print(f"\n=== Feature: {col} ===")
    print("Demographic Parity:\n", dp)

    tpr = {}
    fpr = {}

    for grp in g.unique():
        mask = (g == grp)

        tp = ((pred_long_before == 1) & (y_long == 1) & mask).sum()
        fn = ((pred_long_before == 0) & (y_long == 1) & mask).sum()
        fp = ((pred_long_before == 1) & (y_long == 0) & mask).sum()
        tn = ((pred_long_before == 0) & (y_long == 0) & mask).sum()

        tpr[grp] = tp / (tp + fn + 1e-9)
        fpr[grp] = fp / (fp + tn + 1e-9)

    print("TPR:", tpr)
    print("FPR:", fpr)





=== Feature: Charging Time Difference (minutes) ===
Demographic Parity:
 Charging Time Difference (minutes)
Low     0.887752
Med     0.525740
High    0.463209
dtype: float64
TPR: {'Low': np.float64(0.1340206185563556), 'Med': np.float64(0.03755215577187931), 'High': np.float64(0.028395061728377535)}
FPR: {'Low': np.float64(0.9945235487400531), 'Med': np.float64(0.9461077844305712), 'High': np.float64(0.9440273037536219)}

=== Feature: Charging Rate (kW) ===
Demographic Parity:
 Charging Rate (kW)
Low     0.443836
Med     0.643593
High    0.792082
dtype: float64
TPR: {'Med': np.float64(0.049671977506982504), 'Low': np.float64(0.020360674810924746), 'High': np.float64(0.09696969696955005)}
FPR: {'Med': np.float64(0.9543894065713809), 'Low': np.float64(0.9682997118148643), 'High': np.float64(0.979566816509612)}

=== Feature: Battery Capacity (kWh) ===
Demographic Parity:
 Battery Capacity (kWh)
Low     0.789508
Med     0.516420
High    0.573544
dtype: float64
TPR: {'High': np.float64(0

  dp = pred_long_before.groupby(g).mean()


In [17]:
from sklearn.utils.class_weight import compute_sample_weight
weights = compute_sample_weight("balanced", y_long)

long_model_fixed = RandomForestClassifier(n_estimators=200, random_state=42)
long_model_fixed.fit(X_long, y_long, sample_weight=weights)

pred_long_after = pd.Series(long_model_fixed.predict(X_long), index=X_long.index)

print("✔ Mitigated LONG model trained")


✔ Mitigated LONG model trained


In [None]:
print("\nLONG MODEL FAIRNESS — AFTER \n")

for col in LONG_FEATURES:
    g = long_groups[col]

    dp_before = pred_long_before.groupby(g).mean()
    dp_after = pred_long_after.groupby(g).mean()

    print(f"\n=== Feature: {col} ===")
    print("DP Before:", dp_before.to_dict())
    print("DP After :", dp_after.to_dict())

    tpr_before = {}
    tpr_after = {}
    fpr_before = {}
    fpr_after = {}

    for grp in g.unique():
        mask = (g == grp)

        tp_b = ((pred_long_before == 1) & (y_long == 1) & mask).sum()
        fn_b = ((pred_long_before == 0) & (y_long == 1) & mask).sum()
        fp_b = ((pred_long_before == 1) & (y_long == 0) & mask).sum()
        tn_b = ((pred_long_before == 0) & (y_long == 0) & mask).sum()

        tp_a = ((pred_long_after == 1) & (y_long == 1) & mask).sum()
        fn_a = ((pred_long_after == 0) & (y_long == 1) & mask).sum()
        fp_a = ((pred_long_after == 1) & (y_long == 0) & mask).sum()
        tn_a = ((pred_long_after == 0) & (y_long == 0) & mask).sum()

        tpr_before[grp] = tp_b / (tp_b + fn_b + 1e-9)
        tpr_after[grp]  = tp_a / (tp_a + fn_a + 1e-9)

        fpr_before[grp] = fp_b / (fp_b + tn_b + 1e-9)
        fpr_after[grp]  = fp_a / (fp_a + tn_a + 1e-9)

    print("TPR Before:", tpr_before)
    print("TPR After :", tpr_after)
    print("FPR Before:", fpr_before)
    print("FPR After :", fpr_after)





=== Feature: Charging Time Difference (minutes) ===
DP Before: {'Low': 0.8877518388231532, 'Med': 0.5257400257400258, 'High': 0.46320907617504053}
DP After : {'Low': 0.12408058842340902, 'Med': 0.4626769626769627, 'High': 0.5251215559157212}
TPR Before: {'Low': np.float64(0.1340206185563556), 'Med': np.float64(0.03755215577187931), 'High': np.float64(0.028395061728377535)}
TPR After : {'Low': np.float64(0.9999999999974227), 'Med': np.float64(0.9999999999993046), 'High': np.float64(0.9999999999993827)}
FPR Before: {'Low': np.float64(0.9945235487400531), 'Med': np.float64(0.9461077844305712), 'High': np.float64(0.9440273037536219)}
FPR After : {'Low': np.float64(0.0), 'Med': np.float64(0.0), 'High': np.float64(0.0)}

=== Feature: Charging Rate (kW) ===
DP Before: {'Low': 0.44383649822980364, 'Med': 0.6435930457179653, 'High': 0.7920823945928549}
DP After : {'Low': 0.5532668168651432, 'Med': 0.3435286542176433, 'High': 0.21242355970389443}
TPR Before: {'Med': np.float64(0.0496719775069

  dp_before = pred_long_before.groupby(g).mean()
  dp_after = pred_long_after.groupby(g).mean()
