In [None]:
import os
import csv

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

from pathlib import Path
from sklearn.model_selection import train_test_split

from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric

device = 'cuda' if torch.cuda.is_available() else 'cpu'


## Energy consumption & CO2 emission

In [None]:
# Load the CodeCarbon output CSV file
output_dir = Path("./Results/Sustainability")
df = pd.read_csv(output_dir / "cc_emissions.csv")

# Rename columns 
# cc_ for CodeCarbon results
# ct_ for CarbonTracker results
# mlco2_ for MLCO2 Impact results
df.rename(columns={
    "duration": "t",
    "emissions": "cc_co2",
    "energy_consumed": "cc_energy"
}, inplace=True)

dimensions = [9, 12, 15]
full_data = df[["t", "cc_co2", "cc_energy"]].copy()
full_data["N"] = dimensions
full_data = full_data[["N", "t", "cc_co2", "cc_energy"]]

# CarbonTracker data for energy and emissions
# Original results are in Results/Sustainability/ct_emissions_n{n}.log file
ct_data = {
    "energy": [0.004169, 0.005497, 0.006868],  # kWh
    "emissions": [2.005273, 2.644529, 3.303579]  # gCO2eq
}

# Convert emissions to kgCO2eq
ct_data["emissions"] = [em / 1000 for em in ct_data["emissions"]]

# MLCO2Impact calculations https://mlco2.github.io/impact/#co2eq
T4_power_watt = 70  # Goulge Cloud T4-GPU power in Watts
emission_factor_east_asia = 0.56  # kgCO2eq/kWh

mlco2_energy = []
mlco2_emissions = []

for duration in df["t"]:
    energy = (duration / 3600) * T4_power_watt / 1000  # Convert to kWh
    emission = emission_factor_east_asia * energy

    mlco2_energy.append(energy)
    mlco2_emissions.append(emission)

full_data["ct_co2"] = ct_data["emissions"]
full_data["ct_energy"] = ct_data["energy"]
full_data["mlco2_co2"] = mlco2_emissions
full_data["mlco2_energy"] = mlco2_energy

# Print final cleaned data
pd.options.display.float_format = "{:.5f}".format
print(full_data)
pd.reset_option("display.float_format")

output_file = output_dir / f"emissions_full.csv"
full_data.to_csv(output_file, index=False)


## Fairness

In [None]:
# Define Model
class NeuralNet(nn.Module):
    def __init__(self, input_dim, hidden_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

In [None]:
dataset_size = [9,12,15]
test_results = {}
binary = True

for n in dataset_size:
    # Load the dataset
    X = np.load("Datasets/kryptonite-%s-X.npy"%(n))
    y = np.load("Datasets/kryptonite-%s-y.npy"%(n))
    
    if binary:
        X = np.where(X>0.5, 1, 0)
        
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # 20% test
  
    X = torch.tensor(X_test.astype(np.float32)).to(device)

    para = {"hidden_size":[4*n], "lr":[0.01], "alpha":[0.0001], "batch_size":[128], "num_epochs":[100]}
    model = NeuralNet(n, para["hidden_size"][0]).to(device)

    # Load the model
    model.load_state_dict(torch.load("model/%smodel_best.pth"%(n)))
    model.eval()

    output = model(X).cpu().detach().numpy()
    output = np.squeeze(output.round()).astype(np.int16)
    
    columns = [f"Feature_{i}" for i in range(n)]
    df = pd.DataFrame(X_test, columns=columns)
    df["Ground_Truth"] = y_test
    df["Prediction"] = output
    
    test_results[f"n{n}"] = df

In [None]:
for n in dataset_size:
    df = test_results[f"n{n}"]
    
    ground_truth = "Ground_Truth" 
    prediction = "Prediction"
    features = [f"Feature_{i}" for i in range(n)]
        
    results = []
    # Loop through each feature as the protected attribute
    for protected_attribute in features:
        
        binary_dataset = BinaryLabelDataset(
            df=df,
            label_names=["Ground_Truth"],
            protected_attribute_names=[protected_attribute]
        )

        classified_dataset = binary_dataset.copy()
        classified_dataset.labels = df["Prediction"].values.reshape(-1, 1)

        classification_metric = ClassificationMetric(
            binary_dataset,
            classified_dataset,
            privileged_groups=[{protected_attribute: 1}],
            unprivileged_groups=[{protected_attribute: 0}]
        )

        # Calculate Metrics
        spd = classification_metric.statistical_parity_difference()
        eod = classification_metric.equal_opportunity_difference() 
        di = classification_metric.disparate_impact()
        aod = classification_metric.average_odds_difference()
        consistency = classification_metric.consistency()[0]

        results.append({
            "Feature": protected_attribute,
            "SPD": spd,
            "EOD": eod,
            "DI": di,
            "AOD": aod,
            "Consistency": consistency,
        })

    results_df = pd.DataFrame(results)

    save_dir = Path("./Results/Sustainability")
    output_file = save_dir / f"fairness_metrics_n{n}.csv"
    results_df.to_csv(output_file, index=False)

    print(f"Fairness metrics for n={n} dataset saved to {output_file}")