In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_parquet("possible_datasets/inequality_education_fev.parquet")
X = df.drop(columns=["target", "id", "timestamp"])
y = df["target"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=df["Human Development Groups"])

In [3]:
X = torch.tensor( [np.hstack(X_train.values[i]) for i in range(len(X_train))], dtype=torch.float32)
y = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
groups = torch.tensor(X_train["Human Development Groups"], dtype=torch.int64) #fairness groups


# Simple model
class GlobalTimeSeriesModel(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1)
        )

    def forward(self, x):
        return self.net(x)

# Fairness penalty
def fairness_penalty(y_true, y_pred, groups):
    """Differenz der mittleren Fehler zwischen Gruppen."""
    errors = torch.abs(y_true - y_pred).detach()
    groups_err = []
    groups_err.append(errors[groups == 0].mean()) 
    groups_err.append(errors[groups == 1].mean())
    groups_err.append(errors[groups == 2].mean())
    groups_err.append(errors[groups == 3].mean())
    return np.std(np.array(groups_err))


def train(model, with_fairness=True):
  optimizer = optim.Adam(model.parameters(), lr=0.01)
  criterion = nn.MSELoss()
  lambda_fair = 0.5  # Gewichtung der Fairness-Strafe

  for epoch in range(10000):
      optimizer.zero_grad()
      y_pred = model(X)
      mse_loss = criterion(y_pred, y)
      fair_loss = fairness_penalty(y, y_pred, groups)
      if with_fairness:
        loss = mse_loss + lambda_fair * fair_loss
      else:
        loss = mse_loss
      loss.backward()
      optimizer.step()

      if epoch % 1000 == 0:
          print(f"Epoch {epoch}: Gesamtverlust={loss.item():.4f} "
                f"(MSE={mse_loss.item():.4f}, Fairness={fair_loss.item():.4f})")

model1 = GlobalTimeSeriesModel(input_size=len(X[0]), hidden_size=9)
model2 = GlobalTimeSeriesModel(input_size=len(X[0]), hidden_size=9)

train(model1, with_fairness=True)
print("\n\n")
train(model2, with_fairness=False)

  X = torch.tensor( [np.hstack(X_train.values[i]) for i in range(len(X_train))], dtype=torch.float32)


Epoch 0: Gesamtverlust=757.3701 (MSE=751.9115, Fairness=10.9171)
Epoch 1000: Gesamtverlust=1.2333 (MSE=1.0914, Fairness=0.2839)
Epoch 2000: Gesamtverlust=0.2652 (MSE=0.1945, Fairness=0.1413)
Epoch 3000: Gesamtverlust=0.0571 (MSE=0.0270, Fairness=0.0602)
Epoch 4000: Gesamtverlust=0.0328 (MSE=0.0128, Fairness=0.0399)
Epoch 5000: Gesamtverlust=0.0157 (MSE=0.0045, Fairness=0.0224)
Epoch 6000: Gesamtverlust=0.0082 (MSE=0.0031, Fairness=0.0102)
Epoch 7000: Gesamtverlust=0.0039 (MSE=0.0006, Fairness=0.0066)
Epoch 8000: Gesamtverlust=0.0022 (MSE=0.0003, Fairness=0.0038)
Epoch 9000: Gesamtverlust=0.0015 (MSE=0.0002, Fairness=0.0026)



Epoch 0: Gesamtverlust=738.6561 (MSE=738.6561, Fairness=10.7734)
Epoch 1000: Gesamtverlust=1.0125 (MSE=1.0125, Fairness=0.2789)
Epoch 2000: Gesamtverlust=0.1663 (MSE=0.1663, Fairness=0.1230)
Epoch 3000: Gesamtverlust=0.0145 (MSE=0.0145, Fairness=0.0159)
Epoch 4000: Gesamtverlust=0.0050 (MSE=0.0050, Fairness=0.0105)
Epoch 5000: Gesamtverlust=0.0019 (MSE=0.0019, Fa

In [4]:
X_test_tensor = torch.tensor([np.hstack(X_test.values[i]) for i in range(len(X_test))], dtype=torch.float32)

y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

def test(model):
    preds = model(X_test_tensor)  
    test_mse = nn.MSELoss()(preds, y_test_tensor).item()

    groups_test = torch.tensor(X_test["Human Development Groups"].values, dtype=torch.int64)
    test_fairness = fairness_penalty(y_test_tensor, preds, groups_test).item()


    print(f"Test MSE: {test_mse:.4f}")
    print(f"Test fairness (std of group mean abs errors): {test_fairness:.4f}")

test(model1)
print("\n\n")
test(model2)

Test MSE: 0.1171
Test fairness (std of group mean abs errors): 0.1355



Test MSE: 0.0814
Test fairness (std of group mean abs errors): 0.0913


# Walmart Dataset

In [41]:
from datasets import load_dataset

# Lade den Datensatz
ds = load_dataset("autogluon/fev_datasets", "m5_1D", split="train")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Generating train split: 100%|██████████| 30490/30490 [00:09<00:00, 3253.91 examples/s]
