### Environment Setup

This experiment is implemented using PyTorch. The following cell verifies the PyTorch version and whether GPU acceleration is available.


In [1]:
import torch
print(torch.__version__)
print("CUDA available:", torch.cuda.is_available())

2.10.0+cpu
CUDA available: False


## Model 2: Regularised Deep Neural Network (DNN)

To address the overfitting observed in the baseline model, we implement a regularised DNN architecture following the Cam-Ready paper design.

### Architecture

- Input layer: number of neurons = number of input features  
- Hidden Layer 1: 128 neurons  
- Hidden Layer 2: 64 neurons  
- Hidden Layer 3: 32 neurons  
- Output layer: 1 neuron (predicting continuous power consumption)

### Regularisation Techniques

- ReLU activation
- Batch Normalisation before activation
- Dropout (p = 0.3) after each hidden layer
- L2 weight decay (Î» = 0.01)
- Adam optimizer
- MSE loss function
- Batch size = 64



In [2]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# 1) load data
df = pd.read_csv("clean_ul_stage1.csv")

feature_cols = ["airtime", "selected_mcs", "txgain"]

target_col = "pm_power"

df = df.dropna(subset=feature_cols + [target_col]).copy()
for c in feature_cols + [target_col]:
    df[c] = pd.to_numeric(df[c], errors="coerce")
df = df.dropna(subset=feature_cols + [target_col]).copy()
df = df[df[target_col] > 0].copy() 

X = df[feature_cols].values
y = df[target_col].values


# 2) split: train/test then train/val
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val,  y_train, y_val  = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

# 3) scale (fit ONLY on train)
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s   = scaler.transform(X_val)
X_test_s  = scaler.transform(X_test)

print("Shapes:", X_train_s.shape, X_val_s.shape, X_test_s.shape)


Shapes: (4153, 3) (462, 3) (1154, 3)


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

class RegularizedDNN(nn.Module):
    def __init__(self, input_dim):
        super(RegularizedDNN, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x)
    
X_train_tensor = torch.FloatTensor(X_train_s)
X_test_tensor  = torch.FloatTensor(X_test_s)

y_train_tensor = torch.FloatTensor(y_train).view(-1,1)
y_test_tensor  = torch.FloatTensor(y_test).view(-1,1)

input_dim = X_train_s.shape[1]

model2 = RegularizedDNN(input_dim)

criterion = nn.MSELoss()

optimizer = optim.Adam(
    model2.parameters(),
    lr=0.001,
    weight_decay=0.01   # L2 regularisation
)




In [5]:
epochs = 200
batch_size = 64

# ---- add val tensors (no logic change, just for printing val MSE) ----
X_val_tensor = torch.FloatTensor(X_val_s)
y_val_tensor = torch.FloatTensor(y_val).view(-1, 1)

best_val_mse = float("inf")

print("\nUL dataset (model2) training:")

for epoch in range(epochs):
    model2.train()
    permutation = torch.randperm(X_train_tensor.size(0))

    for i in range(0, X_train_tensor.size(0), batch_size):
        indices = permutation[i:i+batch_size]
        batch_x = X_train_tensor[indices]
        batch_y = y_train_tensor[indices]

        optimizer.zero_grad()
        outputs = model2(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

    # ---- epoch-end evaluation: train MSE & val MSE (like model1) ----
    model2.eval()
    with torch.no_grad():
        train_pred = model2(X_train_tensor)
        val_pred   = model2(X_val_tensor)

        train_mse = criterion(train_pred, y_train_tensor).item()
        val_mse   = criterion(val_pred, y_val_tensor).item()

    if val_mse < best_val_mse:
        best_val_mse = val_mse

    # print at the same rhythm as your model1 screenshot (every 10 epochs + epoch 1)
    if (epoch == 0) or ((epoch + 1) % 10 == 0):
        print(f"Epoch {epoch+1:03d} | train MSE {train_mse:.6f} | val MSE {val_mse:.6f}")

print(f"Best val MSE: {best_val_mse}")




UL dataset (model2) training:
Epoch 001 | train MSE 105.024681 | val MSE 104.599503
Epoch 010 | train MSE 0.633623 | val MSE 0.625232
Epoch 020 | train MSE 0.393767 | val MSE 0.352687
Epoch 030 | train MSE 0.234490 | val MSE 0.200634
Epoch 040 | train MSE 0.248148 | val MSE 0.214999
Epoch 050 | train MSE 0.227434 | val MSE 0.190935
Epoch 060 | train MSE 0.326798 | val MSE 0.300932
Epoch 070 | train MSE 0.249883 | val MSE 0.205872
Epoch 080 | train MSE 0.190273 | val MSE 0.157135
Epoch 090 | train MSE 0.234376 | val MSE 0.210683
Epoch 100 | train MSE 0.197828 | val MSE 0.159149
Epoch 110 | train MSE 0.176596 | val MSE 0.160130
Epoch 120 | train MSE 0.183139 | val MSE 0.169760
Epoch 130 | train MSE 0.147827 | val MSE 0.125393
Epoch 140 | train MSE 0.168670 | val MSE 0.148724
Epoch 150 | train MSE 0.146991 | val MSE 0.125951
Epoch 160 | train MSE 0.125251 | val MSE 0.111948
Epoch 170 | train MSE 0.117266 | val MSE 0.099210
Epoch 180 | train MSE 0.111616 | val MSE 0.094849
Epoch 190 | tra

In [6]:
model2.eval()

with torch.no_grad():
    y_pred = model2(X_test_tensor)

y_pred = y_pred.numpy().flatten()
y_true = y_test_tensor.numpy().flatten()

import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

def mean_relative_error(y_true, y_pred, eps=1e-9):
    return np.mean(np.abs(y_true - y_pred) / (np.abs(y_true) + eps)) * 100

mse  = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae  = mean_absolute_error(y_true, y_pred)
mre  = mean_relative_error(y_true, y_pred)

print("\n=== Model 2: Regularised DNN ===")
print("X:", feature_cols, " y:", target_col)
print(f"MSE  : {mse:.6f}")
print(f"RMSE : {rmse:.6f}")
print(f"MAE  : {mae:.6f}")
print(f"MRE% : {mre:.4f}")



=== Model 2: Regularised DNN ===
X: ['airtime', 'selected_mcs', 'txgain']  y: pm_power
MSE  : 0.122160
RMSE : 0.349514
MAE  : 0.253338
MRE% : 2.1707
