In [None]:
import neurokit2 as nk
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import LambdaLR
import lightning as L
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import Callback
import torch.optim as optim
import math
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
import warnings
from pathlib import Path
warnings.filterwarnings("ignore")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
seed = 50
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if using GPU

    # Ensure deterministic behavior (slower but reproducible)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
folder = Path("Biraffe2")
suffix = "_GEQ.csv"   # example target ending

files = [
    f for f in folder.iterdir()
    if f.is_file()
    and f.name.endswith(suffix)
    # Files that are of size less than 2KB are empty
    and f.stat().st_size > 2 * 1024  # > 2 KB
]

In [None]:
data = []

for f in files:
    df = pd.read_csv(f)
    data.append(df)
    print(f"Loaded {f}, rows = {len(df)}")

In [None]:
seq_len = 1000
sampling_rate = 1000
feature_cols = [
    "ECG",# "EDA",
]

feature_cols = [
    "EDA",
]

label_cols = [
    "Competence13","Immersion13","Flow13","Tension13",
    "Challenge13","Negative_affect13","Positive_affect13",
    "Positive_affect18","Negative_affect18",
    "Competence18","Flow18","Immersion18",
]

label_cols = [
    #"Immersion13","Flow13","Tension13",
    # "Challenge13","Negative_affect13","Positive_affect13"
    "Negative_affect18", "Positive_affect18",
]

negativeIndex = 0
positiveIndex = 1

X_all = [] 
y_all = []


def processDataframe(df, X, y):


    for game in df["Game_nr"].unique():
        game_df = df[df["Game_nr"] == game]


        data_ecg = game_df["ECG"].to_numpy()
        data_eda = game_df["EDA"].to_numpy()
        label_value = game_df[label_cols].iloc[0]
        # print("ECG: ", data_ecg)
        # break


        if label_value.isna().any():
            print(f"Skipping Game {game} in â€” label is NaN")
            continue

        label = label_value.to_numpy()
            
        # print(label)


        num_sequences = len(data_ecg) // seq_len

        for i in range(2, num_sequences):
            start = i * seq_len
            end = start + seq_len
            seq = np.array([ data_ecg[start:end].reshape(-1), data_eda[start:end].reshape(-1) ]) # np.array(data_eda[start:end].reshape(-1))
            # signal = data_ecg[start:end].reshape(-1)
            # print("Len: ", signal)
            # try:
            #     signals, info = nk.eda_process(signal, sampling_rate=1000)
            # except:
            #     continue
            # Delineate the ECG signal and visualizing all peaks of ECG complexes
            # Extract R-peaks locations
            # _, rpeaks = nk.ecg_peaks(signal, sampling_rate=sampling_rate)
            # _, waves_peak = nk.ecg_delineate(signal, 
            #                                  rpeaks, 
            #                                  sampling_rate=sampling_rate, 
            #                                  method="peak", 
            #                                  show=True, 
            #                                  show_type='peaks')
            # print("Signal", signals["EDA_Clean"])
            # seq = torch.tensor(waves_peak)#.reshape(-1)
            # seq = torch.tensor(signals["EDA_Clean"])#.reshape(-1)

            # X.append(seq.reshape(-1, 1)) // Used with positional encoding so we have dim (x, 1)
            # print(seq.reshape(-1).shape)
            X.append(seq.reshape(-1))
            # X.append(seq)
            # X.append(waves_peak)
            y.append(label)  # label = game nr
            # break
        # break

            
for n in range(len(data)):
    processDataframe(data[n], X_all, y_all)

print(np.array(X_all).shape)

In [None]:
dataPoints = []
component = "Negative_affect18"
component = "Positive_affect18"
np.concatenate(([], [0.75, 3.25]))
for pData in data:
    # print(pData["Immersion18"].unique())
    dataPoints = np.concatenate((dataPoints, pData[component].unique()))
pd.array(dataPoints).unique()

In [None]:
# In case the array items are tensors and not np arrays
# We need to convert back to np arrays for the loader to accept them
for n in range(len(X_all)):
    X_all[n] = np.array(X_all[n])

In [None]:
print("Inputs: ", len(X))
print("Labels: ", len(y))
# Process the raw EDA signal
signals, info = nk.eda_process(data[0]["EDA"][0:1000], sampling_rate=1000)
# print(data[0]["EDA"][0:1000])
print(signals["EDA_Phasic"])
print(data[0]["EDA"][0:1000].unique())

In [None]:
X_ecg_train, X_ecg_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=seed, shuffle=True
)

X_ecg_valid, X_ecg_test, y_valid, y_test = train_test_split(
    X_ecg_test, y_test, test_size=0.5, random_state=seed, shuffle=True
)

# print(X_ecg_train)

X_ecg_train = torch.tensor(X_ecg_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_ecg_test = torch.tensor(X_ecg_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)
X_ecg_valid = torch.tensor(X_ecg_valid, dtype=torch.float32)
y_valid = torch.tensor(y_valid, dtype=torch.float32)

x_mean = X_ecg_train.mean().item()
x_std = X_ecg_train.std().item()

y_mean = y_train.mean().item()
y_std = y_train.std().item()
print("y mean:", y_mean)
print("y std:", y_std)
print("min/max:", y_train.min().item(), y_train.max().item())

print("y mean:", ((y_train - y_mean) / y_std).std())

def minMaxMean(data, index = -1):
    maxNr = -1e100
    minNr = 1e100
    mean = 0
    for data_x in data:
        if index >= 0 or index < len(data_x):
            tempMax = data_x[index]
            tempMin = data_x[index]
            mean += data_x[index] / len(data)
        else: 
            tempMax = data_x.max()
            tempMin = data_x.min()
            mean += data_x.mean() / len(data)
        if tempMax > maxNr:
            maxNr = tempMax
        if tempMin < minNr:
            minNr = tempMin
    return minNr, maxNr, mean

in_min, in_max, in_mean = minMaxMean(X_ecg_train)
n_label_min, n_label_max, n_label_mean = minMaxMean(y_train, index=0)
p_label_min, p_label_max, p_label_mean = minMaxMean(y_train, index=1)


print("In Min: ", in_min)
print("In Max: ", in_max)
print("In Mean: ", in_mean)

print("")
print("N Label Min: ", n_label_min)
print("N Label Max: ", n_label_max)
print("N Label Mean: ", n_label_mean)

print("")
print("P Label Min: ", p_label_min)
print("P Label Max: ", p_label_max)
print("P Label Mean: ", p_label_mean)

In [None]:
def getOutputLabel(output):
    return round(output * 10)
n_y_dist = {}
p_y_dist = {}

for y in y_train:
    label = getOutputLabel(y[negativeIndex].item())
    n_y_dist[label] = n_y_dist.get(label, 0) + 1
    label = getOutputLabel(y[positiveIndex].item())
    p_y_dist[label] = p_y_dist.get(label, 0) + 1

n_keys_sorted = sorted(n_y_dist.keys())
p_keys_sorted = sorted(p_y_dist.keys())

In [None]:
nrOfBins = 3

def splitToGroups(dataDict, nrOfGroups):
    split_size = round(len(dataDict.keys()) / nrOfGroups)
    dictionary = {}
    label = 0
    group_count = 0
    
    for n in range(0, 41):
        if dataDict.get(n, -1) != -1:
            dictionary[n] = label
            group_count += 1
            if group_count % split_size == 0 and label < nrOfGroups - 1:
                label += 1
    return dictionary

n_binned_labels = splitToGroups(n_y_dist, nrOfBins)
p_binned_labels = splitToGroups(p_y_dist, nrOfBins)

print(n_binned_labels)
print(p_binned_labels)

In [None]:
n_splits_size = [0, 0, 0]
p_splits_size = [0, 0, 0]
for n in range(0, 42, 2):
    if n_y_dist.get(n, -1) != -1:
        n_splits_size[n_binned_labels.get(n)] += n_y_dist.get(n, 0)

    if p_y_dist.get(n, -1) != -1:
        p_splits_size[p_binned_labels.get(n)] += p_y_dist.get(n, 0)
print("Negative Distribution", n_splits_size)
print("Positive Distribution", p_splits_size)

In [None]:
print(len(valid_binnedLabels))
print(len(X_ecg_valid))
print(len(y_valid))

In [None]:
batch_size = 16
train_loader = DataLoader(
    # TensorDataset(((X_ecg_train - x_mean) / x_std), ((y_train - y_mean) / y_std)),
    # TensorDataset(X_ecg_train, y_train),
    # TensorDataset( X_ecg_train, ((y_train - label_min) / (label_max - label_min)) ),
    TensorDataset( X_ecg_train, torch.tensor(binnedLabels, dtype=torch.long) ),
    # TensorDataset(X_ecg_train[0], y_train[0]),
    batch_size=batch_size,
    shuffle=True,
    num_workers=4
)

valid_loader = DataLoader(
    # TensorDataset(((X_ecg_train - x_mean) / x_std), ((y_train - y_mean) / y_std)),
    # TensorDataset(X_ecg_train, y_train),
    # TensorDataset( X_ecg_train, ((y_train - label_min) / (label_max - label_min)) ),
    TensorDataset( X_ecg_valid, torch.tensor(valid_binnedLabels, dtype=torch.long) ),
    # TensorDataset(X_ecg_train[0], y_train[0]),
    batch_size=batch_size,
    shuffle=True,
    num_workers=4
)

test_loader = DataLoader(
    # TensorDataset(((X_ecg_train - x_mean) / x_std), ((y_train - y_mean) / y_std)),
    # TensorDataset(X_ecg_train, y_train),
    # TensorDataset( X_ecg_train, ((y_train - label_min) / (label_max - label_min)) ),
    TensorDataset( X_ecg_test, torch.tensor(test_binnedLabels, dtype=torch.long) ),
    # TensorDataset(X_ecg_train[0], y_train[0]),
    batch_size=batch_size,
    shuffle=True,
    num_workers=4
)

In [None]:
yTemp = ((y_train - label_min) / (label_max - label_min))
summation = [0, 0]
for labelTemp in yTemp:
    for i in range(len(labelTemp)):
        summation[i] += labelTemp[i]
summation

# Data driven pinning

# for mean in summation:
#     print(mean / len(yTemp))
labelsMean = torch.tensor(np.array(summation) / len(yTemp)).to(device)
labelsMean

# signals, info = nk.eda_process(X[0], sampling_rate=1000)
pinnedLabels = []
for n in yTemp:
    if n[0].item() < labelsMean[0].item():
        pinnedLabels.append(0)
    else:
        pinnedLabels.append(1)

X_ecg_test = torch.tensor(X_ecg_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)
validationPinnedLabels = []
validationTemp = ((y_test - label_min) / (label_max - label_min))
for n in validationTemp:
    if n[0].item() < labelsMean[0].item():
        validationPinnedLabels.append(0)
    else:
        validationPinnedLabels.append(1)

In [None]:
# size = 8
# print(X_ecg_train[0:size:].shape)
# print(y_train[0:size:].shape)
# train_loader = DataLoader(
#     # TensorDataset(X_ecg_train, ((y_train - y_mean) / y_std)),
#     TensorDataset(X_ecg_train[0:size:], y_train[0:size:]),
#     batch_size=1,
#     shuffle=True,
#     num_workers=4
# )

# train_loader = DataLoader(
#     TensorDataset(X_ecg_train, ((y_train - y_mean) / y_std)),
#     # TensorDataset(X_ecg_train[0], y_train[0]),
#     batch_size=batch_size,
#     shuffle=True,
#     num_workers=4
# )

train_loader = DataLoader(
    # TensorDataset(((X_ecg_train - x_mean) / x_std), ((y_train - y_mean) / y_std)),
    # TensorDataset( ((X_ecg_train - in_min) / (in_max - in_min)), ((y_train - label_min) / (label_max - label_min)) ),
    TensorDataset( X_ecg_train, ((y_train - label_min) / (label_max - label_min)) ),
    # TensorDataset(X_ecg_train[0], y_train[0]),
    batch_size=batch_size,
    shuffle=True,
    num_workers=4
)

In [None]:
# train_loader = DataLoader(TensorDataset(torch.stack(X_ecg_train).cuda(), torch.stack(y_train).cuda()), batch_size=64, shuffle=True, num_workers=4)
class MeanPenaltyLoss(nn.Module):
    def __init__(self, base_loss="L2", penalty_weight=0.1, alpha=1.0):
        super().__init__()
        if base_loss == "L1":
            self.base_loss = nn.L1Loss()
        elif base_loss == "L2":
            self.base_loss = nn.MSELoss()
        else:
            raise ValueError("base_loss must be 'L1' or 'L2'")
        self.penalty_weight = penalty_weight
        self.alpha = alpha

    def forward(self, y_pred, y_true):
        # Base loss (e.g., MAE)
        base = self.base_loss(y_pred, y_true)

        # Compute mean of true labels in batch
        # batch_mean = y_true.mean()
        mean = labelsMean #batch_mean #0.5 #y_mean

        # Penalize predictions close to mean
        distance_from_mean = torch.abs(y_pred - mean)
        penalty = 0.3 + torch.exp(-self.alpha * distance_from_mean).mean()

        total_loss = base + self.penalty_weight * penalty**2
        return total_loss

In [None]:
# Positional encoding (sinusoidal version)
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # shape (1, max_len, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        # x: (batch_size, seq_len, d_model)
        x = x + self.pe[:, :x.size(1)]
        return x

In [None]:
# Transformer model for signal input
class SignalTransformer(nn.Module):
    def __init__(self, input_size=250, d_model=128, nhead=8, num_layers=4, dim_feedforward=256, num_classes=1):
        super().__init__()
        # print("Input size: ", input_size)
        self.input_proj = nn.Linear(input_size, d_model)
        # self.pos_encoder = PositionalEncoding(d_model=d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            batch_first=True  # makes input shape (B, S, D)
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # self.output_head_1 = nn.Linear(d_model, num_classes * 12)
        # self.output_head_2 = nn.Linear(num_classes * 12, num_classes * 6)
        # self.output_head_3 = nn.Linear(num_classes * 6, num_classes)

    def forward(self, x):
        # if x.ndim < 3:
        #   return x
        # x shape: (batch_size, seq_len, 250)
        # print("Before Shape: ", x.shape)
        # print(x.shape)
        x = self.input_proj(x)               # (B, S, d_model)
        # print(x.shape)
        # x = self.pos_encoder(x)              # add positional encoding
        out = self.transformer(x)              # (B, S, d_model)
        # print(out.shape)
        # print("Transformer Out: ", x)
        # x = x.mean(dim=1)                    # mean pooling over sequence
        # x = self.output_head_1(x)            # (B, num_classes)
        # x = self.output_head_2(x)            # (B, num_classes)
        # out = self.output_head_3(x)            # (B, num_classes)
        # print("Out: ", out)
        return out

In [None]:
class TransformerLightning(L.LightningModule):
    def __init__(self, input_size = 250, d_model=128, num_heads=8, d_ff=512, num_classes = 1, num_layers=2, lr=1e-4):
        super().__init__()
        self.save_hyperparameters()

        self.layers = nn.ModuleList([
            # nn.Linear(input_size, d_model),
            nn.Sequential(
                # SignalTransformer(input_size=input_size, d_model=input_size, nhead=num_heads, dim_feedforward=d_ff, num_classes=num_classes),
                SignalTransformer(input_size=input_size, d_model=d_model, nhead=num_heads, dim_feedforward=d_ff, num_classes=num_classes),
                nn.ReLU(),          # <-- activation
                # SignalTransformer(input_size=d_model, d_model=d_model, nhead=num_heads, dim_feedforward=d_ff, num_classes=num_classes),
                # nn.ReLU(),          # <-- activation
                # SignalTransformer(input_size=d_model, d_model=d_model, nhead=num_heads, dim_feedforward=d_ff, num_classes=num_classes),
                # nn.ReLU(),          # <-- activation
                # SignalTransformer(input_size=d_model, d_model=d_model, nhead=num_heads, dim_feedforward=d_ff, num_classes=num_classes),
                # nn.ReLU(),          # <-- activation
                # SignalTransformer(input_size=d_model, d_model=d_model, nhead=num_heads, dim_feedforward=d_ff, num_classes=num_classes),
                # nn.ReLU(),          # <-- activation
                # SignalTransformer(input_size=d_model, d_model=d_model, nhead=num_heads, dim_feedforward=d_ff, num_classes=num_classes),
                # nn.ReLU(),          # <-- activation
                # SignalTransformer(input_size=d_model, d_model=d_model, nhead=num_heads, dim_feedforward=d_ff, num_classes=num_classes),
                # nn.ReLU(),          # <-- activation
                SignalTransformer(input_size=d_model, d_model=input_size, nhead=num_heads, dim_feedforward=d_ff, num_classes=num_classes),
                nn.ReLU(),          # <-- activation
            ),
            #for _ in range(num_layers)
            nn.LayerNorm(input_size),  # normalization layer
            nn.Sequential(
                nn.Linear(input_size, 128),
                nn.Dropout(0.2),    # optional
                # nn.Tanh(),          # <-- activation
                nn.ReLU(),
            )
            ,
            # nn.Sequential(
            #     nn.Linear(256, 256),
            #     nn.Dropout(0.2),    # optional
            #     # nn.Tanh(),          # <-- activation
            #     nn.ReLU(),
            # ),
            # nn.Sequential(
            #     nn.Linear(256, 256),
            #     nn.Dropout(0.2),    # optional
            #     # nn.Tanh(),          # <-- activation
            #     nn.ReLU(),
            # ),
            # nn.Sequential(
            #     nn.Linear(256, 128),
            #     nn.Dropout(0.2),    # optional
            #     # nn.Tanh(),          # <-- activation
            #     nn.ReLU(),
            # ),
            # nn.Sequential(
            #     nn.Linear(128, 64),
            #     nn.Dropout(0.2),    # optional
            #     # nn.Tanh(),          # <-- activation
            #     nn.ReLU(),
            # ),
            # nn.Sequential(
            #     nn.Linear(64, 32),
            #     nn.Dropout(0.2),    # optional
            #     # nn.Tanh(),          # <-- activation
            #     nn.ReLU(),
            # ),
            nn.Sequential(
                nn.Linear(128, 32),
                nn.Dropout(0.2),    # optional
                # nn.Tanh(),          # <-- activation
                nn.ReLU(),
            ),
            # nn.Linear(32, num_classes),
            # nn.Softmax(),
            # nn.Sigmoid(),
            # nn.Tanh(),
        ])
        # self.output_head = nn.Linear(d_model, d_model)  # example output head
        self.loss_fn = nn.CrossEntropyLoss()
        # self.loss_fn = MeanPenaltyLoss(penalty_weight=1)
        # self.loss_fn = nn.MSELoss()
        # self.loss_fn = nn.L1Loss()

        self.negative_classifier = nn.Linear(32, num_classes)
        self.positive_classifier = nn.Linear(32, num_classes)
        
        self.warmup_steps=200
        self.total_steps=2000

    def forward(self, x):
        initialX = x
        attenLayer = True
        for layer in self.layers:
            x = layer(x)
            if attenLayer: # Skip connection
                x = initialX + x
                attenLayer = False
        # return self.output_head(x)
        # x = x.mean(dim=1)  
        # positive = self.positive_classifier(x)
        # negative = positive # self.negative_classifier(x)
        return self.negative_classifier(x), self.positive_classifier(x) #  negative, positive

    def training_step(self, batch, batch_idx):
        x, y = batch

        negative_labels = y[:, negativeIndex]   # shape: (batch_size, num_classes)
        positive_labels = y[:, positiveIndex]   # shape: (batch_size, num_classes)
        
        negative_y_hat, positive_y_hat = self(x)
        
        # loss = self.loss_fn(y, y_hat)
        # loss = self.loss_fn(positive_y_hat, positive_labels)
        negative_loss = self.loss_fn(negative_y_hat, negative_labels)
        positive_loss = self.loss_fn(positive_y_hat, positive_labels)
        loss = negative_loss + positive_loss
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss #negative_loss #loss

    def configure_optimizers(self):
        # return optim.Adam(self.parameters(), lr=self.hparams.lr)
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams.lr, weight_decay=1e-2)
        # simple linear warmup -> then cosine decay
        def lr_lambda(current_step):
            if current_step < self.warmup_steps:
                return float(current_step) / max(1.0, float(self.warmup_steps))
            # cosine after warmup
            progress = float(current_step - self.warmup_steps) / max(1, float(self.total_steps - self.warmup_steps))
            return 0.5 * (1.0 + math.cos(math.pi * progress))

        scheduler = {
            'scheduler': LambdaLR(optimizer, lr_lambda),
            'interval': 'step',
            'frequency': 1
        }
        return [optimizer], [scheduler]

In [None]:
# Example usage
seq_len = 2000
signal_dim = len(X_all[0])
input_dim = len(X_all[0])
d_model=128
seed = 512
num_classes = 3 # len(y[0])

set_seed(seed)
model = TransformerLightning(input_size=signal_dim, d_model=d_model, num_heads=4, d_ff=128, num_classes=num_classes, num_layers=4, lr=2e-3)
# model = SignalTransformer(input_size=signal_dim, num_classes=5)
# inputs = torch.randn(batch_size, seq_len, signal_dim)
# outputs = model.forward(torch.stack([torch.tensor(X[0], dtype=torch.float32), torch.tensor(X[1], dtype=torch.float32)] ).cpu() )
# print(len(y[0]))
# print("Output: ", outputs, ". End")  # torch.Size([16, 5])

In [None]:
y_mean = y_train.mean().item()
y_std = y_train.std().item()
print("y mean:", y_mean)
print("y std:", y_std)
print("min/max:", y_train.min().item(), y_train.max().item())

print("y mean:", ((y_train - y_mean) / y_std).std())

baseline = torch.mean(torch.abs(((y_train - y_mean) / y_std)))
print("Baseline MAE (predict 0):", baseline.item())

In [None]:
total_grad = 0.0
for p in model.parameters():
    if p.grad is not None:
        total_grad += p.grad.abs().sum().item()
print("Total grad magnitude:", total_grad)

In [None]:
for x, lab in train_loader:
    print("min y:", lab.min(), "max y:", lab.max(), "mean y:", lab.mean())
    break

In [None]:
smax = nn.Softmax()
def evalPrecision(data_loader, debug = False, PUDevice = device):
    model.eval()
    total = 0
    correctCount = [0, 0]
    with torch.no_grad():
        for inputs, labels in data_loader:
            # Get labels
            # print("Input Len: ", len(inputs))
            # print("Labels Len: ", labels.shape)
            inputs = inputs.to(PUDevice)
            labels = labels.to(PUDevice)
            
            negative_labels = labels[:, negativeIndex]   # shape: (batch_size, num_classes)
            positive_labels = labels[:, positiveIndex]   # shape: (batch_size, num_classes)
            # print("Negative Labels Len: ", negative_labels.shape)
            
            negative_outputs, positive_outputs = model.forward( inputs )
            # print("Output Len: ", positive_outputs.shape)
            total += len(labels)
            
            negative_logitLabels = torch.argmax(negative_outputs, dim=1)
            positive_logitLabels = torch.argmax(positive_outputs, dim=1)
    
            # print("logit argMax Len: ", negative_logitLabels.shape)
            
            
            correctCount[negativeIndex] += (negative_logitLabels == negative_labels).sum().item()
            correctCount[positiveIndex] += (positive_logitLabels == positive_labels).sum().item()
            if debug:
                print("N", correctCount[negativeIndex], " P", correctCount[positiveIndex], " out of", total)
            # print(labels.shape)
            # print(outputs.shape)
            # print("Label: \n", labels)
            # print("Output: \n", outputs, ". End")  # torch.Size([16, 5])
            # break
    if debug:
        print("N", correctCount[negativeIndex], " P", correctCount[positiveIndex], "out of ", total)
        print("N Precision", correctCount[negativeIndex] / total, " P Precision", correctCount[positiveIndex] / total)
    return np.array(correctCount) / total

In [None]:
model_progress = {
    "training": [],
    "validation": []
}

model_progress

In [None]:
# Evaluating the precision on the training data
trainingPrecision = evalPrecision(train_loader, True, "cpu")
model_progress["training"].append(trainingPrecision)

In [None]:
# Evaluating the precision on the validation data
validationPrecision = evalPrecision(valid_loader, True, "cpu")
model_progress["validation"].append(validationPrecision)

In [None]:
# Call it only after the training is finished
evalPrecision(test_loader, True, "cpu")

In [None]:
smax = nn.Softmax()
model.eval()
total = 0
correctCount = 0
with torch.no_grad():
    for inputs, labels in valid_loader:
        # print(inputs)
        outputs = model.forward( inputs.cpu() )
        total += len(labels)
        logitLabels = torch.argmax(outputs, dim=1)
        correctCount += (logitLabels == labels).sum().item()
        print(correctCount, "out of ", total)
        # print(labels.shape)
        # print(outputs.shape)
        print("Label: \n", labels)
        print("logitLabels: \n", logitLabels, ". End")  # torch.Size([16, 5])
        print("Output: \n", outputs, ". End")  # torch.Size([16, 5])
        break
print(correctCount / total)

In [None]:
smax = nn.Softmax()
model.eval()
total = 0
correctCount = 0
confusionMatrix = [[0, 0], [0, 0]] # [lows [true lows, lows but predicted high], highs[true highs, highs but predicted lows]]
cm_data = [[[], []], [[], []]] # negative [Actual, Predicted], positive [Actual, Predicted]
with torch.no_grad():
    for inputs, labels in valid_loader:
        inputs = inputs.to("cpu")
        labels = labels.to("cpu")
        total += len(labels)
        
        negative_labels = labels[:, negativeIndex]   # shape: (batch_size, num_classes)
        positive_labels = labels[:, positiveIndex]   # shape: (batch_size, num_classes)
        # print("Negative Labels Len: ", negative_labels.shape)
        labels = [[], []]
        labels[negativeIndex] = negative_labels
        labels[positiveIndex] = positive_labels
        
        negative_outputs, positive_outputs = model.forward( inputs )
        # print("Output Len: ", positive_outputs.shape)
        
        negative_logitLabels = torch.argmax(negative_outputs, dim=1)
        positive_logitLabels = torch.argmax(positive_outputs, dim=1)

        logitLabels = [[], []]
        logitLabels[negativeIndex] = negative_logitLabels
        logitLabels[positiveIndex] = positive_logitLabels

        # Previous code had an issue which reduced the size of the confusion matrix
        # It has now been fixed
        for n in range(len(labels[negativeIndex])):
            cm_data[negativeIndex][0].append(labels[negativeIndex][n].item())
            cm_data[negativeIndex][1].append(logitLabels[negativeIndex][n].item())
            
            cm_data[positiveIndex][0].append(labels[positiveIndex][n].item())
            cm_data[positiveIndex][1].append(logitLabels[positiveIndex][n].item())
            # confusionMatrix[labels[n].item()][logitLabels[negativeIndex][n]] += 1
            # if labels[n].item() == logitLabels[n]:
            #    confusionMatrix[labels[n].item()][logitLabels[n]] += 1
            # else: # Predicted label x but actual label is y
            #     confusionMatrix[labels[n].item()][1] += 1
        # correctCount += (logitLabels == labels).sum().item()
        # print(correctCount, "out of ", total)
        # print(labels.shape)
        # print(outputs.shape)
        # print("Negative Labels: \n", negative_labels)
        # print("Negative Output: \n", negative_outputs, ". End")  # torch.Size([16, 5])
        # print("Positive Labels: \n", positive_labels)
        # print("Positive Output: \n", positive_outputs, ". End")  # torch.Size([16, 5])
        # break
# print(correctCount / total)

In [None]:
confusionMatrix
negative_actual = cm_data[negativeIndex][0] #confusionMatrix[0]
negative_predicted = cm_data[negativeIndex][1] #confusionMatrix[1]

positive_actual = cm_data[positiveIndex][0] #confusionMatrix[0]
positive_predicted = cm_data[positiveIndex][1] #confusionMatrix[1]
# print(actual)
# print(predicted)
confusion_matrix = metrics.confusion_matrix(negative_actual, negative_predicted)
print(confusion_matrix)

matrix = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [0, 1, 2])

matrix.plot(cmap="Blues")
plt.title("Confusion Matrix Negative Affect 18")
plt.show()


confusion_matrix = metrics.confusion_matrix(positive_actual, positive_predicted)
print(confusion_matrix)

matrix = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [0, 1, 2])

matrix.plot(cmap="Blues")
plt.title("Confusion Matrix Positive Affect 18")
plt.show()

In [None]:
class Every20EpochsCallback(Callback):
    def __init__(self, interval=20):
        super().__init__()
        self.interval = interval

    # Called at end of each epoch
    def on_train_epoch_end(self, trainer, pl_module):
        epoch = trainer.current_epoch

        if epoch >= self.interval and epoch % self.interval == 0:
            train_precision = evalPrecision(train_loader, False, device)
            valid_precision = evalPrecision(valid_loader, False, device)
            model_progress["training"].append(train_precision)
            model_progress["validation"].append(valid_precision)
            print("Precision ", "Train", train_precision, " Valid", valid_precision)

callback = Every20EpochsCallback(interval=20)

In [None]:
# ce = nn.CrossEntropyLoss()
# sm = torch.tensor([[0.5134, 0.4866], [0.5131, 0.4869]], dtype=torch.float)
# ce(sm, torch.tensor([0, 1], dtype=torch.long))
model_progress

In [None]:
validation = {
    "negative": [],
    "positive": [],
}

training = {
    "negative": [],
    "positive": [],
}

epochs = []

for n in range(len(model_progress["training"])):
    if n % 26 == 0: # Some elements has been added in a wrong way every 25 times
        continue
    validation["negative"].append(model_progress["validation"][n][negativeIndex])
    validation["positive"].append(model_progress["validation"][n][positiveIndex])

    training["negative"].append(model_progress["training"][n][negativeIndex])
    training["positive"].append(model_progress["training"][n][positiveIndex])
    epochs.append(len(epochs) * 20)
# model_progress["training"][26+26]


plt.plot(epochs, validation["negative"], marker='x', label='Val Precision')
plt.plot(epochs, training["negative"], marker='o', label='Train Precision')

plt.xlabel("Epoch")
plt.ylabel("Precision")
plt.title("Training vs Validation Precision - Negative Affect18")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


plt.plot(epochs, validation["positive"], marker='x', label='Val Precision')
plt.plot(epochs, training["positive"], marker='o', label='Train Precision')

plt.xlabel("Epoch")
plt.ylabel("Precision")
plt.title("Training vs Validation Precision - Positive Affect18")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
model.train()
trainer = L.Trainer(
    max_epochs=501,
    accelerator="gpu",   # or "auto" if you want Lightning to detect GPU
    devices=1,
    callbacks=[callback]
)

trainer.fit(model, train_loader)

In [None]:
# Sanity Check
from ucimlrepo import fetch_ucirepo

# fetch dataset
air_quality = fetch_ucirepo(id=360)

# data (as pandas dataframes)
all_data = air_quality.data.features
# y = air_quality.data.targets

# # metadata
# print(air_quality.metadata)

# # variable information
# print(air_quality.variables)

In [None]:
# Select relevant columns
features = ['PT08.S1(CO)', 'NMHC(GT)', 'C6H6(GT)', 'PT08.S2(NMHC)',
            'NOx(GT)', 'PT08.S3(NOx)', 'NO2(GT)', 'PT08.S4(NO2)',
            'PT08.S5(O3)', 'T', 'RH', 'AH']
target = ['CO(GT)']

X = all_data[features].values
y = all_data[target].values


In [None]:
X

In [None]:
scaler_x = StandardScaler()
scaler_y = StandardScaler()
X = scaler_x.fit_transform(X)
y = scaler_y.fit_transform(y)
input_dim = len(X[0])

In [None]:
X[0]

In [None]:
# Create sequences (24-hour window)
def create_sequences(X, y, seq_len=24):
    xs, ys = [], []
    for i in range(len(X) - seq_len):
        xs.append(X[i:i+seq_len])
        ys.append(y[i+seq_len])  # predict next hour
    return torch.tensor(xs, dtype=torch.float32), torch.tensor(ys, dtype=torch.float32)

x_seq, y_seq = create_sequences(X, y, input_dim)

# DataLoader for Lightning
train_loader = DataLoader(TensorDataset(x_seq, y_seq), batch_size=32, shuffle=True, num_workers=4)

In [None]:
model = TransformerLightning(input_size=input_dim, d_model=64, num_heads=8, d_ff=64, num_classes=1, num_layers=1, lr=1e-3)
signalModel = SignalTransformer(input_size=input_dim, d_model=64, nhead=8, num_classes=1)

In [None]:
input = torch.tensor(X[0], dtype=torch.float32).reshape(1, 1, 12)
print(X)
print(y[0])

In [None]:
signalModel.eval()
with torch.no_grad():
  print(signalModel(input))

In [None]:
model.eval()
with torch.no_grad():
  print(model.forward(input))

In [None]:
model.train()
trainer = L.Trainer(
    max_epochs=15,
    accelerator="gpu",   # or "auto" if you want Lightning to detect GPU
    devices=1
)

trainer.fit(model, train_loader)

In [None]:
trainer = Trainer(
    max_epochs=60,              # new total epochs
    accelerator="gpu",
)

trainer.fit(model, train_loader, ckpt_path="/content/lightning_logs/version_3/checkpoints/epoch=29-step=4410.ckpt")

In [None]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs/