In [28]:
import csv
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F

In [29]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [258]:
data = []
min_len = float('inf')

In [259]:
with open('../data/2023-10-04-30-pu.csv', 'r',newline='', encoding='utf-8') as csvfile:
    spikereader = csv.reader(csvfile, delimiter=';')
    for row in spikereader:
        data.append(row)
        if len(row) < min_len:
            min_len = len(row)

In [260]:
data = [[float(item) for item in sublist[:min_len]] for sublist in data]


In [261]:
data_tensor = torch.tensor(data).to(device)

In [262]:
print(data_tensor)
print(data_tensor.shape)

tensor([[  0., 898., 904.,  ..., 915., 911., 911.],
        [  0., 852., 877.,  ..., 868., 883., 881.],
        [  0., 868., 874.,  ..., 896., 899., 883.],
        ...,
        [  9., 923., 891.,  ..., 891., 869., 888.],
        [  9., 922., 913.,  ..., 902., 915., 913.],
        [  9., 903., 909.,  ..., 889., 893., 895.]], device='cuda:0')
torch.Size([300, 8601])


In [263]:
class CustomTensorDataset(Dataset):
    def __init__(self, data_tensor):
        self.data_tensor = data_tensor

    def __len__(self):
        return len(self.data_tensor)

    def __getitem__(self, idx):
        sample = self.data_tensor[idx]
        label = torch.tensor(int(sample[0])).to(device)
        item = sample[1:]
        return item, label

In [264]:
custom_dataset = CustomTensorDataset(data_tensor)

In [265]:
train_size = 0.4  # Proportion of the dataset for training
train_dataset, test_dataset = train_test_split(custom_dataset, train_size=train_size, shuffle=True)

In [266]:
number_of_values = [0] * 2

In [267]:
# print(train_dataset[5])

In [268]:
batch_size = 2
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [269]:
class SimpleMulticlassNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleMulticlassNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 512)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(512, num_classes)
        self.relu3 = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        # x = self.relu1(x)
        return x

In [270]:
i, (data, label) = next(enumerate(train_dataloader))
print(data.shape)
print(data, label)

torch.Size([2, 8600])
tensor([[928., 922., 943.,  ..., 887., 890., 885.],
        [866., 847., 848.,  ..., 888., 884., 882.]], device='cuda:0') tensor([9, 8], device='cuda:0')


In [271]:
class SimpleTransformer(nn.Module):
    def __init__(self, input_size, num_classes, hidden_size=64, num_layers=2, num_heads=4):
        super(SimpleTransformer, self).__init__()
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.transformer = nn.Transformer(
            d_model=hidden_size,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers
        )
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # x: (batch_size, sequence_length)
        
        embedded = self.embedding(x)
        embedded = embedded.permute(1, 0, 2)  # (sequence_length, batch_size, hidden_size)
        
        # Since this is a simple classification task, we can use the same sequence as input and output
        output = self.transformer(embedded, embedded)
        output = output.permute(1, 0, 2)  # (batch_size, sequence_length, hidden_size)
        
        # Pooling or other aggregation methods can be applied here
        pooled_output = torch.mean(output, dim=1)  # Average pooling
        
        logits = self.fc(pooled_output)
        return logits

In [272]:
num_classes = 10
# model = SimpleTransformer(min_len-1, num_classes)
# model = SimpleLSTM(min_len-1,64,2,10)

model = SimpleMulticlassNN(min_len-1, num_classes)
model.to(device)

criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for multiclass classification
optimizer = optim.AdamW(model.parameters(), lr=1e-5)

# scheduler = StepLR(optimizer, step_size=40, gamma=1e-1)

In [273]:
import numpy as np
from sklearn import metrics

In [274]:
all_acu = []
all_auc = []
all_f1 = []
for i in range(5):
    model = SimpleMulticlassNN(min_len-1, num_classes)
    model.to(device)

    criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for multiclass classification
    optimizer = optim.AdamW(model.parameters(), lr=5e-6)

    scheduler = StepLR(optimizer, step_size=50, gamma=1e-1)    
    best_acu = 0
    best_auc = 0
    best_f1 = 0
    epochs = 100
    for epoch in range(epochs):
        train_loss = 0
        model.train()  # Set the model to training mode
        for batch_data, batch_labels in train_dataloader:
            optimizer.zero_grad()  # Zero the gradients
            outputs = model(batch_data)  # Forward pass
            loss = criterion(outputs, batch_labels)  # Calculate loss
            train_loss += loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights

        print(train_loss.item())
        scheduler.step()  # Adjust learning rate
        model.eval()  # Set the model to evaluation mode
        val_loss = 0.0
        correct = 0
        total = 0
        labels = 0
        preds = 0
        probs = 0
        with torch.no_grad():
            for batch_data, batch_labels in test_dataloader:
                outputs = model(batch_data)
                val_loss += criterion(outputs, batch_labels).item()
                _, predicted = outputs.max(1)
                np_labels = batch_labels.detach().cpu().numpy()
                np_predictions = predicted.detach().cpu().numpy()
                if total > 0:
                    labels = np.concatenate((labels,np_labels),axis=0)
                    preds = np.concatenate((preds,np_predictions),axis=0)
                    probs = np.concatenate((probs, torch.softmax(outputs, dim=1).detach().cpu().numpy()),axis=0)
                else:
                    labels = np_labels
                    preds = np_predictions
                    probs = torch.softmax(outputs, dim=1).detach().cpu().numpy()
                    # all_outputs = outputs.detach().cpu().numpy()
                total += batch_labels.size(0)
                correct += predicted.eq(batch_labels).sum().item()

        avg_val_loss = val_loss / len(test_dataloader)
        accuracy = 100.0 * correct / total
        acu = metrics.balanced_accuracy_score(labels, preds) * 100
        f1 = metrics.f1_score(labels, preds, average='weighted')
        if num_classes > 2:
            auc = metrics.roc_auc_score(labels, probs, multi_class='ovo')
        else:
            # raise ValueError(probs)
            auc = metrics.roc_auc_score(labels, preds)
        if acu > best_acu:
            best_acu = acu
        if auc > best_auc:
            best_auc = auc
        if f1 > best_f1:
            best_f1 = f1

        # print(f"Epoch [{epoch+1}/{epochs}] - Validation Loss: {avg_val_loss:.4f} - Validation Accuracy: {accuracy:.2f}% - sklearn Accuracy: {acu:.2f}%")
        print(f"Epoch [{epoch+1}/{epochs}] - Balanced Accuracy: {acu:.2f}% - AUC: {auc:.4f} - F1: {f1:.4f}")
    all_acu.append(best_acu)
    all_auc.append(best_auc)
    all_f1.append(best_f1)
    print(f"Best Accuracy: {best_acu:.2f}% - Best AUC: {best_auc:.4f} - Best F1: {best_f1:.4f}")
print("\r\n")
print(f"Mean Accuracy: {np.mean(all_acu):.2f}% - Mean AUC: {np.mean(all_auc):.4f} - Mean F1: {np.mean(all_f1):.4f}")

1625.68994140625
Epoch [1/100] - Balanced Accuracy: 16.44% - AUC: 0.6185 - F1: 0.0670
612.088623046875
Epoch [2/100] - Balanced Accuracy: 11.76% - AUC: 0.5889 - F1: 0.0330
608.7559814453125
Epoch [3/100] - Balanced Accuracy: 11.80% - AUC: 0.5843 - F1: 0.0618
712.1212158203125
Epoch [4/100] - Balanced Accuracy: 19.04% - AUC: 0.6277 - F1: 0.1196
519.8916015625
Epoch [5/100] - Balanced Accuracy: 10.00% - AUC: 0.6645 - F1: 0.0166
474.407470703125
Epoch [6/100] - Balanced Accuracy: 13.53% - AUC: 0.6512 - F1: 0.0438
621.4609985351562
Epoch [7/100] - Balanced Accuracy: 22.84% - AUC: 0.6918 - F1: 0.1147
651.0465087890625
Epoch [8/100] - Balanced Accuracy: 27.22% - AUC: 0.6856 - F1: 0.1767
567.0028686523438
Epoch [9/100] - Balanced Accuracy: 10.00% - AUC: 0.5777 - F1: 0.0540
579.624267578125
Epoch [10/100] - Balanced Accuracy: 19.35% - AUC: 0.6643 - F1: 0.0908
658.809326171875
Epoch [11/100] - Balanced Accuracy: 19.04% - AUC: 0.7010 - F1: 0.1215
553.017578125
Epoch [12/100] - Balanced Accuracy: