In [655]:
import csv
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F
from sklearn.model_selection import StratifiedKFold

In [656]:
import torch
import torch.nn as nn
from torchvision import transforms
torch.manual_seed(205)

<torch._C.Generator at 0x24ece295310>

In [657]:
# if torch.cuda.is_available():
#     device = torch.device("cuda")
# else:
#     device = torch.device("cpu")

device = torch.device("cpu")

In [658]:
data = []
min_len = float('inf')

In [659]:
with open('../data/git-data/2023-10-06-F.csv', 'r',newline='', encoding='utf-8') as csvfile:
    spikereader = csv.reader(csvfile, delimiter=';')
    for row in spikereader:
        data.append(row)
        if len(row) < min_len:
            min_len = len(row)

In [660]:
data = [[float(item) for item in sublist[:min_len]] for sublist in data]
labels = [d[0] for d in data]

In [661]:
data_tensor = torch.tensor(data).to(device)

In [662]:
print(torch.std_mean(data_tensor))

(tensor(27.6221), tensor(1372.5297))


In [663]:
print(data_tensor)
print(data_tensor.shape)

tensor([[   0., 1382., 1380.,  ..., 1372., 1355., 1371.],
        [   0., 1387., 1394.,  ..., 1369., 1348., 1366.],
        [   0., 1354., 1376.,  ..., 1375., 1381., 1356.],
        ...,
        [   9., 1336., 1376.,  ..., 1380., 1369., 1381.],
        [   9., 1382., 1354.,  ..., 1370., 1361., 1388.],
        [   9., 1364., 1368.,  ..., 1358., 1335., 1287.]])
torch.Size([100, 8601])


In [664]:
class CustomTensorDataset(Dataset):
    def __init__(self, data_tensor, transform=None):
        self.data_tensor = data_tensor
        self.transform = transform

    def __len__(self):
        return len(self.data_tensor)

    def __getitem__(self, idx):
        sample = self.data_tensor[idx]
        label = torch.tensor(int(sample[0])).to(device)
        item = sample[1:]

        # Apply transformation only if it's specified and idx is in the training subset
        if self.transform is not None:
            for transform in self.transform:
                item = transform(item)

        return item, label

In [665]:
custom_dataset = CustomTensorDataset(data_tensor)

In [666]:
# Create a custom transformation to add Gaussian noise
class AddGaussianNoise(object):
    def __init__(self, mean=0.0, std=1.0):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean

# Define the mean and standard deviation for Gaussian noise
mean_value = 0.0
std_value = 0.2

# Apply transformation only to the training subset
# gaussian_noise_transform = AddGaussianNoise(mean=22, std=350)
# gaussian_noise_transform2 = AddGaussianNoise(mean=220, std=402)

gaussian_noise_transform = AddGaussianNoise(mean=2, std=21)
gaussian_noise_transform2 = AddGaussianNoise(mean=220, std=201)

In [667]:
class TimeSeriesWarp(object):
    def __init__(self, magnitude=0.1):
        self.magnitude = magnitude

    def __call__(self, sequence):
        seq_len = sequence.size(0)
        
        # Determine the range of indices to be modified
        warp_range = int(seq_len * self.magnitude)
        start_idx = torch.randint(0, seq_len - warp_range, (1,)).item()
        end_idx = start_idx + warp_range

        # Generate random warping values
        # warp_values = torch.normal(0, 500.1, size=(end_idx - start_idx,))  # Adjust the parameters as needed
        warp_values = torch.normal(0, 2.1, size=(end_idx - start_idx,))  # Adjust the parameters as needed

        # Apply warping to the sequence within the selected range
        sequence[start_idx:end_idx] += warp_values

        return sequence
warp_transform = TimeSeriesWarp(magnitude=0.5)

In [668]:
class WindowWarp(object):
    def __init__(self, window_size=5, magnitude=0.1):
        self.window_size = window_size
        self.magnitude = magnitude

    def __call__(self, sequence):
        seq_len = sequence.size(0)
        
        # Determine the number of windows
        num_windows = max(1, seq_len // self.window_size)
        
        # Randomly select a window to warp
        selected_window = torch.randint(0, num_windows, (1,)).item()
        start_idx = selected_window * self.window_size
        end_idx = min(start_idx + self.window_size, seq_len)

        # Generate random warping values for the selected window
        warp_values = torch.normal(0, 0.1, size=(end_idx - start_idx,))  # Adjust the parameters as needed

        # Apply warping to the selected window in the sequence
        sequence[start_idx:end_idx] += self.magnitude * warp_values

        return sequence

# window_warp_transform = WindowWarp(window_size=100, magnitude=1000.1)
window_warp_transform = WindowWarp(window_size=100, magnitude=500.1)

In [669]:
# train_size = 0.6  # Proportion of the dataset for training
# train_dataset, test_dataset = train_test_split(custom_dataset, train_size=train_size, shuffle=True)

In [670]:
# batch_size = 2
# train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [671]:
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, dropout_prob=0.0):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.dropout1 = nn.Dropout(dropout_prob)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.dropout2 = nn.Dropout(dropout_prob)
        self.downsample = None

        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(out_channels),
            )

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout1(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.dropout2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

class ResNet1D(nn.Module):
    def __init__(self, block, layers, num_classes=10, dropout_prob=0.0):
        super(ResNet1D, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv1d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0],dropout_prob=dropout_prob)
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dropout_prob=dropout_prob)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dropout_prob=dropout_prob)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dropout_prob=dropout_prob)
        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1, dropout_prob=0.0):
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, dropout_prob))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(block(out_channels, out_channels, dropout_prob=dropout_prob))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Define the ResNet model
model = ResNet1D(BasicBlock, [2, 2, 2, 2], num_classes=10)

In [672]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc = nn.Linear(16 * 4300, num_classes)  # Adjust the output size for 10 classes

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc(x)
        return x

In [673]:
import numpy as np
from sklearn import metrics

In [674]:
num_classes = 10
# all_acu = []
# all_auc = []
# all_f1 = []
results = {}
k_folds = 6
num_epochs = 1
# torch.manual_seed(10000)
kfold = StratifiedKFold(n_splits=k_folds, shuffle=True)
for fold, (train_ids, test_ids) in enumerate(kfold.split(data_tensor, labels)):
    train_ids = torch.tensor(train_ids)
    test_ids = torch.tensor(test_ids)
    train_rows = torch.index_select(data_tensor, 0, train_ids)
    test_rows = torch.index_select(data_tensor, 0, test_ids)
    # train_dataset = CustomTensorDataset(data_tensor=train_rows, transform=[gaussian_noise_transform, window_warp_transform])
    # test_dataset = CustomTensorDataset(data_tensor=test_rows, transform=[gaussian_noise_transform2, window_warp_transform])
    train_dataset = CustomTensorDataset(data_tensor=train_rows, transform=[gaussian_noise_transform, window_warp_transform])
    test_dataset = CustomTensorDataset(data_tensor=test_rows)
    # train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    # test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    # raise ValueError(train_subsampler)
    
    # Define data loaders for training and testing data in this fold
    train_dataloader = torch.utils.data.DataLoader(
                      train_dataset, 
                      batch_size=2, shuffle=True)
    test_dataloader = torch.utils.data.DataLoader(
                      test_dataset,
                      batch_size=2, shuffle=False)
# for i in range(5):
    model = SimpleCNN(num_classes=num_classes)
    model.to(device)

    criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for multiclass classification
    optimizer = optim.AdamW(model.parameters(), lr=5e-5)

    scheduler = StepLR(optimizer, step_size=20, gamma=1e-1)    
    best_acu = 0
    best_auc = 0
    best_f1 = 0
    epochs = 100
    for epoch in range(epochs):
        train_loss = 0
        model.train()  # Set the model to training mode
        for batch_data, batch_labels in train_dataloader:
            optimizer.zero_grad()  # Zero the gradients
            batch_data = torch.unsqueeze(batch_data,1)
            outputs = model(batch_data)  # Forward pass
            loss = criterion(outputs, batch_labels)  # Calculate loss
            train_loss += loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights

        print(train_loss.item())
        scheduler.step()  # Adjust learning rate
        model.eval()  # Set the model to evaluation mode
        val_loss = 0.0
        correct = 0
        total = 0
        labels = 0
        preds = 0
        probs = 0
        with torch.no_grad():
            for batch_data, batch_labels in test_dataloader:
                batch_data = torch.unsqueeze(batch_data,1)
                outputs = model(batch_data)
                val_loss += criterion(outputs, batch_labels).item()
                _, predicted = outputs.max(1)
                np_labels = batch_labels.detach().cpu().numpy()
                np_predictions = predicted.detach().cpu().numpy()
                if total > 0:
                    labels = np.concatenate((labels,np_labels),axis=0)
                    preds = np.concatenate((preds,np_predictions),axis=0)
                    probs = np.concatenate((probs, torch.softmax(outputs, dim=1).detach().cpu().numpy()),axis=0)
                else:
                    labels = np_labels
                    preds = np_predictions
                    probs = torch.softmax(outputs, dim=1).detach().cpu().numpy()
                    # all_outputs = outputs.detach().cpu().numpy()
                total += batch_labels.size(0)
                correct += predicted.eq(batch_labels).sum().item()

        avg_val_loss = val_loss / len(test_dataloader)
        accuracy = 100.0 * correct / total
        acu = metrics.balanced_accuracy_score(labels, preds) * 100
        f1 = metrics.f1_score(labels, preds, average='weighted')
        if num_classes > 2:
            auc = metrics.roc_auc_score(labels, probs, multi_class='ovo')
        else:
            auc = metrics.roc_auc_score(labels, preds)
        if acu > best_acu:
            best_acu = acu
        if auc > best_auc:
            best_auc = auc
        if f1 > best_f1:
            best_f1 = f1

        # print(f"Epoch [{epoch+1}/{epochs}] - Validation Loss: {avg_val_loss:.4f} - Validation Accuracy: {accuracy:.2f}% - sklearn Accuracy: {acu:.2f}%")
        print(f"Epoch [{epoch+1}/{epochs}] - Balanced Accuracy: {acu:.2f}% - AUC: {auc:.4f} - F1: {f1:.4f}")
    results[fold] = (best_acu, best_auc, best_f1)
    # all_acu.append(best_acu)
    # all_auc.append(best_auc)
    # all_f1.append(best_f1)
    print(f"Best Accuracy: {best_acu:.2f}% - Best AUC: {best_auc:.4f} - Best F1: {best_f1:.4f}")
print("\r\n")
# print(f"Mean Accuracy: {np.mean(all_acu):.2f}% - Mean AUC: {np.mean(all_auc):.4f} - Mean F1: {np.mean(all_f1):.4f}")
for key, value in results.items():
    print(f'Fold {key}: {value} %')

53217.17578125
Epoch [1/100] - Balanced Accuracy: 10.00% - AUC: 0.5000 - F1: 0.0065
45497.078125
Epoch [2/100] - Balanced Accuracy: 10.00% - AUC: 0.5000 - F1: 0.0065
50073.44921875
Epoch [3/100] - Balanced Accuracy: 10.00% - AUC: 0.5000 - F1: 0.0248
46518.48828125
Epoch [4/100] - Balanced Accuracy: 10.00% - AUC: 0.5000 - F1: 0.0065
57102.515625
Epoch [5/100] - Balanced Accuracy: 10.00% - AUC: 0.5278 - F1: 0.0248
36122.765625
Epoch [6/100] - Balanced Accuracy: 10.00% - AUC: 0.5472 - F1: 0.0248
58731.99609375
Epoch [7/100] - Balanced Accuracy: 10.00% - AUC: 0.5000 - F1: 0.0248
49408.23046875
Epoch [8/100] - Balanced Accuracy: 10.00% - AUC: 0.5444 - F1: 0.0248
44914.94921875
Epoch [9/100] - Balanced Accuracy: 10.00% - AUC: 0.5000 - F1: 0.0065
58564.4375
Epoch [10/100] - Balanced Accuracy: 10.00% - AUC: 0.5000 - F1: 0.0065
60732.51953125
Epoch [11/100] - Balanced Accuracy: 10.00% - AUC: 0.5000 - F1: 0.0248
42812.44140625
Epoch [12/100] - Balanced Accuracy: 10.00% - AUC: 0.5278 - F1: 0.0065

In [676]:
sum_acc = 0
sum_auc = 0
sum_f1 = 0
for key, (acc, auc, f1) in results.items():
    sum_acc += acc
    sum_auc += auc
    sum_f1 += f1
sum_acc /= len(results)
sum_auc /= len(results)
sum_f1 /= len(results)

print(sum_acc, sum_auc, sum_f1)

65.83333333333333 0.9472222222222223 0.6115049302549304
