# CNN para classificação de espectrogramas

## Definitions

In [364]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, f1_score
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.get_device_name(0)

'NVIDIA GeForce GTX 1650'

In [365]:
# Load data
spectrograms = np.load("datasets/spectrograms/spectrograms_janela1s_hop05s_Oz.npy")

labels = np.load("datasets/spectrograms/labels_numeric.npy")

In [366]:
print(spectrograms.shape)
print(labels.shape)

(6, 20, 1, 20, 3)
(6, 20)


In [367]:
labels

array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3],
       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3],
       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3],
       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3],
       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3],
       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]])

In [368]:
def split_train_test_trials(X, y, test_trial):
    y_test = []
    y_train = []
    X_test = []
    X_train = []
    for trial in range(0, 6):
        if trial == test_trial:
            X_test.extend(X[trial])
            y_test.extend(y[trial])
        else:
            X_train.extend(X[trial])
            y_train.extend(y[trial])
    return np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test)


def prepare_data(X, y, test_trial=5, train_batch_size=16, test_batch_size=1):
    X_train, X_test, y_train, y_test = split_train_test_trials(X, y, test_trial)
    train_data = TensorDataset(
        torch.tensor(X_train, dtype=torch.float32).to(device),
        torch.tensor(y_train, dtype=torch.long).to(device),
    )
    test_data = TensorDataset(
        torch.tensor(X_test, dtype=torch.float32).to(device),
        torch.tensor(y_test, dtype=torch.long).to(device),
    )
    train_loader = DataLoader(train_data, batch_size=train_batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=test_batch_size, shuffle=True)
    return train_loader, test_loader

In [369]:
train_loader, test_loader = prepare_data(
    spectrograms, labels, test_trial=5, train_batch_size=20
)

In [370]:
# Define the CNN Model using nn.Sequential for each block
class CNNModel(nn.Module):
    def __init__(self, n_channels, n_classes):
        super(CNNModel, self).__init__()

        self.block1 = nn.Sequential(
            nn.Conv2d(n_channels, 15, kernel_size=3, padding=1),  # entrada
            # nn.BatchNorm2d(15),
            nn.Dropout(0.3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.block2 = nn.Sequential(
            nn.Conv2d(15, 30, kernel_size=3, padding=1),
            # nn.BatchNorm2d(30),
            nn.Dropout(0.3),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2),
        )
        self.block3 = nn.Sequential(
            nn.Conv2d(30, 15, kernel_size=(3, 2), padding=1),
            # nn.BatchNorm2d(15),
            nn.Dropout(0.3),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2),
        )
        self.block4 = nn.Sequential(
            nn.Conv2d(15, 30, kernel_size=(3, 2), padding=1),
            # nn.BatchNorm2d(30),
            nn.Dropout(0.3),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2),
        )

        # Fully Connected Layers
        self.fc = nn.Sequential(
            nn.Linear(900, 250),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(250, 125),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(125, n_classes),
        )

    def forward(self, x):
        # Apply convolutional blocks
        x = self.block1(x)
        # x = F.interpolate(x, scale_factor=2, mode="nearest-exact")  # Interpolation
        x = self.block2(x)
        # x = F.interpolate(x, scale_factor=2, mode="nearest-exact")  # Interpolation
        x = self.block3(x)
        # x = F.interpolate(x, scale_factor=2, mode="nearest-exact")  # Interpolation
        x = self.block4(x)
        # x = F.interpolate(x, scale_factor=2, mode="nearest-exact")  # Interpolation

        # Flatten for fully connected layers
        x = x.view(x.size(0), -1)

        # Apply fully connected layers
        x = self.fc(x)
        output = F.log_softmax(x, dim=1)
        return output


def train(
    model,
    train_loader,
    val_loader,
    criterion,
    optimizer,
    num_epochs=100,
    device=0,
    save_path="best_model.pth",
):
    best_val_accuracy = 0.0

    model.to(device)
    for epoch in range(num_epochs):
        # Training Phase
        model.train()
        running_loss = 0.0
        train_correct = 0
        train_total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            # Compute training accuracy
            _, preds = torch.max(outputs, 1)
            train_correct += (preds == labels).sum().item()
            train_total += labels.size(0)

        train_accuracy = train_correct / train_total
        avg_train_loss = running_loss / len(train_loader)

        # Validation Phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                # Compute validation accuracy
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_accuracy = val_correct / val_total
        avg_val_loss = val_loss / len(val_loader)

        # Save the model if it has the best validation accuracy
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), save_path)
            print(f"Best model saved with accuracy: {best_val_accuracy:.4f}")

        # Print epoch summary
        print(
            f"Epoch {epoch + 1}/{num_epochs}: "
            f"Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, "
            f"Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}"
        )

    print(f"Training complete. Best validation accuracy: {best_val_accuracy:.4f}")

In [371]:
def evaluate(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Metrics Calculation
    accuracy = accuracy_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds, average="weighted")
    f1 = f1_score(all_labels, all_preds, average="weighted")
    cm = confusion_matrix(all_labels, all_preds)

    print(f"Test set Accuracy: {accuracy:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Confusion Matrix:\n{cm}")
    return accuracy, recall, f1

In [372]:
model = CNNModel(1, 4)
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
train(model, train_loader, test_loader, criterion, optimizer, num_epochs=300)

Best model saved with accuracy: 0.2500
Epoch 1/300: Train Loss: 1.3887, Train Accuracy: 0.2600, Val Loss: 1.3867, Val Accuracy: 0.2500
Epoch 2/300: Train Loss: 1.3873, Train Accuracy: 0.1900, Val Loss: 1.3866, Val Accuracy: 0.2500
Epoch 3/300: Train Loss: 1.3870, Train Accuracy: 0.2700, Val Loss: 1.3865, Val Accuracy: 0.2500
Epoch 4/300: Train Loss: 1.3852, Train Accuracy: 0.2400, Val Loss: 1.3864, Val Accuracy: 0.2500
Epoch 5/300: Train Loss: 1.3861, Train Accuracy: 0.2800, Val Loss: 1.3863, Val Accuracy: 0.2500
Epoch 6/300: Train Loss: 1.3847, Train Accuracy: 0.2400, Val Loss: 1.3864, Val Accuracy: 0.2500
Epoch 7/300: Train Loss: 1.3883, Train Accuracy: 0.2800, Val Loss: 1.3865, Val Accuracy: 0.2500
Epoch 8/300: Train Loss: 1.3898, Train Accuracy: 0.1700, Val Loss: 1.3864, Val Accuracy: 0.2500
Epoch 9/300: Train Loss: 1.3866, Train Accuracy: 0.2400, Val Loss: 1.3865, Val Accuracy: 0.2500
Epoch 10/300: Train Loss: 1.3906, Train Accuracy: 0.1900, Val Loss: 1.3866, Val Accuracy: 0.2500


In [356]:
# Evaluate the model
best_model = CNNModel(n_channels=1, n_classes=4)
best_model.load_state_dict(torch.load("best_model.pth"))
best_model.to(device)
evaluate(best_model, test_loader)

Test set Accuracy: 0.7500
Recall: 0.7500
F1 Score: 0.7481
Confusion Matrix:
[[5 0 0 0]
 [0 4 1 0]
 [0 2 3 0]
 [1 1 0 3]]


  best_model.load_state_dict(torch.load("best_model.pth"))


(0.75, 0.75, 0.7481060606060606)

## Cross validation

### Pz, PO5, PO3, POz, PO4, PO6, O1, Oz, O2

In [357]:
all_acc = []
all_recall = []
all_f1 = []
for test_trial in range(0, 6):
    print(f"Test on trial {test_trial}")
    train_loader, test_loader = prepare_data(
        spectrograms, labels, test_trial=test_trial, train_batch_size=20
    )
    model = CNNModel(1, 4).to(device)
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    # Train the model
    train(
        model,
        train_loader,
        test_loader,
        criterion,
        optimizer,
        num_epochs=300,
        save_path="best_model.pth",
    )
    best_model = CNNModel(n_channels=9, n_classes=4)
    best_model.load_state_dict(torch.load("best_model.pth"))
    best_model.to(device)
    # Eval
    acc, rcll, f1s = evaluate(best_model, test_loader)
    all_acc.append(acc)
    all_recall.append(rcll)
    all_f1.append(f1s)


all_acc = np.array(all_acc)
all_recall = np.array(all_recall)
all_f1 = np.array(all_f1)

Test on trial 0
Best model saved with accuracy: 0.2500
Epoch 1/300: Train Loss: 1.3919, Train Accuracy: 0.2400, Val Loss: 1.3876, Val Accuracy: 0.2500
Epoch 2/300: Train Loss: 1.3883, Train Accuracy: 0.2500, Val Loss: 1.3867, Val Accuracy: 0.2500
Epoch 3/300: Train Loss: 1.3876, Train Accuracy: 0.2200, Val Loss: 1.3865, Val Accuracy: 0.2500
Epoch 4/300: Train Loss: 1.3874, Train Accuracy: 0.2400, Val Loss: 1.3865, Val Accuracy: 0.2500
Epoch 5/300: Train Loss: 1.3877, Train Accuracy: 0.1600, Val Loss: 1.3864, Val Accuracy: 0.2500
Epoch 6/300: Train Loss: 1.3891, Train Accuracy: 0.2000, Val Loss: 1.3862, Val Accuracy: 0.2500
Epoch 7/300: Train Loss: 1.3871, Train Accuracy: 0.2100, Val Loss: 1.3862, Val Accuracy: 0.2500
Epoch 8/300: Train Loss: 1.3859, Train Accuracy: 0.3500, Val Loss: 1.3861, Val Accuracy: 0.2500
Epoch 9/300: Train Loss: 1.3826, Train Accuracy: 0.3100, Val Loss: 1.3861, Val Accuracy: 0.2500
Epoch 10/300: Train Loss: 1.3868, Train Accuracy: 0.2600, Val Loss: 1.3860, Val A

  best_model.load_state_dict(torch.load("best_model.pth"))


RuntimeError: Error(s) in loading state_dict for CNNModel:
	size mismatch for block1.0.weight: copying a param with shape torch.Size([15, 1, 3, 3]) from checkpoint, the shape in current model is torch.Size([15, 9, 3, 3]).

In [345]:
print(all_acc.mean())
print(all_recall.mean())
print(all_f1.mean())

0.6583333333333333
0.6583333333333333
0.6495146057646056
