## This is starter code for single point prediction with CNNs

In [None]:
import os
import glob

# common math imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# common torch imports
import torch
import torch.nn.functional as F

# common sklearn imports 
from sklearn.preprocessing import LabelEncoder, StandardScaler

## Load and process data

In [None]:
main_path = 'data'
train_folders = [f'{main_path}/train',
                 # f'{main_path}/rain-sounds', f'{main_path}/colored-noise'
]
test_folder = f'{main_path}/val'

# ---------- TRAINING DATA ----------
X_list = []
Y_list = []

for folder in train_folders:
    # find files like X_1000.npy, X_2000.npy, etc.
    X_files = sorted(glob.glob(os.path.join(folder, "X_*.npy")))
    Y_files = sorted(glob.glob(os.path.join(folder, "Y_*.npy")))

    for xf, yf in zip(X_files, Y_files):
        X_list.append(np.load(xf))
        Y_list.append(np.load(yf))

# Stack into arrays
X_train = np.vstack(X_list)
Y_train = np.concatenate(Y_list)

# Cleanup
del X_list, Y_list


# ---------- VALIDATION / TEST DATA ----------
X_test_list = []
Y_test_list = []

X_files = sorted(glob.glob(os.path.join(test_folder, "X_*.npy")))
Y_files = sorted(glob.glob(os.path.join(test_folder, "Y_*.npy")))

for xf, yf in zip(X_files, Y_files):
    X_test_list.append(np.load(xf))
    Y_test_list.append(np.load(yf))

X_test = np.vstack(X_test_list)
Y_test = np.concatenate(Y_test_list)

del X_test_list, Y_test_list

# --- SCALE DATA ---
BA, FR, TI = X_train.shape
X_train_2d = X_train.reshape(BA, FR * TI)
scaler = StandardScaler()
X_train_scaled_2d = scaler.fit_transform(X_train_2d)
X_train_scaled = X_train_scaled_2d.reshape(BA, FR, TI)
X_test_2d = X_test.reshape(X_test.shape[0], -1)
X_test_scaled_2d = scaler.transform(X_test_2d)
X_test_scaled = X_test_scaled_2d.reshape(X_test.shape[0], FR, TI)
X_train = X_train_scaled[:, np.newaxis, :, :]
X_test = X_test_scaled[:, np.newaxis, :, :]
del X_train_scaled_2d
del X_test_scaled_2d
del X_train_scaled
del X_test_scaled
del X_train_2d
del X_test_2d

# --- TO TENSORS ---
X_train = torch.tensor(X_train, dtype=torch.float32)
Y_train = torch.tensor(Y_train, dtype=torch.int32).long()
num_classes = Y_train.unique().numel()
X_test = torch.tensor(X_test, dtype=torch.float32)
Y_test = torch.tensor(Y_test, dtype=torch.int32).long()

# --- SAVE TENSORS ---
torch.save(X_train, os.path.join(main_path, "X_train.pt"))
torch.save(Y_train, os.path.join(main_path, "Y_train.pt"))
torch.save(X_test,  os.path.join(main_path, "X_test.pt"))
torch.save(Y_test,  os.path.join(main_path, "Y_test.pt"))


Training data size: 52314
Testing data size: 13013


## Some simple CNN models

Note that you can use these for multilabel or single categorical label classification, which will depend on your choice of target and loss.

In [None]:
class SimpleCNN(nn.Module):
    """Simple Convolutional Neural Network for audio classification."""

    def __init__(self, num_classes, 
                 in_channels=1, 
                 out_channels1=16, 
                 out_channels2=32,
                 fc_hidden_units=64, 
                 kernel_size=3, 
                 stride=1, 
                 padding=1,
                 pooling_size=2, 
                 dropout=0.5,
                 multilabel=False,
                 ):
        """Initialize Simple Convolutional Neural Network for audio classification.

        Parameters
        ----------
        num_classes : int
            Number of output classes for classification.
        in_channels : int, optional
            Number of input channels (default is 1 for grayscale audio).
        out_channels1 : int, optional
            Number of output channels for the first convolutional layer.
        out_channels2 : int, optional
            Number of output channels for the second convolutional layer.
        fc_hidden_units : int, optional
            Number of hidden units in the fully connected layer.
        kernel_size : int or tuple, optional
            Size of the convolutional kernel.
        stride : int or tuple, optional
            Stride of the convolutional operation.
        padding : int or tuple, optional
            Padding added to the input tensor.
        pooling_size : int or tuple, optional
            Size of the pooling operation.
        dropout : float, optional
            Dropout rate for regularization (default is 0.5).

        Returns
        -------
        None
        """
        # Store parameters
        self.num_classes = num_classes
        self.dropout_rate = dropout

        # Batch normalization layers
        self.bn1 = nn.BatchNorm2d(out_channels1)
        self.bn2 = nn.BatchNorm2d(out_channels2)
        self.bn_fc = nn.BatchNorm1d(fc_hidden_units)
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels1, 
                               kernel_size=kernel_size, stride=stride, padding=padding)
        self.conv2 = nn.Conv2d(out_channels1, out_channels2, 
                               kernel_size=kernel_size, stride=stride, padding=padding)
        self.pool = nn.MaxPool2d(pooling_size, pooling_size)
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.LazyLinear(fc_hidden_units)
        self.multilabel = multilabel
        self.fc2 = nn.Linear(fc_hidden_units, num_classes)

    def forward(self, x):
        """Forward pass of the SimpleCNN model.

        Parameters
        ----------
        x : torch.Tensor
            Input tensor of shape (batch_size, in_channels, height, width).

        Returns
        -------
        torch.Tensor
            Output tensor of shape (batch_size, num_classes).
        """
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.bn_fc(x)
        x = F.relu(x)
        x = self.dropout(x)

        # for multi-class classification
        if not self.multilabel:
            x = self.fc2(x)
            probs = F.softmax(x, dim=1)
            return probs
        else:
            # for multi-label classification
            return x

## Make train-test split and one hot encoding

## Initialize the model

In [48]:
cnn_model = SimpleCNN(num_classes,
                      1,
                      8,
                      8,
                      16,
                      dropout=0.2
                     )
cnn_model.to(device)

SimpleCNN(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc1): LazyLinear(in_features=0, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=10, bias=True)
)

## Set up training parameters

In [49]:
# Example usage
num_epochs = 20  # Number of epochs to train

# Define the loss function and optimizer
# criterion = nn.BCEWithLogitsLoss()  # Suitable for multi-label classification
criterion = nn.CrossEntropyLoss()  # Suitable for multi-class classification

# Choose an optimizer for training
optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)  # You can adjust the learning rate

## Perform training

In [None]:
# Training loop

for epoch in range(num_epochs):

    # training
    cnn_model.train()

    running_loss = 0.0
    for inputs, labels in train_loader:

        # Zero the parameter gradients
        optimizer.zero_grad()
        
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        # .unsqeeze(1) is because there is only grayscale channel
        outputs = cnn_model(inputs.unsqueeze(1))

        # Compute the loss
        # For multi-class classification
        loss = criterion(outputs, labels.long())
        # For multi-label classification
        # loss = criterion(outputs, labels.float())

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Accumulate the loss
        running_loss += loss.item()

    training_loss = running_loss / len(train_loader)

    # evaluation
    cnn_model.eval()
    
    running_loss = 0.0
    with torch.no_grad():  # Disable gradient computation for evaluation
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
    
            # Forward pass
            # .unsqeeze(1) is because there is only grayscale channel
            outputs = cnn_model(inputs.unsqueeze(1))
    
            # Compute the loss
            # For multi-class classification
            loss = criterion(outputs, labels.long())
            # For multi-label classification
            # loss = criterion(outputs, labels.float())

            # Accumulate the loss
            running_loss += loss.item()

    validation_loss = running_loss / len(test_loader)

    # Print the average loss for this epoch
    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {training_loss:.4f}, Validation Loss: {validation_loss:.4f}")

Epoch 1/20, Train Loss: 2.1315, Validation Loss: 2.1104
Epoch 2/20, Train Loss: 2.0503, Validation Loss: 2.1057
Epoch 3/20, Train Loss: 2.0258, Validation Loss: 2.1056
Epoch 4/20, Train Loss: 2.0174, Validation Loss: 2.1069
Epoch 5/20, Train Loss: 2.0110, Validation Loss: 2.1069
Epoch 6/20, Train Loss: 2.0086, Validation Loss: 2.1145
Epoch 7/20, Train Loss: 2.0055, Validation Loss: 2.1116
Epoch 8/20, Train Loss: 2.0008, Validation Loss: 2.1113
Epoch 9/20, Train Loss: 1.9948, Validation Loss: 2.1074


## Evaluate the model

In [44]:
# Evaluate the model's accuracy on the validation data
cnn_model.eval()  # Set the model to evaluation mode
correct1 = 0
correct2 = 0
total1 = 0
total2 = 0

with torch.no_grad():  # Disable gradient computation for evaluation
    for inputs, labels in train_loader:

        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = cnn_model(inputs.unsqueeze(1))

        # # Get the predicted class (index of the maximum value in the output)
        # # For multi-label (one-hot) targets, use sigmoid and threshold at 0.5
        # probs = torch.sigmoid(outputs)
        # predicted = (probs > 0.5).int()
        # labels = labels.int()

        # probs = probs.cpu()
        # predicted = predicted.cpu()
        # labels = labels.cpu()

        # # Update total and correct counts
        # # For multi-label accuracy, count samples where all labels match
        # correct1 += (predicted == labels).all(dim=1).sum().item()
        # total1 += labels.size(0)
        # correct2 += ((predicted == labels) * labels).sum()
        # total2 += labels.size(0)

        # Get the predicted class (index of the maximum value in the output)
        # For multi-class (one-hot) targets
        predicted = outputs.argmax(dim=1)
        labels = labels.int()
        predicted = predicted.cpu()
        labels = labels.cpu()
        print(accuracy_score(predicted,labels))

# Calculate and print the accuracy
# accuracy1 = correct1 / total1 * 100
# accuracy2 = correct2 / total2 * 100
# print(f"Training Accuracy (all labels): {accuracy1:.2f}%")
# print(f"Training Accuracy (single label): {accuracy2:.2f}%")
print()

correct1 = 0
correct2 = 0
total1 = 0
total2 = 0

with torch.no_grad():  # Disable gradient computation for evaluation
    for inputs, labels in test_loader:

        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = cnn_model(inputs.unsqueeze(1))

        # # Get the predicted class (index of the maximum value in the output)
        # # For multi-label (one-hot) targets, use sigmoid and threshold at 0.5
        # probs = torch.sigmoid(outputs)
        # predicted = (probs > 0.5).int()
        # labels = labels.int()

        # probs = probs.cpu()
        # predicted = predicted.cpu()
        # labels = labels.cpu()

        # # Update total and correct counts
        # # For multi-label accuracy, count samples where all labels match
        # correct1 += (predicted == labels).all(dim=1).sum().item()
        # total1 += labels.size(0)
        # correct2 += ((predicted == labels) * labels).sum()
        # total2 += labels.size(0)

        # Get the predicted class (index of the maximum value in the output)
        # For multi-class (one-hot) targets
        predicted = outputs.argmax(dim=1)
        labels = labels.int()
        predicted = predicted.cpu()
        labels = labels.cpu()
        print(accuracy_score(predicted,labels))

# Calculate and print the accuracy
# accuracy1 = correct1 / total1 * 100
# accuracy2 = correct2 / total2 * 100
# print(f"Validation Accuracy (all labels): {accuracy1:.2f}%")
# print(f"Validation Accuracy (single label): {accuracy2:.2f}%")
print()

0.39453125
0.388671875
0.375
0.373046875
0.39453125
0.42578125
0.35546875
0.376953125
0.41015625
0.3984375
0.392578125
0.361328125
0.388671875
0.390625
0.40234375
0.396484375
0.4140625
0.384765625
0.41796875
0.375
0.390625
0.423828125
0.400390625
0.384765625
0.44140625
0.419921875
0.4140625
0.38671875
0.3515625
0.392578125
0.3984375
0.416015625
0.423828125
0.416015625
0.41015625
0.380859375
0.37109375
0.37109375
0.41015625
0.41796875
0.4140625
0.375
0.373046875
0.423828125
0.37890625
0.328125
0.3828125
0.4296875
0.41796875
0.33203125
0.423828125
0.365234375
0.3984375
0.39453125
0.376953125
0.416015625
0.3203125
0.400390625
0.38671875
0.39453125
0.390625
0.384765625
0.390625
0.41796875
0.37890625
0.423828125
0.365234375
0.40625
0.3515625
0.375
0.412109375
0.36328125
0.388671875
0.37109375
0.396484375
0.427734375
0.3984375
0.40234375
0.43359375
0.404296875
0.39453125
0.38671875
0.392578125
0.408203125
0.412109375
0.408203125
0.408203125
0.40625
0.400390625
0.384765625
0.3984375
0.3925781

In [42]:
probs.argmax(dim=1)

tensor([7, 1, 0, 7, 0, 7, 7, 2, 7, 2, 2, 2, 7, 0, 0, 1, 2, 4, 0, 7, 2, 0, 2, 8,
        0, 0, 7, 0, 2, 1, 2, 1, 7, 2, 1, 2, 2, 0, 0, 4, 0, 0, 1, 1, 2, 7, 2, 1,
        7, 2, 0, 8, 7, 2, 7, 2, 2, 1, 2, 2, 2, 2, 0, 2, 1, 2, 2, 2, 7, 0, 7, 2,
        7, 2, 2, 2, 1, 2, 0, 2, 0, 4, 8, 4, 2, 2, 0, 2, 0, 0, 0, 7, 0, 8, 2, 2,
        1, 2, 2, 0, 2, 7, 7, 4, 0, 4, 8, 0, 1, 0, 1, 8, 2, 0, 2, 8, 7, 7, 7, 4,
        2, 2, 2, 7, 7, 7, 2, 7, 0, 2, 0, 7, 2, 7, 0, 2, 2, 4, 0, 7, 2, 8, 7, 1,
        2, 7, 2, 7, 7, 7, 0, 0, 2, 4, 0, 4, 4, 0, 2, 2, 0, 1, 2, 0, 1, 2, 0, 0,
        0, 1, 2, 2, 4, 0, 2, 2, 7, 0, 2, 0, 8, 2, 2, 7, 8, 1, 1, 2, 0, 2, 2, 0,
        2, 2, 2, 4, 2, 2, 0, 0, 2, 0, 7, 7, 1, 4, 0, 2, 2, 0, 0, 2, 7, 4, 0, 4,
        2, 2, 2, 2, 2, 0, 2, 8, 2, 2, 7, 2, 0, 7, 7, 4, 2, 0, 1, 2, 0, 2, 7, 7,
        7, 7, 2, 7, 4, 1, 2, 2, 7, 2, 1, 8, 8, 7, 7, 4, 7, 0, 1, 0, 8, 1, 0, 2,
        2, 4, 0, 2, 1, 2, 2, 0, 4, 0, 0, 0, 2, 7, 2, 7, 2, 1, 0, 4, 2, 2, 2, 1,
        2, 0, 1, 7, 7, 4, 7, 1, 2, 7, 1,