# Convolution Neural Network with raw audio data


Assumes that preprocessing step is already done using the matlab functions (output is table which we will convert to pandas dataframe)
* `generate_data.m`
* `resampleRun.m`

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import librosa
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [2]:
# Data parameters
dataLength = 5 # in seconds
filename  = "raw_data.csv"

In [7]:
def load_raw_data(filename, dataLength):
    data_raw = pd.read_csv(filename)
    numSamples = data_raw.shape[0]
    sampleRate = data_raw.iloc[0].iloc[1]
    labels = []
    mfcc_spectrograms = np.empty((numSamples//10, dataLength*sampleRate), dtype=np.float32)  # Preallocate array
    for index, row in data_raw.iterrows():
        if index%10 == 0:
            if row.iloc[0] == 'ad':
                labels.append(0)
            else:
                labels.append(1)
            data = row[2:].values
            data_signals[index//10] = data
    labels = np.array(labels)
    return labels, data_signals

In [8]:
# load up mfcc data
labels, raw_data = load_raw_data(filename, dataLength)
print(raw_data.shape)

NameError: name 'data_signals' is not defined

In [5]:
# Training parameters
batchSize = 32
num_epochs = 10

In [6]:
# Convert data to tensor compliant dataset
raw_train, raw_test, labels_train, labels_test = train_test_split(raw_data, labels, test_size=0.2, random_state=100)
raw_train_tensor  = torch.tensor(raw_train).unsqueeze(1)
raw_test_tensor   = torch.tensor(raw_test).unsqueeze(1)
labels_train_tensor = torch.tensor(labels_train)
labels_test_tensor  = torch.tensor(labels_test)
train_dataset = TensorDataset(raw_train_tensor, labels_train_tensor)
test_dataset = TensorDataset(raw_test_tensor, labels_test_tensor)
train_loader = DataLoader(train_dataset, batch_size = batchSize, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = batchSize, shuffle = False)

In [7]:
# Define our CNN
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        
        # Pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(7936, 512)
        self.fc2 = nn.Linear(512, 2)

    def forward(self, x):
        x = x.view(-1, 1, 498, 12) #(batch size, channels, height, width)
        # Convolutional layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        
        # Flatten the output for fully connected layers
        size = x.size()[1:]  #all size except batchSize
        numFeatures = 1
        for s in size:
            numFeatures *= s
        x = x.view(-1, numFeatures)
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [8]:
# The Model
model = CNN()

# Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:
# Train
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

Epoch [1/10], Loss: 0.4242
Epoch [2/10], Loss: 0.2097
Epoch [3/10], Loss: 0.1540
Epoch [4/10], Loss: 0.1167
Epoch [5/10], Loss: 0.1102
Epoch [6/10], Loss: 0.1015
Epoch [7/10], Loss: 0.0927
Epoch [8/10], Loss: 0.0568
Epoch [9/10], Loss: 0.1050
Epoch [10/10], Loss: 0.0453


In [10]:
# Evaluate
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
accuracy = correct / total
print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.9588
