# Convolution Neural Network with raw audio data

* `read-write-data.ipynb`

In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import librosa
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score

In [2]:
# load up mfcc data
#labels, raw_data = load_raw_data_dask(labelFilename, dataFilename, dataLength)
raw_labels = np.load("labels.npy") 
raw_data = np.load("raw_data.npy")
print(raw_labels)
print(raw_labels.shape)
print(raw_data.shape)

[1 1 1 ... 0 0 0]
(1049,)
(1049, 220500)


In [3]:
# Convert data to tensor compliant dataset
batchSize = 100
raw_train, raw_test, labels_train, labels_test = train_test_split(raw_data, raw_labels, test_size=0.2, random_state=100)

raw_train_tensor    = torch.tensor(raw_train).unsqueeze(1)
raw_test_tensor     = torch.tensor(raw_test).unsqueeze(1)
labels_train_tensor = torch.tensor(labels_train)
labels_test_tensor  = torch.tensor(labels_test)

train_dataset = TensorDataset(raw_train_tensor, labels_train_tensor)
test_dataset  = TensorDataset(raw_test_tensor, labels_test_tensor)

train_loader  = DataLoader(train_dataset, batch_size = batchSize, shuffle = True)
test_loader   = DataLoader(test_dataset, batch_size = batchSize, shuffle = False)

In [4]:
# Define our CNN
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv1d(1,  32,  kernel_size=10, padding=1)
        self.conv2 = nn.Conv1d(32, 64,  kernel_size=5, padding=1)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        
        # Pooling layer
        self.pool = nn.AvgPool1d(2, 2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(3527808, 512)
        self.fc2 = nn.Linear(512, 2)

    def forward(self, x):
        # Convolutional layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        
        # Flatten the output for fully connected layers
        size = x.size()[1:]  #all size except batchSize
        numFeatures = 1
        for s in size:
            numFeatures *= s
        x = x.view(-1, numFeatures)
        
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [5]:
# The Model
model = CNN()

# Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [6]:
# Train
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

Epoch [1/5], Loss: 30.7335
Epoch [2/5], Loss: 0.3124
Epoch [3/5], Loss: 0.2495
Epoch [4/5], Loss: 0.1906
Epoch [5/5], Loss: 0.1435


In [12]:
# Evaluate
model.eval()
correct = 0
total = 0
totalLabels    = []
totalPredicted = []
with torch.no_grad():
    for t_inputs, t_labels in test_loader:
        outputs = model(t_inputs)
        _, predicted = torch.max(outputs.data, 1)
        totalLabels    += t_labels
        totalPredicted += predicted
print(f"Test Accuracy: {accuracy_score(totalLabels, totalPredicted)}")
print(f"Test Precision: {precision_score(totalLabels, totalPredicted)}")
print(f"F1: {f1_score(totalLabels, totalPredicted)}")
print(f"Recall: {recall_score(totalLabels, totalPredicted)}")

Test Accuracy: 0.9095238095238095
Test Precision: 0.9223300970873787
F1: 0.9090909090909091
Recall: 0.8962264150943396


In [13]:
torch.save(model.state_dict(), 'cnn_raw_model_batch_100_epoch_5.pth')