# Import dependencies

In [96]:
import os
import torch
import torchaudio
import numpy as np
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchaudio.transforms import MFCC

In [97]:
data_path = os.path.dirname(os.getcwd())+"/data"
os.chdir(data_path)
chord_labels = os.listdir(os.path.join(data_path, "Train"))

# Define a custom Dataset in PyTorch

In [128]:
from sklearn.preprocessing import LabelEncoder

class WAVDataset(Dataset):
    def __init__(self, data_dir, audio_sample_rate):
        self.data_dir = data_dir
        self.sampling_rate = audio_sample_rate
        #Tune n_mels, default is 128
        self.transform = MFCC(sample_rate=audio_sample_rate, n_mfcc=40, melkwargs={"n_mels": 64, "n_fft":400})
        
        #Store file paths and target labels
        self.files = []
        self.labels = []
        self.encoded_labels = []

        # Walk through the directories to get audio file paths and labels
        for label in os.listdir(data_dir):
            label_dir = os.path.join(data_dir, label)
            if os.path.isdir(label_dir):
                for audio_file in os.listdir(label_dir):
                    if audio_file.endswith('.wav'):
                        audio_file_path = os.path.join(label_dir, audio_file)
                        self.files.append(audio_file_path)
                        self.labels.append(label)

        #One-hot encode string labels as tensors
        le = LabelEncoder()
        self.encoded_labels = le.fit_transform(self.labels)
        self.encoded_labels = torch.from_numpy(self.encoded_labels)
        # self.encoded_labels = torch.nn.functional.one_hot(self.encoded_labels)
        

    
    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        audio_path = self.files[idx]
        label = self.encoded_labels[idx]
        #Extract MFCC data from audio
        waveform, audio_sample_rate = torchaudio.load(audio_path)
        

        #Some files are mono (1 channel), duplicate channel if so
        if waveform.shape[0] == 1:
            waveform = waveform.repeat(2,1)
  
        waveform_MFCC = self.transform(waveform)

        #Input shapes need to be the same, take the average coefficient over time intervals, should output a tensor of shape (2, n_mfcc)
        waveform_MFCC = torch.mean(waveform_MFCC, dim=2)
        
        return waveform_MFCC, label

In [129]:
# Define paths
train_dir = os.path.join(data_path, "Train")
test_dir = os.path.join(data_path, "Test")

# Create instances of custom dataset
train_dataset = WAVDataset(data_dir=train_dir, audio_sample_rate=16000)
test_dataset = WAVDataset(data_dir=test_dir, audio_sample_rate=16000)

# Create DataLoaders
data_batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=data_batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=data_batch_size, shuffle=False)

# for batch_data, batch_labels in train_loader:
#     print("Batch Data Shape:", batch_data.shape)  # Shape: (batch_size, 13)
#     print("Batch Labels Shape:", batch_labels.shape)  # Shape: (batch_size

In [132]:
#Set GPU or CPU
device = "cuda" if torch.cuda.is_available() else "cpu"


class ChordClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(40*2, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(16, 8),
            # nn.Softmax(dim=1)
        )
        
    
    # 3. Define a forward method containing the forward pass computation
    def forward(self, x):
        x = self.linear_relu_stack(x)
        return x

# 4. Create an instance of the model and send it to target device
model = ChordClassifier().to(device)
print(model)

ChordClassifier(
  (linear_relu_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=80, out_features=128, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.3, inplace=False)
    (7): Linear(in_features=64, out_features=32, bias=True)
    (8): ReLU()
    (9): Dropout(p=0.3, inplace=False)
    (10): Linear(in_features=32, out_features=16, bias=True)
    (11): ReLU()
    (12): Dropout(p=0.2, inplace=False)
    (13): Linear(in_features=16, out_features=8, bias=True)
  )
)


In [133]:
import torch.optim as optim

epochs = 100
learning_rate = 0.001
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

running_loss = 0.
last_loss = 0.


for epoch in range(epochs):
    total = 0
    correct = 0
    for i,data in enumerate(train_loader):
        batch_X, batch_Y = data
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = loss_fn(outputs, batch_Y)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs.data, 1)  # Get the predicted class
        total += batch_Y.size(0)  # Total number of samples
        correct += (predicted == batch_Y).sum().item()  # Count correct predictions

        accuracy = 100 * correct / total  # Calculate accuracy
        running_loss += loss.item()
        if i % 5 == 4:
            last_loss = running_loss / 1000 # loss per batch
            print('  batch {} loss: {} accuracy {}'.format(i + 1, last_loss, accuracy))
            running_loss = 0.

  batch 5 loss: 0.011708209753036498 accuracy 9.375
  batch 10 loss: 0.010815365076065063 accuracy 12.8125
  batch 15 loss: 0.010855023622512817 accuracy 12.708333333333334
  batch 20 loss: 0.010578906059265137 accuracy 12.34375
  batch 25 loss: 0.01038986086845398 accuracy 12.25
  batch 30 loss: 0.010525167465209961 accuracy 13.020833333333334
  batch 35 loss: 0.010701079607009887 accuracy 12.5
  batch 40 loss: 0.010523247957229614 accuracy 12.65625
  batch 5 loss: 0.018978142499923707 accuracy 15.625
  batch 10 loss: 0.01038432240486145 accuracy 15.0
  batch 15 loss: 0.010512538194656373 accuracy 13.541666666666666
  batch 20 loss: 0.010400304794311524 accuracy 13.4375
  batch 25 loss: 0.010244907379150391 accuracy 13.75
  batch 30 loss: 0.010374575853347778 accuracy 13.958333333333334
  batch 35 loss: 0.010480214834213257 accuracy 13.928571428571429
  batch 40 loss: 0.010245428800582886 accuracy 14.0625
  batch 5 loss: 0.018835041999816896 accuracy 12.5
  batch 10 loss: 0.0104615874