In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import fft
import wave
import sys
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import wavfile
import scipy.signal as signal

import torch

In [None]:
path = 'C:/Users/User/Desktop/AppStat/MachineLearning/AppliedML2024/final_project/data/nsynth-valid/audio/'

#sys.path.append("C:/Users/Admin/OneDrive/Skrivebord/Machine Learning/AppliedML2024-main/AppliedML2024/Assignments/Group Project/Shared code/AudioNNRep/functions")
sys.path.append('C:/Users/User/Desktop/AppStat/MachineLearning/AppliedML2024/final_project')

import functions as f

In [None]:
filenames = f.read_files_in_dir(path)
organ = [filename for filename in filenames if "organ" in filename] 
bass = [filename for filename in filenames if "bass" in filename]
guitar = [filename for filename in filenames if "guitar" in filename]
vocal = [filename for filename in filenames if "vocal" in filename] 
flutes = [filename for filename in filenames if "flute" in filename]
keyboards = [filename for filename in filenames if "keyboard" in filename] 

In [None]:
instruments = [organ, bass, guitar, vocal, flutes, keyboards]

In [None]:
import random

In [None]:
#using f.audio_to_waveform(path + instrument) and f.waveform_to_spectogram(waveform_test) generate mixed_spectogram of three of the instruments: pianos, bass, guitar, drum, flutes, keyboards

def generate_mixed_spectrograms(n_mixed_spectrograms, number_of_instruments = 3):
    instruments = [organ, bass, guitar, vocal, flutes, keyboards]
    picked_inst_arr = np.zeros((n_mixed_spectrograms, len(instruments)))
    for i in range(n_mixed_spectrograms):
        # for each row, turn 3 random zeros to 1
        picked_inst = random.sample(range(len(instruments)), number_of_instruments)
        picked_inst_arr[i, picked_inst] = 1
    
    mixed_spectograms = []
    indvidual_spectograms = []
    for i in range(n_mixed_spectrograms):
        selected_files = []
        # Select files from the picked instruments
        for j in range(len(instruments)):
            if picked_inst_arr[i, j] == 1:
                selected_files.append(random.choice(instruments[j]))

        # Generate the mixed spectrogram, and save the individual spectrograms
        spectogram_i = []
        for j in range(len(selected_files)):
            waveform_test, sr = f.audio_to_waveform(path + selected_files[j])
            spectogram = f.waveform_to_spectrogram(waveform_test)
            spectogram_i.append(spectogram)
            if j == 0:
                combined_waveform = waveform_test
            else:
                combined_waveform = combined_waveform + waveform_test

        indvidual_spectograms.append(spectogram_i)

        mixed_spectogram = f.waveform_to_spectrogram(combined_waveform)
        mixed_spectograms.append(mixed_spectogram)

    return np.array(mixed_spectograms), indvidual_spectograms, picked_inst_arr

In [None]:
mixed_spectograms, indvidual_spectograms, picked_inst_arr = generate_mixed_spectrograms(3200, 3)

In [None]:
#np.shape(mixed_spectogra

In [None]:
from torch.utils.data import DataLoader, Dataset

import torch.optim as optim
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
X_train, X_test, y_train, y_test = train_test_split(mixed_spectograms, picked_inst_arr, test_size=0.2, random_state=42)

# normalize the data
X_train = X_train / np.max(X_train)
X_test  = X_test  / np.max(X_test)


In [None]:
class AudioDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [None]:
train_dataset = AudioDataset(X_train, y_train)
test_dataset = AudioDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
#import device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
for X, y in test_loader:
    X, y = X.to(device, dtype=torch.float32), y.to(device, dtype=torch.float32)
    X    = torch.unsqueeze(X, 0)
    print("final shape for the model:", np.shape(X))

In [None]:
def criterion_function(output, target):
    return torch.sum(torch.abs(target - output))

In [None]:
for X, y in train_loader:
    X, y = X.to(device, dtype=torch.float32), y.to(device, dtype=torch.float32)
    print("X shape:", np.shape(X), "y shape:", np.shape(y))

In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
class CNN(nn.Module):
    def __init__(self, num_classes=6, in_channels=32, out_channels=64):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=(2, 126), padding=(1, 64))
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, 32, kernel_size=(3, 3), padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.dropout = nn.Dropout(0.2)
        self.maxpool = nn.MaxPool2d(kernel_size=(2, 2), stride=2, padding=0)

        # Assuming input size is (batch_size, 32, H, W)
    
        #flatten
        self.fc1 = nn.Linear(8192, 512)
        self.fc2 = nn.Linear(512, in_channels)
        self.fc3 = nn.Linear(in_channels, num_classes)

    def _get_feature_size(self, shape):
        
        x = torch.zeros(shape)
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.maxpool(x)
        return int(np.prod(x.size()))

    def forward(self, x):
        x = self.dropout(x)
        x = F.relu(self.conv1(x))
        #print("Shape after conv1:", x.shape)
        x = self.maxpool(x)
        #print("Shape after maxpool1:", x.shape)
        x = F.relu(self.conv2(x))
        #print("Shape after conv2:", x.shape)
        x = self.maxpool(x)
        #print("Shape after maxpool2:", x.shape)
        
        x = x.view(x.size(0), -1)
        #print("Shape after flattening again:", x.shape)
        x = F.relu(self.fc1(x))
        #print("Shape after fc1:", x.shape)
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
    
        return x

# Initialize model, criterion, optimizer, and other components
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
#criterion = criterion_function()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 100
patience = 0
best_loss = np.inf
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for X, y in train_loader:
        X, y = X.to(device, dtype=torch.float32), y.to(device, dtype=torch.float32)
        
        optimizer.zero_grad()
        y_pred = model(X)
        #print(y_pred[0])
        #print(y[0])
        loss = criterion_function(y_pred, y)/len(y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * X.size(0)  # Accumulate loss
        #print(f'Batch loss: {loss.item():.4f}')
    #Implement the validation loop
    val_loss = 0.0
    model.eval()
    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(device, dtype=torch.float32), y.to(device, dtype=torch.float32)
            #print("X shape:", np.shape(X), "y shape:", np.shape(y))
            y_pred = model(X)
            loss = criterion_function(y_pred, y)/len(y)
            val_loss += loss.item() * X.size(0)  # Accumulate loss
    if val_loss < best_loss:
        best_loss = val_loss
        best_model = model.state_dict()
        patience = 0
    else:
        patience += 1
    if patience > 10:
        print("Early stopping")
        break
    
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch + 1}, Loss: {epoch_loss:.4f}')
    print(f'valiation loss: {val_loss:.4f}')

Epoch 1, Loss: 3.0059
valiation loss: 1919.2619
Epoch 2, Loss: 2.9937
valiation loss: 1916.3536
Epoch 3, Loss: 2.9975
valiation loss: 1911.9218
Epoch 4, Loss: 2.9975
valiation loss: 1902.5467
Epoch 5, Loss: 2.9904
valiation loss: 1907.4213
Epoch 6, Loss: 2.9933
valiation loss: 1895.3389
Epoch 7, Loss: 2.9697
valiation loss: 1886.9595
Epoch 8, Loss: 2.9573
valiation loss: 1877.3579
Epoch 9, Loss: 2.9491
valiation loss: 1868.9294
Epoch 10, Loss: 2.9021
valiation loss: 1882.2527
Epoch 11, Loss: 2.8690
valiation loss: 1871.2840
Epoch 12, Loss: 2.8455
valiation loss: 1809.9082
Epoch 13, Loss: 2.8012
valiation loss: 1819.9813
Epoch 14, Loss: 2.7899
valiation loss: 1787.7303
Epoch 15, Loss: 2.7657
valiation loss: 1794.8571
Epoch 16, Loss: 2.7160
valiation loss: 1746.5792
Epoch 17, Loss: 2.7043
valiation loss: 1735.6530
Epoch 18, Loss: 2.6722
valiation loss: 1724.9329
Epoch 19, Loss: 2.6347
valiation loss: 1755.3963
Epoch 20, Loss: 2.6186
valiation loss: 1702.9211
Epoch 21, Loss: 2.6176
valiat

In [30]:
torch.save(model.state_dict(), 'your_model_weights_sonic_classification.pth')