In [13]:
# Importing Libraries

import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Audio
import torch as torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import torchaudio.transforms as audio
from scipy.signal import resample
from PIL import Image

In [14]:
class Feature_Dataset(Dataset):
    def __init__(self, data, labels):
        
        data = torch.from_numpy(data)
        labels = torch.from_numpy(labels)
        
        self.data = data
        self.labels = labels
        
    def __len__(self):
        
        return(len(self.labels))
    
    def __getitem__(self, idx):
        
        sample = dict()
        sample['index'] = idx
        sample['data'] = self.data[idx]
        sample['label'] = self.labels[idx]
        
        return sample
    
def test(path_to_test_data, path_to_test_labels):
    data_test = np.load(path_to_test_data)
    labels_test = np.load(path_to_test_labels)
    
    r = len(data_test)
    for i in range(r):
        data_test[i] = resample(data_test[i], 100000)
        data_test[i] = (data_test[i] - np.average(data_test[i]))/max(abs(data_test[i]) + 1e-9)
    
    # Extracting Features
    spectrograms_1 = []
    spectrograms_2 = []
    spectrograms_3 = []
    
    for i in range(r):

        sig = torch.Tensor(data_test[i])
        sg = audio.MelSpectrogram( 
                                sample_rate = 44000, n_fft=4096, 
                                win_length = 4096, hop_length=1024,
                                n_mels=128)(sig)
        sg = sg.detach().numpy()
        sg = np.array(transforms.Resize((128, 256))(Image.fromarray(sg)))
        spectrograms_1.append(np.log(sg + 1e-9))


        sg = audio.MelSpectrogram( 
                                sample_rate = 44000, n_fft=4096, 
                                win_length = 2048, hop_length=512,
                                n_mels=128)(sig)
        sg = sg.detach().numpy()
        sg = np.array(transforms.Resize((128, 256))(Image.fromarray(sg)))
        spectrograms_2.append(np.log(sg + 1e-9))


        sg = audio.MelSpectrogram( 
                                sample_rate = 44000, n_fft=4096, 
                                win_length = 1024, hop_length=256,
                                n_mels=128)(sig)
        sg = sg.detach().numpy()
        sg = np.array(transforms.Resize((128, 256))(Image.fromarray(sg)))
        spectrograms_3.append(np.log(sg + 1e-9))


    features_log = np.stack([[spectrograms_1[i], spectrograms_2[i], spectrograms_3[i]] for i in range(r)])
    
    # Sphering the spectrograms:
    ZCA_spectrograms_1 = []
    ZCA_spectrograms_2 = []
    ZCA_spectrograms_3 = []

    for i in range(r):
        for j in range(3):
            demeaned = np.array(features_log[i][j] - np.mean(features_log[i][j], axis=0))
            (r_prime,c) = demeaned.shape
            lam = 10**-7
            P = float(c)
            cov = 1/P*np.dot(demeaned, demeaned.T) + lam*np.eye(r_prime)
            D,V = np.linalg.eigh(cov)
            ZCAed = np.dot(V, np.dot(V.T, demeaned))
            if j == 0:
                ZCA_spectrograms_1.append(ZCAed)
            if j == 1:
                ZCA_spectrograms_2.append(ZCAed)
            if j == 2:
                ZCA_spectrograms_3.append(ZCAed)

    ZCA_features_test = np.stack([[ZCA_spectrograms_1[i], ZCA_spectrograms_2[i], ZCA_spectrograms_3[i]] for i in range(r)])

    # Loading Ensemble
    net1 = torch.load("final_ensemble_densenet_1.pth", map_location=torch.device("cpu"))
    net2 = torch.load("final_ensemble_densenet_2.pt", map_location=torch.device("cpu"))
    net3 = torch.load("final_ensemble_densenet_3.pt", map_location=torch.device("cpu"))
    net4 = torch.load("final_ensemble_densenet_4.pt", map_location=torch.device("cpu"))
    
    net4.eval();
    net3.eval();
    net2.eval();
    net1.eval();
    
    batch=24
    test_dataset = Feature_Dataset(ZCA_features_test, labels_test)
    test_loader  = DataLoader(test_dataset, batch_size=batch, shuffle=True)
    
    # Making Ensemble predictions - this may take a minute
    with torch.no_grad():
        test_acc = []
        all_labels = []
        all_preds = []
        for idx, data in enumerate(test_loader):
            images, labels = data['data'], data['label']
            labels -= 1
            outputs1 = net1(images.type(torch.FloatTensor))
            outputs2 = net2(images.type(torch.FloatTensor))
            outputs3 = net3(images.type(torch.FloatTensor))
            outputs4 = net4(images.type(torch.FloatTensor))

            avg_outputs = (outputs4 + outputs3 + outputs2 + outputs1)/4
            _, predicted = torch.max(avg_outputs.data, 1)
            all_preds.append(predicted)
            all_labels.append(labels)
            print("predicted labels =", predicted)
            print("true labels      =", labels.type(torch.int))
            print("----------------------------------------------")
            total = labels.size(0)
            correct = (predicted == labels.type(torch.LongTensor)).sum().item()
            test_acc.append(correct/ total)
        test_accuracy = np.average(test_acc)
    print("test accuracy =", test_accuracy)
    
    return all_preds, all_labels, test_accuracy

In [15]:
all_preds, all_labels, test_accuracy = test("data_training.npy", "labels_training.npy")

predicted labels = tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])
true labels      = tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       dtype=torch.int32)
----------------------------------------------
predicted labels = tensor([1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0])
true labels      = tensor([1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
       dtype=torch.int32)
----------------------------------------------
predicted labels = tensor([0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0])
true labels      = tensor([0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0],
       dtype=torch.int32)
----------------------------------------------
predicted labels = tensor([0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0])
true labels      = tensor([0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 