In [1]:
import pandas as pd
import librosa
import numpy as np

import librosa.display
import matplotlib.pyplot as plt

import torch
from torch.utils.data import TensorDataset, DataLoader, random_split
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import models, transforms

from toolbox import *
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

import seaborn as sns; sns.set()
import warnings
warnings.filterwarnings("ignore")


In [None]:
df=pd.read_csv("UrbanSound8K/metadata/UrbanSound8K.csv")
classes=list(df["class"].unique())

In [None]:
paths=dict()
for i in range(len(classes)):
    temp_df=df[df["class"]==classes[i]].reset_index()
    fold=temp_df["fold"].iloc[0]    # The fold of the first audio sample for the specific class
    sample_name=temp_df["slice_file_name"].iloc[0]
    path="UrbanSound8K/audio/fold{0}/{1}".format(fold, sample_name)
    paths[classes[i]]=path
paths

In [None]:
for i, label in enumerate(classes):
    sample=paths[label]
    plt.clf()
    plt.title(label)
    data, sample_rate=librosa.load(sample)
    librosa.display.waveplot(data, sr=sample_rate)
    # plt.savefig("outputs/{}.png".format(label))
    plt.show()

In [None]:
path=paths["gun_shot"]
audio, sr=librosa.load(path)
mfccs=librosa.feature.mfcc(audio, sr, n_mfcc=40)
mfccs.shape

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
img = librosa.display.specshow(mfccs, x_axis='time', ax=ax)
fig.colorbar(img, ax=ax)
ax.set(title='MFCC')

In [None]:
def extract_mfcc(path):
    audio, sr=librosa.load(path)
    mfccs=librosa.feature.mfcc(audio, sr, n_mfcc=40)
    return np.mean(mfccs.T, axis=0)

In [None]:
features=[]
labels=[]
folds=[]
for i in range(len(df)):
    fold=df["fold"].iloc[i]
    filename=df["slice_file_name"].iloc[i]
    path="UrbanSound8K/audio/fold{0}/{1}".format(fold, filename)
    mfccs=extract_mfcc(path)

    # dataset.append((mfccs,df["classID"].iloc[i]))
    features.append(mfccs)
    folds.append(fold)
    labels.append(df["classID"].iloc[i])

In [None]:
features_pt=torch.tensor(features)
featur_pt
labels_pt=torch.tensor(labels)
labels_pt
folds_pt=torch.tensor(folds)
folds_pt
# Saving the dataset to disk to prevent re-Loading
torch.save(features_pt, "UrbanSound8K/features_mfccs.pt")
torch.save(labels_pt, "UrbanSound8K/labels.pt")
torch.save(folds_pt, "UrbanSound8K/folds.pt")

#######################################################################

In [2]:
features=torch.load("UrbanSound8K/features_mfccs.pt")
labels=torch.load("UrbanSound8K/labels.pt")
folds=torch.load("UrbanSound8K/folds.pt")

In [3]:
def get_dataset(skip_fold):
    local_features=[]
    local_labels=[]
    for i in range(len(folds)):
        if folds[i]==skip_fold:
            continue
        local_features.append(features[i])
        local_labels.append(labels[i])
    local_features=torch.stack(local_features)
    local_labels=torch.stack(local_labels)
    return TensorDataset(local_features, local_labels), local_features, local_labels

In [4]:
dataset, local_features, local_labels = get_dataset(skip_fold=10)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(local_features.numpy(), local_labels.numpy(), test_size=0.20, random_state=42)

In [6]:
val_size=int(0.2*len(dataset))
train_size=len(dataset)-val_size

train_ds, val_ds=random_split(dataset, [train_size, val_size])

len(train_ds), len(val_ds)

(6316, 1579)

batch_size=128
train_loader=DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader=DataLoader(val_ds, batch_size, num_workers=4, pin_memory=True)

# Looking at a batch of data
for x,y in train_loader:
    print(f"feautres: {x}\nlabels: {y}")
    print(f"dtypes: (x)->{x.dtype}, (y)->{y.dtype}")
    break

In [7]:
plt.rcParams["legend.loc"] = "best"
plt.rcParams['figure.facecolor'] = 'white'

In [8]:
#%matplotlib inline
names = ['Air Conditioner',
        'Car Horn',
        'Children Playing',
        'Dog bark',
        'Drilling',
        'Engine Idling',
        'Gun Shot',
        'Jackhammer',
        'Siren',
        'Street Music']

In [9]:
experiments = [] 

#2 class
for i in range(9):
    for j in range(i + 1, 10):
        experiments.append((i, j))

#3 class
experiments.extend([(0, 1, 2), (0, 1, 3), (0, 1, 4), (0, 1, 5)\
               , (0, 1, 6), (0, 1, 7), (0, 1, 8), (0, 1, 9)])
#4 class
#experiments.extend([(0, 1, 2, 3), (0, 1, 2, 4), (0, 1, 2, 5), (0, 1, 2, 6)\
#               , (0, 1, 2, 7), (0, 1, 2, 8), (0, 1, 2, 9)])

In [10]:
# filter python warnings
def run():
    torch.multiprocessing.freeze_support()
    print('loop')

if __name__ == '__main__':
    run()
                

loop


In [11]:
def initialize_model(model_name, num_classes, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size

In [14]:
for classes in experiments:
    fraction_of_train_samples_space = np.geomspace(.001, 1, num=8)
    trials = 1
    num_classes = len(classes)
    
    #resnet18
    resnet_acc = list()
    simplecnn = list() 
    cnn2layer = list()
    complexcnn = list() 
    
    for fraction_of_train_samples in fraction_of_train_samples_space:
 
        train_loader, test_loader = create_loaders_set(y_train, y_test, classes, train_ds, val_ds, int(fraction_of_train_samples * 10000))
        
        resnet18, input_size = initialize_model('resnet', num_classes, use_pretrained=True)
        best_accuracy = np.mean([run_dn_image(resnet18, train_loader, test_loader) for _ in range(trials)])
        resnet_acc.append(best_accuracy)
        print("resnet Train Fraction:", str(fraction_of_train_samples), "Accuracy:", str(best_accuracy))

        
        cnn32 = SimpleCNN32Filter(num_classes)
        mean_accuracy = np.mean([run_dn_image(cnn32, train_loader, test_loader) for _ in range(trials)])
        simplecnn.append(mean_accuracy)
        print("simple Train Fraction:", str(fraction_of_train_samples), "Accuracy: ", str(mean_accuracy))

        
        cnn32 = SimpleCNN32Filter2Layers(num_classes)
        mean_accuracy = np.mean([run_dn_image(cnn32, train_loader, test_loader) for _ in range(trials)])
        cnn2layer.append(mean_accuracy)
        print("Train Fraction:", str(fraction_of_train_samples), " Cnn 2 layer Accuracy: ", str(mean_accuracy))

        
        cnn32 = CNN5Layer(num_classes)
        mean_accuracy = np.mean([run_dn_image(cnn32, train_loader, test_loader) for _ in range(trials)])
        complexcnn.append(mean_accuracy)
        print("complex Train Fraction:", str(fraction_of_train_samples), " Accuracy: ", str(mean_accuracy))
    
    #naive RF
    rf_acc = list()
    for fraction_of_train_samples in fraction_of_train_samples_space:
        RF = RandomForestClassifier(n_estimators=100, n_jobs = -1)
        best_accuracy = np.mean([run_rf_image_set(RF, X_train, y_train, X_test, y_test, int(fraction_of_train_samples * 10000), classes) for _ in range(trials)])
        rf_acc.append(best_accuracy)
        print("Train Fraction:", str(fraction_of_train_samples))
        print("Accuracy:", str(best_accuracy))
               
    
    
    plt.rcParams['figure.figsize'] = 13, 10
    plt.rcParams['font.size'] = 25
    plt.rcParams['legend.fontsize'] = 16.5
    plt.rcParams['legend.handlelength'] = 2.5
    plt.rcParams['figure.titlesize'] = 20
    plt.rcParams['xtick.labelsize'] = 15
    plt.rcParams['ytick.labelsize'] = 15
    

    fig, ax = plt.subplots() # create a new figure with a default 111 subplot
    ax.plot(fraction_of_train_samples_space*10000, rf_acc, marker='X', markerfacecolor='red', markersize=8, color='green', linewidth=3, linestyle=":", label="RF")
    ax.plot(fraction_of_train_samples_space*10000, resnet_acc, marker='X', markerfacecolor='red', markersize=8, color='green', linewidth=3, linestyle="--", label="Resnet18")
    ax.plot(fraction_of_train_samples_space*10000, simplecnn, marker='X', markerfacecolor='red', markersize=8, color='green', linewidth=3, label="simpleCNN")
    ax.plot(fraction_of_train_samples_space*10000, cnn2layer, marker='X', markerfacecolor='red', markersize=8, color='orange', linewidth=3, linestyle=":", label="2layerCNN")
    ax.plot(fraction_of_train_samples_space*10000, complexcnn, marker='X', markerfacecolor='red', markersize=8, color='orange', linewidth=3, label="5layerCNN")


    ax.set_xlabel('Number of Train Samples', fontsize=18)
    ax.set_xscale('log')
    ax.set_xticks([i*10000 for i in list(fraction_of_train_samples_space)])
    ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
    
    ax.set_ylabel('Accuracy', fontsize=18)
    
    graph_title = str(classes[0]) + " (" + names[classes[0]] + ") "
    file_title = str(classes[0])
    for j in range(1, len(classes)):
        graph_title = graph_title + " vs " + str(classes[j]) + names[classes[j]]
        file_title = file_title + "-" + str(classes[j])
    ax.set_title(graph_title, fontsize=18)
    plt.legend()
    plt.savefig("cifar_results_fixed/" + file_title)
    table = pd.DataFrame(np.concatenate(([rf_acc], [resnet_acc], [simplecnn], [cnn2layer], [complexcnn]), axis=0))
    algos = ['RF', 'resnet', 'simpleCNN', '2layercnn', '5layercnn']
    table['algos'] = algos
    cols = table.columns.tolist()
    cols = [cols[-1]] + cols[:-1]
    cols = pd.Index(cols)
    table = table[cols]
    table.to_csv("cifar_results_fixed/" + file_title + ".csv", index=False)


AttributeError: 'Subset' object has no attribute 'tensors'