In [1]:
import os
import numpy as np
import pandas as pd
from glob import glob

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import torchvision.models



In [2]:
class Task5Model(nn.Module):

    def __init__(self, num_classes):

        super().__init__()

        self.bw2col = nn.Sequential(
            nn.BatchNorm2d(1),
            nn.Conv2d(1, 10, 1, padding=0), nn.ReLU(),
            nn.Conv2d(10, 3, 1, padding=0), nn.ReLU())

        self.mv2 = torchvision.models.mobilenet_v2(pretrained=True)

        self.final = nn.Sequential(
            nn.Linear(1280, 512), nn.ReLU(), nn.BatchNorm1d(512),
            nn.Linear(512, num_classes))

    def forward(self, x):
        x = self.bw2col(x)
        x = self.mv2.features(x)
        x = x.max(dim=-1)[0].max(dim=-1)[0]
        x = self.final(x)
        return x

In [3]:
eval_files = glob('../../data/audio-eval/*.wav')
eval_files = [os.path.basename(x) for x in eval_files]

In [4]:
X = np.concatenate([
        np.expand_dims(np.load('../../data/logmelspec-eval/{}.npy'.format(x)).T[:635, :], axis=0)
        for x in eval_files])
X = X[:, None, :, :]

In [5]:
channel_means = np.load('../../data/channel_means.npy')
channel_stds = np.load('../../data/channel_stds.npy')
X = (X - channel_means) / channel_stds

In [6]:
class AudioDataset(Dataset):
    def __init__(self, X):
        self.X = X

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        sample = self.X[idx, ...]
        i = np.random.randint(sample.shape[1])
        sample = torch.cat([
                sample[:, i:, :],
                sample[:, :i, :]],
                dim=1)
        return sample

In [7]:
dataset = AudioDataset(torch.Tensor(X))
loader = DataLoader(dataset, 64, shuffle=False)

In [8]:
cuda = True
device = torch.device('cuda:0' if cuda else 'cpu')
print('Device: ', device)

Device:  cuda:0


In [9]:
from glob import glob

In [12]:
model = Task5Model(31).to(device)

In [13]:
output_cols = [
        '1_engine', '2_machinery-impact', '3_non-machinery-impact',
        '4_powered-saw', '5_alert-signal', '6_music', '7_human-voice', '8_dog',
        '1-1_small-sounding-engine', '1-2_medium-sounding-engine',
        '1-3_large-sounding-engine', '2-1_rock-drill', '2-2_jackhammer',
        '2-3_hoe-ram', '2-4_pile-driver', '3-1_non-machinery-impact',
        '4-1_chainsaw', '4-2_small-medium-rotating-saw',
        '4-3_large-rotating-saw', '5-1_car-horn', '5-2_car-alarm', '5-3_siren',
        '5-4_reverse-beeper', '6-1_stationary-music', '6-2_mobile-music',
        '6-3_ice-cream-truck', '7-1_person-or-small-group-talking',
        '7-2_person-or-small-group-shouting', '7-3_large-crowd',
        '7-4_amplified-speech', '8-1_dog-barking-whining']
cols_in_order = [
    "audio_filename", "1-1_small-sounding-engine",
    "1-2_medium-sounding-engine", "1-3_large-sounding-engine",
    "2-1_rock-drill",
    "2-2_jackhammer", "2-3_hoe-ram", "2-4_pile-driver",
    "3-1_non-machinery-impact",
    "4-1_chainsaw", "4-2_small-medium-rotating-saw",
    "4-3_large-rotating-saw",
    "5-1_car-horn", "5-2_car-alarm", "5-3_siren", "5-4_reverse-beeper",
    "6-1_stationary-music",
    "6-2_mobile-music", "6-3_ice-cream-truck",
    "7-1_person-or-small-group-talking",
    "7-2_person-or-small-group-shouting", "7-3_large-crowd",
    "7-4_amplified-speech",
    "8-1_dog-barking-whining", "1_engine", "2_machinery-impact",
    "3_non-machinery-impact", "4_powered-saw", "5_alert-signal",
    "6_music", "7_human-voice", "8_dog"]

In [14]:
import os

In [16]:
for a_model_file in sorted(glob('*_model')):
    print(a_model_file)
    
    model.load_state_dict(torch.load(a_model_file))
    
    all_preds = []
    for _ in range(10):
        preds = []
        for inputs in loader:
                inputs = inputs.to(device)
                with torch.set_grad_enabled(False):
                    model = model.eval()
                    outputs = model(inputs)
                    preds.append(outputs.detach().cpu().numpy())
        preds = np.concatenate(preds, axis=0)
        preds = (1 / (1 + np.exp(-preds)))
        all_preds.append(preds)
    tmp = all_preds[0]
    for x in all_preds[1:]:
        tmp += x
    tmp = tmp / 10
    preds = tmp

    output_df = pd.DataFrame(preds, columns=output_cols)
    output_df['audio_filename'] = pd.Series(eval_files, index=output_df.index)
    output_df = output_df.loc[:, cols_in_order]
    
    a_file = os.path.basename(a_model_file)
    output_df.to_csv('{}.csv'.format(a_file), index=False)

no_pretrained_run1_model
no_pretrained_run2_model
no_pretrained_run3_model
with_pretrained_run1_model
with_pretrained_run2_model
with_pretrained_run3_model
