In [1]:
import os
import sys
import numpy as np
import argparse
import h5py
import math
import time
import logging
import matplotlib.pyplot as plt
import torchvision
from sklearn import metrics
import _pickle as cPickle
import shutil

import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import Linear, NLLLoss, LogSoftmax, Sequential
from torch.optim import Adam

from models import Cnn14_scatter
from data_generator import monoDataset

from kymatio import Scattering1D

In [2]:
DATA_ROOT = '/home/laura/monophonic/data'

train_npz_path=DATA_ROOT+'/train.npz'
test_npz_path=DATA_ROOT+'/test.npz'

train_dataset = monoDataset(npz_path=train_npz_path, audio_length=220500, classes_num=7)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=32,
    num_workers=8,
    shuffle=True
)

test_dataset = monoDataset(npz_path=test_npz_path, audio_length=220500, classes_num=7)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=32,
    num_workers=8,
    shuffle=True
)

In [3]:
T = 110250
J = 6
Q = 8
num_classes=7
log_eps = 1e-6

In [4]:
scattering = Scattering1D(J, T, Q)
scattering.cuda()

<kymatio.scattering1d.scattering1d.Scattering1D at 0x7fc53c434810>

In [5]:
# Number of epochs.
num_epochs = 50
# Learning rate for Adam.
lr = 1e-4

In [6]:
first = True

for e in range(num_epochs):
    for batch_data_dict in train_loader:

        batch_input = batch_data_dict['waveform'].cuda()
        batch_target = batch_data_dict['target'].cuda()
        
        y_tr = torch.argmax(batch_target,axis=1)
        y_tr = y_tr.long()

        Sx_tr = scattering.forward(batch_input)
        Sx_tr = Sx_tr[:,1:,:]
        Sx_tr = torch.log(torch.abs(Sx_tr) + log_eps)
        Sx_tr = torch.mean(Sx_tr, dim=-1)

        # This part is very wrong!!!
        mu_tr = Sx_tr.mean(dim=0)
        std_tr = Sx_tr.std(dim=0)
        Sx_tr = (Sx_tr - mu_tr) / std_tr

        if (first):
            num_input = Sx_tr.shape[-1]
            model = Sequential(Linear(num_input, num_classes), LogSoftmax(dim=1))
            optimizer = Adam(model.parameters())
            criterion = NLLLoss()
            model.cuda()
            criterion.cuda()
            
            first = False
            
        model.zero_grad()
        resp = model.forward(Sx_tr)
        loss = criterion(resp, y_tr)
        loss.backward()
        optimizer.step()
        
    # Calculate the response of the training data at the end of this epoch and
    # the average loss.
    resp = model.forward(Sx_tr)
    avg_loss = criterion(resp, y_tr)

    # Try predicting the classes of the signals in the training set and compute
    # the accuracy.
    y_hat = resp.argmax(dim=1)
    accuracy = (y_tr == y_hat).float().mean()

    print('Epoch {}, average loss = {:1.3f}, accuracy = {:1.3f}'.format(
        e, avg_loss, accuracy))

Epoch 0, average loss = 1.463, accuracy = 0.438
Epoch 1, average loss = 1.096, accuracy = 0.656
Epoch 2, average loss = 0.941, accuracy = 0.781
Epoch 3, average loss = 0.952, accuracy = 0.719
Epoch 4, average loss = 0.635, accuracy = 0.906
Epoch 5, average loss = 0.772, accuracy = 0.719
Epoch 6, average loss = 0.710, accuracy = 0.750
Epoch 7, average loss = 0.941, accuracy = 0.688
Epoch 8, average loss = 0.445, accuracy = 0.906
Epoch 9, average loss = 0.596, accuracy = 0.812
Epoch 10, average loss = 0.506, accuracy = 0.938
Epoch 11, average loss = 0.640, accuracy = 0.875
Epoch 12, average loss = 0.414, accuracy = 0.906
Epoch 13, average loss = 0.607, accuracy = 0.781
Epoch 14, average loss = 0.376, accuracy = 0.875
Epoch 15, average loss = 0.760, accuracy = 0.719
Epoch 16, average loss = 0.440, accuracy = 0.906
Epoch 17, average loss = 0.625, accuracy = 0.844
Epoch 18, average loss = 0.513, accuracy = 0.812
Epoch 19, average loss = 0.324, accuracy = 0.969
Epoch 20, average loss = 0.465

In [8]:
avg_acc = 0
avg_loss_total = 0
iteration = 0

for batch_data_dict in test_loader:

        batch_input = batch_data_dict['waveform'].cuda()
        batch_target = batch_data_dict['target'].cuda()
        
        y_ts = torch.argmax(batch_target,axis=1)
        y_ts = y_ts.long()

        Sx_ts = scattering.forward(batch_input)
        Sx_ts = Sx_ts[:,1:,:]
        Sx_ts = torch.log(torch.abs(Sx_ts) + log_eps)
        Sx_ts = torch.mean(Sx_ts, dim=-1)

        Sx_ts = (Sx_ts - mu_tr) / std_tr

        resp = model.forward(Sx_ts)
        avg_loss = criterion(resp, y_ts)

        # Try predicting the labels of the signals in the test data and compute the
        # accuracy.
        y_hat = resp.argmax(dim=1)
        accu = (y_ts == y_hat).float().mean()
        
        avg_loss_total += avg_loss
        avg_acc += accu
        
        iteration +=1

print('TEST, average loss = {:1.3f}, accuracy = {:1.3f}'.format(
        avg_loss_total/iteration, avg_acc/iteration))

TEST, average loss = 0.366, accuracy = 0.875
