In [1]:
import torch
import os
import argparse
import torch.nn.functional as F
import ipdb
import numpy as np
import logging

from torch import nn
from data.preprocessing import AudioProcessor
from data.audio_transforms import MelScale
from data.loaders import get_data_loader
from tqdm import tqdm, trange
from utils.utils import mkdir_in_path, GPU_is_available
from torch.utils.data import DataLoader

from os.path import dirname, realpath, join
from datetime import datetime
from sklearn.metrics import confusion_matrix, classification_report
from utils.utils import read_json
from datetime import datetime
from evaluation.inception_network import SpectrogramInception3
from pprint import pprint

In [2]:
name = 'name'
path = 'evaluation/inception_models'
config = {
    "name": "inception train on footsteps",
    "comments": "inception train on footsteps",
    "output_path": "../output_folder",
    "loader_config": {
        "dbname": "footsteps",
        "data_path": "audio/footsteps/",
        "criteria": {},
        "shuffle": True
    },
    "transform_config": {
        "transform": "stft",
        "fade_out": True,
        "fft_size": 1024,
        "win_size": 1024,
        "n_frames": 64,
        "hop_size": 256,
        "log": False,
        "ifreq": False,
        "sample_rate": 16000,
        "audio_length": 16000
    }
}
batch_size = 1
n_epoch = 100

In [3]:
output_path = mkdir_in_path(path, 'inception_models')
output_file = join(output_path, f"{name}_{datetime.now().strftime('%Y-%m-%d')}.pt")
output_log = join(output_path, f"{name}_{datetime.now().strftime('%Y-%m-%d')}.log")
logging.basicConfig(filename=output_log, level=logging.INFO)

loader_config = config['loader_config']
print("-- TRAIN INCEPTION MODEL: loader_config --")
print(loader_config)
print()

transform_config = config['transform_config']
print("-- TRAIN INCEPTION MODEL: transform_config --")
print(transform_config)
print()

transform = transform_config['transform']
print("-- TRAIN INCEPTION MODEL: transform --")
print(transform)
print()

dbname = loader_config['dbname']
print("-- TRAIN INCEPTION MODEL: dbname --")
print(dbname)
print()

loader_module = get_data_loader(dbname)
print("-- TRAIN INCEPTION MODEL: loader_module --")
print(loader_module)
print()

-- TRAIN INCEPTION MODEL: loader_config --
{'dbname': 'footsteps', 'data_path': 'audio/footsteps/', 'criteria': {}, 'shuffle': True}

-- TRAIN INCEPTION MODEL: transform_config --
{'transform': 'stft', 'fade_out': True, 'fft_size': 1024, 'win_size': 1024, 'n_frames': 64, 'hop_size': 256, 'log': False, 'ifreq': False, 'sample_rate': 16000, 'audio_length': 16000}

-- TRAIN INCEPTION MODEL: transform --
stft

-- TRAIN INCEPTION MODEL: dbname --
footsteps

-- TRAIN INCEPTION MODEL: loader_module --
<class 'data.loaders.footsteps_loader.FootstepsDataLoader'>



In [4]:
processor = AudioProcessor(**transform_config)

-- DATA PROCESSOR: __init__

-- AUDIO PROCESSOR: init_tranform_pipeline
Configuring stft transform...
self.transform:
stft



In [5]:
loader = loader_module(name=dbname + '_' + transform, preprocessing=processor, **loader_config)

-- DATALOADER: __init__

-- FOOSTEPS DATALOADER: load_data

len(self.data):
721
len(self.metadata): 
721

Dataset audio/footsteps/processed/footsteps_stft/footsteps_stft.pt exists. Reloading...
-- DATALOADER: load_from_pt_file



In [6]:
mel = MelScale(sample_rate=transform_config['sample_rate'],
                fft_size=transform_config['fft_size'],
                n_mel=transform_config.get('n_mel', 256),
                rm_dc=True)

In [10]:
val_data, val_labels = loader.get_validation_set()
val_data = val_data[:, 0:1]

-- DATA LOADER: get_validation_set


In [11]:
data_loader = DataLoader(loader,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=2)

In [12]:
num_classes = len(set(loader.metadata))
print("-- NUM CLASSES --")
print(num_classes)
print()

-- NUM CLASSES --
6



In [13]:
device = "cuda" if GPU_is_available() else "cpu"

inception_model = nn.DataParallel(
        SpectrogramInception3(num_classes, aux_logits=False))
inception_model.to(device)

optim = torch.optim.Adam(filter(lambda p: p.requires_grad, inception_model.parameters()),
                    betas=[0, 0.99], lr=0.001)

criterion = torch.nn.CrossEntropyLoss()

Cuda not available. Running on CPU


In [14]:
data_iter = iter(data_loader)
input, labels = data_iter.next()

In [15]:
input.shape

torch.Size([1, 2, 512, 64])

In [16]:
labels

tensor([0])