In [1]:
from nnAudio import features
from scipy.io import wavfile
import torch
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader
import os
from sklearn.preprocessing import LabelEncoder
from glob import glob
from net_factory import get_network_fn
from torchsummary import summary

import torch.optim as optim
import torch.nn as nn
from torch.optim.lr_scheduler import StepLR, MultiStepLR
import torch.nn.functional as F
from utils import accuracy, AverageMeter, save_checkpoint, visualize_graph, get_parameters_size
from tensorboardX import SummaryWriter
import time

In [4]:
path = './DeepShip_train/'

In [21]:
wav_paths = glob('{}/**'.format(path), recursive=True)
wav_paths = [x.replace(os.sep, '/') for x in wav_paths if '.wav' in x]

In [7]:
classes = sorted([f for f in os.listdir(path) if not f.startswith('.')])
le = LabelEncoder()
le.fit(classes)
labels = [os.path.split(x)[0].split('/')[-1] for x in wav_paths]
labels = le.transform(labels)

In [8]:
class MyDataset(torch.utils.data.Dataset):
    """
    Class to load the dataset
    """

    def __init__(self, auList, labelList):
        """
        :param auList: audio list (Note that these lists have been processed and pickled using the loadData.py)
        :param labelList: label list (Note that these lists have been processed and pickled using the loadData.py)
        """
        self.auList = auList
        self.labelList = labelList

    def __len__(self):
        return len(self.auList)

    def __getitem__(self, idx):
        """
        :param idx: Index of the image file
        :return: returns the image and corresponding label file.
        """
        _, audio = wavfile.read(self.auList[idx])
        label = self.labelList[idx]

        return (audio, label)

In [22]:
# 数据准备
data_loader = DataLoader(MyDataset(wav_paths[:2], labels[:2]), batch_size=2, num_workers=8, pin_memory=True, shuffle=False)
for batch_idx, (data, target) in enumerate(data_loader):
    print(data.shape)
    print(target)

torch.Size([2, 96000])
tensor([0, 0])


In [11]:
model = get_network_fn('audioGabor')
device = "cuda"
model = model.to(device)
summary(model, input_size=(1, 96000))

STFT kernels created, time used = 0.1453 seconds
STFT filter created, time used = 0.0044 seconds
Mel filter created, time used = 0.0044 seconds
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
              STFT-1            [-1, 1025, 188]               0
    MelSpectrogram-2             [-1, 128, 188]               0
           GaborYu-3          [-1, 1, 128, 188]              36
            Conv2d-4         [-1, 20, 126, 186]             180
              ReLU-5         [-1, 20, 126, 186]               0
         MaxPool2d-6           [-1, 20, 63, 93]               0
            Conv2d-7           [-1, 40, 63, 93]          20,000
              ReLU-8           [-1, 40, 63, 93]               0
         MaxPool2d-9           [-1, 40, 31, 46]               0
           Conv2d-10           [-1, 80, 31, 46]          80,000
             ReLU-11           [-1, 80, 31, 46]               0
        MaxPool2d-12   

In [9]:
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01, momentum=0.9, weight_decay=3e-05)#code
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)#code--10, 0.5
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)

In [13]:
best_prec1 = 0
writer = SummaryWriter(comment='_'+'audioGabor'+'_')
iteration = 0
def train(epoch):
    model.train()
    global iteration
    st = time.time()
    for batch_idx, (data, target) in enumerate(data_loader):
        iteration += 1
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)

        prec1, = accuracy(output, target)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accuracy: {:.2f}'.format(
                epoch, batch_idx * len(data), len(data_loader.dataset),
                100. * batch_idx / len(data_loader), loss.item(), prec1.item()))
            writer.add_scalar('Loss/Train', loss.item(), iteration)
            writer.add_scalar('Accuracy/Train', prec1, iteration)
    epoch_time = time.time() - st
    lr = optimizer.param_groups[0]['lr']
    print('Epoch time:{:0.2f}s'.format(epoch_time),  '	learning-rate:', lr)
    scheduler.step()

In [15]:
epochs=10
for epoch in range(epochs):
    print('------------------------------------------------------------------------')
    train(epoch+1)

------------------------------------------------------------------------
Epoch time:0.48s 	learning-rate: 0.01
------------------------------------------------------------------------
Epoch time:0.45s 	learning-rate: 0.01
------------------------------------------------------------------------
Epoch time:0.44s 	learning-rate: 0.01
------------------------------------------------------------------------
Epoch time:0.44s 	learning-rate: 0.01
------------------------------------------------------------------------
Epoch time:0.46s 	learning-rate: 0.01
------------------------------------------------------------------------
Epoch time:0.51s 	learning-rate: 0.01
------------------------------------------------------------------------
Epoch time:0.53s 	learning-rate: 0.01
------------------------------------------------------------------------
Epoch time:0.45s 	learning-rate: 0.01
------------------------------------------------------------------------
Epoch time:0.46s 	learning-rate: 0.001


In [19]:
model = get_network_fn('audioGabor')
checkpoint = torch.load('model_best.pth.tar')#,map_location='cpu'
print(checkpoint.keys())
model.load_state_dict(checkpoint['state_dict'])

STFT kernels created, time used = 0.1127 seconds
STFT filter created, time used = 0.0048 seconds
Mel filter created, time used = 0.0048 seconds
dict_keys(['epoch', 'state_dict', 'best_prec1', 'optimizer'])


<All keys matched successfully>

In [23]:
model.eval()
outputs = []

maxk = max((1,))
for batch,_ in data_loader:
    # Every time forward is called, attention maps will be generated and saved in the directory "attention_maps"
    output = model(batch)
    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    print(pred)

tensor([[2, 2]])
