In [1]:
import os
import glob

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from preprocessing import convert_spectrograms, convert_tensor
from model_ser import CLDNN
from utils.optimization import WarmupLinearSchedule

In [2]:
conv_dim = '1d'
checkpoint = ''
hidden_size = 64
num_layers = 2
bidirectional=True
with_focus_attn=False

batch_size = 128
learning_rate = 0.001
num_epochs = 100

use_warmup = True
gradient_accumulation_steps = 1
warmup_proportion = 0.1

multi_gpu = True

normal_data_repo = './wav_data/normal'
violence_data_repo = './wav_data/violence'

output_dir = 'output'
save_checkpoint_steps = 50

cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if(multi_gpu):
    batch_size = batch_size * torch.cuda.device_count()
    
if not os.path.exists(output_dir):
        os.makedirs(output_dir)

In [3]:
normal_data_repo = './wav_data/normal/concat'
violence_data_repo = './wav_data/violence/concat'

In [4]:
normal_data = glob.glob(os.path.join(normal_data_repo, '**', '*wav'), recursive=True)
normal_data = sorted(normal_data)

violence_data = glob.glob(os.path.join(violence_data_repo, '**', '*wav'), recursive=True)
violence_data = sorted(violence_data)

In [5]:
X_normal = convert_spectrograms(normal_data, conv_dim=conv_dim)
X_violence = convert_spectrograms(violence_data, conv_dim=conv_dim)

100%|██████████| 1/1 [00:00<00:00, 23.92it/s]
100%|██████████| 1/1 [00:00<00:00,  2.36it/s]


In [6]:
X_normal = convert_tensor(X_normal, device=device)
X_violence = convert_tensor(X_violence, device=device)

In [7]:
y_normal = np.zeros(len(X_normal))
y_violence = np.ones(len(X_violence))

In [8]:
y_normal = torch.tensor(y_normal, device=device).float().view(-1, 1)
y_violence = torch.tensor(y_violence, device=device).float().view(-1, 1)

In [9]:
X = torch.cat((X_normal, X_violence), dim=0)
y = torch.cat((y_normal, y_violence), dim=0)

In [10]:
train_ds = TensorDataset(X, y)

In [11]:
train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=True)

In [12]:
if(multi_gpu == True):
    model = CLDNN(conv_dim=conv_dim, checkpoint=checkpoint, hidden_size=hidden_size, num_layers=num_layers,
                 bidirectional=bidirectional, with_focus_attn=with_focus_attn)
    model = torch.nn.DataParallel(model).cuda()
else:
    model = CLDNN(conv_dim=conv_dim).to(device)

In [13]:
loss_func = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

if(use_warmup == True):
    t_total = len(train_dataloader) // gradient_accumulation_steps * num_epochs
    opt_scheduler = WarmupLinearSchedule(optimizer, warmup_steps=t_total * warmup_proportion, t_total=t_total)

In [14]:
def train(dataloader, epochs):
    for epoch in range(epochs):
        train_loss = 0
        nb_train_steps = 0
        correct = 0
        num_samples = 0
        
        for X_batch, y_batch in dataloader:
            optimizer.zero_grad()
            
            outputs = model(X_batch)
            
            loss = loss_func(outputs, y_batch)
            loss.backward()
            
            optimizer.step()
            opt_scheduler.step()
            
            train_loss += loss.mean().item()
            nb_train_steps += 1
            
            outputs = (outputs >= 0.5).float()
            correct += (outputs == y_batch).float().sum()
            num_samples += len(X_batch)
            
        train_loss = train_loss / nb_train_steps
        accuracy = correct / num_samples
            
        for param_group in optimizer.param_groups:
            lr = param_group['lr']
        print('epoch: {:3d},    lr={:6f},    loss={:5f},    accuracy={:5f}'
              .format(epoch+1, lr, train_loss, accuracy))
        
        if((epoch+1) % save_checkpoint_steps == 0):
            model_checkpoint = "%s_%s_step_%d.pt" % ('CLDNN', conv_dim, epoch+1)
            output_model_file = os.path.join(output_dir, model_checkpoint)
            if(multi_gpu == True):
                torch.save(model.module.state_dict(), output_model_file)
            else:
                torch.save(model.state_dict(), output_model_file)
            print("Saving checkpoint %s" % output_model_file)

In [15]:
train(train_dataloader, num_epochs)

  self.dropout, self.training, self.bidirectional, self.batch_first)


epoch:   1,    lr=0.000100,    loss=0.635439,    accuracy=0.942383
epoch:   2,    lr=0.000200,    loss=0.634247,    accuracy=0.941406
epoch:   3,    lr=0.000300,    loss=0.630397,    accuracy=0.941406
epoch:   4,    lr=0.000400,    loss=0.624086,    accuracy=0.940918
epoch:   5,    lr=0.000500,    loss=0.614684,    accuracy=0.942383
epoch:   6,    lr=0.000600,    loss=0.602402,    accuracy=0.942871
epoch:   7,    lr=0.000700,    loss=0.586427,    accuracy=0.943359
epoch:   8,    lr=0.000800,    loss=0.565916,    accuracy=0.941406
epoch:   9,    lr=0.000900,    loss=0.536928,    accuracy=0.942383
epoch:  10,    lr=0.001000,    loss=0.494502,    accuracy=0.943359
epoch:  11,    lr=0.000989,    loss=0.433899,    accuracy=0.940918
epoch:  12,    lr=0.000978,    loss=0.352026,    accuracy=0.942871
epoch:  13,    lr=0.000967,    loss=0.275212,    accuracy=0.943848
epoch:  14,    lr=0.000956,    loss=0.238669,    accuracy=0.939941
epoch:  15,    lr=0.000944,    loss=0.212908,    accuracy=0.94