In [1]:
import os
import glob
import numpy as np
import librosa
from scipy.io.wavfile import write
from preprocessing import preprocessing

In [2]:
sample_data_repo = os.path.join('.', 'wav_data')

samples_normal = glob.glob(os.path.join(sample_data_repo, 'normal', '**', '*wav'), recursive=True)
samples_normal = sorted(samples_normal)

samples_violence = glob.glob(os.path.join(sample_data_repo, 'violence', '**', '*wav'), recursive=True)
samples_violence = sorted(samples_violence)

In [3]:
len(samples_normal), len(samples_violence)

(10, 96)

In [4]:
samples_normal_list = []

for data_dir in samples_normal:
    y, sr = librosa.load(data_dir)
    samples_normal_list.append(y)

In [5]:
write('./wav_data/normal/concat/normal.wav', sr, np.concatenate(samples_normal_list))

In [6]:
samples_violence_list = []

for data_dir in samples_violence:
    y, sr = librosa.load(data_dir)
    samples_violence_list.append(y)

In [7]:
write('./wav_data/violence/concat/violence.wav', sr, np.concatenate(samples_violence_list))

In [8]:
samples_normal_concat = glob.glob(os.path.join(sample_data_repo, 'normal', 'concat', '*wav'), recursive=True)
samples_violence_concat = glob.glob(os.path.join(sample_data_repo, 'violence', 'concat', '*wav'), recursive=True)

In [9]:
concat_normal_tensors = []
normal_data_dirs = [samples_normal_concat]

for data_dir in normal_data_dirs:
    for i in range(len(data_dir)):
        concat_tensor = preprocessing(data_dir[i], method='mfcc', sr=22050)
        concat_normal_tensors.append(concat_tensor)

In [10]:
X_normal = np.concatenate(np.array(concat_normal_tensors), axis=0)
X_normal.shape

(129, 128, 100, 1)

In [11]:
y_normal = np.zeros(X_normal.shape[0])
y_normal.shape

(129,)

In [12]:
concat_violence_tensors = []
violence_data_dirs = [samples_violence_concat]

for data_dir in violence_data_dirs:
    for i in range(len(data_dir)):
        concat_tensor = preprocessing(data_dir[i], method='mfcc', sr=22050)
        concat_violence_tensors.append(concat_tensor)

In [13]:
X_violence = np.concatenate(np.array(concat_violence_tensors), axis=0)
X_violence.shape

(1813, 128, 100, 1)

In [14]:
y_violence = np.ones(X_violence.shape[0])
y_violence.shape

(1813,)

In [15]:
X = np.concatenate([X_normal, X_violence], axis=0)
y = np.concatenate([y_normal, y_violence], axis=0)
X.shape, y.shape

((1942, 128, 100, 1), (1942,))

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [17]:
batch_size = 64
lr = 0.001
num_epochs = 20

In [18]:
X = torch.tensor(X, device=device).float()
X = X.permute(0, 3, 1, 2)
y = torch.tensor(y, device=device).float()
y = y.view(-1, 1)

X.shape, y.shape

(torch.Size([1942, 1, 128, 100]), torch.Size([1942, 1]))

In [19]:
train_ds = TensorDataset(X, y)

In [20]:
train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=True)

In [21]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 16, 5, padding=0),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Conv2d(16, 16, 5, padding=0),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(2, 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, padding=0),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, 5, padding=0),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2, 2)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 64, (5, 4), padding=0),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.ReLU()
        )
        self.fc = nn.Sequential(
            nn.Linear(5632, 256),
            nn.ReLU(),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        out = out.reshape(x.shape[0], -1)
        out = self.fc(out)
        return out

In [22]:
model = Classifier().to(device)

In [23]:
loss_func = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

In [24]:
def train(dataloader, epochs):
    for epoch in range(epochs):
        for x_batch, y_batch in dataloader:
            optimizer.zero_grad()
            
            outputs = model(x_batch)
            
            loss = loss_func(outputs, y_batch)
            loss.backward()
            
            optimizer.step()
            
        for param_group in optimizer.param_groups:
            lr = param_group['lr']
        print('epoch: {:3d},    lr={:6f},    loss={:5f}'.format(epoch+1, lr, loss.item()))

In [25]:
train(train_dataloader, num_epochs)

epoch:   1,    lr=0.001000,    loss=0.142352
epoch:   2,    lr=0.001000,    loss=0.090885
epoch:   3,    lr=0.001000,    loss=0.014078
epoch:   4,    lr=0.001000,    loss=0.011287
epoch:   5,    lr=0.001000,    loss=0.012549
epoch:   6,    lr=0.001000,    loss=0.000733
epoch:   7,    lr=0.001000,    loss=0.000307
epoch:   8,    lr=0.001000,    loss=0.005690
epoch:   9,    lr=0.001000,    loss=0.000058
epoch:  10,    lr=0.001000,    loss=0.000769
epoch:  11,    lr=0.001000,    loss=0.022446
epoch:  12,    lr=0.001000,    loss=0.000124
epoch:  13,    lr=0.001000,    loss=0.009645
epoch:  14,    lr=0.001000,    loss=0.000664
epoch:  15,    lr=0.001000,    loss=0.000423
epoch:  16,    lr=0.001000,    loss=0.000019
epoch:  17,    lr=0.001000,    loss=0.000938
epoch:  18,    lr=0.001000,    loss=0.000087
epoch:  19,    lr=0.001000,    loss=0.000883
epoch:  20,    lr=0.001000,    loss=0.000573


In [26]:
torch.save(model.state_dict(), './output/m_speech.pt')