In [27]:
import os
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

from torch.utils.data import random_split
from torch.utils.data import Dataset
from sklearn.metrics import f1_score

import random
import torch

seed = 0
deterministic = True

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
if deterministic:
	torch.backends.cudnn.deterministic = True
	torch.backends.cudnn.benchmark = False

In [28]:
path = '.'
data_path = path + '/data'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [139]:
class ImageDataset(Dataset):
    def __init__(self, annotations_file):
        self.img_labels = pd.read_csv(annotations_file)

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = self.img_labels.iloc[idx, 0]
        image = np.load(data_path + "/train_npy/" + img_path)[:, :197]
        label = self.img_labels.iloc[idx, 1]
        return image, label
    

class TestImageDataset(Dataset):
    def __init__(self, annotations_file):
        self.img_path_list = os.listdir(annotations_file)

    def __len__(self):
        return len(self.img_path_list)

    def __getitem__(self, idx):
        img_path = self.img_path_list[idx]
        image = np.load(data_path + "/test_npy/" + img_path)[:, :197]
        
        return image

In [77]:
# https://towardsdatascience.com/audio-deep-learning-made-simple-sound-classification-step-by-step-cebc936bbe5

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init

# ----------------------------
# Audio Classification Model
# ----------------------------
class AudioClassifier (nn.Module):
    # ----------------------------
    # Build the model architecture
    # ----------------------------
    def __init__(self):
        super().__init__()
        conv_layers = []

        # First Convolution Block with Relu and Batch Norm. Use Kaiming Initialization
        self.conv1 = nn.Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
        self.relu1 = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(8)
        init.kaiming_normal_(self.conv1.weight, a=0.1)
        self.conv1.bias.data.zero_()
        conv_layers += [self.conv1, self.relu1, self.bn1]

        # Second Convolution Block
        self.conv2 = nn.Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu2 = nn.ReLU()
        self.bn2 = nn.BatchNorm2d(16)
        init.kaiming_normal_(self.conv2.weight, a=0.1)
        self.conv2.bias.data.zero_()
        conv_layers += [self.conv2, self.relu2, self.bn2]

        # Second Convolution Block
        self.conv3 = nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu3 = nn.ReLU()
        self.bn3 = nn.BatchNorm2d(32)
        init.kaiming_normal_(self.conv3.weight, a=0.1)
        self.conv3.bias.data.zero_()
        conv_layers += [self.conv3, self.relu3, self.bn3]

        # Second Convolution Block
        self.conv4 = nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        self.relu4 = nn.ReLU()
        self.bn4 = nn.BatchNorm2d(64)
        init.kaiming_normal_(self.conv4.weight, a=0.1)
        self.conv4.bias.data.zero_()
        conv_layers += [self.conv4, self.relu4, self.bn4]

        # Linear Classifier
        self.ap = nn.AdaptiveAvgPool2d(output_size=1)

        self.lin = nn.Sequential(
          nn.Linear(64, 16),
          nn.ReLU(),
          nn.Linear(16, 1),
          nn.Sigmoid()
        )

        # Wrap the Convolutional Blocks
        self.conv = nn.Sequential(*conv_layers)

    # ----------------------------
    # Forward pass computations
    # ----------------------------
    def forward(self, x):
        # Run the convolutional blocks
        x = self.conv(x)

        # Adaptive pool and flatten for input to linear layer
        x = self.ap(x)
        x = x.view(x.shape[0], -1)

        # Linear layer
        x = self.lin(x)

        # Final output
        return x



In [142]:
def training(model, train_dl, num_epochs, lr, threshold):
  # Loss Function, Optimizer and Scheduler
  criterion = nn.BCELoss()
  optimizer = torch.optim.Adam(model.parameters(),lr=lr)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr,
                                                steps_per_epoch=int(len(train_dl)),
                                                epochs=num_epochs,
                                                anneal_strategy='linear')

  # Repeat for each epoch
  for epoch in tqdm(range(num_epochs)):
    running_loss = 0.0
    correct_prediction = 0
    total_prediction = 0
    total_f1 = 0

    # Repeat for each batch in the training set
    for i, data in enumerate(train_dl):
        # Get the input features and target labels, and put them on the GPU
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs = inputs.reshape(-1, 1, 129, 197)

        # Normalize the inputs
        inputs_m, inputs_s = inputs.mean(), inputs.std()
        inputs = (inputs - inputs_m) / inputs_s

        # Zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels.float())
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Keep stats for Loss and Accuracy
        running_loss += loss.item()

        # Get the predicted class with the highest score
        prediction = outputs >= torch.FloatTensor([threshold]).to(device)
        # Count of predictions that matched the target label
        correct_prediction += (prediction == labels).sum().item()
        total_prediction += prediction.shape[0]
        total_f1 += f1_score(prediction.cpu(), labels.cpu())

        #if i % 10 == 0:    # print every 10 mini-batches
        #    print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 10))

    # Print stats at the end of the epoch
    num_batches = len(train_dl)
    avg_loss = running_loss / num_batches
    acc = correct_prediction/total_prediction
    f1 = total_f1/i
    print(f'Epoch: {epoch}, Loss: {avg_loss:.2f}, Accuracy: {acc:.2f}, F1: {f1:.3f}')

  print('Finished Training')


def inference (model, val_dl, threshold):
  correct_prediction = 0
  total_prediction = 0
  total_f1 = 0

  # Disable gradient updates
  with torch.no_grad():
    for i, data in enumerate(val_dl):
      # Get the input features and target labels, and put them on the GPU
      inputs, labels = data[0].to(device), data[1].to(device)
      inputs = inputs.reshape(-1, 1, 129, 197)

      # Normalize the inputs
      inputs_m, inputs_s = inputs.mean(), inputs.std()
      inputs = (inputs - inputs_m) / inputs_s

      # Get predictions
      outputs = model(inputs)

      # Get the predicted class with the highest score
      prediction = outputs >= torch.FloatTensor([threshold]).to(device)
      # Count of predictions that matched the target label
      correct_prediction += (prediction == labels).sum().item()
      total_prediction += prediction.shape[0]

      total_f1 += f1_score(prediction.cpu(), labels.cpu())

  acc = correct_prediction/total_prediction
  f1 = total_f1/i
  print(f'Accuracy: {acc:.2f}, F1: {f1:.3f}, Total items: {total_prediction}')

def predict(model, test_data, threshold):
  predictions = []
  # Disable gradient updates
  with torch.no_grad():
      for data in test_data:
          # Get the input features and target labels, and put them on the GPU
          inputs = data.to(device)
          inputs = inputs.reshape(-1, 1, 129, 197)

          # Normalize the inputs
          inputs_m, inputs_s = inputs.mean(), inputs.std()
          inputs = (inputs - inputs_m) / inputs_s

          # Get predictions
          outputs = model(inputs)

          # Get the predicted class with the highest score
          prediction = outputs >= torch.FloatTensor([threshold]).to(device)
          predictions.extend(list(prediction))
  return predictions


In [102]:
train_ds = ImageDataset(data_path + "/train.csv")

# Create the model and put it on the GPU if available
myModel = AudioClassifier()
myModel = myModel.to(device)
# Check that it is on Cuda
next(myModel.parameters()).device

# Random split of 80:20 between training and validation
num_items = len(train_ds)
num_train = round(num_items * 0.8)
num_val = num_items - num_train
train_ds, val_ds = random_split(train_ds, [num_train, num_val])

# Create training and validation data loaders
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=64, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=128, shuffle=False)

In [127]:
num_epochs=80   # Just for demo, adjust this higher.
lr = 1e-3
threshold = 0.3
training(myModel, train_dl, num_epochs, lr, threshold)

  0%|          | 0/80 [00:00<?, ?it/s]

Epoch: 0, Loss: 0.31, Accuracy: 33.36, F1: 0.783
Epoch: 1, Loss: 0.31, Accuracy: 33.27, F1: 0.787
Epoch: 2, Loss: 0.30, Accuracy: 33.43, F1: 0.792
Epoch: 3, Loss: 0.31, Accuracy: 33.32, F1: 0.790
Epoch: 4, Loss: 0.31, Accuracy: 33.43, F1: 0.792
Epoch: 5, Loss: 0.31, Accuracy: 33.46, F1: 0.790
Epoch: 6, Loss: 0.31, Accuracy: 33.27, F1: 0.789
Epoch: 7, Loss: 0.31, Accuracy: 33.34, F1: 0.792
Epoch: 8, Loss: 0.30, Accuracy: 33.43, F1: 0.799
Epoch: 9, Loss: 0.31, Accuracy: 33.29, F1: 0.792
Epoch: 10, Loss: 0.30, Accuracy: 33.21, F1: 0.796
Epoch: 11, Loss: 0.30, Accuracy: 33.37, F1: 0.795
Epoch: 12, Loss: 0.30, Accuracy: 33.45, F1: 0.795
Epoch: 13, Loss: 0.30, Accuracy: 33.53, F1: 0.798
Epoch: 14, Loss: 0.31, Accuracy: 33.46, F1: 0.795
Epoch: 15, Loss: 0.30, Accuracy: 33.20, F1: 0.797
Epoch: 16, Loss: 0.30, Accuracy: 33.47, F1: 0.800
Epoch: 17, Loss: 0.30, Accuracy: 33.26, F1: 0.794
Epoch: 18, Loss: 0.30, Accuracy: 33.48, F1: 0.799
Epoch: 19, Loss: 0.31, Accuracy: 33.32, F1: 0.791
Epoch: 20,

In [128]:
inference(myModel, val_dl, threshold)

Accuracy: 64.83, F1: 0.875, Total items: 1200


In [140]:
test_ds = TestImageDataset(data_path + "/test_npy")
test_dl = torch.utils.data.DataLoader(test_ds, batch_size=1024, shuffle=False)

In [143]:
predictions = predict(myModel, test_dl, threshold)
predictions = [int(p) for p in predictions]
submission = pd.read_csv(data_path + "/anwer_sample.csv")
submission['label'] = predictions

In [144]:
submission.label.unique()

array([1, 0], dtype=int64)

In [145]:
submission

Unnamed: 0,file,label
0,spc_00001.npy,1
1,spc_00010.npy,1
2,spc_00014.npy,0
3,spc_00015.npy,1
4,spc_00016.npy,0
...,...,...
2995,spc_08982.npy,1
2996,spc_08987.npy,1
2997,spc_08988.npy,0
2998,spc_08993.npy,0


In [146]:
submission.to_csv(data_path + '/submission/submission0.csv', index=0)

In [147]:
submission

Unnamed: 0,file,label
0,spc_00001.npy,1
1,spc_00010.npy,1
2,spc_00014.npy,0
3,spc_00015.npy,1
4,spc_00016.npy,0
...,...,...
2995,spc_08982.npy,1
2996,spc_08987.npy,1
2997,spc_08988.npy,0
2998,spc_08993.npy,0
