## train

In [1]:
import numpy as np
import pandas as pd
import os
import tqdm
import random
import time
import math
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.optim import Adam, AdamW
from torchvision.models import resnet18, resnet34, resnet50
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold
from contextlib import contextmanager
from typing import Optional
import logging
from numpy.random import beta
from pathlib import Path

from conformer import ConformerConvModule
from conformer import ConformerBlock


In [2]:
# use GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [3]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore

In [4]:
class config:
    SEED = 416
    INPUT = Path("../input/rfcx-species-audio-detection/train")
    OUTPUT = "/kaggle/working"

## Audio Transform

In [18]:
# transform all audio to 2d
# run audio_transformer

## Preprocessing

## Data Loader

In [None]:
def get_dataloder():
    # transform
    train_transform = transforms.Compose([
        # transforms.RandomCrop((128, 313), pad_if_needed=True, padding_mode="constant"),
        transforms.ToTensor(),
    ])
    valid_transform = transforms.Compose([
        # transforms.CenterCrop((128, 313)),
        transforms.ToTensor()
    ])
    
    # dataset
    train_datasets = datasets.ImageFolder(root=config.INPUT, transform=train_transform)
    valid_datasets = datasets.ImageFolder(root=config.INPUT, transform=valid_transform)

    # k-fold
    skf = StratifiedKFold(n_splits=config.N_FOLDS, shuffle=True, random_state=config.SEED)

    # target
    _t = train_datasets.targets
    trn_idx, val_idx = [(trn_idx, val_idx) for trn_idx, val_idx in skf.split(_t, _t)][config.FOLD]

    # subset
    train_datasets = torch.utils.data.Subset(train_datasets, trn_idx)
    valid_datasets = torch.utils.data.Subset(valid_datasets, val_idx)

    # dataloader
    train_data_loader = torch.utils.data.DataLoader(train_datasets, batch_size=config.TRAIN_BS, shuffle=True, num_workers=config.TRAIN_WORKS)
    valid_data_loader = torch.utils.data.DataLoader(valid_datasets, batch_size=config.VALID_BS, shuffle=False, num_workers=config.VALID_WORKS)
    
    return train_data_loader, valid_data_loader

# dataloader
train_data_loader, valid_data_loader = get_dataloder()

# ?
for d in train_data_loader:
    break

# ?
img = d[0][0]
plt.imshow(np.rollaxis(img.numpy(), 0, 3))

In [11]:
x = torch.randn(100, 1024, 1000)
x = x.to(device)

RuntimeError: CUDA error: an illegal memory access was encountered

In [12]:
layer = ConformerConvModule(
    dim = 1000,
    causal = False,             # auto-regressive or not - 1d conv will be made causal with padding if so
    expansion_factor = 2,       # what multiple of the dimension to expand for the depthwise convolution
    kernel_size = 31,           # kernel size, 17 - 31 was said to be optimal
    dropout = 0.                # dropout at the very end
)
x = layer(x) + x

In [13]:
x.shape

torch.Size([100, 1024, 1000])

In [14]:
block = ConformerBlock(
    dim = 1000,
    dim_head = 64,
    heads = 8,
    ff_mult = 4,
    conv_expansion_factor = 2,
    conv_kernel_size = 31,
    attn_dropout = 0.,
    ff_dropout = 0.,
    conv_dropout = 0.
)

# x = torch.randn(1, 1024, 512)
block(x) # (1, 1024, 512)

tensor([[[-8.2625e-02,  1.3729e+00,  6.1499e-01,  ...,  1.2382e+00,
          -3.1172e+00,  9.3844e-01],
         [ 4.2708e-01, -6.2781e-01, -2.7205e-01,  ..., -1.3194e-01,
           1.1173e+00, -3.1611e-01],
         [-9.8986e-01, -1.1533e+00,  2.1363e+00,  ..., -8.9860e-01,
          -7.7698e-01,  7.7793e-01],
         ...,
         [-2.7806e-01,  5.6589e-01, -1.7443e+00,  ..., -7.7034e-01,
           4.4023e-01,  5.9001e-01],
         [ 1.6416e+00, -1.2186e+00, -1.8733e+00,  ...,  8.7656e-01,
           2.5466e-01,  7.7383e-01],
         [-1.1374e+00, -4.8029e-02,  5.5781e-01,  ...,  8.9890e-01,
           1.5303e+00,  1.3465e+00]],

        [[ 5.8669e-01,  1.3152e+00, -7.7389e-01,  ...,  5.5820e-01,
           3.1729e-01, -1.0471e+00],
         [-4.0519e-01,  5.1771e-01, -1.0939e+00,  ..., -8.4925e-01,
          -5.4616e-01,  3.5948e+00],
         [ 3.9591e-01, -2.0474e+00,  6.0393e-01,  ...,  6.1673e-02,
          -5.9224e-01,  1.0050e+00],
         ...,
         [-2.9284e-01, -9

In [24]:
block(x)

tensor([[[-0.3272, -1.4688,  0.6977,  ..., -1.3553, -0.6360, -0.1129],
         [ 0.7752,  0.6278, -0.9865,  ...,  1.3034, -0.6864, -0.1707],
         [ 0.7214,  2.0796,  1.9614,  ..., -0.3620, -0.7741,  0.0137],
         ...,
         [ 0.1327,  0.9225,  0.5310,  ...,  0.1975, -0.6152,  0.8490],
         [ 0.3753,  0.7627,  1.0196,  ...,  1.2932,  0.3024,  0.0706],
         [ 0.4095,  0.5860,  0.3746,  ..., -0.6731, -0.2048,  0.6988]]],
       grad_fn=<NativeLayerNormBackward>)

## Modeling