In [1]:
FOLD = [0, 1]

## imports

In [2]:
import gc
import os
import sys
import pickle
import random
import time
import logging

import datetime as dt
from collections import Counter, defaultdict
from functools import partial
from pathlib import Path
from psutil import cpu_count
from hashlib import sha1

import librosa
import numpy as np
import pandas as pd
from PIL import Image
# from sklearn.model_selection import train_test_split, KFold
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
#from skmultilearn.model_selection import iterative_train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from fastprogress import master_bar, progress_bar
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms

from tqdm import tqdm_notebook
import matplotlib.pyplot as plt

from imgaug import augmenters as iaa

In [3]:
print(os.listdir("../input"))

['fat2019-new-1ch', 'freesound-audio-tagging-2019', 'freesound-sox-pitchup-equ-tremolo-tbup', 'freesound-all-curated-aug-1ch', 'freesound-convnet-noisy-pitch-pretrain-rrc']


## utils

In [4]:
def get_logger(name="Main", tag="exp", log_dir="log/"):
    log_path = Path(log_dir)
    path = log_path / tag
    path.mkdir(exist_ok=True, parents=True)

    logger = logging.getLogger(name)
    logger.setLevel(logging.INFO)

    fh = logging.FileHandler(
        path / (dt.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".log"))
    sh = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter(
        "%(asctime)s %(name)s %(levelname)s %(message)s")

    fh.setFormatter(formatter)
    sh.setFormatter(formatter)
    logger.addHandler(fh)
    logger.addHandler(sh)
    return logger

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 520
seed_everything(SEED)

In [6]:
N_JOBS = cpu_count()
os.environ['MKL_NUM_THREADS'] = str(N_JOBS)
os.environ['OMP_NUM_THREADS'] = str(N_JOBS)
DataLoader = partial(DataLoader, num_workers=N_JOBS)

In [7]:
# from official code https://colab.research.google.com/drive/1AgPdhSp7ttY18O3fEoHOQKlt_3HJDLi8#scrollTo=cRCaCIb9oguU
def _one_sample_positive_class_precisions(scores, truth):
    """Calculate precisions for each true class for a single sample.

    Args:
      scores: np.array of (num_classes,) giving the individual classifier scores.
      truth: np.array of (num_classes,) bools indicating which classes are true.

    Returns:
      pos_class_indices: np.array of indices of the true classes for this sample.
      pos_class_precisions: np.array of precisions corresponding to each of those
        classes.
    """
    num_classes = scores.shape[0]
    pos_class_indices = np.flatnonzero(truth > 0)
    # Only calculate precisions if there are some true classes.
    if not len(pos_class_indices):
        return pos_class_indices, np.zeros(0)
    # Retrieval list of classes for this sample.
    retrieved_classes = np.argsort(scores)[::-1]
    # class_rankings[top_scoring_class_index] == 0 etc.
    class_rankings = np.zeros(num_classes, dtype=np.int)
    class_rankings[retrieved_classes] = range(num_classes)
    # Which of these is a true label?
    retrieved_class_true = np.zeros(num_classes, dtype=np.bool)
    retrieved_class_true[class_rankings[pos_class_indices]] = True
    # Num hits for every truncated retrieval list.
    retrieved_cumulative_hits = np.cumsum(retrieved_class_true)
    # Precision of retrieval list truncated at each hit, in order of pos_labels.
    precision_at_hits = (
            retrieved_cumulative_hits[class_rankings[pos_class_indices]] /
            (1 + class_rankings[pos_class_indices].astype(np.float)))
    return pos_class_indices, precision_at_hits


def calculate_per_class_lwlrap(truth, scores):
    """Calculate label-weighted label-ranking average precision.

    Arguments:
      truth: np.array of (num_samples, num_classes) giving boolean ground-truth
        of presence of that class in that sample.
      scores: np.array of (num_samples, num_classes) giving the classifier-under-
        test's real-valued score for each class for each sample.

    Returns:
      per_class_lwlrap: np.array of (num_classes,) giving the lwlrap for each
        class.
      weight_per_class: np.array of (num_classes,) giving the prior of each
        class within the truth labels.  Then the overall unbalanced lwlrap is
        simply np.sum(per_class_lwlrap * weight_per_class)
    """
    assert truth.shape == scores.shape
    num_samples, num_classes = scores.shape
    # Space to store a distinct precision value for each class on each sample.
    # Only the classes that are true for each sample will be filled in.
    precisions_for_samples_by_classes = np.zeros((num_samples, num_classes))
    for sample_num in range(num_samples):
        pos_class_indices, precision_at_hits = (
            _one_sample_positive_class_precisions(scores[sample_num, :],
                                                  truth[sample_num, :]))
        precisions_for_samples_by_classes[sample_num, pos_class_indices] = (
            precision_at_hits)
    labels_per_class = np.sum(truth > 0, axis=0)
    weight_per_class = labels_per_class / float(np.sum(labels_per_class))
    # Form average of each column, i.e. all the precisions assigned to labels in
    # a particular class.
    per_class_lwlrap = (np.sum(precisions_for_samples_by_classes, axis=0) /
                        np.maximum(1, labels_per_class))
    # overall_lwlrap = simple average of all the actual per-class, per-sample precisions
    #                = np.sum(precisions_for_samples_by_classes) / np.sum(precisions_for_samples_by_classes > 0)
    #           also = weighted mean of per-class lwlraps, weighted by class label prior across samples
    #                = np.sum(per_class_lwlrap * weight_per_class)
    return per_class_lwlrap, weight_per_class

## Dataset

In [8]:
dataset_dir = Path('../input/freesound-audio-tagging-2019')
preprocessed_dir = Path('../input/fat2019-new-1ch/')
augmented_dir = Path("../input/freesound-all-curated-aug-1ch/")
augmented_dir2 = Path("../input/freesound-sox-pitchup-equ-tremolo-tbup/")

In [10]:
csvs = {
    'train_curated': dataset_dir / 'train_curated.csv',
    'sample_submission': dataset_dir / 'sample_submission.csv'
}

dataset = {
    'train_curated': dataset_dir / 'train_curated'
}

mels = {
    'train_curated': preprocessed_dir / 'mels_train_curated.pkl',
    "fade": augmented_dir / "mel_fade.pkl",
    "pitch": augmented_dir / "mel_pitch.pkl",
    "reverb": augmented_dir / "mel_reverb.pkl",
    "tb": augmented_dir / "mel_tb.pkl",
    "pitchup": augmented_dir2 / "mel_pitchup.pkl",
    "eq": augmented_dir2 / "mel_equalize.pkl",
    "tbup": augmented_dir2 / "mel_tbup.pkl"
}

In [11]:
train_curated = pd.read_csv(csvs['train_curated'])
train_df = train_curated

test_df = pd.read_csv(csvs['sample_submission'])

labels = test_df.columns[1:].tolist()
num_classes = len(labels)
num_classes

80

In [12]:
y_train = np.zeros((len(train_df), num_classes)).astype(int)
for i, row in enumerate(train_df['labels'].str.split(',')):
    for label in row:
        idx = labels.index(label)
        y_train[i, idx] = 1

y_train.shape

(4970, 80)

In [13]:
with open(mels['train_curated'], 'rb') as curated:
    x_train = pickle.load(curated)
    
with open(mels["fade"], "rb") as fade:
    x_fade = pickle.load(fade)
    
with open(mels["pitch"], "rb") as pitch:
    x_pitch = pickle.load(pitch)
    
with open(mels["reverb"], "rb") as reverb:
    x_reverb = pickle.load(reverb)
    
with open(mels["tb"], "rb") as tb:
    x_tb = pickle.load(tb)
    
with open(mels["pitchup"], "rb") as pu:
    x_pitchup = pickle.load(pu)
    
with open(mels["eq"], "rb") as eq:
    x_eq = pickle.load(eq)
    
with open(mels["tbup"], "rb") as tbup:
    x_tbup = pickle.load(tbup)
    
len(x_train), len(x_fade), len(x_pitch), len(x_reverb), len(x_tb), len(x_pitchup), len(x_eq), len(x_tbup)

(4970, 4970, 4970, 4970, 4970, 4970, 4970, 4970)

## Data Transformation

In [14]:
#########################################################
class ImgAugTransform:
    def __init__(self):
        self.aug = iaa.Sequential([
            iaa.CoarseDropout(0.1,size_percent=0.02)
        ])
      
    def __call__(self, img):
        img = np.array(img)
        return self.aug.augment_image(img)
#########################################################

In [15]:
from torchvision.transforms import *
import math
from imgaug import augmenters as iaa

class RandomErasingOrCoaseDropout(object):

    def __init__(self, probability = 0.3, probability2 = 0.6, sl = 0.02, sh = 0.4, r1 = 0.3, mean=[0.4914, 0.4822, 0.4465]):
        self.probability = probability
        self.probability2 = probability2
        self.mean = mean
        self.sl = sl
        self.sh = sh
        self.r1 = r1
        
        self.aug = iaa.Sequential([
            iaa.CoarseDropout(0.1,size_percent=0.02)
        ])
       
    def __call__(self, img):
        
        th = random.uniform(0, 1)
        if th < self.probability:
            img = np.array(img)
            return torch.tensor(self.aug.augment_image(img))
        
        if th > self.probability2:
            return img

        for attempt in range(100):
            area = img.size()[1] * img.size()[2]
       
            target_area = random.uniform(self.sl, self.sh) * area
            aspect_ratio = random.uniform(self.r1, 1/self.r1)

            h = int(round(math.sqrt(target_area * aspect_ratio)))
            w = int(round(math.sqrt(target_area / aspect_ratio)))

            if w < img.size()[2] and h < img.size()[1]:
                x1 = random.randint(0, img.size()[1] - h)
                y1 = random.randint(0, img.size()[2] - w)
                if img.size()[0] == 3:
                    img[0, x1:x1+h, y1:y1+w] = self.mean[0]
                    img[1, x1:x1+h, y1:y1+w] = self.mean[1]
                    img[2, x1:x1+h, y1:y1+w] = self.mean[2]
                else:
                    img[0, x1:x1+h, y1:y1+w] = self.mean[0]
                return img
        return img

In [16]:
# blend image augmentation
# https://pillow.readthedocs.io/en/stable/reference/Image.html

def array2img(array):    
    image = Image.fromarray(array, mode='L')        
    time_dim, base_dim = image.size
    crop = random.randint(0, time_dim - base_dim)
    image = image.crop([crop, 0, crop + base_dim, base_dim])
    return image


In [17]:
transforms_dict = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(0.5),
        #########################################
        # ImgAugTransform(),
        #########################################
        transforms.ToTensor(),
        # ===============
        RandomErasingOrCoaseDropout(),
        # ===============
    ]),
    #########################################
    'valid': transforms.Compose([
        transforms.RandomHorizontalFlip(0.5),
        #ImgAugTransform(),
        transforms.ToTensor(),
        # RandomErasingOrCoaseDropout(),
    ]),
    #########################################
    'test': transforms.Compose([
        transforms.RandomHorizontalFlip(0.5),
        #########################################
        # ImgAugTransform(),
        #########################################
        transforms.ToTensor(),
    ]),
}

In [18]:
def _get_strength_list(x_train):
    ##################################################################
    strength_list_train = []
    for i in progress_bar(range(len(x_train))):
        time_dim, base_dim = x_train[i].shape[1], x_train[i].shape[0]
        x = x_train[i]
        x_sum = []
        x_max = 0
        x_min = np.inf
        for j in range(0,time_dim-base_dim + 1):
            #x_temp = x[:,j:j + base_dim].sum()
            x_temp = x[0:84,j:j + base_dim].sum()
            x_sum.append(x_temp)
            if x_temp > x_max:
                x_max = x_temp
            if x_temp < x_min:
                x_min = x_temp
        if (x_max == x_min):
            height = x_max
        else:
            height = x_max - x_min
        strength_list_train.append([x_sum, height, x_max, x_min, time_dim, base_dim])
    ##################################################################
    return strength_list_train

strength_list_train = _get_strength_list(x_train)

In [19]:
class FATTrainDataset(Dataset):
    #def __init__(self, mels, labels, transforms):
    ############################################################
    def __init__(self, mels, labels, transforms, strength_list):
    ############################################################
        super().__init__()
        self.mels = mels
        self.labels = labels
        self.transforms = transforms
        ################################
        self.strength_list = strength_list
        ################################

        
    def __len__(self):
        return len(self.mels)
    
    def __getitem__(self, idx):
        # crop 2sec
        #image = Image.fromarray(self.mels[idx], mode='RGB')　#後でimage化     
        #time_dim, base_dim = image.size
        ##################################################################
        #time_dim, base_dim = self.mels[idx].shape[1], self.mels[idx].shape[0]        
        x = self.mels[idx]
        x_sum = self.strength_list[idx][0]
        height = self.strength_list[idx][1]
        x_max = self.strength_list[idx][2]
        x_min = self.strength_list[idx][3]
        time_dim = self.strength_list[idx][4]
        base_dim = self.strength_list[idx][5]
        flag = True
        while flag:
            crop = random.randint(0, int(height * (time_dim- base_dim))) # 一様分布の乱数
            #オール0対策
            if height == 0:
                crop_idx = 0
                value = 0
            else:
                crop_idx = int(crop // height)
                value = int(crop % height)                
            if ((x_sum[crop_idx] - x_min) - value) >= 0:
                flag = False
        crop = crop_idx
        image = Image.fromarray(x, mode='L') 
        ##################################################################
        #crop = random.randint(0, time_dim - base_dim)
        image = image.crop([crop, 0, crop + base_dim, base_dim])
        image = self.transforms(image).div_(255)[0, :, :]
        
        label = self.labels[idx]
        label = torch.from_numpy(label).float()
        
        return image, label

In [20]:
class FATValidDataset(Dataset):
    #def __init__(self, mels, labels, transforms):
    ############################################################
    def __init__(self, fnames, mels, labels, transforms, strength_list, tta=5):
    ############################################################
        super().__init__()
        self.fnames = fnames
        self.mels = mels
        self.labels = labels
        self.transforms = transforms
        ################################
        self.strength_list = strength_list
        ################################
        self.tta = tta

        
    def __len__(self):
        return len(self.fnames) * self.tta
    
    def __getitem__(self, idx):       
        new_idx = idx % len(self.fnames)
        # crop 2sec
        #image = Image.fromarray(self.mels[idx], mode='RGB')　#後でimage化     
        #time_dim, base_dim = image.size
        ##################################################################
        #time_dim, base_dim = self.mels[idx].shape[1], self.mels[idx].shape[0]        
        x = self.mels[new_idx]
        x_sum = self.strength_list[new_idx][0]
        height = self.strength_list[new_idx][1]
        x_max = self.strength_list[new_idx][2]
        x_min = self.strength_list[new_idx][3]
        time_dim = self.strength_list[new_idx][4]
        base_dim = self.strength_list[new_idx][5]
        flag = True
        while flag:
            crop = random.randint(0, int(height * (time_dim- base_dim))) # 一様分布の乱数
            #オール0対策
            if height == 0:
                crop_idx = 0
                value = 0
            else:
                crop_idx = int(crop // height)
                value = int(crop % height)                
            if ((x_sum[crop_idx] - x_min) - value) >= 0:
                flag = False
        crop = crop_idx
        image = Image.fromarray(x, mode='L') 
        ##################################################################
        #crop = random.randint(0, time_dim - base_dim)
        image = image.crop([crop, 0, crop + base_dim, base_dim])
        image = self.transforms(image).div_(255)[0, :, :]
        
        label = self.labels[new_idx]
        label = torch.from_numpy(label).float()
        
        fname = self.fnames[new_idx]
        
        return image, label, fname

## model

In [21]:
def init_layer(layer, nonlinearity="leaky_relu"):
    nn.init.kaiming_uniform_(layer.weight, nonlinearity=nonlinearity)
    
    if hasattr(layer, "bias"):
        if layer.bias is not None:
            layer.bias.data.fill_(0.0)
            
            
def init_bn(bn):
    bn.bias.data.fill_(0.0)
    bn.running_mean.data.fill_(0.0)
    bn.weight.data.fill_(1.0)
    bn.running_var.data.fill_(1.0)
    
    
class SpatialAttention2d(nn.Module):
    def __init__(self, channel):
        super(SpatialAttention2d, self).__init__()
        self.squeeze = nn.Conv2d(channel, 1, kernel_size=1, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        z = self.squeeze(x)
        z = self.sigmoid(z)
        return x * z


class GAB(nn.Module):
    def __init__(self, input_dim, reduction=4):
        super(GAB, self).__init__()
        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
        self.conv1 = nn.Conv2d(input_dim, input_dim // reduction, kernel_size=1, stride=1)
        self.conv2 = nn.Conv2d(input_dim // reduction, input_dim, kernel_size=1, stride=1)
        self.relu = nn.ReLU(inplace=True)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        z = self.global_avgpool(x)
        z = self.relu(self.conv1(z))
        z = self.sigmoid(self.conv2(z))
        return x * z

    
class SCse(nn.Module):
    def __init__(self, dim):
        super(SCse, self).__init__()
        self.satt = SpatialAttention2d(dim)
        self.catt = GAB(dim)

    def forward(self, x):
        return self.satt(x) + self.catt(x)
    
    
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, reduction=16):
        super(ConvBlock, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=in_channels,
                               out_channels=out_channels,
                               kernel_size=(2, 2),
                               stride=(1, 1),
                               padding=(1, 1),
                               bias=False)
        self.conv2 = nn.Conv2d(in_channels=out_channels,
                               out_channels=out_channels,
                               kernel_size=(2, 2),
                               stride=(1, 1),
                               padding=(1, 1),
                               bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.scse = SCse(out_channels)
        # self.se = SELayer(out_channels)
        
        self.init_weights()
        
    def init_weights(self):
        init_layer(self.conv1)
        init_layer(self.conv2)
        init_bn(self.bn1)
        init_bn(self.bn2)
        
    def forward(self, inp, pool_size=(2, 2), pool_type="avg"):
        x = inp
        x = F.relu_(self.bn1(self.conv1(x)))
        x = F.relu_(self.scse(self.bn2(self.conv2(x))))
        # x = F.relu_(self.se(self.bn2(self.conv2(x))))
        if pool_type == "max":
            x = F.max_pool2d(x, kernel_size=pool_size)
        elif pool_type == "avg":
            x = F.avg_pool2d(x, kernel_size=pool_size)
        elif pool_type == "both":
            x1 = F.max_pool2d(x, kernel_size=pool_size)
            x2 = F.avg_pool2d(x, kernel_size=pool_size)
            x = x1 + x2
        else:
            import pdb
            pdb.set_trace()
        return x
    
    
class ConvNet(nn.Module):
    def __init__(self, n_classes=80):
        super(ConvNet, self).__init__()
        self.conv1 = ConvBlock(1, 32)
        self.conv2 = ConvBlock(32, 64)
        self.conv3 = ConvBlock(64, 128)
        self.conv4 = ConvBlock(128, 256)
        self.conv5 = ConvBlock(256, 512)
        
        self.bn1 = nn.BatchNorm1d((1 + 4 + 20) * 512)
        self.drop1 = nn.Dropout(0.4)
        self.fc1 = nn.Linear((1 + 4 + 20) * 512, 512)
        self.prelu = nn.PReLU()
        self.bn2 = nn.BatchNorm1d(512)
        self.drop2 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(512, n_classes)
        
    def init_weight(self):
        init_layer(self.fc1)
        init_layer(self.fc2)
        init_bn(self.bn1)
        init_bn(self.bn2)
    
    def forward(self, x):
        x = x.view(x.size(0), 1, x.size(1), x.size(2))
        x = self.conv1(x, pool_size=(1, 1), pool_type="both")
        x = self.conv2(x, pool_size=(4, 1), pool_type="both")
        x = self.conv3(x, pool_size=(1, 3), pool_type="both")
        x = self.conv4(x, pool_size=(4, 1), pool_type="both")
        x = self.conv5(x, pool_size=(1, 3), pool_type="both")
        
        x1_max = F.max_pool2d(x, (5, 8))
        x1_mean = F.avg_pool2d(x, (5, 8))
        x1 = (x1_max + x1_mean).reshape(x.size(0), -1)
        
        x2_max = F.max_pool2d(x, (2, 4))
        x2_mean = F.avg_pool2d(x, (2, 4))
        x2 = (x2_max + x2_mean).reshape(x.size(0), -1)
        
        x = torch.mean(x, dim=3)
        x, _ = torch.max(x, dim=2)
        
        x = torch.cat([x, x1, x2], dim=1)
        x = self.drop1(self.bn1(x))
        x = self.prelu(self.fc1(x))
        x = self.drop2(self.bn2(x))
        x = self.fc2(x)
         
        return x

In [22]:
class FineTune(nn.Module):
    def __init__(self, n_classes, weight_path):
        super(FineTune, self).__init__()

        self.convnet = ConvNet(n_classes)
        self.convnet.load_state_dict(torch.load(weight_path))
        
        self.bn1 = nn.BatchNorm1d((1 + 4 + 20) * 512)
        self.drop1 = nn.Dropout(0.4)
        self.fc1 = nn.Linear((1 + 4 + 20) * 512, 512)
        self.prelu = nn.PReLU()
        self.bn2 = nn.BatchNorm1d(512)
        self.drop2 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(512, n_classes)
        
    def init_weight(self):
        init_layer(self.fc1)
        init_layer(self.fc2)
        init_bn(self.bn1)
        init_bn(self.bn2)
    
    def forward(self, x):
        x = x.view(x.size(0), 1, x.size(1), x.size(2))
        x = self.convnet.conv1(x, pool_size=(1, 1), pool_type="both")
        x = self.convnet.conv2(x, pool_size=(4, 1), pool_type="both")
        x = self.convnet.conv3(x, pool_size=(1, 3), pool_type="both")
        x = self.convnet.conv4(x, pool_size=(4, 1), pool_type="both")
        x = self.convnet.conv5(x, pool_size=(1, 3), pool_type="both")
        
        x1_max = F.max_pool2d(x, (5, 8))
        x1_mean = F.avg_pool2d(x, (5, 8))
        x1 = (x1_max + x1_mean).reshape(x.size(0), -1)
        
        x2_max = F.max_pool2d(x, (2, 4))
        x2_mean = F.avg_pool2d(x, (2, 4))
        x2 = (x2_max + x2_mean).reshape(x.size(0), -1)
        
        x = torch.mean(x, dim=3)
        x, _ = torch.max(x, dim=2)
        
        x = torch.cat([x, x1, x2], dim=1)
        x = self.drop1(self.bn1(x))
        x = self.prelu(self.fc1(x))
        x = self.drop2(self.bn2(x))
        x = self.fc2(x)
         
        return x

## train

In [23]:
#def train_model(x_train, y_train, train_transforms):
#####################################################################
#def train_model(x_train, y_train, train_transforms, strength_list):
def train_model(x_train, y_train, 
                x_fade, x_pitch, x_reverb, x_tb,
                x_pitchup, x_eq, x_tbup,
                train_df, 
                train_transforms, valid_transforms, 
                strength_list):
#def train_model(x_train, y_train, x_fade, x_pitch, x_reverb, 
#                train_transforms, valid_transforms, strength_list, strength_list_blend_arr_x, blend_arr_x, blend_arr_y):
#####################################################################
    num_epochs = 40
    batch_size = 128
    test_batch_size = 128
    lr = 1e-3
    eta_min = 1e-5
    t_max = 10
    
    num_classes = y_train.shape[1]
    
    idx = np.arange(len(x_train))
    #idx_ex = np.arange(len(blend_arr_x))
    
    lam = np.random.beta(1.0, 1.0)
    
    kfold = KFold(n_splits=5, shuffle=True, random_state=SEED)
    splits = list(kfold.split(idx))
    use_fold = []
    for i in FOLD:
        use_fold.append(splits[i])
    # =====
    # kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
    # =====
    
    logger = get_logger("Main", tag="train", log_dir="log/")
    bests = []
    loss_list = []
    lwlrap_list = []
    for fold_num, (trn_idx, val_idx) in enumerate(use_fold):
    # for fold_num, (trn_idx, val_idx) in enumerate(kfold.split(idx, fold_order)):
        logger.info(f"Fold {fold_num + 1}")
        
        x_trn, x_val = list(), list()
        for j in idx:
            if j in trn_idx:
                x_trn.append(x_train[j])
            else:
                x_val.append(x_train[j])
                
        y_trn, y_val = y_train[trn_idx], y_train[val_idx]
        val_fnames = set(train_df.loc[val_idx, "fname"].values.tolist())
                
        # get mix data
        blend_arr_x = []
        blend_arr_y = []
        for i in range(1000):
            r1, r2 = random.randint(0, len(x_trn)-1), random.randint(0, len(x_trn)-1)
            #img0 = array2img(x_train[r1])
            #img1 = array2img(x_train[r2])
            img0 = array2img(x_trn[r1])
            img1 = array2img(x_trn[r2])
            img_blend = Image.blend(im1=img0, im2=img1, alpha=lam)

            #y_blend = y_train[r1]*(1-lam) + y_train[r2]*lam
            y_blend = y_trn[r1]*(1-lam) + y_trn[r2]*lam
            img_blend_arr = np.asarray(img_blend)
            blend_arr_x.append(img_blend_arr)
            blend_arr_y.append(y_blend)
        
        idx_ex = np.arange(len(blend_arr_x))
        strength_list_blend_arr_x = _get_strength_list(blend_arr_x)
                
        for data_list in [x_fade, x_pitch, x_reverb, x_tb, x_pitchup, x_eq, x_tbup]:
            for j in idx:
                if j in trn_idx:
                    x_trn.append(data_list[j])

        #y_trn, y_val = y_train[trn_idx], y_train[val_idx]
        y_trn = np.tile(y_trn, reps=(8, 1))
        
        # =====================================================
        y_trn = np.concatenate([y_trn, np.stack(blend_arr_y)])
        for k in idx_ex:
            x_trn.append(blend_arr_x[k])
        # =====================================================
        
        #####################################################################
        strength_list_trn = np.array(strength_list)[trn_idx]
        strength_list_trn = np.tile(strength_list_trn, reps=(8, 1))

        strength_list_val = np.array(strength_list)[val_idx]
        #####################################################################
        
        strength_list_trn = np.concatenate([strength_list_trn, strength_list_blend_arr_x])
        #train_dataset = FATTrainDataset(x_trn, y_trn, train_transforms)
        #valid_dataset = FATTrainDataset(x_val, y_val, train_transforms)
        
        #fname作成
        #####################################################################
        #train_df_trn = train_df[trn_idx]
        train_df_val = np.array(train_df['fname'])[val_idx]
        #####################################################################
        
        #####################################################################
        train_dataset = FATTrainDataset(x_trn, y_trn, train_transforms, strength_list_trn)
        #valid_dataset = FATTrainDataset(x_val, y_val, train_transforms, strength_list_val)
        #valid_dataset = FATValidDataset(train_df_val, x_val, y_val, train_transforms, strength_list_val, tta=20)
        valid_dataset = FATValidDataset(train_df_val, x_val, y_val, valid_transforms, strength_list_val, tta=20)
        #####################################################################

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        valid_loader = DataLoader(valid_dataset, batch_size=test_batch_size, shuffle=False)

        model = FineTune(n_classes=80, 
                              weight_path=Path("../input/freesound-convnet-noisy-pitch-pretrain-rrc/weight_best.pt")).cuda()
        # model = ConvNet(n_classes=80).cuda()
        
        criterion = nn.BCEWithLogitsLoss().cuda()
        optimizer = Adam(params=model.parameters(), lr=lr, amsgrad=False)
        scheduler = CosineAnnealingLR(optimizer, T_max=t_max, eta_min=eta_min)

        best_epoch = -1
        best_lwlrap = 0.
        mb = master_bar(range(num_epochs))
        torch.cuda.empty_cache()

        for epoch in mb:
            start_time = time.time()
            model.train()
            avg_loss = 0.

            for x_batch, y_batch in progress_bar(train_loader, parent=mb):
                preds = model(x_batch.cuda())
                loss = criterion(preds, y_batch.cuda())

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                avg_loss += loss.item() / len(train_loader)

            model.eval()
            valid_preds = np.zeros((len(x_val), num_classes))
            avg_val_loss = 0.
            
            ########################################################
            all_outputs, all_y, all_fnames = [], [], []
            ########################################################

            #for i, (x_batch, y_batch) in enumerate(valid_loader):
            #############################################################
            for i, (x_batch, y_batch, fnames) in enumerate(valid_loader):
            #############################################################
                preds = model(x_batch.cuda()).detach()
                loss = criterion(preds, y_batch.cuda())

                #preds = torch.sigmoid(preds)
                #valid_preds[i * test_batch_size: (i+1) * test_batch_size] = preds.cpu().numpy()

                #avg_val_loss += loss.item() / len(valid_loader)
            #################################################
                all_outputs.append(preds.cpu().numpy())
                all_y.append(y_batch.cuda())
                all_fnames.extend(fnames)

                preds = torch.sigmoid(preds)
                #valid_preds[i * test_batch_size: (i+1) * test_batch_size] = preds.cpu().numpy()

                avg_val_loss += loss.item() / len(valid_loader)

            valid_preds = pd.DataFrame(data=np.concatenate(all_outputs),
                                 index=all_fnames,
                                 columns=map(str, range(num_classes)))
            valid_preds = valid_preds.groupby(level=0).mean()
            valid_preds = valid_preds.values
            #################################################             

            score, weight = calculate_per_class_lwlrap(y_val, valid_preds)
            lwlrap = (score * weight).sum()

            scheduler.step()

            if (epoch + 1) % 1 == 0:
                elapsed = time.time() - start_time
                mb.write(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  val_lwlrap: {lwlrap:.6f}  time: {elapsed:.0f}s')
                logger.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  val_lwlrap: {lwlrap:.6f}  time: {elapsed:.0f}s')

            if lwlrap > best_lwlrap:
                best_epoch = epoch + 1
                best_lwlrap = lwlrap
                torch.save(model.state_dict(), f'weight_best{fold_num}.pt')
                
            #loss, lwlrap 記録
            loss_list.append([fold_num, avg_loss, avg_val_loss])
            lwlrap_list.append([fold_num, lwlrap])
                
        bests.append((best_epoch, best_lwlrap))
        del x_trn, x_val, y_trn, y_val
        gc.collect()
    logger.info(f"Best: {bests}")
            
    return bests, loss_list, lwlrap_list


In [24]:
gc.collect()

0

In [None]:
#result = train_model(x_train, y_train, transforms_dict["train"])
result, loss_list, lwlrap_list = train_model(x_train, y_train, 
                                             x_fade, x_pitch, x_reverb, x_tb, 
                                             x_pitchup, x_eq, x_tbup,
                                             train_df,
                                             transforms_dict["train"], 
                                             transforms_dict["valid"], 
                                             strength_list_train)

2019-06-07 12:16:55,681 Main INFO Fold 1


2019-06-07 12:20:11,418 Main INFO Epoch 1 - avg_train_loss: 0.1750  avg_val_loss: 0.0349  val_lwlrap: 0.782349  time: 190s


In [None]:
gc.collect()

## Plotting

In [None]:
num_fold = 5
########################################################
#可視化
loss_list = pd.DataFrame(loss_list)
lwlrap_list = pd.DataFrame(lwlrap_list)
for i in range(num_fold):

    loss = loss_list[loss_list.iloc[:,0] == i][1]
    val_loss = loss_list[loss_list.iloc[:,0] == i][2]
    full_lwlrap = lwlrap_list[lwlrap_list.iloc[:,0] == i][1]
    #curated_lwlrap = lwlrap_list[lwlrap_list.iloc[:,0] == i][2]

    epochs = range(1, len(loss) + 1)

    #lossとlwlrapをプロット
    plt.plot(epochs, loss, 'bo', label = "Training loss")
    plt.plot(epochs, val_loss, 'b', label = "Validation loss")
    plt.plot(epochs, full_lwlrap, 'bo',color='r', label = "Full lwlrap")
    #plt.plot(epochs, curated_lwlrap, 'b',color='r', label = "Curated lwlrap")
    plt.title('Fold {} Loss and lwlrap'.format(i+1))
    plt.legend()
    plt.show()

########################################################

## Results

In [None]:
result