In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from pathlib import Path
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import IPython
import IPython.display
import PIL
import time
import sklearn.metrics
import pickle

In [None]:
NUM_FOLD = 5
NUM_CLASS = 80
SEED = 42
NUM_EPOCH = 64*6
NUM_CYCLE = 64
BATCH_SIZE = 64
BATCH_SIZE_VALID = 32
DO_TRAIN = True
DO_EVALUATE = True
DEBUG = False
EXPERIMENT = False
EXPERIMENT_FOLD = 1
FOLD_LIST = [4]

In [None]:
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.optim.lr_scheduler import _LRScheduler
import torchvision.models as models


In [None]:
!pip install pretrainedmodels
import pretrainedmodels
import pretrainedmodels.utils
class ResNet(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNet, self).__init__()

        self.num_classes = num_classes
        self.mode = 'train'

        self.base_model = pretrainedmodels.__dict__['resnet34'](num_classes=num_classes, pretrained=None)

        self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = self.base_model.bn1
        self.relu = self.base_model.relu
        self.maxpool = self.base_model.maxpool
        self.layer1 = self.base_model.layer1
        self.layer2 = self.base_model.layer2
        self.layer3 = self.base_model.layer3
        self.layer4 = self.base_model.layer4
        self.avgpool = nn.AdaptiveMaxPool2d((1, 1))
        self.last_linear = nn.Linear(self.base_model.layer4[1].conv1.in_channels, num_classes)
        self.last_linear = nn.Sequential(
            nn.Linear(self.base_model.layer4[1].conv1.in_channels, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(1024, NUM_CLASS),
        )
        self.last_linear2 = nn.Sequential(
            nn.Linear(self.base_model.layer4[1].conv1.in_channels, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(1024, NUM_CLASS),
        )


    def feature(self, input):
        x0 = self.conv1(input)  #; print('layer conv1 ',x.size()) # [8, 64, 112, 112]
        x0 = self.bn1(x0)
        x0 = self.relu(x0)
        x1 = self.maxpool(x0)
        x1 = self.layer1(x1) #  ; print('layer 1 ',x.size()) # [8, 1024, 28, 28])
        x2 = self.layer2(x1) #  ; print('layer 2 ',x.size()) # [8, 1024, 28, 28])
        x3 = self.layer3(x2) #  ; print('layer 3 ',x.size()) # [8, 1024, 28, 28])
        # x4 = self.layer4(x3) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        x = self.avgpool(x3) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        return x

    def forward(self, input):
        bs, ch, h, w = input.size()
        x0 = self.conv1(input)  #; print('layer conv1 ',x.size()) # [8, 64, 112, 112]
        x0 = self.bn1(x0)
        x0 = self.relu(x0)
        x1 = self.maxpool(x0)
        x1 = self.layer1(x1) #  ; print('layer 1 ',x.size()) # [8, 1024, 28, 28])
        x2 = self.layer2(x1) #  ; print('layer 2 ',x.size()) # [8, 1024, 28, 28])
        x3 = self.layer3(x2) #  ; print('layer 3 ',x.size()) # [8, 1024, 28, 28])
        x4 = self.layer4(x3) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        x = self.avgpool(x4).view(bs, -1) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        x = self.last_linear(x) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])

        return x
    
    def noisy(self, input):
        bs, ch, h, w = input.size()
        x0 = self.conv1(input)  #; print('layer conv1 ',x.size()) # [8, 64, 112, 112]
        x0 = self.bn1(x0)
        x0 = self.relu(x0)
        x1 = self.maxpool(x0)
        x1 = self.layer1(x1) #  ; print('layer 1 ',x.size()) # [8, 1024, 28, 28])
        x2 = self.layer2(x1) #  ; print('layer 2 ',x.size()) # [8, 1024, 28, 28])
        x3 = self.layer3(x2) #  ; print('layer 3 ',x.size()) # [8, 1024, 28, 28])
        x4 = self.layer4(x3) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        x = self.avgpool(x4).view(bs, -1) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        x = self.last_linear2(x) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])

        return x


In [None]:
import cv2
import librosa
class MfccDataset(Dataset):
    """Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition.

    Arguments:
        A CSV file path
        Path to image folder
        Extension of images
        PIL transforms
    """

    def __init__(self, df, load_dir, slice=-1, mixup=False, 
                 cutout=False, cutout_h=False, cutout_w=False,
                 gain=False, resize=False,
                flip=False,
                highpass=False,
                ):
        self.X_train = df['fname']
        self.y_train = df.iloc[:,2:].values
        self.slice = slice
        self.mixup = mixup
        self.cutout = cutout
        self.cutout_h = cutout_h
        self.cutout_w = cutout_w
        self.gain = gain
        self.resize = resize
        self.highpass = highpass
        self.flip = flip
        self.load_dir = load_dir
        # print(self.y_train.shape)

    def do_slice(self, img):
        if self.slice!=-1:
            img_new = np.zeros([img.shape[0], self.slice], np.float32)
            if img.shape[1]<self.slice:
                shift = np.random.randint(0, self.slice - img.shape[1])
                img_new[:, shift:shift + img.shape[1]] =img
            elif img.shape[1]==self.slice:
                img_new = img
            else:
                shift = np.random.randint(0, img.shape[1]-self.slice)
                img_new = img[:, shift:shift+self.slice]
        else:
            # print(img_base.shape)
            img_new = img
        return img_new
    
    def do_highpass(self, img):
        coord = np.random.randint(0, img.shape[0])
        img[coord:] = 0
    
    def do_mixup(self, img, label):
        idx = np.random.randint(0,len(self.X_train))
        img2 = np.load("{}/{}.npy".format(self.load_dir, self.X_train[idx][:-4]))
        img2 = self.do_slice(img2)
        
        label2 = self.y_train[idx].astype(np.float32)

        rate = np.random.random()
        img = img*rate + img2*(1-rate)
        label = label*rate + label2*(1-rate)
        return img, label
    
    def do_flip(self, img):
        return img[:,::-1]
    
    
    def do_cutout_h(self, img, max = 32):
        coord = np.random.randint(0, img.shape[0])
        width = np.random.randint(8, max)
        cut = np.array([coord-width, coord+width])
        cut = np.clip(cut, 0, img.shape[0])
        img[cut[0]:cut[1]] = 0
        return img
    
    
    def do_cutout_w(self, img, max = 32):
        coord = np.random.randint(0, img.shape[1])
        width = np.random.randint(8, max)
        cut = np.array([coord-width, coord+width])
        cut = np.clip(cut, 0, img.shape[1])
        img[:,cut[0]:cut[1]] = 0
        return img
    
    def do_highpass(self, img):
        th = np.random.randint(0, img.shape[0])
        img[th:] = 0
        return img
    
    def cutout_bug(self, img):
        coordx = np.sort(np.random.randint(0, self.slice,2))
        coordy = np.sort(np.random.randint(0, 128, 2))
        img[coordx[0]:coordx[1]] = 0
        return img
        
    def do_resize(self, img, max=0.1):
        rate = 1- max + np.random.random() * max * 2
        img_tmp = cv2.resize(img, (int(self.slice*rate), img.shape[0], ))
        if rate>1:
            img_new = img_tmp[:,:img.shape[1]]
        else:
            img_new = np.zeros_like(img)
            img_new[:,:img_tmp.shape[1]] = img_tmp
        return img

    
    def do_gain(self, img, max=0.1):
        rate = 1- max + np.random.random() * max * 2
        return img * rate
    
    def __getitem__(self, index):
        img = np.load("{}/{}.npy".format(self.load_dir, self.X_train[index][:-4]))
        img = self.do_slice(img)
        label = self.y_train[index].astype(np.float32)

        if self.mixup and np.random.random()<0.5:
            img, label = self.do_mixup(img, label)
        if self.gain and np.random.random()<0.5:
             img = self.do_gain(img)
        if self.resize and np.random.random()<0.5:
             img = self.do_resize(img)
        if self.cutout and np.random.random()<0.5:
            img = self.cutout_bug(img)
        if self.cutout_h and np.random.random()<0.5:
            img = self.do_cutout_h(img)
        if self.cutout_w and np.random.random()<0.5:
            img = self.do_cutout_w(img)
        if self.flip and np.random.random()<0.5:
            img = self.do_flip(img)
        if self.highpass and np.random.random()<0.5:
            img = self.do_highpass(img)
            
            
        img = librosa.power_to_db(img)
        img = (img - img.mean()) / (img.std()+1e-7)
        img = img.reshape([1, img.shape[0], img.shape[1]])
        
        return img, label

    def __len__(self):
        return len(self.X_train.index)

In [None]:
df_train = pd.read_csv("../input/freesound-audio-tagging-2019/train_curated.csv")
df_test = pd.read_csv("../input/freesound-audio-tagging-2019/sample_submission.csv")
df_noise = pd.read_csv("../input/freesound-audio-tagging-2019/train_noisy.csv")
labels = df_test.columns[1:].tolist()

for label in labels:
    print(label)
    df_train[label] = df_train['labels'].apply(lambda x: label in x)
    df_noise[label] = df_noise['labels'].apply(lambda x: label in x)
print(df_train.shape, df_noise.shape, df_test.shape)

In [None]:
from sklearn.model_selection import KFold, StratifiedKFold
# set log columns
folds = list(KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_train))))

In [None]:
from math import cos, pi


def cycle(iterable):
    """
    dataloaderをiteratorに変換
    :param iterable:
    :return:
    """
    while True:
        for x in iterable:
            yield x
            
def _one_sample_positive_class_precisions(scores, truth):
    """Calculate precisions for each true class for a single sample.

    Args:
      scores: np.array of (num_classes,) giving the individual classifier scores.
      truth: np.array of (num_classes,) bools indicating which classes are true.

    Returns:
      pos_class_indices: np.array of indices of the true classes for this sample.
      pos_class_precisions: np.array of precisions corresponding to each of those
        classes.
    """
    num_classes = scores.shape[0]
    pos_class_indices = np.flatnonzero(truth > 0)
    # Only calculate precisions if there are some true classes.
    if not len(pos_class_indices):
        return pos_class_indices, np.zeros(0)
    # Retrieval list of classes for this sample.
    retrieved_classes = np.argsort(scores)[::-1]
    # class_rankings[top_scoring_class_index] == 0 etc.
    class_rankings = np.zeros(num_classes, dtype=np.int)
    class_rankings[retrieved_classes] = range(num_classes)
    # Which of these is a true label?
    retrieved_class_true = np.zeros(num_classes, dtype=np.bool)
    retrieved_class_true[class_rankings[pos_class_indices]] = True
    # Num hits for every truncated retrieval list.
    retrieved_cumulative_hits = np.cumsum(retrieved_class_true)
    # Precision of retrieval list truncated at each hit, in order of pos_labels.
    precision_at_hits = (
            retrieved_cumulative_hits[class_rankings[pos_class_indices]] /
            (1 + class_rankings[pos_class_indices].astype(np.float)))
    return pos_class_indices, precision_at_hits


# All-in-one calculation of per-class lwlrap.

def calculate_per_class_lwlrap(truth, scores):
    """Calculate label-weighted label-ranking average precision.

    Arguments:
      truth: np.array of (num_samples, num_classes) giving boolean ground-truth
        of presence of that class in that sample.
      scores: np.array of (num_samples, num_classes) giving the classifier-under-
        test's real-valued score for each class for each sample.

    Returns:
      per_class_lwlrap: np.array of (num_classes,) giving the lwlrap for each
        class.
      weight_per_class: np.array of (num_classes,) giving the prior of each
        class within the truth labels.  Then the overall unbalanced lwlrap is
        simply np.sum(per_class_lwlrap * weight_per_class)
    """
    assert truth.shape == scores.shape
    num_samples, num_classes = scores.shape
    # Space to store a distinct precision value for each class on each sample.
    # Only the classes that are true for each sample will be filled in.
    precisions_for_samples_by_classes = np.zeros((num_samples, num_classes))
    for sample_num in range(num_samples):
        pos_class_indices, precision_at_hits = (
            _one_sample_positive_class_precisions(scores[sample_num, :],
                                                  truth[sample_num, :]))
        precisions_for_samples_by_classes[sample_num, pos_class_indices] = (
            precision_at_hits)
    labels_per_class = np.sum(truth > 0, axis=0)
    weight_per_class = labels_per_class / float(np.sum(labels_per_class))
    # Form average of each column, i.e. all the precisions assigned to labels in
    # a particular class.
    per_class_lwlrap = (np.sum(precisions_for_samples_by_classes, axis=0) /
                        np.maximum(1, labels_per_class))
    # overall_lwlrap = simple average of all the actual per-class, per-sample precisions
    #                = np.sum(precisions_for_samples_by_classes) / np.sum(precisions_for_samples_by_classes > 0)
    #           also = weighted mean of per-class lwlraps, weighted by class label prior across samples
    #                = np.sum(per_class_lwlrap * weight_per_class)
    return per_class_lwlrap, weight_per_class


class CosineLR(_LRScheduler):
    """SGD with cosine annealing.
    """

    def __init__(self, optimizer, step_size_min=1e-5, t0=100, tmult=2, curr_epoch=-1, last_epoch=-1):
        self.step_size_min = step_size_min
        self.t0 = t0
        self.tmult = tmult
        self.epochs_since_restart = curr_epoch
        super(CosineLR, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        self.epochs_since_restart += 1

        if self.epochs_since_restart > self.t0:
            self.t0 *= self.tmult
            self.epochs_since_restart = 0

        lrs = [self.step_size_min + (
                    0.5 * (base_lr - self.step_size_min) * (1 + cos(self.epochs_since_restart * pi / self.t0)))
               for base_lr in self.base_lrs]

        # print(lrs)

        return lrs

    
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


In [None]:
def train(train_loaders, model, optimizer, scheduler, epoch):
    train_loader, noise_itr = train_loaders
    bce_avr   = AverageMeter()
    bce_noise_avr   = AverageMeter()
    criterion_bce    = nn.BCEWithLogitsLoss().cuda()
    sigmoid = torch.nn.Sigmoid().cuda()

    # switch to train mode
    model.train()

    starttime = time.time()
    preds = np.zeros([0, NUM_CLASS], np.float32)
    y_true = np.zeros([0, NUM_CLASS], np.float32)
    preds_noise = np.zeros([0, NUM_CLASS], np.float32)
    y_true_noise = np.zeros([0, NUM_CLASS], np.float32)
    for i, (input, target) in enumerate(train_loader):
        input = input.cuda(async=True)
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        input_noise, target_noise = next(noise_itr)  # test dataのバッチ
        input_noise = torch.autograd.Variable(input_noise.cuda(async=True))
        target_noise = torch.autograd.Variable(target_noise.cuda(async=True))

        # compute output
        output = model(input_var)
        bce = criterion_bce(output, target_var)
        output_noise = model.noisy(input_noise)
        bce_noise = criterion_bce(output_noise, target_noise)
        loss = bce + bce_noise
        pred = sigmoid(output)
        pred = pred.data.cpu().numpy()
        pred_noise = sigmoid(output_noise)
        pred_noise = pred_noise.data.cpu().numpy()
        bce_avr.update(bce.data, input.size(0))
        bce_noise_avr.update(bce_noise.data, input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()  # # 勾配の初期化
        loss.backward()
        optimizer.step()
        scheduler.step()

        preds = np.concatenate([preds, pred])
        y_true = np.concatenate([y_true, target.data.cpu().numpy()])
        preds_noise = np.concatenate([preds_noise, pred_noise])
        y_true_noise = np.concatenate([y_true_noise, target_noise.data.cpu().numpy()])

    # print(preds.shape, y_true.shape)
    # print(y_true[:,:-1].shape, preds[:,:-1].shape)
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true[:,:-1], preds[:,:-1])
    lwlrap = np.sum(per_class_lwlrap * weight_per_class)
    # print(y_true_noise[:,:-1].shape, preds_noise[:,:-1].shape)
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true_noise[:,:-1], preds_noise[:,:-1])
    lwlrap_noise = np.sum(per_class_lwlrap * weight_per_class)
    return bce_avr.avg.item(), lwlrap, bce_noise_avr.avg.item(), lwlrap_noise


def validate(val_loader, model):
    bce_avr = AverageMeter()

    sigmoid = torch.nn.Sigmoid().cuda()

    criterion_bce = nn.BCEWithLogitsLoss().cuda()

    # switch to train mode
    model.eval()

    starttime = time.time()
    preds = np.zeros([0, NUM_CLASS], np.float32)
    y_true = np.zeros([0, NUM_CLASS], np.float32)
    for i, (input, target) in enumerate(val_loader):
        input = input.cuda(async=True)
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        # print(input.size())

        # compute output
        with torch.no_grad():
            output = model(input_var)
            bce = criterion_bce(output, target_var)
            pred = sigmoid(output)
            pred = pred.data.cpu().numpy()

        # measure accuracy and record loss
        bce_avr.update(bce.data, input.size(0))
        preds = np.concatenate([preds, pred])
        y_true = np.concatenate([y_true, target.data.cpu().numpy()])
        
        
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true, preds)
    lwlrap = np.sum(per_class_lwlrap * weight_per_class)

    return bce_avr.avg.item(), lwlrap

In [None]:
# baseline

log_columns = ['epoch', 'bce', 'lwlrap', 'bce_noise', 'lwlrap_noise', 'val_bce', 'val_lwlrap', 'time']  # 学習ログのカラム名
for fold, (ids_train_split, ids_valid_split) in enumerate(folds):
    print("fold: {}".format(fold + 1))
    if fold+1 not in FOLD_LIST: continue
    starttime = time.time()
    train_log = pd.DataFrame(columns=log_columns)

    # build model
    model = ResNet(NUM_CLASS).cuda()

    # set generator
    df_train_fold = df_train.iloc[ids_train_split].reset_index(drop=True)
    dataset_train = MfccDataset(df_train_fold, "../input/mel128v3/train/", 
                                slice=1024, 
                                mixup=True, 
                                cutout_h=True,
                                gain=True,
                                resize=True,
                               )
    train_loader = DataLoader(dataset_train,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=1,  # 1 for CUDA?
                              pin_memory=True  # CUDA only
                              )

    df_valid = df_train.iloc[ids_valid_split].reset_index(drop=True)
    dataset_valid = MfccDataset(df_valid, "../input/mel128v3/train/")
    valid_loader = DataLoader(dataset_valid,
                              batch_size=1,
                              shuffle=False,
                              num_workers=1,  # 1 for CUDA
                              pin_memory=True  # CUDA only
                              )

    dataset_noise = MfccDataset(df_noise, "../input/mel128v3n/noise/",
                                slice=1024, 
                                mixup=True, 
                                cutout_h=True,
                                gain=True,
                                resize=True,
                               )
    noise_loader = DataLoader(dataset_noise,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=1,  # 1 for CUDA?
                              pin_memory=True  # CUDA only
                              )
    noise_itr = cycle(noise_loader)  # dataloaderをgeneratorに変換

    # set optimizer and loss
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
    scheduler = CosineLR(optimizer, step_size_min=1e-6, t0=len(train_loader) * NUM_CYCLE, tmult=1)
#     scheduler = CosineLR(optimizer, step_size_min=1e-6, t0=len(train_loader) * 512, tmult=1)
    cudnn.benchmark = True

    # training
    for epoch in range(NUM_EPOCH):
        # train for one epoch
        bce, lwlrap, bce_noise, lwlrap_noise = train(
            (train_loader, noise_itr),
             model, optimizer, scheduler, epoch
        )

        # evaluate on validation set
        val_bce, val_lwlrap = validate(valid_loader, model)
        # print(lwlrap)
        # print(val_lwlrap)
        
        endtime = time.time() - starttime
        print("Epoch: {}/{} ".format(epoch + 1, NUM_EPOCH)
              + "CE: {:.4f} ".format(bce)
              + "LwLRAP: {:.4f} ".format(lwlrap)
              + "noise CE: {:.4f} ".format(bce_noise)
              + "noise LwLRAP: {:.4f} ".format(lwlrap_noise)
              + "Valid CE: {:.4f} ".format(val_bce)
              + "Valid LwLRAP: {:.4f} ".format(val_lwlrap)
              + "sec: {:.1f}".format(endtime)
              )
        train_log_epoch = pd.DataFrame([[epoch+1, bce, lwlrap, bce_noise, lwlrap_noise, val_bce, val_lwlrap, endtime]],
                               columns=log_columns)
        train_log = pd.concat([train_log, train_log_epoch])
        train_log.to_csv("train_log_fold{}.csv".format(fold+1), index=False)
        if (epoch+1)%NUM_CYCLE==0:
            torch.save(model.state_dict(), "weight_fold_{}_epoch_{}.pth".format(fold+1, epoch+1))
    torch.save(optimizer.state_dict(), 'optimizer_fold_{}_epoch_{}.pth'.format(fold+1, epoch+1))