In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from pathlib import Path
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import IPython
import IPython.display
import PIL
import time
import sklearn.metrics
import pickle
import random
import cv2
import librosa

In [None]:
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.optim.lr_scheduler import _LRScheduler
import torchvision.models as models


In [None]:
NUM_FOLD = 5
NUM_CLASS = 80
SEED = 42
NUM_EPOCH = 64*7
NUM_CYCLE = 64
BATCH_SIZE = 32
BATCH_SIZE_VALID = 32
DO_TRAIN = True
DO_EVALUATE = True
DEBUG = False
EXPERIMENT = False
EXPERIMENT_FOLD = 1
DO_FOLD = [5]
NOISY_LABEL_RATE = 0.
C_SEMI = 20 # 75
TEMPERATURE = 2
NORM = 'Rel'
SLICE_RATE = 0.25
DO_PSEUDO_NOISY = False
NOISY_LAST = "SIGMOID"
TRAIN_LAST = "SIGMOID"
CONSISTENCY_LOSS = 'MSE'
C_NOISY = 1
NUM_MIX = 1
LR = [1e-3, 1e-6]

In [None]:
### seed固定
# torch.manual_seed(SEED)
# random.seed(SEED)
# np.random.seed(SEED)
# torch.manual_seed(SEED)
# torch.cuda.manual_seed(SEED)

# cudnn speed up
cudnn.benchmark = True
# cudnn.benchmark = False  # CUDNN高速化OFF
# torch.backends.cudnn.deterministic = True  # 再現性確保

In [None]:
df_train = pd.read_csv("../input/freesound-audio-tagging-2019/train_curated.csv")
df_test = pd.read_csv("../input/freesound-audio-tagging-2019/sample_submission.csv")
df_noise = pd.read_csv("../input/freesound-audio-tagging-2019/train_noisy.csv")
labels = df_test.columns[1:].tolist()

for label in labels:
    df_train[label] = df_train['labels'].apply(lambda x: label in x)
    df_noise[label] = df_noise['labels'].apply(lambda x: label in x)
    
df_train['num_labels'] = df_train[labels].values.sum(axis=1)
df_noise['num_labels'] = df_noise[labels].values.sum(axis=1)

# df_train[labels] = df_train[labels].values / df_train['num_labels'].values[:,np.newaxis]
# if NOISY_LAST=='SOFTMAX':
#     df_noise[labels] = df_noise[labels].values / df_noise['num_labels'].values[:,np.newaxis]

df_train['path'] = "../input/mel128v3/train/" + df_train['fname']
df_test['path'] = "../input/mel128v3/test/" + df_train['fname']
df_noise['path'] = "../input/mel128v3n/noise/" + df_noise['fname']

print(df_train.shape, df_noise.shape, df_test.shape)
df_train.head(10)

In [None]:
df_train['weight'] = 1
df_noise['weight'] = len(df_train)/len(df_noise)

In [None]:
mean_list = np.zeros(len(df_train), np.float32)
std_list = np.zeros(len(df_train), np.float32)
for i in range(len(df_train)):
    if i%1000==0:
        print("{}/{}".format(i+1, len(df_train)))
    path = "../input/mel128v3/train/{}.npy".format(df_train['fname'][i][:-4])
    mel = np.load(path)
    if NORM=='Abs2':
        mel = librosa.power_to_db(mel, top_db=None, amin=1e-5)
    else:
        mel = librosa.power_to_db(mel)
    mean_list[i] = mel.mean()
    std_list[i] = mel.std()
mel_mean = np.mean(mean_list)
mel_std = np.mean(std_list)
print(mel_mean, mel_std)

In [None]:
# tmp = np.load("../input/freesound-submission/resnet34multi512_noisypred/preds_mel_noise.npy")
# print(tmp.shape)
# tmp = tmp[0,0,0]
tmp1 = np.load("../input/freesound-submission/preds_mel_noise_c1234.npy")
tmp2 = np.load("../input/freesound-submission/preds_mel_noise_c5678.npy")
tmp = np.concatenate([tmp1, tmp2], axis=1)
# tmp = np.load("../input/freesound-submission/preds_mel_noise_c5678.npy")
print(tmp.shape)
tmp = tmp[DO_FOLD[0]-1].mean(axis=(0,1))
# tmp = tmp.mean(axis=(0,1,2))


tmp = tmp**TEMPERATURE
tmp = tmp / tmp.sum(axis=1)[:,np.newaxis]
df_noise_pseudo = df_noise.copy()
df_noise_pseudo[labels] = df_noise[labels].values * NOISY_LABEL_RATE +  tmp * (1-NOISY_LABEL_RATE)
df_noise_pseudo.head()




In [None]:
tmp = np.load("../input/freesound-submission/resnet34multi512_noisypred/preds_mel_valid_noisy.npy").mean(axis=(0,1,2))
tmp = tmp**TEMPERATURE
tmp = tmp / tmp.sum(axis=1)[:,np.newaxis]
df_train_pseudo = df_train.copy()
df_train_pseudo[labels] = df_train_pseudo[labels].values * NOISY_LABEL_RATE +  tmp * (1-NOISY_LABEL_RATE)
df_train_pseudo.head()

In [None]:
def sharpen(pred, T):
    pred = pred**T
    pred = pred / pred.sum()
    return pred

import IPython.display as ipd  # To play sound in the notebook
idx = np.random.randint(0,len(df_noise_pseudo))
# idx = 0
fname = '../input/freesound-audio-tagging-2019/train_noisy//{}'.format(df_noise_pseudo['fname'][idx])
tmp1 = np.load("../input/freesound-submission/preds_mel_noise_c1234.npy")
tmp2 = np.load("../input/freesound-submission/preds_mel_noise_c5678.npy")
tmp = np.concatenate([tmp1, tmp2], axis=1)
print(tmp.shape)
df_tmp = df_noise.copy()
df_tmp[labels] = tmp.mean(axis=(0,1,2))
label_idx = df_tmp[labels].values[idx]
label_idx_sort = np.argsort(label_idx)[::-1]
pred = df_tmp[labels].values[idx]
pred_shapen1 = sharpen(pred, 1)
pred_shapen2 = sharpen(pred, 2)

for i in range(5):
    label = labels[label_idx_sort[i]] + " "*(40-len(labels[label_idx_sort[i]]))
    print("{}, {} {:.3f} | {:.3f} | {:.3f}".format(
        i, label, label_idx[label_idx_sort[i]], pred_shapen1[label_idx_sort[i]], pred_shapen2[label_idx_sort[i]]))
print("true label: {}".format(df_noise_pseudo['labels'][idx]))
ipd.Audio(fname)

In [None]:
!pip install pretrainedmodels
import pretrainedmodels
import pretrainedmodels.utils
class ResNet(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNet, self).__init__()

        self.num_classes = num_classes
        self.mode = 'train'

        self.base_model = pretrainedmodels.__dict__['resnet34'](num_classes=num_classes, pretrained=None)

        self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = self.base_model.bn1
        self.relu = self.base_model.relu
        self.maxpool = self.base_model.maxpool
        self.layer1 = self.base_model.layer1
        self.layer2 = self.base_model.layer2
        self.layer3 = self.base_model.layer3
        self.layer4 = self.base_model.layer4
        self.avgpool = nn.AdaptiveMaxPool2d((1, 1))
#         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.last_linear = nn.Linear(self.base_model.layer4[1].conv1.in_channels, num_classes)
        self.last_linear = nn.Sequential(
            nn.Linear(self.base_model.layer4[1].conv1.in_channels, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(1024, NUM_CLASS),
        )
        self.last_linear2 = nn.Sequential(
            nn.Linear(self.base_model.layer4[1].conv1.in_channels, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(1024, NUM_CLASS),
        )


    def feature(self, input):
        x0 = self.conv1(input)  #; print('layer conv1 ',x.size()) # [8, 64, 112, 112]
        x0 = self.bn1(x0)
        x0 = self.relu(x0)
        x1 = self.maxpool(x0)
        x1 = self.layer1(x1) #  ; print('layer 1 ',x.size()) # [8, 1024, 28, 28])
        x2 = self.layer2(x1) #  ; print('layer 2 ',x.size()) # [8, 1024, 28, 28])
        x3 = self.layer3(x2) #  ; print('layer 3 ',x.size()) # [8, 1024, 28, 28])
        # x4 = self.layer4(x3) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        x = self.avgpool(x3) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        return x

    def forward(self, input):
        bs, ch, h, w = input.size()
        x0 = self.conv1(input)  #; print('layer conv1 ',x.size()) # [8, 64, 112, 112]
        x0 = self.bn1(x0)
        x0 = self.relu(x0)
        x1 = self.maxpool(x0)
        x1 = self.layer1(x1) #  ; print('layer 1 ',x.size()) # [8, 1024, 28, 28])
        x2 = self.layer2(x1) #  ; print('layer 2 ',x.size()) # [8, 1024, 28, 28])
        x3 = self.layer3(x2) #  ; print('layer 3 ',x.size()) # [8, 1024, 28, 28])
        x4 = self.layer4(x3) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        x = self.avgpool(x4).view(bs, -1) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        x = self.last_linear(x) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])

        return x
    
    def noisy(self, input):
        bs, ch, h, w = input.size()
        x0 = self.conv1(input)  #; print('layer conv1 ',x.size()) # [8, 64, 112, 112]
        x0 = self.bn1(x0)
        x0 = self.relu(x0)
        x1 = self.maxpool(x0)
        x1 = self.layer1(x1) #  ; print('layer 1 ',x.size()) # [8, 1024, 28, 28])
        x2 = self.layer2(x1) #  ; print('layer 2 ',x.size()) # [8, 1024, 28, 28])
        x3 = self.layer3(x2) #  ; print('layer 3 ',x.size()) # [8, 1024, 28, 28])
        x4 = self.layer4(x3) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        x = self.avgpool(x4).view(bs, -1) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])
        x = self.last_linear2(x) #  ; print('layer 4 ',x.size()) # [8, 2048, 14, 14])

        return x


In [None]:
class MfccDataset(Dataset):
    """Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition.

    Arguments:
        A CSV file path
        Path to image folder
        Extension of images
        PIL transforms
    """

    def __init__(self, df, load_dir, slice=-1, mixup=False, 
                 cutout=False, cutout_h=False, cutout_w=False,
                 gain=False, resize=False,
                flip=False,
                warp=False,
                highpass=False,
                white=False,
                 slice_mode='original',
                ):
        self.X_train = df['path']
        self.y_train = df[labels].values
        self.slice = slice
        self.mixup = mixup
        self.cutout = cutout
        self.cutout_h = cutout_h
        self.cutout_w = cutout_w
        self.gain = gain
        self.resize = resize
        self.highpass = highpass
        self.flip = flip
        self.warp = warp
        self.load_dir = load_dir
        # print(self.y_train.shape)
        self.white = white
        white_noise = (np.random.rand(44100*120)-0.5).astype(np.float32)/100
        self.white_noise = librosa.feature.melspectrogram(
                white_noise,
                sr=44100,
                n_mels=128,
                hop_length=347*1, # 1sec -> 128
                n_fft=128*20,
                fmin=20,
                fmax=44100//2,
            ).astype(np.float32)
        self.slice_mode = slice_mode

    def do_rate_slice(self, img, min_rate=SLICE_RATE):
        len_img = img.shape[1]
        img_new = np.zeros([img.shape[0], self.slice], np.float32)
        rate = np.random.random() * (1-min_rate) + min_rate
        if np.random.random()<0.5: rate = 1
            
        if img.shape[1]<=self.slice:
            len_slice = int(img.shape[1]*rate)
            if img.shape[1]-len_slice==0:
                shift_slice = 0
            else:
                shift_slice = np.random.randint(0, img.shape[1]-len_slice)
            img = img[:, shift_slice:shift_slice+len_slice]
            if self.slice - len_slice==0:
                shift = 0
            else:
                shift = np.random.randint(0, self.slice - len_slice)
            img_new[:, shift:shift + len_slice] =img
        else:
            shift = np.random.randint(0, img.shape[1]-self.slice)
            img_new = img[:, shift:shift+self.slice]
            len_slice = int(self.slice*rate)
            if self.slice-len_slice==0:
                shift_slice = 0
            else:
                shift_slice = np.random.randint(0, self.slice-len_slice)
            img_new[:shift_slice] = 0
            img_new[shift_slice+len_slice:] = 0
        return img_new

    def do_random_slice(self, img):
        img_new = np.zeros([img.shape[0], self.slice], np.float32)
        if img.shape[1]<self.slice:
            shift = np.random.randint(0, self.slice - img.shape[1])
            img_new[:, shift:shift + img.shape[1]] =img
        elif img.shape[1]==self.slice:
            img_new = img
        else:
            shift = np.random.randint(0, img.shape[1]-self.slice)
            img_new = img[:, shift:shift+self.slice]
        return img_new

    def do_original_slice(self, img):
        return img
        
        
    def do_slice(self, img):
        if self.slice_mode=='random':
            return self.do_random_slice(img)
        elif self.slice_mode=='rate':
            return self.do_rate_slice(img)
        elif self.slice_mode=='original':
            return self.do_original_slice(img)
    
    def do_highpass(self, img):
        coord = np.random.randint(0, img.shape[0])
        img[coord:] = 0
    
    def do_mixup(self, img, label, alpha=1.):
        idx = np.random.randint(0,len(self.X_train))
        img2 = np.load("{}.npy".format(self.X_train[idx][:-4]))
        img2 = self.do_slice(img2)
        
        label2 = self.y_train[idx].astype(np.float32)

        rate = np.random.beta(alpha,alpha)
        img = img*rate + img2*(1-rate)
        label = label*rate + label2*(1-rate)
        return img, label
    
    def do_white(self, img):
        shift = np.random.randint(0, self.white_noise.shape[1] - self.slice)
        white_noise_slice = self.white_noise[:, shift:shift + self.slice] * np.random.rand()
        img += white_noise_slice
        return img
        
        
        
    def do_flip(self, img):
        return img[:,::-1]
    
    
    def do_cutout_h(self, img, max = 32):
        coord = np.random.randint(0, img.shape[0])
        width = np.random.randint(8, max)
        cut = np.array([coord-width, coord+width])
        cut = np.clip(cut, 0, img.shape[0])
        img[cut[0]:cut[1]] = 0
        return img
    
    
    def do_cutout_w(self, img, max = 32):
        coord = np.random.randint(0, img.shape[1])
        width = np.random.randint(8, max)
        cut = np.array([coord-width, coord+width])
        cut = np.clip(cut, 0, img.shape[1])
        img[:,cut[0]:cut[1]] = 0
        return img
    
    def do_highpass(self, img):
        th = np.random.randint(0, img.shape[0])
        img[th:] = 0
        return img
    
    def cutout_bug(self, img):
        coordx = np.sort(np.random.randint(0, self.slice,2))
        coordy = np.sort(np.random.randint(0, 128, 2))
        img[coordx[0]:coordx[1]] = 0
        return img
        
    def do_resize(self, img, max=0.1):
        rate = 1- max + np.random.random() * max * 2
        img_tmp = cv2.resize(img, (int(self.slice*rate), img.shape[0], ))
        if rate>1:
            img_new = img_tmp[:,:img.shape[1]]
        else:
            img_new = np.zeros_like(img)
            img_new[:,:img_tmp.shape[1]] = img_tmp
        return img_new

    
    def do_gain(self, img, max=0.1):
        rate = 1- max + np.random.random() * max * 2
        return img * rate
    
    def do_warp(self, img, max=64):
        left = np.random.randint(0, img.shape[1])
        right = np.min([img.shape[1], left+np.random.randint(8, max)])
        tmp = img[:,left:img.shape[1]-(right-left)]
        img[:,left:right] = 0
        img[:, right:] = tmp
#         print(left, right, tmp.shape)
        return img
        
    def __getitem__(self, index):
        img = np.load("{}.npy".format(self.X_train[index][:-4]))
        img = self.do_slice(img)
        label = self.y_train[index].astype(np.float32)
            
        for i in range(NUM_MIX):
            if self.mixup and np.random.random()<0.5:
                img, label = self.do_mixup(img, label)
        if self.gain and np.random.random()<0.5:
             img = self.do_gain(img)
        if self.resize and np.random.random()<0.5:
             img = self.do_resize(img)
        if self.white and np.random.random()<0.5:
             img = self.do_white(img)
                
                
        if self.cutout and np.random.random()<0.5:
            img = self.cutout_bug(img)
        if self.cutout_h and np.random.random()<0.5:
            img = self.do_cutout_h(img)
        if self.cutout_w and np.random.random()<0.5:
            img = self.do_cutout_w(img)
        if self.warp and np.random.random()<0.5:
            img = self.do_warp(img)
        if self.flip and np.random.random()<0.5:
            img = self.do_flip(img)
        if self.highpass and np.random.random()<0.5:
            img = self.do_highpass(img)
            
        if NORM=='Abs' or NORM=='Abs2':
            img = librosa.power_to_db(img, top_db=None, amin=1e-5)
            img = (img - mel_mean) / (mel_std+1e-7)
        elif NORM=='Rel':
            img = librosa.power_to_db(img)
            img = (img - img.mean()) / (img.std()+1e-7)
        img = img.reshape([1, img.shape[0], img.shape[1]])
        
        return img, label

    def __len__(self):
        return len(self.X_train.index)

In [None]:
from math import cos, pi


def cycle(iterable):
    """
    dataloaderをiteratorに変換
    :param iterable:
    :return:
    """
    while True:
        for x in iterable:
            yield x
            
def _one_sample_positive_class_precisions(scores, truth):
    """Calculate precisions for each true class for a single sample.

    Args:
      scores: np.array of (num_classes,) giving the individual classifier scores.
      truth: np.array of (num_classes,) bools indicating which classes are true.

    Returns:
      pos_class_indices: np.array of indices of the true classes for this sample.
      pos_class_precisions: np.array of precisions corresponding to each of those
        classes.
    """
    num_classes = scores.shape[0]
    pos_class_indices = np.flatnonzero(truth > 0)
    # Only calculate precisions if there are some true classes.
    if not len(pos_class_indices):
        return pos_class_indices, np.zeros(0)
    # Retrieval list of classes for this sample.
    retrieved_classes = np.argsort(scores)[::-1]
    # class_rankings[top_scoring_class_index] == 0 etc.
    class_rankings = np.zeros(num_classes, dtype=np.int)
    class_rankings[retrieved_classes] = range(num_classes)
    # Which of these is a true label?
    retrieved_class_true = np.zeros(num_classes, dtype=np.bool)
    retrieved_class_true[class_rankings[pos_class_indices]] = True
    # Num hits for every truncated retrieval list.
    retrieved_cumulative_hits = np.cumsum(retrieved_class_true)
    # Precision of retrieval list truncated at each hit, in order of pos_labels.
    precision_at_hits = (
            retrieved_cumulative_hits[class_rankings[pos_class_indices]] /
            (1 + class_rankings[pos_class_indices].astype(np.float)))
    return pos_class_indices, precision_at_hits


# All-in-one calculation of per-class lwlrap.

def calculate_per_class_lwlrap(truth, scores):
    """Calculate label-weighted label-ranking average precision.

    Arguments:
      truth: np.array of (num_samples, num_classes) giving boolean ground-truth
        of presence of that class in that sample.
      scores: np.array of (num_samples, num_classes) giving the classifier-under-
        test's real-valued score for each class for each sample.

    Returns:
      per_class_lwlrap: np.array of (num_classes,) giving the lwlrap for each
        class.
      weight_per_class: np.array of (num_classes,) giving the prior of each
        class within the truth labels.  Then the overall unbalanced lwlrap is
        simply np.sum(per_class_lwlrap * weight_per_class)
    """
    assert truth.shape == scores.shape
    num_samples, num_classes = scores.shape
    # Space to store a distinct precision value for each class on each sample.
    # Only the classes that are true for each sample will be filled in.
    precisions_for_samples_by_classes = np.zeros((num_samples, num_classes))
    for sample_num in range(num_samples):
        pos_class_indices, precision_at_hits = (
            _one_sample_positive_class_precisions(scores[sample_num, :],
                                                  truth[sample_num, :]))
        precisions_for_samples_by_classes[sample_num, pos_class_indices] = (
            precision_at_hits)
    labels_per_class = np.sum(truth > 0, axis=0)
    weight_per_class = labels_per_class / float(np.sum(labels_per_class))
    # Form average of each column, i.e. all the precisions assigned to labels in
    # a particular class.
    per_class_lwlrap = (np.sum(precisions_for_samples_by_classes, axis=0) /
                        np.maximum(1, labels_per_class))
    # overall_lwlrap = simple average of all the actual per-class, per-sample precisions
    #                = np.sum(precisions_for_samples_by_classes) / np.sum(precisions_for_samples_by_classes > 0)
    #           also = weighted mean of per-class lwlraps, weighted by class label prior across samples
    #                = np.sum(per_class_lwlrap * weight_per_class)
    return per_class_lwlrap, weight_per_class


class CosineLR(_LRScheduler):
    """SGD with cosine annealing.
    """

    def __init__(self, optimizer, step_size_min=1e-5, t0=100, tmult=2, curr_epoch=-1, last_epoch=-1):
        self.step_size_min = step_size_min
        self.t0 = t0
        self.tmult = tmult
        self.epochs_since_restart = curr_epoch
        super(CosineLR, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        self.epochs_since_restart += 1

        if self.epochs_since_restart > self.t0:
            self.t0 *= self.tmult
            self.epochs_since_restart = 0

        lrs = [self.step_size_min + (
                    0.5 * (base_lr - self.step_size_min) * (1 + cos(self.epochs_since_restart * pi / self.t0)))
               for base_lr in self.base_lrs]

        # print(lrs)

        return lrs

    
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


In [None]:
def train(train_loaders, model, optimizer, scheduler, epoch):
    train_loader, noise_itr, semi_itr, semi_itr2 = train_loaders
    ce_avr   = AverageMeter()
    ce_noise_avr   = AverageMeter()
    mse_semi_avr    = AverageMeter()
    if DO_PSEUDO_NOISY:
        mse_semi2_avr = AverageMeter()
    else:
        mse_semi2_avr = 0
    
    criterion_bce = nn.BCELoss().cuda()
    criterion_mse = nn.MSELoss().cuda()
#     criterion_bce    = nn.KLDivLoss(reduction='mean').cuda()
    sigmoid = torch.nn.Sigmoid().cuda()
    softmax = torch.nn.Softmax().cuda()
    if NOISY_LAST=='SIGMOID':
        act_noisy = sigmoid
    elif NOISY_LAST=='SOFTMAX':
        act_noisy = softmax
    if TRAIN_LAST=='SIGMOID':
        act_train = sigmoid
    elif TRAIN_LAST=='SOFTMAX':
        act_train = softmax
    if CONSISTENCY_LOSS=='MSE':
        criterion_consistency = criterion_mse
    elif CONSISTENCY_LOSS=='CE':
        criterion_consistency = criterion_bce

    # switch to train mode
    model.train()

    starttime = time.time()
    preds = np.zeros([0, NUM_CLASS], np.float32)
    y_true = np.zeros([0, NUM_CLASS], np.float32)
    preds_noise = np.zeros([0, NUM_CLASS], np.float32)
    y_true_noise = np.zeros([0, NUM_CLASS], np.float32)
    for i, (input, target) in enumerate(train_loader):
        # prepare batches
        input = input.cuda(async=True)
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        input_noise, target_noise = next(noise_itr)  # test dataのバッチ
        input_noise = torch.autograd.Variable(input_noise.cuda(async=True))
        target_noise = torch.autograd.Variable(target_noise.cuda(async=True))
        
        input_semi, target_semi = next(semi_itr)
        input_semi = torch.autograd.Variable(input_semi.cuda(async=True))
        target_semi = torch.autograd.Variable(target_semi.cuda(async=True))
        
        
        
        # get model outputs
        output = model(input_var)
        ce = criterion_bce(act_train(output), target_var)
                           
        output_noise = model.noisy(input_noise)
        ce_noise = criterion_bce(act_noisy(output_noise), target_noise)
        
        output_semi = model(input_semi)
        mse_semi = criterion_consistency(act_train(output_semi), target_semi)
                           
        if DO_PSEUDO_NOISY:
            input_semi2, target_semi2 = next(semi_itr2)
            input_semi2 = torch.autograd.Variable(input_semi2.cuda(async=True))
            target_semi2 = torch.autograd.Variable(target_semi2.cuda(async=True))
            output_semi2 = model.noisy(input_semi2)
            mse_semi2 = criterion_consistency(softmax(output_semi2), target_semi2)
        else:
            mse_semi2 = 0
                           
        
        # calc losses
#         l1 = 0
#         l2 = 0
#         for param in model.parameters():
#             l1 += param.abs().sum()
#             l2 += param.norm(2)
#         loss = bce + bce_noise + 1e-7*l1 + 1e-5*l2
        loss = ce + ce_noise * C_NOISY + C_SEMI * mse_semi + C_SEMI * mse_semi2
        pred = act_train(output)
        pred = pred.data.cpu().numpy()
        pred_noise = act_noisy(output_noise)
        pred_noise = pred_noise.data.cpu().numpy()
        ce_avr.update(ce.data, input.size(0))
        ce_noise_avr.update(ce_noise.data, input.size(0))
        mse_semi_avr.update(mse_semi.data, input.size(0))
        if DO_PSEUDO_NOISY:
            mse_semi2_avr.update(mse_semi2.data, input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()  # # 勾配の初期化
        loss.backward()
        optimizer.step()
        scheduler.step()

        preds = np.concatenate([preds, pred])
        y_true = np.concatenate([y_true, target.data.cpu().numpy()])
        preds_noise = np.concatenate([preds_noise, pred_noise])
        y_true_noise = np.concatenate([y_true_noise, target_noise.data.cpu().numpy()])
#         print(pred[0])
#         print(
#             "CE: {:.4f} ".format(ce_avr.avg.item())
#               + "noise CE: {:.4f} ".format(ce_noise_avr.avg.item())
#               +"semi MSE: {:.4f} ".format(mse_semi_avr.avg.item())
#               )
        

    # print(preds.shape, y_true.shape)
    # print(y_true[:,:-1].shape, preds[:,:-1].shape)
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true, preds)
    lwlrap = np.sum(per_class_lwlrap * weight_per_class)
    # print(y_true_noise[:,:-1].shape, preds_noise[:,:-1].shape)
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true_noise, preds_noise)
    lwlrap_noise = np.sum(per_class_lwlrap * weight_per_class)
    if DO_PSEUDO_NOISY:
        mse_semi2_avr = mse_semi2_avr.avg.item()
    return ce_avr.avg.item(), lwlrap, ce_noise_avr.avg.item(), lwlrap_noise, mse_semi_avr.avg.item(), mse_semi2_avr


def validate(val_loader, model):
    ce_avr   = AverageMeter()
    criterion_bce = nn.BCELoss().cuda()
    sigmoid = torch.nn.Sigmoid().cuda()
    softmax = torch.nn.Softmax().cuda()
    if TRAIN_LAST=='SIGMOID':
        act_train = sigmoid
    elif TRAIN_LAST=='SOFTMAX':
        act_train = softmax
    
    # switch to train mode
    model.eval()

    starttime = time.time()
    preds = np.zeros([0, NUM_CLASS], np.float32)
    y_true = np.zeros([0, NUM_CLASS], np.float32)
    for i, (input, target) in enumerate(val_loader):
        input = input.cuda(async=True)
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        # print(input.size())

        # compute output
        with torch.no_grad():
            output = model(input_var)
            ce = criterion_bce(act_train(output), target_var)
            pred = softmax(output)
            pred = pred.data.cpu().numpy()

        # measure accuracy and record loss
        ce_avr.update(ce.data, input.size(0))
        preds = np.concatenate([preds, pred])
        y_true = np.concatenate([y_true, target.data.cpu().numpy()])
        
        
    per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(y_true, preds)
    lwlrap = np.sum(per_class_lwlrap * weight_per_class)

    return ce_avr.avg.item(), lwlrap

In [None]:
from sklearn.model_selection import KFold, StratifiedKFold
# set log columns
folds = list(KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_train))))
folds_noisy = list(KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_noise))))

In [None]:
# baseline

log_columns = ['epoch', 'bce', 'lwlrap', 'bce_noise', 'lwlrap_noise', 'semi_mse', 'semi_mse2', 'val_bce', 'val_lwlrap', 'time']  # 学習ログのカラム名
for fold, (ids_train_split, ids_valid_split) in enumerate(folds):
    print("fold: {}".format(fold + 1))
    starttime = time.time()
    if fold+1 not in DO_FOLD: continue
    train_log = pd.DataFrame(columns=log_columns)

    # build model
    model = ResNet(NUM_CLASS).cuda()
    model.load_state_dict(torch.load("../input/resnet34hardaug512/models/weight_fold_{}_epoch_512.pth".format(fold+1)))

    # set generator
    df_train_fold = df_train.iloc[ids_train_split].reset_index(drop=True)
    dataset_train = MfccDataset(df_train_fold, "../input/mel128v3/train/", 
                                slice=512, 
                                mixup=True, 
                                cutout_h=True,
                                gain=True,
#                                 resize=True,
#                                 warp=True,
#                                 white=True,
                                slice_mode='random',
                               )
    train_loader = DataLoader(dataset_train,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=1,  # 1 for CUDA?
                              pin_memory=True,  # CUDA only
                              )

    df_valid = df_train.iloc[ids_valid_split].reset_index(drop=True)
    dataset_valid = MfccDataset(df_valid, "../input/mel128v3/train/", slice_mode='original')
    valid_loader = DataLoader(dataset_valid,
                              batch_size=1,
                              shuffle=False,
                              num_workers=1,  # 1 for CUDA
                              pin_memory=True,  # CUDA only
                              )

    df_noise_fold = df_noise.iloc[folds_noisy[fold][0]].reset_index(drop=True)
    dataset_noise = MfccDataset(df_noise_fold, "../input/mel128v3n/noise/",
                                slice=512, 
                                mixup=True, 
                                cutout_h=True,
                                gain=True,
                                slice_mode='random',
                               )
    noise_loader = DataLoader(dataset_noise,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=1,  # 1 for CUDA?
                              pin_memory=True,  # CUDA only
                              )
    noise_itr = cycle(noise_loader)  # dataloaderをgeneratorに変換
    

    df_semi = pd.concat([df_train.iloc[ids_train_split], df_noise_pseudo.iloc[folds_noisy[fold][0]]]).reset_index(drop=True)
    semi_sampler = torch.utils.data.sampler.WeightedRandomSampler(df_semi['weight'].values, len(df_semi))
    dataset_semi = MfccDataset(df_semi, "../input/mel128v3/train/", 
                                slice=512, 
                                mixup=True, 
                                cutout_h=True,
                                gain=True,
                                resize=True,
                                slice_mode='random',
                               )
    semi_loader = DataLoader(dataset_semi,
                              batch_size=BATCH_SIZE,
                              shuffle=False,
                              num_workers=1,  # 1 for CUDA?
                              pin_memory=True,  # CUDA only
                              sampler=semi_sampler,
                              )
    semi_itr = cycle(semi_loader)  # dataloaderをgeneratorに変換
    
    
    df_semi2 = pd.concat([df_train_pseudo.iloc[ids_train_split], df_noise.iloc[folds_noisy[fold][0]]]).reset_index(drop=True)
    semi_sampler2 = torch.utils.data.sampler.WeightedRandomSampler(df_semi2['weight'].values, len(df_semi))
    dataset_semi2 = MfccDataset(df_semi, "../input/mel128v3/train/", 
                                slice=512, 
                                mixup=True, 
                                cutout_h=True,
                                gain=True,
                                resize=True,
                                slice_mode='random',
                               )
    semi_loader2 = DataLoader(dataset_semi2,
                              batch_size=BATCH_SIZE,
                              shuffle=False,
                              num_workers=1,  # 1 for CUDA?
                              pin_memory=True,  # CUDA only
                              sampler=semi_sampler2,
                              )
    semi_itr2 = cycle(semi_loader2)  # dataloaderをgeneratorに変換
    

    # set optimizer and loss
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR[0])
    scheduler = CosineLR(optimizer, step_size_min=LR[1], t0=len(train_loader) * NUM_CYCLE, tmult=1)

    # training
    for epoch in range(NUM_EPOCH):
        # train for one epoch
        bce, lwlrap, bce_noise, lwlrap_noise, mse_semi, mse_semi2 = train(
            (train_loader, noise_itr, semi_itr, semi_itr2),
             model, optimizer, scheduler, epoch
        )

        # evaluate on validation set
        val_bce, val_lwlrap = validate(valid_loader, model)
        
        endtime = time.time() - starttime
        print("Epoch: {}/{} ".format(epoch + 1, NUM_EPOCH)
              + "CE: {:.4f} ".format(bce)
              + "LwLRAP: {:.4f} ".format(lwlrap)
              + "noise CE: {:.4f} ".format(bce_noise)
              + "noise LwLRAP: {:.4f} ".format(lwlrap_noise)
              + "semi MSE: {:.4f} ".format(mse_semi)
              + "semi2 MSE: {:.4f} ".format(mse_semi2)
              + "Valid CE: {:.4f} ".format(val_bce)
              + "Valid LwLRAP: {:.4f} ".format(val_lwlrap)
              + "sec: {:.1f}".format(endtime)
              )
        train_log_epoch = pd.DataFrame([[epoch+1, bce, lwlrap, bce_noise, lwlrap_noise, mse_semi, mse_semi2, val_bce, val_lwlrap, endtime]],
                               columns=log_columns)
        train_log = pd.concat([train_log, train_log_epoch])
        train_log.to_csv("train_log_fold{}.csv".format(fold+1), index=False)
        if (epoch+1)%NUM_CYCLE==0:
            torch.save(model.state_dict(), "weight_fold_{}_epoch_{}.pth".format(fold+1, epoch+1))
    torch.save(optimizer.state_dict(), 'optimizer_fold_{}_epoch_{}.pth'.format(fold+1, epoch+1))

In [None]:
val_bce_last = train_log['val_bce'].values[-1]
val_lwlrap_last = train_log['val_lwlrap'].values[-1]
best_lwlrap = train_log['val_lwlrap'].values.max()
best_epoch = train_log['epoch'][train_log['val_lwlrap']==best_lwlrap].values[0]
best_bce = train_log['val_bce'].values.min()
print("{:.8f}, {:.8f}, {}, {:.8f}, {:.8f}".format(val_lwlrap_last, val_bce_last ,best_epoch, best_lwlrap, best_bce))