In [1]:
import os
import numpy as np
import pandas as pd
import torch

#from torch_fun.dataloader import build_dataloader
#from torch_fun.model import build_model
#from torch_fun.utils import count_parameters, seed_everything, AdamW, CosineAnnealingWithRestartsLR
from torch import cuda
#from torch.utils.tensorboard import SummaryWriter
from pathlib import Path
from time import time
from tqdm import tqdm
from torch import nn
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

import torchvision.models as models

import warnings
warnings.filterwarnings('ignore')

In [2]:
import torch
import pandas as pd
from pathlib import Path
from torch.utils.data import DataLoader, Dataset


def get_spectrogram_feature(data):
    # data shape -> batch * dim

    stft = torch.stft(torch.FloatTensor(data),
                        8,
                        center=False,
                        normalized=False,
                        onesided=True)

    stft = (stft[:,:,0].pow(2) + stft[:,:,1].pow(2)).pow(0.5)
    amag = stft.numpy()
    feat = torch.FloatTensor(amag)
    feat = torch.unsqueeze(feat,dim=0)
#     feat = torch.FloatTensor(feat).transpose(1, -1)

    return feat

class Semi_dataset(Dataset):
    def __init__(self, data_frame):
        
        self.data_list = list()
        for data in data_frame[[str(x) for x in range(226)]].values:
            self.data_list.append(get_spectrogram_feature(data))
        data_frame.drop(columns=[str(x) for x in range(226)], inplace=True)
        self.df = data_frame
        try:
            self.label = data_frame[['layer_' + str(x) for x in range(1, 5)]].values
        except:
            print('This dataframe does not have target value')
            self.label = None

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
#         data = get_spectrogram_feature(self.df.iloc[index][[str(x) for x in range(226)]].values)
        data = self.data_list[index]

        if self.label is None:
            return data
        else:
            target = torch.tensor(self.label[index, :])
            return data, target


def build_dataloader(data_frame, batch_size, shuffle):
    dataset = Semi_dataset(data_frame)
    dataloader = DataLoader(
                            dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=0
                            )
    return dataloader

In [3]:
from torchvision import models
from efficientnet_pytorch import EfficientNet
from torch import nn
import torchvision.models as models

class Model(nn.Module):
    def __init__(self, device, model_name='efficient', weight_path=None):
        super(Model, self).__init__()

        self.device = device
        self.weight_path = weight_path
        self.criterion = nn.L1Loss()
        self.first_layer = nn.Sequential(
            nn.Conv2d(1, 3, (3, 1))
        ) 
                
        if model_name == 'efficient':
            self.backbone = EfficientNet.from_pretrained('efficientnet-b0', num_classes=1)
            self.backbone.requires_grad = True
            in_features = self.backbone._fc.in_features
            self.backbone._fc = nn.Sequential(
                nn.Linear(in_features=in_features, out_features=256, bias=True),
                nn.BatchNorm1d(num_features=256),
                nn.ReLU(),
                nn.Linear(in_features=256, out_features=4, bias=True),
            )
    def loss(self, pred, label):
        loss = self.criterion(pred, label)
        return loss

    def forward(self, input_img, target=None):
        input_img = input_img.to(self.device)
        
        if target is not None:
            target = target.to(self.device)
        
        x = self.first_layer(input_img)
        print(1)
        pred = self.backbone(x)
        print(2)
        pred = x.float()
        loss = self.loss(pred, target)
        if self.training:
            return pred, loss
        else:
            return pred, loss
class Baseline(nn.Module):
    def __init__(self, hidden_size, out_size):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=hidden_size, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(hidden_size),
            nn.Conv2d(hidden_size, hidden_size, 3, 2, 1),
            nn.ReLU(True),
            nn.BatchNorm2d(hidden_size),
            nn.Conv2d(hidden_size, hidden_size, 3, 2, 1),
            nn.ReLU(True),
            nn.BatchNorm2d(hidden_size),
            nn.Conv2d(hidden_size, hidden_size, 3, 2, 1),
            nn.ReLU(True),
            nn.BatchNorm2d(hidden_size),
            nn.Conv2d(hidden_size, hidden_size, 3, 2, 1),
            nn.ReLU(True),
            nn.BatchNorm2d(hidden_size),
            nn.Conv2d(hidden_size, out_size, 4, 1),
        )

    def forward(self, image):
        return self.net(image).squeeze(-1).squeeze(-1)


class Resnet18(nn.Module):
    def __init__(self, num_classes, dropout=False):
        super().__init__()
        self.first_layer = nn.Sequential(
            nn.Conv2d(1, 3, (3, 1))
        )
        model = models.resnet18(pretrained=True)
        model = list(model.children())[:-1]
        if dropout:
            model.append(nn.Dropout(0.2))
        model.append(nn.Conv2d(512, num_classes, 1))
        self.net = nn.Sequential(*model)

    def forward(self, x):
        return self.net(self.first_layer(x)).squeeze(-1).squeeze(-1)


class Resnet50(nn.Module):
    def __init__(self, num_classes, dropout=False):
        super().__init__()
        self.first_layer = nn.Sequential(
            nn.Conv2d(1, 3, (3, 1))
        ) 
        model = models.resnet50(pretrained=True)
        model = list(model.children())[:-1]
        if dropout:
            model.append(nn.Dropout(0.2))
        model.append(nn.Conv2d(2048, num_classes, 1))
        self.net = nn.Sequential(*model)

    def forward(self, x):
        return self.net(self.first_layer(x)).squeeze(-1).squeeze(-1)

class Resnet152(nn.Module):
    def __init__(self, num_classes, dropout=False):
        super().__init__()
        self.first_layer = nn.Sequential(
            nn.Conv2d(1, 3, (3, 1))
        ) 
        model = models.resnet152(pretrained=True)
        model = list(model.children())[:-1]
        if dropout:
            model.append(nn.Dropout(0.2))
        model.append(nn.Conv2d(2048, num_classes, 1))
        self.net = nn.Sequential(*model)

    def forward(self, x):
        return self.net(self.first_layer(x)).squeeze(-1).squeeze(-1)


class Resnext50(nn.Module):
    def __init__(self, num_classes, dropout=False):
        super().__init__()
        model = models.resnext50_32x4d(pretrained=True)
        model = list(model.children())[:-1]
        if dropout:
            model.append(nn.Dropout(0.2))
        model.append(nn.Conv2d(2048, num_classes, 1))
        self.net = nn.Sequential(*model)

    def forward(self, x):
        return self.net(x).squeeze(-1).squeeze(-1)


class Resnext101(nn.Module):
    def __init__(self, num_classes, dropout=False):
        super().__init__()
        model = models.resnext101_32x8d(pretrained=True)
        model = list(model.children())[:-1]
        if dropout:
            model.append(nn.Dropout(0.1))
        model.append(nn.Conv2d(2048, num_classes, 1))
        self.net = nn.Sequential(*model)

    def forward(self, x):
        return self.net(x).squeeze(-1).squeeze(-1)
        
        
def build_model(device, model_name='efficient', weight_path=None):
    if model_name == 'efficient':
        model = Model(device, model_name, weight_path)
    elif model_name == 'resnet50':
        model = Resnet50(4, False)
    elif model_name == 'resnet18':
        model = Resnet18(4, False)
    elif model_name == 'resnet152':
        model = Resnet152(4, False)
    model.to(device)
    return model

In [4]:
import os
import math
import random
import numpy as np
import torch
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


def count_parameters(model):
    '''
    Count of trainable weights in a model
    '''
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def mixup_data(x, y, alpha=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

class AdamW(Optimizer):
    """Implements AdamW algorithm.

    It has been proposed in `Fixing Weight Decay Regularization in Adam`_.

    Arguments:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)

    .. Fixing Weight Decay Regularization in Adam:
    https://arxiv.org/abs/1711.05101
    """

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
                 weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps,
                        weight_decay=weight_decay)
        super(AdamW, self).__init__(params, defaults)

    def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                if grad.is_sparse:
                    raise RuntimeError('AdamW does not support sparse gradients, please consider SparseAdam instead')

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['step'] = 0
                    # Exponential moving average of gradient values
                    state['exp_avg'] = torch.zeros_like(p.data)
                    # Exponential moving average of squared gradient values
                    state['exp_avg_sq'] = torch.zeros_like(p.data)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                state['step'] += 1

                # according to the paper, this penalty should come after the bias correction
                # if group['weight_decay'] != 0:
                #     grad = grad.add(group['weight_decay'], p.data)

                # Decay the first and second moment running average coefficient
                exp_avg.mul_(beta1).add_(1 - beta1, grad)
                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)

                denom = exp_avg_sq.sqrt().add_(group['eps'])

                bias_correction1 = 1 - beta1 ** state['step']
                bias_correction2 = 1 - beta2 ** state['step']
                step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1

                p.data.addcdiv_(-step_size, exp_avg, denom)

                if group['weight_decay'] != 0:
                    p.data.add_(-group['weight_decay'], p.data)

        return loss



class CosineAnnealingWithRestartsLR(_LRScheduler):
    '''
    SGDR\: Stochastic Gradient Descent with Warm Restarts: https://arxiv.org/abs/1608.03983
    code: https://github.com/gurucharanmk/PyTorch_CosineAnnealingWithRestartsLR/blob/master/CosineAnnealingWithRestartsLR.py
    added restart_decay value to decrease lr for every restarts
    '''
    def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1, T_mult=1, restart_decay=0.95):
        self.T_max = T_max
        self.T_mult = T_mult
        self.next_restart = T_max
        self.eta_min = eta_min
        self.restarts = 0
        self.last_restart = 0
        self.T_num = 0
        self.restart_decay = restart_decay
        super(CosineAnnealingWithRestartsLR,self).__init__(optimizer, last_epoch)

    def get_lr(self):
        self.Tcur = self.last_epoch - self.last_restart
        if self.Tcur >= self.next_restart:
            self.next_restart *= self.T_mult
            self.last_restart = self.last_epoch
            self.T_num += 1
        learning_rate = [(self.eta_min + ((base_lr)*self.restart_decay**self.T_num - self.eta_min) * (1 + math.cos(math.pi * self.Tcur / self.next_restart)) / 2) for base_lr in self.base_lrs]
        return learning_rate

In [5]:
DEBUG = False

DATASET_PATH = '../wafer'
train_df = pd.read_csv(os.path.join(DATASET_PATH, 'train.csv'))
test_df = pd.read_csv(os.path.join(DATASET_PATH, 'test.csv'))
submission = pd.read_csv(os.path.join(DATASET_PATH, 'sample_submission.csv'))

if DEBUG:
    train_df = train_df[:1000]

In [6]:
%%time
######## Scale
scaler = StandardScaler()
train_df.iloc[:,4:] = scaler.fit_transform(train_df.iloc[:,4:])
test_df.iloc[:,1:] = scaler.fit_transform(test_df.iloc[:,1:])

CPU times: user 1min 31s, sys: 5.34 s, total: 1min 37s
Wall time: 15.7 s


In [7]:
# hyper parameter
lr = 2.5e-4 / 4
start_epoch = 0
num_epochs = 30000
best_loss = 99999999
loss_list = []

batch_size = 1024

test_loader = build_dataloader(test_df, batch_size, False)

This dataframe does not have target value


In [8]:
if cuda.is_available:
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

In [9]:
#valid_len = int(len(train_df)*0.1)

#X_train = train_df[valid_len:]
#X_valid = train_df[:valid_len]

In [10]:
#train_loader = build_dataloader(X_train, batch_size, True)
#valid_loader = build_dataloader(X_valid, batch_size, False)

In [11]:
# build model
#model = build_model(device, model_name='resnet')

#optimizer = AdamW(model.parameters(), lr, weight_decay=0.000025)
#criterion = nn.L1Loss()

In [12]:
# output path
#output_dir = Path('./', 'output')
#output_dir.mkdir(exist_ok=True, parents=True)
#model_path = output_dir / 'model.pt'

In [13]:
# load train model
# load_model_path = Path('../input/daconsemimodel/model (1).pt')

# model.load_state_dict(torch.load(load_model_path))

In [14]:
def validation(model, criterion, valid_loader, device):
    
    model.eval()
    valid_preds = np.zeros((len(valid_loader.dataset), 4))
    valid_targets = np.zeros((len(valid_loader.dataset), 4))
    val_loss = 0.
    
    with torch.no_grad():
        for i, (data, target) in enumerate(valid_loader):
            
            valid_targets[i * batch_size: (i+1) * batch_size] = target.float().numpy().copy()

            data = data.to(device)
            target = target.float().to(device)
                
            output = model(data)
            loss = criterion(output, target)
            
            valid_preds[i * batch_size: (i+1) * batch_size] = output.detach().cpu().numpy()
            
            val_loss += loss.item() / len(valid_loader)
        
    val_score = mean_absolute_error(valid_preds, valid_targets)
    
    return val_loss, val_score   

In [15]:
#model = models.vgg16(pretrained=False)
#first_layer = nn.Conv2d(1, 3, (3, 1))
#features = list(model.features)
#features.insert(0, nn.Conv2d(1, 3, (3, 1)))
#model.features = nn.Sequential(*features)
#num_features = model.classifier[6].in_features # last layer's in_feature
#features = list(model.classifier.children())[:-1] # remove last layer
#features.extend([nn.Linear(num_features, 4)]) # Add new layer with 4 outputs
#model.classifier = nn.Sequential(*features)
#model

In [16]:
seed = 42
#n_splits = 5
seed_everything(seed)

#x_tr, x_val = train_df.iloc[:, :], train_df.iloc[val_index, :]

# build model

model = build_model(device, model_name='resnet152')

optimizer = AdamW(model.parameters(), lr, weight_decay=0.000025)
criterion = nn.L1Loss()

train_loader = build_dataloader(train_df, batch_size, True)
#valid_loader = build_dataloader(x_val, batch_size, False)

#best_loss = 99999999
start_time = time()

best_epoch = 0
best_train_loss = 1000
#best_valid_score = 1000
model_path = '../wafer/resnet152_weights/resnet152_nfft8_34e_train_only.pt'
best_epoch_list = []
best_train_score_list = []

In [17]:
for epoch in range(start_epoch, num_epochs):

    model.train()
    model.load_state_dict(torch.load(model_path))
    optimizer.zero_grad()
    train_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):

        if device:
            data = data.to(device)
            target = target.float().to(device)
        else:
            target = target.float()

        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        train_loss += loss.item() / len(train_loader)
        
    if train_loss < best_train_loss:
        best_train_loss = train_loss
        best_epoch = epoch
        torch.save(model.state_dict(), model_path)
        print('----------------------------------------------------------------------->> loss improved to {:.5f}'.format(best_train_loss))

    #val_loss, val_score = validation(model, criterion, valid_loader, device)

    #if val_loss < best_loss:
    #    best_loss = val_loss
    #    torch.save(model.state_dict(), "F{}_nfft8_resnet18_model.pt".format(fold_num))
    #    print(">> score improved..! ")

    elapsed = time() - start_time
    
    lr = [_['lr'] for _ in optimizer.param_groups]
    
    print('Epoch {} / {}  train Loss: {:.4f}  lr: {:.5f}  elapsed: {:.0f}m {:.0f}s' \
          .format(epoch,  num_epochs - 1, train_loss, lr[0], elapsed // 60, elapsed % 60))
    #print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    best_epoch_list.append(best_epoch)
    best_train_score_list.append(best_train_loss)
print("==================== Resnet18 - Best train_loss - {:.5f} =================".format(best_train_loss))

----------------------------------------------------------------------->> loss improved to 1.41867
Epoch 0 / 29999  train Loss: 1.4187  lr: 0.00006  elapsed: 9m 9s
----------------------------------------------------------------------->> loss improved to 1.41684
Epoch 1 / 29999  train Loss: 1.4168  lr: 0.00006  elapsed: 18m 18s
Epoch 2 / 29999  train Loss: 1.4339  lr: 0.00006  elapsed: 27m 26s
----------------------------------------------------------------------->> loss improved to 1.35815
Epoch 3 / 29999  train Loss: 1.3582  lr: 0.00006  elapsed: 36m 34s
Epoch 4 / 29999  train Loss: 1.3754  lr: 0.00006  elapsed: 45m 42s
----------------------------------------------------------------------->> loss improved to 1.29128
Epoch 5 / 29999  train Loss: 1.2913  lr: 0.00006  elapsed: 54m 51s
Epoch 6 / 29999  train Loss: 1.3365  lr: 0.00006  elapsed: 63m 58s
Epoch 7 / 29999  train Loss: 1.3278  lr: 0.00006  elapsed: 73m 5s
Epoch 8 / 29999  train Loss: 1.4734  lr: 0.00006  elapsed: 82m 12s
----

KeyboardInterrupt: 

In [18]:
epoch_df = pd.DataFrame()
epoch_df['epoch'] = best_epoch_list
epoch_df['train_loss'] = best_train_score_list

In [19]:
epoch_df.sort_values('train_loss').head()

Unnamed: 0,epoch,train_loss
36,34,1.363878
35,34,1.363878
34,34,1.363878
33,32,1.382772
32,32,1.382772


In [21]:
score_to = round(min(best_train_score_list),6)
score_to

1.363878

In [24]:
%%time
batch_size = 1024
#test_loader = build_dataloader(test_df.iloc[:, 1:].values, Y=None, batch_size=batch_size, shuffle=False)

model = build_model(device, model_name='resnet152')
model.to(device)

model.eval()
model.load_state_dict(torch.load(model_path))

test_preds = np.zeros((len(test_loader.dataset), 4))

with torch.no_grad():
    for batch_idx, data in enumerate(test_loader):
        if device:
            data = data.to(device)
        outputs = model(data)
        test_preds[batch_idx * batch_size:(batch_idx+1) * batch_size] = outputs.detach().cpu().numpy()

CPU times: user 4.13 s, sys: 244 ms, total: 4.38 s
Wall time: 2.48 s


In [27]:
submission = pd.DataFrame({'id': test_df['id'],
                           'layer_1':test_preds.transpose()[0],
                           'layer_2':test_preds.transpose()[1],
                           'layer_3':test_preds.transpose()[2],
                           'layer_4':test_preds.transpose()[3]})
submission.to_csv('../wafer/resnet152_submission/resnet152_nfft8_train_only_{}.csv'.format(score_to), index=False)

submission.head()

Unnamed: 0,id,layer_1,layer_2,layer_3,layer_4
0,0,253.157059,230.475861,131.71344,86.181999
1,1,158.094223,127.168747,235.380234,98.242493
2,2,148.41214,177.574493,273.701019,155.74115
3,3,91.902367,228.991211,188.543716,82.892853
4,4,272.687286,293.107208,245.112518,270.573212


In [None]:
# inference #########
model.load_state_dict(torch.load(model_path))
model.eval()

predictions = np.zeros((len(test_loader.dataset),4))
with torch.no_grad():
    for i, data in enumerate(test_loader):
        data = data.to(device)
        output = model(data)
        predictions[i*batch_size: (i+1)*batch_size] = output.detach().cpu().numpy()
print('prediction value check: ', output[0])
np.savetxt('../wafer/resnet18_submission/resnet18_nfft8_train_only.csv'.format(fold_num), predictions, delimiter=',')

In [None]:
for fold in range(n_splits):
    fold_num = str(fold+1)
    # inference #########
    model.load_state_dict(torch.load("F{}_nfft8_resnet18_model.pt".format(fold_num)))
    model.eval()

    predictions = np.zeros((len(test_loader.dataset),4))
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            data = data.to(device)
            output = model(data)
            predictions[i*batch_size: (i+1)*batch_size] = output.detach().cpu().numpy()
    print('prediction value check: ', output[0])
    print(predictions.shape)
    np.savetxt('../wafer/resnet18_submission/F{}_nfft8_resnet18.csv'.format(fold_num), predictions, delimiter=',')

In [None]:
predictions