# Stacking 

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import os

root_dir = Path('C:/Users/sinjy/jupyter_notebook/datasets')
data_dir = root_dir / 'kaggle_datasets' / 'Iceberg'
predict_dir = root_dir / 'kaggle_predict'

os.listdir(data_dir)

['model',
 'statoil-iceberg-classifier-challenge',
 'statoil-iceberg-submissions',
 'submission38-lb01448']

In [2]:
os.listdir(data_dir / 'statoil-iceberg-submissions')

['sub_200_ens_densenet.csv',
 'sub_blend009.csv',
 'sub_fcn.csv',
 'sub_keras_beginner.csv',
 'sub_TF_keras.csv']

In [3]:
os.listdir(data_dir / 'submission38-lb01448')

['submission38.csv', 'submission43.csv']

## pytorch CNN DenseNet Ensemble
## ==> sub_200_ens_densenet.csv

In [4]:
from __future__ import print_function
from __future__ import division

import numpy as np
import pandas as pd

import torch
import sys
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from sklearn import metrics
from sklearn.metrics import roc_auc_score, log_loss, roc_curve, auc
from sklearn.model_selection import StratifiedKFold, ShuffleSplit, cross_val_score, train_test_split
from sklearn.utils import shuffle

import logging
import psutil
import os
import scipy.signal
import random
from datetime import datetime
from scipy import signal
import scipy

In [5]:
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
Tensor = FloatTensor

In [6]:
handler = logging.basicConfig(level=logging.INFO)
lgr = logging.getLogger(__name__)

In [7]:
def cpuStats():
    print(sys.version) ## python version
    print(psutil.cpu_percent())  ## present cpu utilization rate
    print(psutil.virtual_memory())  ## memory
    pid = os.getpid()
    py = psutil.Process(pid)
    memoryUse = py.memory_info()[0] / 2. ** 30
    print('memory GB:', memoryUse)
    
cpuStats()

3.7.11 (default, Jul 27 2021, 09:42:29) [MSC v.1916 64 bit (AMD64)]
21.7
svmem(total=16980230144, available=11203145728, percent=34.0, used=5777084416, free=11203145728)
memory GB: 0.19483566284179688


In [8]:
lgr.info("USE CUDA=" + str(use_cuda))

INFO:__main__:USE CUDA=True


In [9]:
seed = 17 * 19
np.random.seed(seed)
torch.manual_seed(seed)
if use_cuda:
    torch.cuda.manual_seed(seed)

### load data

In [10]:
TARGET_VAR = 'target'
BASE_FOLDER = data_dir / 'statoil-iceberg-classifier-challenge'

In [11]:
data = pd.read_json(BASE_FOLDER / 'train.json')

print(data.shape)

(1604, 5)


### shuffle

In [12]:
random.seed(datetime.now())
data = shuffle(data)
data = data.reindex(np.random.permutation(data.index))

In [13]:
data['band_1'] = data['band_1'].apply(lambda x: np.array(x).reshape(75,75))
data['band_2'] = data['band_2'].apply(lambda x: np.array(x).reshape(75,75))

data['inc_angle'] = pd.to_numeric(data['inc_angle'], errors='coerce')

In [14]:
band_1 = np.concatenate([im for im in data['band_1']]).reshape(-1, 75, 75)
band_2 = np.concatenate([im for im in data['band_2']]).reshape(-1, 75, 75)
full_img = np.stack([band_1, band_2], axis=1)

In [15]:
def XnumpyToTensor(x_data_np):
    x_data_np = np.array(x_data_np, dtype=np.float32)
    print(x_data_np.shape)
    print(type(x_data_np))
    
    if use_cuda:
        lgr.info("Using the GPU")
        X_tensor = (torch.from_numpy(x_data_np).cuda())
    else:
        lgr.info("Using the CPU")
        X_tensor = (torch.from_numpy(x_data_np))
        
    print((X_tensor.shape))
    return X_tensor

In [16]:
def YnumpyToTensor(y_data_np):
    y_data_np = y_data_np.reshape((y_data_np.shape[0], 1))
    print(y_data_np.shape)
    print(type(y_data_np))
    
    if use_cuda:
        lgr.info("Using the GPU")
        Y_tensor = (torch.from_numpy(y_data_np)).type(torch.FloatTensor).cuda()
    else:
        lgr.info("Using the CPU")
        Y_tensor = (torch.from_numpy(y_data_np)).type(torch.FloatTensor)
        
    print(type(Y_tensor))
    print(y_data_np.shape)
    print(type(y_data_np))
    return Y_tensor

### dataset

In [17]:
class FullTrainingDataset(torch.utils.data.Dataset):
    def __init__(self, full_ds, offset, length):
        self.full_ds = full_ds
        self.offset = offset
        self.length = length
        assert len(full_ds) >= offset + length, Exception("Parent Dataset not long enough")
        super(FullTrainingDataset, self).__init__()
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, i):
        return self.full_ds[i+self.offset]

In [18]:
validationRatio = 0.11
def trainTestSplit(dataset, val_share=validationRatio):
    val_offset = int(len(dataset) * (1 - val_share))
    print("offset:", str(val_offset))
    return FullTrainingDataset(dataset, 0, val_offset), FullTrainingDataset(dataset, val_offset, len(dataset) - val_offset)

In [19]:
batch_size = 64

from torch.utils.data import TensorDataset, DataLoader
train_imgs = XnumpyToTensor(full_img)
train_targets = YnumpyToTensor(data['is_iceberg'].values)
dset_train = TensorDataset(train_imgs, train_targets)

INFO:__main__:Using the GPU


(1604, 2, 75, 75)
<class 'numpy.ndarray'>


INFO:__main__:Using the GPU


torch.Size([1604, 2, 75, 75])
(1604, 1)
<class 'numpy.ndarray'>
<class 'torch.Tensor'>
(1604, 1)
<class 'numpy.ndarray'>


In [20]:
train_ds, val_ds = trainTestSplit(dset_train)
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, 
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(val_ds, batch_size=batch_size, 
                                        shuffle=False)
print(val_loader)
print(train_loader)

offset: 1427
<torch.utils.data.dataloader.DataLoader object at 0x000001B33AE9E508>
<torch.utils.data.dataloader.DataLoader object at 0x000001B33AE9EF08>


### model

In [21]:
num_epochs = 100
n_channels = 2
total_classes = 1

In [22]:
class Bottleneck(nn.Module):
    def __init__(self, nChannels, growthRate):
        super(Bottleneck, self).__init__()
        interChannels = 4 * growthRate
        self.bn1 = nn.BatchNorm2d(nChannels)
        self.conv1 = nn.Conv2d(nChannels, interChannels, kernel_size=1, 
                               bias=False)
        self.bn2 = nn.BatchNorm2d(interChannels)
        self.conv2 = nn.Conv2d(interChannels, growthRate, kernel_size=3, 
                              padding=1, bias=False)
    
    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat((x, out), 1)
        return out

In [23]:
class SingleLayer(nn.Module):
    def __init__(self, nChannels, growthRate):
        super(SingleLayer, self).__init__()
        self.bn1 = nn.BatchNorm2d(nChannels)
        self.conv1 = nn.Conv2d(nChannels, growthRate, kernel_size=3, padding=1,
                              bias=False)
    
    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = torch.cat((x, out), 1)
        return out

In [24]:
class Transition(nn.Module):
    def __init__(self, nChannels, nOutChannels):
        super(Transition, self).__init__()
        self.bn1 = nn.BatchNorm2d(nChannels)
        self.conv1 = nn.Conv2d(nChannels, nOutChannels, kernel_size=1, 
                              bias=False)
    
    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = F.avg_pool2d(out, 2)
        return out

In [25]:
import math
class DenseNet(nn.Module):
    def __init__(self, growthRate, depth, reduction, nClasses, bottleneck):
        super(DenseNet, self).__init__()
        
        nDenseBlocks = (depth-4) // 3
        if bottleneck:
            nDenseBlocks //= 2
        
        nChannels = 2 * growthRate
        self.conv1 = nn.Conv2d(2, nChannels, kernel_size=3, padding=1, 
                               bias=False)
        self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks, 
                                      bottleneck)
        nChannels += nDenseBlocks * growthRate
        nOutChannels = int(math.floor(nChannels * reduction))
        self.trans1 = Transition(nChannels, nOutChannels)
        
        nChannels = nOutChannels
        self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks, 
                                      bottleneck)
        nChannels += nDenseBlocks * growthRate
        nOutChannels = int(math.floor(nChannels * reduction))
        self.trans2 = Transition(nChannels, nOutChannels)
        
        nChannels = nOutChannels
        self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks, 
                                      bottleneck)
        nChannels += nDenseBlocks * growthRate
        
        self.bn1 = nn.BatchNorm2d(nChannels)
        self.fc = nn.Linear(128, nClasses)
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
    
    def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck):
        layers = []
        for i in range(int(nDenseBlocks)):
            if bottleneck:
                layers.append(Bottleneck(nChannels, growthRate))
            else:
                layers.append(SingleLayer(nChannels, growthRate))
            nChannels += growthRate
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.dense3(out)
        out = F.avg_pool2d(F.relu(self.bn1(out)), 8)
        out = out.view(out.size(0), -1)
        out = F.sigmoid(self.fc(out))
        return out

In [26]:
model = DenseNet(growthRate=8, depth=20, reduction=0.5, bottleneck=True, 
                nClasses=1)

In [27]:
print('+ Number of params: {}'.format(sum([p.data.nelement() for p in model.parameters()])))

+ Number of params: 19921


In [28]:
print(model)

DenseNet(
  (conv1): Conv2d(2, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (dense1): Sequential(
    (0): Bottleneck(
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(32, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): Bottleneck(
      (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(24, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(32, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
  )
  (trans1): Transition(
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=T

### Training

In [29]:
loss_func = torch.nn.BCELoss()

LR = 0.0005
MOMENTUM = 0.95
optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=5e-5)
if use_cuda:
    lgr.info("Using the GPU")
    model.cuda()
    loss_func.cuda()
lgr.info(optimizer)
lgr.info(loss_func)

INFO:__main__:Using the GPU
INFO:__main__:Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0005
    weight_decay: 5e-05
)
INFO:__main__:BCELoss()


### this model just example 
### which take few days of training gpu

In [30]:
# import warnings
# warnings.filterwarnings('ignore')
# criterion = loss_func
# all_losses = []
# val_losses = []

# if __name__ == '__main__':
#     for epoch in range(num_epochs):
#         model.train()
#         print('Epoch {}/{}'.format(epoch + 1, num_epochs))
#         print('*' * 5 + ':')
#         running_loss = 0.0
#         running_acc = 0.0
#         for i, data in enumerate(train_loader, 1):
#             img, label = data
#             if use_cuda:
#                 img, label = img.cuda(non_blocking=True), label.cuda(non_blocking=True)
                
#             out = model(img)
#             loss = criterion(out, label)
#             running_loss += loss.item() * label.size(0)

#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()

#             if i % 10 == 0:
#                 all_losses.append(running_loss / (batch_size * i))
#                 print('[{}/{}] Loss: {:.6f}'.format(epoch+1, num_epochs, running_loss / (batch_size * i), 
#                                                    running_acc / (batch_size * i)))
#         print('Finish {} epoch, Loss: {:.6f}'.format(epoch+1, running_loss / (len(train_ds))))
        
#         model.eval()
#         eval_loss = 0
#         eval_acc = 0
#         with torch.no_grad():
#             for data in val_loader:
#                 img, label = data

#                 if use_cuda:
#                     img, label = img.cuda(non_blocking=True), label.cuda(non_blocking=True)
#                 out = model(img)
#                 loss = criterion(out, label)
#                 eval_loss += loss.item() * label.size(0)
#         print('VALIDATION Loss: {:.6f}'.format(eval_loss / (len(val_ds))))
#         val_losses.append(eval_loss / len(val_ds))
#         print()
    
#     torch.save(model.state_dict(), data_dir / 'model' / '200_ens_densenet.pth')
    
#     df_test_set = pd.read_json(BASE_FOLDER / 'test.json')
    
#     df_test_set['band_1'] = df_test_set['band_1'].apply(lambda x: np.array(x).reshape(75, 75))
#     df_test_set['band_2'] = df_test_set['band_2'].apply(lambda x: np.array(x).reshape(75, 75))
#     df_test_set['inc_angle'] = pd.to_numeric(df_test_set['inc_angle'], errors='coerce')
    
#     columns = ['id', 'is_iceberg']
#     df_pred = pd.DataFrame(data=np.zeros((0, len(columns))), columns=columns)
    
#     with torch.no_grad():
#         for index, row in df_test_set.iterrows():
#             rwo_no_id = row.drop('id')
#             band_1_test = (rwo_no_id['band_1']).reshape(-1, 75, 75)
#             band_2_test = (rwo_no_id['band_2']).reshape(-1, 75, 75)
#             full_img_test = np.stack([band_1_test, band_2_test], axis=1)

#             x_data_np = np.array(full_img_test, dtype=np.float32)
#             if use_cuda:
#                 X_tensor_test = torch.from_numpy(x_data_np).cuda()
#             else:
#                 X_tensor_test = torch.from_numpy(x_data_np)
            
#             predicted_val = (model(X_tensor_test).item())
#             df_pred = df_pred.append({'id': row['id'], 'is_iceberg': predicted_val}, ignore_index=True)
    
#     def savePred(df_pred):
#         csv_path = predict_dir / 'sub_200_ens_densenet.csv'
#         df_pred.to_csv(csv_path, columns=('id', 'is_iceberg'), index=None)
        
#     savePred(df_pred)

### test score: 0.15381