In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
from tqdm import tqdm
from sklearn.model_selection import train_test_split, StratifiedKFold
from google.colab import drive
import torch
import torchvision
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.autograd import Variable
import tensorflow as tf


drive.mount('./gdrive', force_remount=True)
plt.rc('axes', unicode_minus=False)
warnings.filterwarnings(action='ignore')

Mounted at ./gdrive


In [None]:
train = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/투빅스/tobigs15-mnist-competition/train_df.csv")
test = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/투빅스/tobigs15-mnist-competition/test_df.csv")
sample_submission = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/투빅스/tobigs15-mnist-competition/sample_submission.csv")

#### 1. pytorch : resnet을 구현하여 data augmentation + kfold ensemble

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

label = torch.LongTensor(train['label'].values)
train = torch.FloatTensor(np.array(train.iloc[:, 1:] / 255).reshape((-1, 1, 28, 28)))
test = torch.FloatTensor(np.array(test.iloc[:, 1:] / 255).reshape((-1, 1, 28, 28)))

In [None]:
# augmentation이 가능하도록 dataset class를 상속받아 새로운 train dataset을 정의합니다.

class CustomedDataset(Dataset):
  def __init__(self, img, label, transforms = None):
    self.img = img
    self.label = label
    self.transforms = transforms
  
  def __len__(self):
    return len(self.img)
  
  def __getitem__(self, idx):
    image = self.img[idx]
    target = self.label[idx]
    if self.transforms:
      image = self.transforms(image)
    return image, target

In [None]:
# image만 return하도록 test dataset을 정의합니다.

class TestDataset(Dataset):
  def __init__(self, img):
    self.img = img
    
  def __len__(self):
    return len(self.img)
  
  def __getitem__(self, idx):
    image = self.img[idx]
    return image

In [None]:
kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 317)
# validation으로 손실되는 데이터셋을 없애기 위해 kfold앙상블을 진행합니다

BATCH_SIZE = 8

transformation = transforms.Compose([transforms.RandomResizedCrop(size = (28, 28), scale = (0.8, 1)),
                                     transforms.RandomAffine(degrees = 30)])
# data augmentation을 위해 cropping을 하고 이미지르 회전시킵니다

In [None]:
# resnet에 가장 기본이 되는 cnn block을 정의합니다.
# convolutional - batch normalization - activation 순으로 진행됩니다.

class Conv_block(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
    super(Conv_block, self).__init__()
    self.conv = nn.Conv2d(in_channels = in_channels,
                          out_channels = out_channels,
                          kernel_size = kernel_size,
                          stride = stride,
                          padding = padding,
                          bias = False)
    self.bn = nn.BatchNorm2d(out_channels)
    self.relu = nn.LeakyReLU()
  
  def forward(self, x):
    return self.relu(self.bn(self.conv(x)))

In [None]:
# resnet 중간중간 image의 size를 줄여줄 때 사용하는 block을 정의합니다.
# pooling을 사용하는 대신 convolutional 연산에서 stride를 높여 image의 size를 줄입니다.
# kernel_size가 1인 kernel을 사용하여 parameter의 수를 효과적으로 줄이고 층은 깊게 쌓을 수 있습니다.

class Connection_conv(nn.Module):
  def __init__(self, in_channels):
    super(Connection_conv, self).__init__()
    self.block1 = Conv_block(in_channels = in_channels, out_channels = int(in_channels/2), kernel_size = 1, stride = 1, padding = 0)
    self.block2 = Conv_block(in_channels = int(in_channels/2), out_channels = int(in_channels/2), kernel_size = 3, stride = 2, padding = 1)
    self.conv3 = nn.Conv2d(in_channels = int(in_channels/2), out_channels = in_channels * 2, kernel_size = 1, stride = 1, padding = 0, bias = False)
    self.bn1 = nn.BatchNorm2d(in_channels * 2)
    self.relu = nn.LeakyReLU()

    self.con_conv = nn.Conv2d(in_channels = in_channels, out_channels = in_channels * 2, kernel_size = 3, stride = 2, padding = 1, bias = False)
    self.bn2 = nn.BatchNorm2d(in_channels * 2)

  def forward(self, x):
    x_ = x
    x = self.block1(x)
    x = self.block2(x)
    x = self.conv3(x)
    x = self.bn1(x)
    
    x_ = self.con_conv(x_)
    x_ = self.bn2(x_)
    
    x = x_ + x
    x = self.relu(x)
    return x

In [None]:
# image의 size를 줄이지 않을 경우 사용할 block입니다. 
# kernel_size가 1인 kernel을 사용하여 parameter의 수를 효과적으로 줄이고 층은 깊게 쌓을 수 있습니다.

class Connection_identity(nn.Module):
  def __init__(self, in_channels):
    super(Connection_identity, self).__init__()
    self.block1 = Conv_block(in_channels = in_channels, out_channels = int(in_channels / 4), kernel_size = 1, stride = 1, padding = 0)
    self.block2 = Conv_block(in_channels = int(in_channels / 4), out_channels = int(in_channels / 4), kernel_size = 3, stride = 1, padding = 1)
    self.conv3 = nn.Conv2d(in_channels = int(in_channels / 4), out_channels = in_channels, kernel_size = 1, stride = 1, padding = 0, bias = False)
    self.bn = nn.BatchNorm2d(in_channels)
    self.relu = nn.LeakyReLU()

  def forward(self, x):
    x_ = x
    x = self.block1(x)
    x = self.block2(x)
    x = self.conv3(x)
    x = self.bn(x)

    x = x_ + x
    x = self.relu(x)
    return x

In [None]:
# resnet에서 global average pooling이후 선형 연산을 위한 block입니다. 
# Linear - batch normalization - activation

class Linear_module(nn.Module):
  def __init__(self, input_shape, output_shape):
    super(Linear_module, self).__init__()
    self.fc = nn.Linear(input_shape, output_shape, bias = False)
    self.bn = nn.BatchNorm1d(output_shape)
    self.relu = nn.LeakyReLU()
  
  def forward(self, x):
    x = self.relu(self.bn(self.fc(x)))
    return x

In [None]:
# 위에서 정의한 block들을 활용하여 Resnet을 구현합니다.
# 코랩을 활용했기 때문에 층을 더 깊게 쌓기에는 연산량의 제약이 있습니다...

class Resnet(nn.Module):
  def __init__(self):
    super(Resnet, self).__init__()
    self.relu = nn.LeakyReLU()
    self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 16, kernel_size = (7, 7), stride = 1, padding = 3, bias = False)
    self.bn1 = nn.BatchNorm2d(16)

    self.block1 = Connection_conv(16)
    self.block2 = Connection_identity(32)
    self.block3 = Connection_identity(32)

    self.block4 = Connection_conv(32)
    self.block5 = Connection_identity(64)
    self.block6 = Connection_identity(64)
    self.block7 = Connection_identity(64)

    self.gap = nn.AdaptiveAvgPool2d(1)
    self.fc1 = Linear_module(64, 32)
    self.fc2 = nn.Linear(32, 10)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)

    x = self.block1(x)
    x = self.block2(x)
    x = self.block3(x)

    x = self.block4(x)
    x = self.block5(x)
    x = self.block6(x)
    x = self.block7(x)

    x = self.gap(x)
    x = x.view(-1, 64)
    x = self.fc1(x)
    x = self.fc2(x)
    return x

In [None]:
# model을 훈련시킬 함수를 정의합니다. 
# model을 훈련시킬 때와 검증할 때 모두 같은 함수를 사용합니다.
# phase가 training일 때는 optimizer를 이용하여 model의 parameter를 갱신합니다

def fit(epoch, model, data_loader, phase = 'training'):
  if phase == 'training':
    model.train()
  elif phase == 'validation':
    model.eval()
  
  running_loss = 0
  running_acc = 0

  total_batch = len(data_loader)

  for batch_idx, (data, target) in enumerate(data_loader):
    if phase == 'training':
      optimizer.zero_grad()
    
    data, target = data.to(device), target.to(device)
    output = model(data)
    pred = output.data.max(dim = 1, keepdim = True)[1]
    
    loss = criterion(output, target)

    if phase == 'training':
      loss.backward()
      optimizer.step()
    
    running_loss += loss / total_batch
    running_acc += pred.eq(target.data.view_as(pred)).cpu().sum() / BATCH_SIZE / total_batch
  
  print('[Epoch: {:>4}] \t{}_loss = {:>.9} \t{}_acc = {:>.9}'.format(epoch + 1, phase, running_loss, phase, running_acc))
  
  return running_loss, running_acc

In [None]:
# 모델의 상태를 저장할 함수를 정의합니다.
# 모델을 불러와 다시 훈련시킬 때 optimizer가 바뀐다면 훈련 과정에 지장이 있을 수 있으므로 optimizer역시 같이 저장합니다.

def save_checkpoint(epoch, model, optimizer, filename):
  state = {
      'Epoch' : epoch,
      'State_dict' : model.state_dict(),
      'optimizer' : optimizer.state_dict()
  }
  torch.save(state, filename)

In [None]:
# kfold를 사용하여 train dataset과 validation dataset으로 나누어 model을 훈련시킵니다.
for i, (train_idx, val_idx) in enumerate(kfold.split(train, label)):

  X_train, X_val = train[train_idx], train[val_idx]
  y_train, y_val = label[train_idx], label[val_idx]

  train_data = CustomedDataset(X_train, y_train, transformation)
  val_data = CustomedDataset(X_val, y_val)

  train_loader = DataLoader(train_data, batch_size = BATCH_SIZE, shuffle = True, drop_last = True)
  val_loader = DataLoader(val_data, batch_size = BATCH_SIZE, shuffle = True, drop_last = True)

  model = Resnet().to(device)

  criterion = nn.CrossEntropyLoss().to(device)
  optimizer = torch.optim.Adam(model.parameters(), lr = 0.001, weight_decay = 3e-6)
  scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'max', factor = 0.5, patience = 7)
  # model을 훈련시킬 때 overshooting이 자주 발생하는 현상이 있었으므로 ReduceLROnPlateau scheduler를 사용합니다.
  # 7epoch동안 validation accuracy가 높아지지 않는다면 optimizer의 Learning Rate를 1/2로 줄입니다.

  val_accuracy = [0]
  PATH = '/content/gdrive/MyDrive/Colab Notebooks/투빅스/cnn_fold%s.pt'%i

  for epoch in range(50):

    epoch_loss, epoch_accuracy = fit(epoch, model, train_loader, phase = 'training')
    torch.cuda.empty_cache()
    val_epoch_loss, val_epoch_accuracy = fit(epoch, model, val_loader, phase = 'validation')
    torch.cuda.empty_cache()
    scheduler.step(val_epoch_accuracy)

    val_accuracy.append(val_epoch_accuracy)

    if val_epoch_accuracy > max(val_accuracy[:-1]):
      # validation accuracy가 가장 높은 상태일 때의 모델을 저장합니다.
      save_checkpoint(epoch + 1, model, optimizer, PATH)
  
  print("THE BEST ACCURACY OF THIS FOLD IS %s"%max(val_accuracy))

In [None]:
# 저장한 모델 5개(kfold에서 k = 5)를 불러옵니다

state0 = torch.load('/content/gdrive/MyDrive/Colab Notebooks/투빅스/cnn_fold0.pt',  map_location=torch.device(device))
state1 = torch.load('/content/gdrive/MyDrive/Colab Notebooks/투빅스/cnn_fold1.pt',  map_location=torch.device(device))
state2 = torch.load('/content/gdrive/MyDrive/Colab Notebooks/투빅스/cnn_fold2.pt',  map_location=torch.device(device))
state3 = torch.load('/content/gdrive/MyDrive/Colab Notebooks/투빅스/cnn_fold3.pt',  map_location=torch.device(device))
state4 = torch.load('/content/gdrive/MyDrive/Colab Notebooks/투빅스/cnn_fold4.pt',  map_location=torch.device(device))

model0 = Resnet().to(device)
model1 = Resnet().to(device)
model2 = Resnet().to(device)
model3 = Resnet().to(device)
model4 = Resnet().to(device)

model0.load_state_dict(state0['State_dict'])
model1.load_state_dict(state1['State_dict'])
model2.load_state_dict(state2['State_dict'])
model3.load_state_dict(state3['State_dict'])
model4.load_state_dict(state4['State_dict'])

In [None]:
# 앞서 정의한 TestDataset을 사용하여 test_loader를 정의합니다.

test_data = TestDataset(test)
test_loader = DataLoader(test_data, batch_size = BATCH_SIZE, shuffle = False, drop_last = False)

In [None]:
# test_loader와 model을 이용하여 모델의 예측값을 저장합니다.
def pred_test(model, test_loader):
  preds = np.zeros((18000, 10))
  model.eval()

  for i, data in enumerate(test_loader):
    data = data.to(device)
    output = model(data).cpu().detach().numpy()
    preds[i * BATCH_SIZE : (i + 1) * BATCH_SIZE] = output
  return preds

In [None]:
# 훈련에 사용한 crossentropy함수에 softmax함수가 내장돼있으므로 
# model의 output은 softmax activation을 거치지 않은 상태입니다.

def softmax(x):
  x_ = x.copy()
  for i in range(len(x_)):
    e_x = np.exp(x_[i] - np.max(x_[i]))
    x_[i] = e_x / e_x.sum()
  return x_

In [None]:
output0 = softmax(pred_test(model0, test_loader))
output1 = softmax(pred_test(model1, test_loader))
output2 = softmax(pred_test(model2, test_loader))
output3 = softmax(pred_test(model3, test_loader))
output4 = softmax(pred_test(model4, test_loader))

In [None]:
# 다섯개의 model을 산술평균하여 앙상블합니다.
torch_ens_output = (output0 + output1 + output2 + output3 + output4) * 0.2

#### 2. Tensorflow : 사전훈련된 densenet모델을 이용하여 kfold ensemble

In [None]:
train = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/투빅스/tobigs15-mnist-competition/train_df.csv")
test = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/투빅스/tobigs15-mnist-competition/test_df.csv")
sample_submission = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/투빅스/tobigs15-mnist-competition/sample_submission.csv")

In [None]:
label = train['label'].values.astype('float32')

train.drop('label', axis = 1, inplace = True)
train = train.values.astype('float32')
train /= 255
train = train.reshape((-1, 28, 28, 1))

test.drop("Unnamed: 0", axis = 1, inplace = True)
test = test.values.astype('float32')
test /= 255
test = test.reshape((-1, 28, 28, 1))

In [None]:
# tensorflow의 사전학습된 densenet을 이용할 때 이미지의 최소 size는 (32, 32)이므로 padding하여 image의 size를 키워줍니다.

train = np.pad(train, ((0, 0), (2, 2), (2, 2), (0, 0)), mode = 'constant')
test = np.pad(test, ((0, 0), (2, 2), (2, 2), (0, 0)), mode = 'constant')

In [None]:
# tensorflow의 사전학습된 densenet을 이용할 때 이미지의 차원은 3차원이어야 하므로 numpy.stack함수를 사용하여 3차원으로 만들어줍니다.

train = np.squeeze(train, axis = -1)
train = np.stack((train,) * 3, axis = -1)

test = np.squeeze(test, axis = -1)
test = np.stack((test,) * 3, axis = -1)

In [None]:
# densenet121을 이용하여 모델의 선형층을 떼고, tast에 맞도록 10차원 출력을 가질 수 있게 모델을 정의합니다.

def my_model():
  bottom = tf.keras.applications.DenseNet121(input_shape = (32, 32, 3), weights = 'imagenet', include_top = False)
  model = tf.keras.models.Sequential([
                                      bottom,
                                      tf.keras.layers.Flatten(),
                                      tf.keras.layers.Dense(512, activation = 'relu'),
                                      tf.keras.layers.Dropout(0.25),
                                      tf.keras.layers.Dense(10, activation = 'softmax')
  ])
  return model

In [None]:
BATCH_SIZE = 128

kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 923)

In [None]:
# data augmentation을 이용하기 위해 ImagedataGenerator함수를 정의합니다.
# image의 30도 회전, 좌우 20%만큼의 이동, 20%만큼의 확대를 적용합니다.
# augmentation을 하였을 때 인간이 인식하기에도 같은 사진(같은 숫자)인가를 주의하여 범위를 설정합니다.

datagen = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range = 30,
                                                          width_shift_range = 0.2,
                                                          height_shift_range = 0.2,
                                                          zoom_range = 0.2)

In [None]:
for i, (train_idx, val_idx) in enumerate(kfold.split(train, label)):
  X_train, X_val, y_train, y_val = train[train_idx], train[val_idx], label[train_idx], label[val_idx]

  model_path = "/content/gdrive/MyDrive/Colab Notebooks/투빅스/densenet%s.h5"%i

  model = my_model()

  model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
                loss = 'sparse_categorical_crossentropy',
                metrics = ['acc'])
  
  # fit_generator함수를 사용하여 모델을 학습합니다. 
  # callback함수를 이용하여 optimizer의 Learning Rate를 변경하고, validation accuracy가 가장 좋은 상태일 때의 모델을 저장합니다.
  model.fit_generator(datagen.flow(X_train, y_train, batch_size = 128),
                      epochs = 60,
                      validation_data = (X_val, y_val),
                      steps_per_epoch = X_train.shape[0] // BATCH_SIZE,
                      callbacks = [tf.keras.callbacks.EarlyStopping(monitor = 'val_acc', patience = 15),
                                   tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_acc', patience = 8, factor = 0.5),
                                   tf.keras.callbacks.ModelCheckpoint(filepath = model_path, monitor = 'val_acc', save_best_only = True, mode = 'max')])

In [None]:
# 모델의 구조를 만들고 load_weights함수를 통해 학습시킨 가중치들을 불러옵니다.
model0 = my_model()
model1 = my_model()
model2 = my_model()
model3 = my_model()
model4 = my_model()

model0.load_weights('/content/gdrive/MyDrive/Colab Notebooks/투빅스/densenet0.h5')
model1.load_weights('/content/gdrive/MyDrive/Colab Notebooks/투빅스/densenet1.h5')
model2.load_weights('/content/gdrive/MyDrive/Colab Notebooks/투빅스/densenet2.h5')
model3.load_weights('/content/gdrive/MyDrive/Colab Notebooks/투빅스/densenet3.h5')
model4.load_weights('/content/gdrive/MyDrive/Colab Notebooks/투빅스/densenet4.h5')

In [None]:
output0 = model0.predict(test)
output1 = model1.predict(test)
output2 = model2.predict(test)
output3 = model3.predict(test)
output4 = model4.predict(test)

In [None]:
# 모델의 예측값을 산술평균하여 앙상블합니다.
tf_ens_output = (output0 + output1 + output2 + output3 + output4) * 0.2

#### 3. densenet + resnet ensemble

In [None]:
# densenet의 결과와 resnet의 결과를 9:1로 앙상블하여 최종 예측값을 만듭니다.

final_output = 0.9 * tf_ens_output + 0.1 * torch_ens_output

In [None]:
# argmax함수를 사용하여 test image의 category를 예측합니다.
final_pred = np.argmax(final_output, axis = 1)

In [None]:
sample_submission['Category'] = final_pred
sample_submission.to_csv("/content/gdrive/MyDrive/Colab Notebooks/투빅스/ens.csv", index = False)