# Pytorch CNN For Beginer

## Try 3 Model
* (CNN + BachNorm + Relu)*N + Pooling + (CNN + BachNorm + Relu)*M + Pooling + Linear fc
* Add residual link on above model
* Transfer Model: Pretrained Resnet

In [None]:
import random
import torch
import torch.nn as nn
import torch.utils.data
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style(style="whitegrid")

from sklearn.model_selection import StratifiedKFold, train_test_split
import time
import copy

#保证可在GPU上运行以及可复现性
torch.backends.cudnn.enabled = False
torch.backends.cudnn.deterministic = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = 'cpu'
device

In [None]:
train = pd.read_csv('../input/digit-recognizer/train.csv')
test = pd.read_csv('../input/digit-recognizer/test.csv')
sub = pd.read_csv('../input/digit-recognizer/sample_submission.csv')

In [None]:
train.head()

In [None]:
train.describe()

In [None]:
print('train shape is {}'.format(train.shape))
print('test shape is {}'.format(test.shape))

In [None]:
label = torch.from_numpy(train['label'].values).type(torch.long)
train.drop(['label'], axis=1, inplace=True)

In [None]:
plt.figure(figsize=(16, 4))
for i in range(4):
    plt.subplot(1, 4, i + 1)
    plt.imshow(train.values[i].reshape(28, 28))
plt.show()

In [None]:
def data_deal(train, test):
    train = train/255
    test = test/255
    train_numpy = train.values.reshape(-1, 1, 28, 28)
    test_numpy = test.values.reshape(-1, 1, 28, 28)
    return torch.from_numpy(train_numpy).type(torch.float), torch.from_numpy(test_numpy).type(torch.float)

In [None]:
train, test = data_deal(train, test)

In [None]:
class DataSet(torch.utils.data.Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label
        
    def __getitem__(self, index):
        if self.label is not None:
            return (self.data[index], self.label[index])
        else:
            return self.data[index]
        
    def __len__(self):
        return len(self.data)

In [None]:
def Data(train, val, train_label, val_label, test):
    datasets = {}
    datasets['train'] = DataSet(train, train_label)
    datasets['val'] = DataSet(val, val_label)
    datasets['test'] = DataSet(test, None)
    data_loader = {i: torch.utils.data.DataLoader(datasets[i], batch_size=32,
                                                 shuffle=True, num_workers=0, drop_last=False) 
                   for i in ['train', 'val']}
    data_loader['test'] = torch.utils.data.DataLoader(datasets['test'], batch_size=32,
                                                 shuffle=False, num_workers=0, drop_last=False)
    data_size = {i :len(datasets[i]) for i in ['train', 'val']}
    return data_loader, data_size

## Base Model CBR + pooling + linear fc 

In [None]:
class CnnBlock(nn.Module):
    def __init__(self, in_channel, out_channel, k, s):
        super(CnnBlock, self).__init__()
        self.cnn = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=k, stride=s, padding=1)
        self.batchnorm = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.cnn(x)
        x = self.batchnorm(x)
        x = self.relu(x)
        return x

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.cnnlayer = nn.ModuleList([CnnBlock(1, 16, 3, 1), CnnBlock(16, 32, 3, 1), CnnBlock(32, 64, 3, 1)])
        self.pooling = nn.MaxPool2d(2, stride=2, padding=0)
        self.cnnlayer1 = nn.ModuleList([CnnBlock(64, 64, 3, 1)])
        self.pooling1 = nn.MaxPool2d(2, stride=2, padding=0)
        self.Linear = nn.Linear(3136, 10)
        
    def forward(self, x):
        for cnn in self.cnnlayer:
            x = cnn(x)
        x = self.pooling(x)
        for cnn in self.cnnlayer1:
            x = cnn(x)
        x = self.pooling1(x)
        x = x.view(x.size(0), -1)
        x = self.Linear(x)
        return x

## Add Residual link

In [None]:
class ResCnnBlock(nn.Module):
    def __init__(self, in_channel, out_channel, k, s):
        super(ResCnnBlock, self).__init__()
        self.cnn = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=k, stride=s, padding=1)
        self.batchnorm = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
    def forward(self, x):
        r = self.cnn(x)
        r = self.batchnorm(r)
        r = self.relu(r)
        return x + r

In [None]:
class ResCNN(nn.Module):
    def __init__(self):
        super(ResCNN, self).__init__()
        self.cnnlayer = nn.ModuleList([CnnBlock(1, 64, 3, 1), ResCnnBlock(64, 64, 3, 1), ResCnnBlock(64, 64, 3, 1)])
        self.pooling = nn.MaxPool2d(2, stride=2, padding=0)
        self.cnnlayer1 = nn.ModuleList([ResCnnBlock(64, 64, 3, 1)])
        self.pooling1 = nn.MaxPool2d(2, stride=2, padding=0)
        self.Linear = nn.Linear(3136, 10)
        
    def forward(self, x):
        for cnn in self.cnnlayer:
            x = cnn(x)
        x = self.pooling(x)
        for cnn in self.cnnlayer1:
            x = cnn(x)
        x = self.pooling1(x)
        x = x.view(x.size(0), -1)
        x = self.Linear(x)
        return x

In [None]:
def Train(dataloader, datasize, model, criterion, optimizer, scheduler, epochs):
    train_loss, val_loss = [], []
    train_acc, val_acc = [], []
    for epoch in range(epochs):
        print('EPOCH {}'.format(epoch))
        best_loss = 1e5
        running_loss = 0
        acc = 0
        for state in ['train', 'val']:
            if state == 'train':
                model.train()
            else:
                model.eval()
            for data, label in dataloader[state]:
                data = data.to(device)
                label = label.to(device)
                # feed forward
                with torch.set_grad_enabled(state == 'train'):
                    out = model(data)
                    optimizer.zero_grad()
                    loss = criterion(out, label)
                    # back forward in train
                    if state == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item()*data.size(0)
                acc += (label == torch.max(out, 1)[1]).sum()
            if state == 'val':
                scheduler.step(loss)
            running_loss = running_loss/datasize[state]
            acc = acc/datasize[state]
            if state == 'train':
                train_loss.append(running_loss)
                train_acc.append(acc.item())
            else:
                val_loss.append(running_loss)
                val_acc.append(acc.item())
            # save best model
            if state == 'val' and running_loss < best_loss:
                best_loss = running_loss
                best_model_wts = copy.deepcopy(model.state_dict())
            print('{} LOSS IS {} ACC IS {}'.format(state, running_loss, acc))
        print()
            
    # plot loss and acc curve
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(range(epochs), train_loss, c='crimson', label='train')
    plt.plot(range(epochs), val_loss, c='blueviolet', label='val')
    plt.subplot(1, 2, 2)
    plt.plot(range(epochs), train_acc, c='crimson', label='train')
    plt.plot(range(epochs), val_acc, c='blueviolet', label='val')
    plt.legend()
    plt.show()
    
    # best model
    model.load_state_dict(best_model_wts)
    
    # pre test_data
    test_pre = torch.LongTensor()
    model.eval()
    for data in dataloader['test']:
        data = data.to(device)
        with torch.no_grad():
            out = model(data)
            out = out.cpu()
            test_pre = torch.cat([test_pre, out], dim=0)
    test_pre = test_pre.cpu().numpy()
    return model, test_pre

## Hold Out

In [None]:
def HoldoutTrain(train, test, Model, algorithm):
    model_list = []
    test_pre_numpy = np.zeros((len(test), 10))
    model_list.append(Model.to(device))

    trn_idx, val_idx, _, _ = train_test_split(list(range(len(train))), list(range(len(train))), stratify=label, test_size = 0.2, random_state = 42)
    print("algorithm is {}".format(algorithm))
    print('=====================')
    model = model_list[0]
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience = 15, verbose = True, factor=0.5)
    train_, val_ = train[trn_idx], train[val_idx]
    train_label, val_label = label[trn_idx], label[val_idx]
    dataloaders, dataset_sizes = Data(train_, val_, train_label, val_label, test)
    model, test_pre = Train(dataloaders, dataset_sizes, model, criterion, optimizer, exp_lr_scheduler, 50)
    test_pre_numpy += test_pre
    model_list[0] = model
    
    return test_pre

## 5 Fold

In [None]:
def KfoldTrain(train, test):
    # 5折交叉验证
    folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    
    model_list = []
    test_pre_numpy = np.zeros((len(test), 10))
    for i in range(5):
        model_list.append(CNN().to(device))

    for fold_, (trn_idx, val_idx) in enumerate(folds.split(range(len(train)), label)):
        print("Fold {}".format(fold_))
        print('=====================')
        model = model_list[fold_]
        criterion = nn.CrossEntropyLoss().to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.005)
        exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience = 15, verbose = True, factor=0.5)
        train_, val_ = train[trn_idx], train[val_idx]
        train_label, val_label = label[trn_idx], label[val_idx]
        dataloaders, dataset_sizes = Data(train_, val_, train_label, val_label, test)
        model, test_pre = Train(dataloaders, dataset_sizes, model, criterion, optimizer, exp_lr_scheduler, 30)
        test_pre_numpy += test_pre
        model_list[fold_] = model
    test_pre_numpy = test_pre_numpy/5
    
    return test_pre_numpy

In [None]:
test_pre_numpy = HoldoutTrain(train, test, CNN(), 'CNN + RELU + BatchNorm  Base')

In [None]:
test_pre_numpy = HoldoutTrain(train, test, ResCNN(), 'ResNet')

## Transfer Learning

In [None]:
from torchvision import models
model = models.resnet18(pretrained=True)
num_fc = model.fc.in_features
model.conv1= nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
model.fc = nn.Linear(num_fc, 10)

In [None]:
test_pre_numpy = HoldoutTrain(train, test, model, 'ResNet Transfer')

## Save Results

In [None]:
test_pre_label = np.argsort(test_pre_numpy, 1)[:, -1]

In [None]:
sub['Label'] = test_pre_label
sub.to_csv('submission.csv', index=False)