In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm import tqdm

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df_train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
train_y = df_train['label']
train_x = df_train.drop('label', axis = 1)
x = np.array(train_x, dtype = 'float')
x_reshape = [x_.reshape(28,28) for x_ in tqdm(x)]

# **Load Dataset**

Due to given dataset shape (Dataframe & 1 dimension shape), it needs to be reshaped to (28 X 28 array).

In [None]:
#EDA
#Image Visualization
print('Image Visualization')
import matplotlib.pyplot as plt
rnum = np.random.randint(0, len(x_reshape) -1 , 10)
plt.figure(figsize = (10,10))
for n, i in enumerate(rnum):
    plt.subplot(1,10,n+1)
    plt.tick_params(left = False, bottom = False, labelleft = False, labelbottom = False)
    plt.imshow(x_reshape[i])
plt.show()
#label balance
print('label size')
label_balance = df_train.groupby('label').size().to_frame().rename(columns = {0 : 'count'})
plt.title('label balance')
plt.bar(label_balance.index, label_balance['count'])
plt.show()
print('****************************************************************************\n')
#Null value detection
train_x.isnull().sum().loc[train_x.isnull().sum() != 0]

In [None]:
#Custom Dataset
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as T

transforms = T.Compose([T.ToTensor(), T.Resize((224, 224))])

class CustomDataset(Dataset):
    def __init__(self, x_data, y_data, transforms = transforms):
        self.x_data = x_data
        self.y_data = y_data
        self.transforms = transforms
    def __len__(self):
        return len(self.x_data)
    def __getitem__(self, idx):
        image = self.x_data[idx]
        label = self.y_data.iloc[idx]
        image = image / 255.0
        sample = {'image' : image, 'label' : label}
        if self.transforms:
            sample['image'] = self.transforms(sample['image'])        
        return sample

#  **MobileNetV1 Structure**
 ![figure1](https://static-01.hindawi.com/articles/cin/volume-2020/8817849/figures/8817849.fig.002.svgz)


> **On network..**
* **Define Depthwise Convolution Network (blue background on the picture)**
* **Define Basic convolution**

In [None]:


#MobileNet Define
import torch.nn as nn
import torch.nn.functional as F

class MobileNet(nn.Module):
    def __init__(self, num_classes = 10):
        super(MobileNet, self).__init__()
        #Define 
        
        def convdw(in_channels, out_channels, kernel_size, stride):
            layers = []
            #Depthwise Convolution
            layers += [nn.Conv2d(in_channels, out_channels,kernel_size = 3,padding = 1, stride = stride, groups = in_channels)]
            layers += [nn.BatchNorm2d(out_channels)]
            layers += [nn.ReLU()]
            
            #Pointwise Convolution
            layers += [nn.Conv2d(in_channels, out_channels,kernel_size =1,padding = 1, stride = stride)]
            layers += [nn.BatchNorm2d(out_channels)]
            layers += [nn.ReLU()]
            return nn.Sequential(*layers)
        
        def convd(in_channels, out_channels, kernel_size, stride):
            layers = []
            layers += [nn.Conv2d(in_channels, out_channels, kernel_size, stride = stride)]
            layers += [nn.BatchNorm2d(out_channels)]
            layers += [nn.ReLU()]
            return nn.Sequential(*layers)
        
        self.conv1 = convd(1,32,3, stride = 2)
        self.convdw1 = convdw(32, 32, 3, 1)
        self.conv2 = convd(32, 64, 1, stride = 1)
        self.convdw2 = convdw(64, 64, 3, 2)
        self.conv3 = convd(64, 128, 1, stride = 1)
        self.convdw3 = convdw(128, 128, 3, 1)
        self.conv4 = convd(128, 128, 1, stride = 1)
        self.convdw4 = convdw(128, 128, 3, 2)
        self.conv5 = convd(128, 256, 1, stride = 1)
        self.convdw5 = convdw(256, 256, 3, 1)
        self.conv6 = convd(256, 256, 1, stride = 1)
        self.convdw6 = convdw(256, 256, 3, 2)
        self.conv7 = convd(256, 512, 1, stride = 1)
        #--------------------------------------------
        #need x5
        self.convdw = convdw(512, 512, 3, 1)
        self.conv = convd(512, 512, 1, stride = 1)
        #--------------------------------------------
        self.convdw7 = convdw(512, 512, 3, 2)
        self.conv8 = convd(512, 1024, 1, stride = 1)
        self.convdw8 = convdw(1024, 1024, 3, 2)
        self.conv9 = convd(1024, 1024, 1, stride = 1)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(1024, num_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.convdw1(x)
        x = self.conv2(x)
        x = self.convdw2(x)
        x = self.conv3(x)
        x = self.convdw3(x)
        x = self.conv4(x)
        x = self.convdw4(x)
        x = self.conv5(x)
        x = self.convdw5(x)
        x = self.conv6(x)
        x = self.convdw6(x)
        x = self.conv7(x)
            
        for i in range(0,5):
            x = self.convdw(x)
            x = self.conv(x)
                
        x = self.convdw7(x)
        x = self.conv8(x)
        x = self.convdw8(x)
        x = self.conv9(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        output = self.fc(x)

        return output

#  **Train Configure**


> **For training..**
* **Setting simple early_stopping (number of patience = 5)**
* **Loss function : Crossentropyloss (softmax)**
* **Data split : Randomly getting dataset from train_df -> due to random extraction, 'shuffle = False' on dataloader.**
* **Initialize network weight : xavier method uniform distribution (makes model's performance worse -> why?)**

In [None]:
#Training configure
#basic options
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# set seed
import random
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

#Initialized weight -> not used
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)
    elif isinstance(m, nn.Conv2d):
        nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

model = MobileNet()

lr = 1e-3
setting_patience = 7
n_epoch = 50
batch_size = 100
#optimizer & criterion function
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.Adam(params = model.parameters(), lr = lr)
#Define dataset
#data split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x_reshape, train_y, test_size = 0.33, random_state = 26)
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)
train_data_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = False)
test_data_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False)
train_total_batch = len(train_data_loader)
test_total_batch = len(test_data_loader)
trn_acc_list = []
trn_loss_list = []
test_acc_list = []
test_loss_list = []

In [None]:
#Training & Evaluation
torch.cuda.empty_cache()
best_accuracy = 0
total_patience = 0
for epoch in range(n_epoch):
    model.train()    
    model.cuda()
    optimizer.zero_grad()
    trn_avg_loss = 0
    trn_total = 0
    trn_correct = 0
    test_avg_loss = 0
    test_total = 0
    test_correct = 0
    with tqdm(train_data_loader, unit = 'batch') as train_bar:
        for sample in train_bar:  
            image = sample['image'].float().cuda()
            label = sample['label'].cuda()
            trn_probs = model(image)
            loss = criterion(trn_probs, label)
            loss.backward()
            optimizer.step()
            trn_avg_loss += loss / train_total_batch
            _, trn_predict = torch.max(trn_probs.data, 1)
            trn_total += label.size(0)
            trn_correct += (trn_predict == label).sum()
            trn_accuracy = 100 * trn_correct / trn_total
            train_bar.set_postfix(epoch = epoch+1, loss = loss.item(), accuracy = trn_accuracy.item())
    model.eval()
    with torch.no_grad():
        with tqdm(test_data_loader, unit = 'batch') as test_bar:
            for sample in test_bar:  
                image = sample['image'].float().cuda()
                label = sample['label'].cuda()
                test_probs = model(image)
                loss = criterion(test_probs, label)
                test_avg_loss += loss / test_total_batch
                _, test_predict = torch.max(test_probs.data, 1)
                test_total += label.size(0)
                test_correct += (test_predict == label).sum()
                test_accuracy = 100 * test_correct / test_total
                test_bar.set_postfix(epoch = epoch+1, loss = loss.item(), accuracy = test_accuracy.item())
    trn_acc_list.append(trn_accuracy)
    trn_loss_list.append(trn_avg_loss)
    test_acc_list.append(test_accuracy)
    test_loss_list.append(test_avg_loss)
    
    if total_patience == setting_patience:
        break
    else:
        if best_accuracy < test_accuracy:
            total_patience = 0
            best_accuracy = test_accuracy
            print('Model Improving')
            print('Epoch : {}, Loss : {:.4f}, Accuracy : {} model save.....'.format(epoch+1, test_avg_loss, test_accuracy))
            torch.save(model.state_dict(), './checkpoint.pt')
        else:
            print('early stop counter : {}/{}'.format(total_patience+1, setting_patience))
            total_patience += 1

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize = (20,5))
plt.subplot(1,2,1)
plt.title('Loss')
x = range(len(trn_loss_list))
plt.plot(x, trn_loss_list, label = 'Train Loss')
plt.plot(x, test_loss_list, label = 'Valid Loss')
plt.legend()
plt.subplot(1,2,2)
plt.title('Accuracy')
x = range(len(trn_loss_list))
plt.plot(x, trn_acc_list, label = 'Train Accuracy')
plt.plot(x, test_acc_list, label = 'Valid Accuracy')
plt.legend()
plt.show()

In [None]:
transforms = T.Compose([T.ToTensor(), T.Resize((224, 224))])

class CustomDataset_test(Dataset):
    def __init__(self, x_data, transforms = transforms):
        self.x_data = x_data
        self.transforms = transforms
    def __len__(self):
        return len(self.x_data)
    def __getitem__(self, idx):
        image = self.x_data[idx]
        image = image / 255.0
        if self.transforms:
            image = self.transforms(image)        
        return image

In [None]:
import torch
# Normal distribution initializing
torch.cuda.empty_cache()
model.load_state_dict(torch.load('./checkpoint.pt'))
model.cuda()
model.eval()
test_df = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
x = np.array(test_df, dtype = 'float')
x_reshape = [x_.reshape(28,28) for x_ in tqdm(x)]
test_dataset = CustomDataset_test(x_reshape)
test_data_loader = DataLoader(test_dataset, batch_size = 1, shuffle = False)
y_hat_list = []
for image in tqdm(test_data_loader):
    image = image.float().cuda()
    y_hat = model(image)
    _, predict = torch.max(y_hat.data, 1)
    y_hat_list.append(predict.item())

In [None]:
submission = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')
submission['Label'] = y_hat_list
submission.to_csv('submission.csv', index = False)