In [1]:
import torch
import random
import torchvision
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import argparse,os,time
import os
import time
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
num_gpus=4

In [2]:
data = pd.read_csv('train_augmented.csv', index_col=0)
val_data=pd.read_csv('val_augmented.csv', index_col=0)

In [3]:
y_data=data["digit"].values
y_data_val=val_data["digit"].values

In [4]:
x_data=data.loc[:,"0":"783"].values
x_data_val=val_data.loc[:,"0":"783"].values

In [5]:
x_data_train=x_data
x_data_test=x_data_val

In [6]:
x_data_train=x_data_train/x_data_train.max()
x_data_test=x_data_test/x_data_test.max()

In [7]:
class CustomDataset(Dataset):
    def __init__(self,x_dat,y_dat):
        x = x_dat
        y = y_dat
        self.len = x.shape[0]
        y=y.astype('int')
        x=x.astype('float32')
        self.x_data = torch.tensor(x)
        self.y_data = torch.tensor(y)

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [8]:
batch_size=256
train_dataset = CustomDataset(x_data_train,y_data)
train_loader = DataLoader(dataset=train_dataset,pin_memory=True,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=60,drop_last=True)
test_dataset = CustomDataset(x_data_test,y_data_val)
test_loader = DataLoader(dataset=test_dataset,pin_memory=True,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=60,drop_last=True)

In [9]:
class inception(nn.Module):
    def __init__(self, in_channels):
        super(inception, self).__init__()
        
        self.branch1x1= nn.Conv2d(in_channels, 16, kernel_size=1)
        
        self.branch5x5_1= nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch5x5_2 = nn.Conv2d(16, 24, kernel_size=5, padding=2)
        
        self.branch3x3db1_1= nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch3x3db1_2 = nn.Conv2d(16, 24, kernel_size=3, padding=1)
        self.branch3x3db1_3 = nn.Conv2d(24, 24, kernel_size=3, padding=1)
        
        self.branch_pool = nn.Conv2d(in_channels, 24, kernel_size=1)
        
        
    def forward(self, x):
        branch1x1=self.branch1x1(x)
        
        branch5x5=self.branch5x5_1(x)
        branch5x5=self.branch5x5_2(branch5x5)
        
        branch3x3=self.branch3x3db1_1(x)
        branch3x3=self.branch3x3db1_2(branch3x3)
        branch3x3=self.branch3x3db1_3(branch3x3)
        
        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)
        
        outputs = [branch1x1, branch5x5, branch3x3, branch_pool]
        
        return torch.cat(outputs, 1)

In [10]:
class NET(nn.Module):
    def __init__(self):
        super(NET, self).__init__()
        self.conv1 = nn.Conv2d(1,10,kernel_size=5)
        self.conv2 = nn.Conv2d(88, 20, kernel_size=5)
        
        self.incept1=inception(in_channels=10)
        self.incept2=inception(in_channels=20)
        
        self.mp = nn.MaxPool2d(2)
        self.fc = nn.Linear(88*4*4,10)
        
    def forward(self, x):
        in_size = x.size(0)
        x = x.view(batch_size//num_gpus,1,28,28)
        x = F.relu(self.mp(self.conv1(x)))
        x = self.incept1(x)
        x = F.relu(self.mp(self.conv2(x)))
        x = self.incept2(x)
        x = x.view(batch_size//num_gpus,-1)
        x = self.fc(x)
        
        return F.log_softmax(x)

In [11]:
model=nn.DataParallel(NET().cuda())
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),weight_decay=0.001)

In [None]:
trn_loss_list = []
val_loss_list = []
total_epoch=100
model_char="minloss"
model_name=""
patience=5
start_early_stop_check=0
saving_start_epoch=10

for epoch in range(total_epoch):
    trn_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        if torch.cuda.is_available():
            inputs=inputs.cuda()
            labels=labels.cuda()
        # grad init
        optimizer.zero_grad()
        # forward propagation
        output= model(inputs)
        # calculate loss
        loss=criterion(output, labels)
        # back propagation 
        loss.backward()
        # weight update
        optimizer.step()
        
        # trn_loss summary
        trn_loss += loss.item()
        # del (memory issue)
        del loss
        del output
    with torch.no_grad():
        val_loss = 0.0
        cor_match = 0
        for j, val in enumerate(test_loader):
            val_x, val_label = val
            if torch.cuda.is_available():
                val_x = val_x.cuda()
                val_label =val_label.cuda()
            val_output = model(val_x)
            v_loss = criterion(val_output, val_label)
            val_loss += v_loss
            _, predicted=torch.max(val_output,1)
            cor_match+=np.count_nonzero(predicted.cpu().detach()==val_label.cpu().detach())
    del val_output
    del v_loss
    del predicted
    
    
    
    trn_loss_list.append(trn_loss/len(train_loader))
    val_loss_list.append(val_loss/len(test_loader))
    val_acc=cor_match/(len(test_loader)*batch_size)
    now = time.localtime()
    print ("%04d/%02d/%02d %02d:%02d:%02d" % (now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec))

    print("epoch: {}/{} | trn loss: {:.4f} | val loss: {:.4f} | val accuracy: {:.4f}% \n".format(
                epoch+1, total_epoch, trn_loss / len(train_loader), val_loss / len(test_loader), val_acc*100
            ))
    
    
    if epoch+1>2:
        if val_loss_list[-1]>val_loss_list[-2]:
            start_early_stop_check=1
    else:
        val_loss_min=val_loss_list[-1]
        
    if start_early_stop_check:
        early_stop_temp=val_loss_list[-patience:]
        if all(early_stop_temp[i]<early_stop_temp[i+1] for i in range (len(early_stop_temp)-1)):
            print("Early stop!")
            break
            
    if epoch+1>saving_start_epoch:
        if val_loss_list[-1]<val_loss_min:
            if os.path.isfile(model_name):
                os.remove(model_name)
            val_loss_min=val_loss_list[-1]
            model_name="incept_model_"+model_char+"_{:.3f}".format(val_loss_min)
            torch.save(model, model_name)
            print("Model replaced and saved as ",model_name)



2020/09/30 15:23:30
epoch: 1/100 | trn loss: 1.7828 | val loss: 1.4608 | val accuracy: 48.7988% 

2020/09/30 15:23:48
epoch: 2/100 | trn loss: 1.3790 | val loss: 1.3197 | val accuracy: 54.5898% 

2020/09/30 15:24:05
epoch: 3/100 | trn loss: 1.2254 | val loss: 1.1732 | val accuracy: 59.3457% 

2020/09/30 15:24:23
epoch: 4/100 | trn loss: 1.1273 | val loss: 1.1157 | val accuracy: 61.5967% 

2020/09/30 15:24:40
epoch: 5/100 | trn loss: 1.0686 | val loss: 1.0734 | val accuracy: 62.7734% 

2020/09/30 15:24:57
epoch: 6/100 | trn loss: 1.0326 | val loss: 1.0374 | val accuracy: 64.0723% 

2020/09/30 15:25:15
epoch: 7/100 | trn loss: 1.0007 | val loss: 1.0477 | val accuracy: 63.5889% 

2020/09/30 15:25:32
epoch: 8/100 | trn loss: 0.9803 | val loss: 1.0086 | val accuracy: 65.1270% 

2020/09/30 15:25:50
epoch: 9/100 | trn loss: 0.9615 | val loss: 0.9947 | val accuracy: 65.7910% 

2020/09/30 15:26:07
epoch: 10/100 | trn loss: 0.9441 | val loss: 1.0007 | val accuracy: 65.4541% 

2020/09/30 15:26:25

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


2020/09/30 15:26:42
epoch: 12/100 | trn loss: 0.9152 | val loss: 0.9624 | val accuracy: 66.8359% 

Model replaced and saved as  incept_model_minloss_0.962
2020/09/30 15:27:00
epoch: 13/100 | trn loss: 0.8977 | val loss: 0.9542 | val accuracy: 67.0752% 

Model replaced and saved as  incept_model_minloss_0.954
2020/09/30 15:27:17
epoch: 14/100 | trn loss: 0.8842 | val loss: 0.9506 | val accuracy: 67.3096% 

Model replaced and saved as  incept_model_minloss_0.951
2020/09/30 15:27:35
epoch: 15/100 | trn loss: 0.8734 | val loss: 0.9229 | val accuracy: 68.1787% 

Model replaced and saved as  incept_model_minloss_0.923
2020/09/30 15:27:52
epoch: 16/100 | trn loss: 0.8619 | val loss: 0.9583 | val accuracy: 66.8750% 

2020/09/30 15:28:10
epoch: 17/100 | trn loss: 0.8541 | val loss: 0.9359 | val accuracy: 67.8955% 

2020/09/30 15:28:27
epoch: 18/100 | trn loss: 0.8387 | val loss: 0.9106 | val accuracy: 68.1787% 

Model replaced and saved as  incept_model_minloss_0.911
2020/09/30 15:28:45
epoch: 