In [1]:
import torch
import random
import torchvision
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import argparse,os,time
import os
import time
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
from scipy.io import wavfile
from glob import glob
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import seaborn as sns
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="3"

In [2]:
data = pd.read_csv('train_augmented_split.csv', index_col=0)
val_data=pd.read_csv('val_augmented_split.csv', index_col=0)

In [3]:
y_data=data["digit"].values
y_data_val=val_data["digit"].values

In [4]:
y_data.shape

(81900,)

In [5]:
x_data=data.loc[:,"0":"783"].values
x_data_val=val_data.loc[:,"0":"783"].values

In [6]:
x_data_train=x_data
x_data_train.shape

(81900, 784)

In [7]:
x_data_test=x_data_val
x_data_test.shape

(20500, 784)

In [8]:
x_data_train=x_data_train/x_data_train.max()
x_data_test=x_data_test/x_data_test.max()
x_data_train.shape

(81900, 784)

In [9]:
class CustomDataset(Dataset):
    def __init__(self,x_dat,y_dat):
        x = x_dat
        y = y_dat
        self.len = x.shape[0]
        y=y.astype('int')
        x=x.astype('float32')
        self.x_data = torch.tensor(x)
        self.y_data = torch.tensor(y)

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [10]:
batch_size=256
train_dataset = CustomDataset(x_data_train,y_data)
train_loader = DataLoader(dataset=train_dataset,pin_memory=True,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=60,drop_last=True)
test_dataset = CustomDataset(x_data_test,y_data_val)
test_loader = DataLoader(dataset=test_dataset,pin_memory=True,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=60,drop_last=True)

In [11]:
# batch_size=1
# model=Model()
# model(train_dataset.x_data[0]).shape

In [12]:
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)


def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

In [13]:
class Bottleneck(nn.Module):

    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None):
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


In [14]:
class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=10, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(1, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def _forward_impl(self, x):
        # See note [TorchScript super()]
        x = x.view(batch_size,1,28,28)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(batch_size,-1)
        x = self.fc(x)

        return x

    def forward(self, x):
        return self._forward_impl(x)

In [15]:
model = ResNet(block=Bottleneck, layers=[3, 8, 36, 3])
model=model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),weight_decay=0.001)

In [16]:
trn_loss_list = []
val_loss_list = []
total_epoch=200
model_char="res152"
model_name=""
patience=5
start_early_stop_check=0
saving_start_epoch=10

for epoch in range(total_epoch):
    trn_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        if torch.cuda.is_available():
            inputs=inputs.cuda()
            labels=labels.cuda()
        # grad init
        optimizer.zero_grad()
        # forward propagation
        output= model(inputs)
        # calculate loss
        loss=criterion(output, labels)
        # back propagation 
        loss.backward()
        # weight update
        optimizer.step()
        
        # trn_loss summary
        trn_loss += loss.item()
        # del (memory issue)
        del loss
        del output
    with torch.no_grad():
        val_loss = 0.0
        mis_match = 0
        for j, val in enumerate(test_loader):
            val_x, val_label = val
            if torch.cuda.is_available():
                val_x = val_x.cuda()
                val_label =val_label.cuda()
            val_output = model(val_x)
            v_loss = criterion(val_output, val_label)
            val_loss += v_loss
            _, predicted=torch.max(val_output,1)
            mis_match+=np.count_nonzero(predicted.cpu().detach()==val_label.cpu().detach())
    del val_output
    del v_loss
    del predicted
    
    
    
    trn_loss_list.append(trn_loss/len(train_loader))
    val_loss_list.append(val_loss/len(test_loader))
    val_acc=mis_match/(len(test_loader)*batch_size)
    now = time.localtime()
    print ("%04d/%02d/%02d %02d:%02d:%02d" % (now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec))

    print("epoch: {}/{} | trn loss: {:.4f} | val loss: {:.4f} | val accuracy: {:.4f}% \n".format(
                epoch+1, total_epoch, trn_loss / len(train_loader), val_loss / len(test_loader), val_acc*100
            ))
    
    
    if epoch+1>2:
        if val_loss_list[-1]>val_loss_list[-2]:
            start_early_stop_check=1
    else:
        val_loss_min=val_loss_list[-1]
        
    if start_early_stop_check:
        early_stop_temp=val_loss_list[-patience:]
        if all(early_stop_temp[i]<early_stop_temp[i+1] for i in range (len(early_stop_temp)-1)):
            print("Early stop!")
            break
            
    if epoch+1>saving_start_epoch:
        if val_loss_list[-1]<val_loss_min:
            if os.path.isfile(model_name):
                os.remove(model_name)
            val_loss_min=val_loss_list[-1]
            model_name="model_ver-2.00_"+model_char+"_{:.3f}".format(val_loss_min)
            torch.save(model, model_name)
            print("Model replaced and saved as ",model_name)

2020/09/14 02:55:22
epoch: 1/200 | trn loss: 2.1094 | val loss: 1.8808 | val accuracy: 33.1250% 

2020/09/14 02:56:33
epoch: 2/200 | trn loss: 1.6412 | val loss: 1.4695 | val accuracy: 49.3848% 

2020/09/14 02:57:47
epoch: 3/200 | trn loss: 1.2140 | val loss: 1.1762 | val accuracy: 59.5508% 

2020/09/14 02:58:57
epoch: 4/200 | trn loss: 0.8934 | val loss: 1.0035 | val accuracy: 66.5527% 

2020/09/14 03:00:08
epoch: 5/200 | trn loss: 0.6368 | val loss: 0.9044 | val accuracy: 70.5029% 

2020/09/14 03:01:20
epoch: 6/200 | trn loss: 0.4650 | val loss: 0.8197 | val accuracy: 75.1758% 

2020/09/14 03:02:34
epoch: 7/200 | trn loss: 0.3598 | val loss: 0.7622 | val accuracy: 76.9580% 

2020/09/14 03:03:46
epoch: 8/200 | trn loss: 0.3003 | val loss: 0.7550 | val accuracy: 77.8174% 

2020/09/14 03:04:58
epoch: 9/200 | trn loss: 0.2673 | val loss: 0.6756 | val accuracy: 80.8398% 

2020/09/14 03:06:11
epoch: 10/200 | trn loss: 0.2412 | val loss: 0.6597 | val accuracy: 81.7139% 

2020/09/14 03:07:24

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Model replaced and saved as  model_ver-2.00_res152_0.665
2020/09/14 03:08:38
epoch: 12/200 | trn loss: 0.2170 | val loss: 0.6795 | val accuracy: 81.3135% 

2020/09/14 03:09:52
epoch: 13/200 | trn loss: 0.2035 | val loss: 0.7066 | val accuracy: 80.7275% 

2020/09/14 03:11:04
epoch: 14/200 | trn loss: 0.1935 | val loss: 0.6542 | val accuracy: 82.7783% 

Model replaced and saved as  model_ver-2.00_res152_0.654
2020/09/14 03:12:18
epoch: 15/200 | trn loss: 0.1880 | val loss: 0.6273 | val accuracy: 82.8027% 

Model replaced and saved as  model_ver-2.00_res152_0.627
2020/09/14 03:13:34
epoch: 16/200 | trn loss: 0.1745 | val loss: 0.6277 | val accuracy: 83.3252% 

2020/09/14 03:14:47
epoch: 17/200 | trn loss: 0.1676 | val loss: 0.5981 | val accuracy: 84.3213% 

Model replaced and saved as  model_ver-2.00_res152_0.598
2020/09/14 03:16:02
epoch: 18/200 | trn loss: 0.1659 | val loss: 0.5997 | val accuracy: 83.8867% 

2020/09/14 03:17:15
epoch: 19/200 | trn loss: 0.1639 | val loss: 0.5974 | val a

2020/09/14 04:42:05
epoch: 89/200 | trn loss: 0.0651 | val loss: 0.4830 | val accuracy: 88.9990% 

2020/09/14 04:43:17
epoch: 90/200 | trn loss: 0.0643 | val loss: 0.5516 | val accuracy: 87.5781% 

2020/09/14 04:44:30
epoch: 91/200 | trn loss: 0.0678 | val loss: 0.5158 | val accuracy: 88.3887% 

2020/09/14 04:45:42
epoch: 92/200 | trn loss: 0.0620 | val loss: 0.5401 | val accuracy: 88.1445% 

2020/09/14 04:46:56
epoch: 93/200 | trn loss: 0.0658 | val loss: 0.5239 | val accuracy: 88.2178% 

2020/09/14 04:48:10
epoch: 94/200 | trn loss: 0.0684 | val loss: 0.5313 | val accuracy: 88.2520% 

2020/09/14 04:49:22
epoch: 95/200 | trn loss: 0.0635 | val loss: 0.4862 | val accuracy: 89.0625% 

2020/09/14 04:50:36
epoch: 96/200 | trn loss: 0.0585 | val loss: 0.5693 | val accuracy: 87.5977% 

2020/09/14 04:51:49
epoch: 97/200 | trn loss: 0.0706 | val loss: 0.4910 | val accuracy: 88.9990% 

2020/09/14 04:53:01
epoch: 98/200 | trn loss: 0.0648 | val loss: 0.5070 | val accuracy: 88.2227% 

2020/09/14

2020/09/14 06:22:05
epoch: 172/200 | trn loss: 0.0578 | val loss: 0.5377 | val accuracy: 88.8281% 

2020/09/14 06:23:17
epoch: 173/200 | trn loss: 0.0598 | val loss: 0.4778 | val accuracy: 89.4482% 

2020/09/14 06:24:29
epoch: 174/200 | trn loss: 0.0567 | val loss: 0.5174 | val accuracy: 88.7012% 

2020/09/14 06:25:41
epoch: 175/200 | trn loss: 0.0550 | val loss: 0.5231 | val accuracy: 88.4668% 

2020/09/14 06:26:53
epoch: 176/200 | trn loss: 0.0625 | val loss: 0.4813 | val accuracy: 88.9648% 

2020/09/14 06:28:05
epoch: 177/200 | trn loss: 0.0542 | val loss: 0.5349 | val accuracy: 88.3691% 

2020/09/14 06:29:16
epoch: 178/200 | trn loss: 0.0544 | val loss: 0.5213 | val accuracy: 88.3691% 

2020/09/14 06:30:28
epoch: 179/200 | trn loss: 0.0568 | val loss: 0.5392 | val accuracy: 87.9199% 

2020/09/14 06:31:40
epoch: 180/200 | trn loss: 0.0592 | val loss: 0.5158 | val accuracy: 88.5254% 

2020/09/14 06:32:52
epoch: 181/200 | trn loss: 0.0550 | val loss: 0.5395 | val accuracy: 88.2812% 



In [17]:
fin_name="model_ver-2.00"
torch.save(model, fin_name)
print("Fin model saved", fin_name)

Fin model saved model_ver-2.00
