In [96]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image
from tqdm.notebook import tqdm

import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, TensorDataset, ConcatDataset

import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [42]:
df_train = pd.read_csv('./data/train.csv')
df_test = pd.read_csv('./data/test.csv')

In [36]:
print(df_train.shape)
df_train.head()
X = df_train.drop('label', axis=1)
y = df_train['label']

(42000, 785)


#### Preparation data

In [85]:
train_path = './data/new_data/train/'
test_path = './data/new_data/test/'

for i in range(len(X)):
    series = X.loc[i]
    arr = np.array(series, dtype=np.uint8).reshape(28, 28)
    image = Image.fromarray(arr)
    path = train_path + f'class_{y[i]}/'
    file_name = f'image_{i}.jpg'
    file_path = path + file_name
    image.save(file_path)

for i in range(len(df_test)):
    series = df_test.loc[i]
    arr = np.array(series, dtype=np.uint8).reshape(28, 28)
    image = Image.fromarray(arr)
    path = test_path + 'test/'
    file_name = f'image_{i}.jpg'
    file_path = path + file_name
    image.save(file_path)

#### Preparation train and test data

In [86]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

vertical_flip_probability = 0.5
aug_transform = transforms.Compose([
    transforms.RandomVerticalFlip(p=vertical_flip_probability),
    transforms.RandomRotation(degrees=(-25, 25)),
    transforms.ToTensor(),
])

train_image_folder = ImageFolder(root=train_path, transform=transform)
train_image_folder_aug = ImageFolder(root=train_path, transform=aug_transform)
train_image_folder = ConcatDataset([train_image_folder, train_image_folder_aug])

test_image_folder = ImageFolder(root=test_path)

In [90]:
batch_size = 32
train_loader = DataLoader(train_image_folder, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_image_folder, shuffle=False, num_workers=2)

#### Train Model

In [97]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.fc1 = nn.Linear(in_features=16*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=84)
        self.fc3 = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.avg_pool2d(kernel_size=2, stride=2)
        x = self.conv2(x)
        x = F.relu(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc2(x))
        return x

model = LeNet()
# model.to(device)
opt = optim.Adam(params=model.parameters(), lr=0.001)
lr_scheduler = lr_scheduler.MultiStepLR(opt, milestones=[7, 10], gamma=0.5)