# 1. Import

In [None]:
import torch
from torch import optim, nn
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
import time
import copy
import os
import matplotlib.pyplot as plt

import warnings

warnings.filterwarnings('ignore')

# 2. Data preparation

##     Load Data

In [None]:
data = pd.read_csv('../input/digit-recognizer/train.csv').sample(frac=1)  # load

train_size = 37800
print("train_size: {}".format(train_size))

label = data['label']
del data['label']

data = torch.tensor(data.values)
label = torch.tensor(label)

# Normalization
data = data / 255.0

print(label.size())

## Split train and valid set. Create dataloaders

In [None]:
all_ds = TensorDataset(data, label)

# Split
train_ds, valid_ds = torch.utils.data.random_split(all_ds, [train_size, 42000-train_size])

# Create dataloaders
batch_size = 1000
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=batch_size * 2)

print(train_ds[0])

## Accelerating computing with GPU

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
print('Device: {}'.format(device))

# 3. CNN

##     Define your model

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.linear = nn.Sequential(
            nn.Linear(256 * 3 * 3, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = x.view(-1, 1, 28, 28).float()
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x


net = Net()
net.to(device)


# Load your model
# if os.path.exists(net_path):
#     net.load_state_dict(torch.load(net_path))

## Set optimizer and loss function

In [None]:
lr = 0.0001

optimizer = optim.RMSprop(net.parameters(), lr=lr, alpha=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8)
loss_fn = nn.CrossEntropyLoss()

## Start training

In [None]:
def accuracy(out, y):
    preds = torch.argmax(out, dim=1)
    return (preds == y).float().mean().item(), len(y)


def loss_batch(net, x, y):
    loss = loss_fn(net(x), y)
    return loss.item(), len(x)


def train_model():
    print('Train model')
    best_model_wts = copy.deepcopy(net.state_dict())
    best_acc = 0.0
    loss_show = []
    acc_show = []

    for epoch in range(epochs):
        epoch_since = time.time()
        print("Epoch {}:".format(epoch), end=' ')
        net.train()
        for step, (b_x, b_y) in enumerate(train_dl):
            out = net(b_x.to(device))
            loss = loss_fn(out, b_y.to(device))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        net.eval()
        with torch.no_grad():
            losses, nums_loss = zip(
                *[loss_batch(net, x.to(device), y.to(device)) for x, y in valid_dl]
            )
            acc, nums_acc = zip(
                *[accuracy(net(x.to(device)), y.to(device)) for x, y in valid_dl]
            )
        val_loss = np.sum(np.multiply(losses, nums_loss)) / np.sum(nums_loss)
        val_acc = np.sum(np.multiply(acc, nums_acc)) / np.sum(nums_acc)
        epoch_time_elapsed = time.time() - epoch_since
        print("time:{:.0f}s".format(epoch_time_elapsed), "loss:{:.10f}".format(val_loss), "accuracy:{:.10f}".format(val_acc))
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(net.state_dict())
        scheduler.step(val_loss)
        loss_show.append(val_loss)
        acc_show.append(val_acc)

        if epoch % 10 == 9:
            net.load_state_dict(best_model_wts)
#             torch.save(net.state_dict(), net_path)
    net.load_state_dict(best_model_wts)
#     Save your model if you want
#     torch.save(net.state_dict(), net_path)
    plt.figure()
    plt.plot(range(epochs), loss_show)
    plt.figure()
    plt.plot(range(epochs), acc_show)
    plt.show()
    


# Set epoch to 30 to get 99.4% accuracy
epochs = 10
train_model()

## Make predictions

In [None]:
def save():
    test = pd.read_csv('../input/digit-recognizer/test.csv')
    test = torch.tensor(test.values)
    test = test / 255.0
    test_ds = TensorDataset(test)
    test_dl = DataLoader(test_ds, batch_size=2)
    print("Save preds")
    preds = []
    with torch.no_grad():
        for x in test_dl:
            x = x[0]
            out = net(x.to(device))
            a, b = out.split(1, 0)
            pred = torch.argmax(a, dim=1).to('cpu')
            preds.append(pred.numpy().tolist()[0])
            pred = torch.argmax(b, dim=1).to('cpu')
            preds.append(pred.numpy().tolist()[0])
    dataframe = pd.DataFrame({"ImageId": range(1, 28001), "Label": preds})
    dataframe.to_csv(csv_path, index=False)
    print('Saved')
    return preds



csv_path = 'post.csv'
# save()

## View training results

In [None]:
acc, nums_acc = zip(
    *[accuracy(net(x.to(device)), y.to(device)) for x, y in valid_dl]
)
valid_acc = np.sum(np.multiply(acc, nums_acc)) / np.sum(nums_acc)
print("valid :{:.10f}".format(valid_acc))

Thanks