In [37]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset, random_split
import torchvision
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [131]:
train = pd.read_csv('/content/drive/MyDrive/Kaggle_digits/train.csv')
test = pd.read_csv('/content/drive/MyDrive/Kaggle_digits/test.csv')

In [4]:
#@title Check for nan

train[train.isnull().any(axis=1)]

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783


In [41]:
y_train = train['label']
train1 = train.drop(['label'], axis=1).values
X_train = train1.reshape(len(train1), 28, 28)

In [42]:
X_train_norm = (X_train-X_train.mean()) / X_train.std()

In [43]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [61]:
tensor_X_train = torch.tensor(
    data=X_train_norm, dtype=torch.float, device=device
)
tensor_X_train = tensor_X_train.unsqueeze(dim=1)

tensor_y_train = torch.tensor(
    data=y_train, dtype=torch.long, device=device
)

In [62]:
full_ds = TensorDataset(tensor_X_train, tensor_y_train)
train_ds, val_ds = random_split(full_ds, lengths=[0.9, 0.1])

print(len(train_ds))
print(len(val_ds))

37800
4200


In [63]:
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64, shuffle=False)

In [72]:
train_ds[0][0].dtype

torch.float32

In [92]:
class SimpleCNN(nn.Module):
    def __init__(self):
      super(SimpleCNN, self).__init__()

      # Layer 1
      self.layer1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(3,3), stride=2, padding=1)
      self.bn1 = nn.BatchNorm2d(32)
      self.relu1 = nn.ReLU()

      # Layer 2
      self.layer2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), stride=2, padding=1)
      self.bn2 = nn.BatchNorm2d(64)
      self.relu2 = nn.ReLU()

      # Flatten
      self.flatten = nn.Flatten()

      # Layer 3
      self.layer3 = nn.Linear(3136, 1024)
      self.bn3 = nn.BatchNorm1d(1024)
      self.relu3 = nn.ReLU()

      #Layer 4
      self.layer4 = nn.Linear(1024,10)
      self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
      x = self.layer1(x)
      x = self.bn1(x)
      x = self.relu1(x)

      x = self.layer2(x)
      x = self.bn2(x)
      x = self.relu2(x)

      x = self.flatten(x)

      x = self.layer3(x)
      x = self.bn3(x)
      x = self.relu3(x)

      x = self.layer4(x)
      x = self.softmax(x)

      return x

model = SimpleCNN().to(device)

In [93]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [80]:
# Training loop
def train_loop(model, train_loader, val_loader, criterion, optimizer):
    n_epochs = 20
    for epoch in range(n_epochs):
        model.train()
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            predicted = model(inputs)
            loss = criterion(predicted, labels)
            loss.backward()
            optimizer.step()
        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                val_loss = criterion(outputs, labels).item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels.squeeze()).sum().item()

        print(f'Epoch {epoch+1}/{n_epochs}, Loss: {val_loss/len(val_loader):.4f}, Accuracy: {correct/total:.4f}')

In [50]:
# Evaluation
def test_loop(model, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            test_loss = criterion(outputs, labels).item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels.squeeze()).sum().item()

    print(f'Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {correct/total:.4f}')


In [94]:
train_loop(model, train_loader, val_loader, criterion, optimizer)

Epoch 1/20, Loss: 0.0229, Accuracy: 0.9736
Epoch 2/20, Loss: 0.0224, Accuracy: 0.9793
Epoch 3/20, Loss: 0.0224, Accuracy: 0.9826
Epoch 4/20, Loss: 0.0222, Accuracy: 0.9848
Epoch 5/20, Loss: 0.0222, Accuracy: 0.9845
Epoch 6/20, Loss: 0.0222, Accuracy: 0.9848
Epoch 7/20, Loss: 0.0223, Accuracy: 0.9850
Epoch 8/20, Loss: 0.0222, Accuracy: 0.9857
Epoch 9/20, Loss: 0.0222, Accuracy: 0.9876
Epoch 10/20, Loss: 0.0221, Accuracy: 0.9857
Epoch 11/20, Loss: 0.0221, Accuracy: 0.9867
Epoch 12/20, Loss: 0.0221, Accuracy: 0.9881
Epoch 13/20, Loss: 0.0221, Accuracy: 0.9883
Epoch 14/20, Loss: 0.0221, Accuracy: 0.9888
Epoch 15/20, Loss: 0.0222, Accuracy: 0.9879
Epoch 16/20, Loss: 0.0221, Accuracy: 0.9876
Epoch 17/20, Loss: 0.0222, Accuracy: 0.9876
Epoch 18/20, Loss: 0.0221, Accuracy: 0.9864
Epoch 19/20, Loss: 0.0221, Accuracy: 0.9867
Epoch 20/20, Loss: 0.0221, Accuracy: 0.9869


In [132]:
id = test.index.values
test1 = test.values
X_test1 = test1.reshape(len(test), 28, 28)
id = id + 1

In [133]:
X_test1.shape

(28000, 28, 28)

In [134]:
X_test = (X_test1-X_test1.mean()) / X_test1.std()
X_test.shape

(28000, 28, 28)

In [135]:
tensor_X_test = torch.tensor(
    data=X_test, dtype=torch.float, device=device
)
X_test = tensor_X_test.unsqueeze(dim=1)
X_test.shape

torch.Size([28000, 1, 28, 28])

In [136]:
pred = model(X_test)

In [150]:
pred = pred.max(1)[1].cpu().numpy()

In [154]:
submission = pd.DataFrame({'ImageId': id, 'Label': pred})
submission.to_csv('submission.csv', index=False)