In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split

In [2]:
class CNN(nn.Module):
    def __init__(self, in_channel =1, num_classes = 26):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 8, kernel_size = (3,3), stride = (1,1), padding = (1,1))
        self.pool = nn.MaxPool2d(kernel_size= (2,2), stride = (2,2))
        self.conv2 = nn.Conv2d(in_channels = 8, out_channels = 16, kernel_size = (3,3), stride = (1,1), padding = (1,1))
        self.conv3 = nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = (3,3), stride = (1,1), padding = (1,1))
        self.output = nn.Linear(32*7*7, num_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.output(x)
        
        return x

In [3]:
#Basic Test
model = CNN()
x = torch.randn(64,1,28,28)
print(model(x).shape)

torch.Size([64, 26])


In [4]:
#parameters
in_channel = 1
num_classes = 26
learning_rate = 0.001
batch_size = 64
num_epochs = 10

In [5]:
data = pd.read_csv("A_Z Handwritten Data.csv").astype('float32')
data.head(25)

Unnamed: 0,values,0,1,2,3,4,5,6,7,8,...,774,775,776,777,778,779,780,781,782,783
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
X = data.drop("values", axis = 1)
y = data["values"].copy()

In [7]:
X_tensor = torch.from_numpy(X.values)
X_tensor = X_tensor.reshape(X.shape[0], 1, 28, 28)
y_tensor = torch.from_numpy(y.values)
y_tensor = y_tensor.type(torch.LongTensor)
X_train, X_test, y_train, y_test = train_test_split(X_tensor,y_tensor, test_size = 0.2, random_state = 42)
print(X_tensor.shape, y_tensor.shape)

train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(dataset = train_data, batch_size = batch_size, shuffle = False)
test_data = TensorDataset(X_test, y_test)
test_loader = DataLoader(dataset = test_data, batch_size = batch_size, shuffle = False)

torch.Size([372451, 1, 28, 28]) torch.Size([372451])


In [8]:
model = CNN()
loss_fun = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate) 

In [12]:
#trian the model
for epoch in range(num_epochs):
    for batch, (data,targets) in enumerate(train_loader):
        
        #fw_prop
        scores = model(data)
        loss = loss_fun(scores, targets)
        
        #bw_prop
        optimizer.zero_grad()
        loss.backward()
        
        #Optimizer
        optimizer.step()

In [13]:
def check_accuracy(loader, model):
    
    num_correct = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():
        for x, y in loader:
            scores = model(x)
            predictions = scores.argmax(1)
            num_correct += sum((predictions == y))
            num_samples += predictions.size(0)
            
    return float(num_correct)/float(num_samples)
    

In [14]:
print("Train Accuracy:", check_accuracy(train_loader, model))
print("Test Accuracy:", check_accuracy(test_loader, model))

Train Accuracy: 0.9905524231440462
Test Accuracy: 0.9847498355506035
