In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
from torch.utils.data import DataLoader,ConcatDataset,random_split
from torchvision import datasets,transforms

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Convert the Images to Grayscale making it black and white and Normalize the pixel values

In [2]:
transform=transforms.Compose([transforms.Grayscale(),transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])

Load the EMNIST dataset containing the list of letters

In [3]:
train_dataset=datasets.EMNIST(root='./data',split='letters',train=True,download=True,transform=transform)
test_dataset=datasets.EMNIST(root='/data',split='letters',train=False,download=True,transform=transform)


Downloading https://biometrics.nist.gov/cs_links/EMNIST/gzip.zip to ./data/EMNIST/raw/gzip.zip


100%|██████████| 562M/562M [00:17<00:00, 32.2MB/s]


Extracting ./data/EMNIST/raw/gzip.zip to ./data/EMNIST/raw
Downloading https://biometrics.nist.gov/cs_links/EMNIST/gzip.zip to /data/EMNIST/raw/gzip.zip


100%|██████████| 562M/562M [00:09<00:00, 59.1MB/s]


Extracting /data/EMNIST/raw/gzip.zip to /data/EMNIST/raw


In [4]:
full_data=ConcatDataset([train_dataset,test_dataset])
total_len=len(full_data)

Extracting Subset

In [5]:
portion_size=int(0.3*total_len)
unused_size=total_len-portion_size

subset_30_percent,_=random_split(full_data,[portion_size,unused_size],generator=torch.Generator().manual_seed(42))

print(f"subset size(30% of total) is :{len(subset_30_percent)}")

subset size(30% of total) is :43680


In [6]:
train_size=int(0.9*len(subset_30_percent))
unused_size=len(subset_30_percent)-train_size
new_train_subset,new_test_subset=random_split(subset_30_percent,[train_size,unused_size],generator=torch.Generator().manual_seed(42))

print(len(new_train_subset))
print(len(new_test_subset))


39312
4368


In [7]:
# Create DataLoaders from these new subsets --> Basically loading stuff up for our use
train_loader = DataLoader(new_train_subset, batch_size=64, shuffle=True)
test_loader = DataLoader(new_test_subset, batch_size=64, shuffle=False)

In [8]:
import torch.nn as nn
import torch.nn.functional as F

In [9]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN,self).__init__()
        self.conv1=nn.Conv2d(1,16,kernel_size=3,stride=1,padding=1) #(10,28,28)
        self.conv2=nn.Conv2d(16,32,kernel_size=3,stride=1,padding=1)
        self.conv3=nn.Conv2d(32,64,kernel_size=3,stride=1,padding=1)
        self.fc1=nn.Linear(64*3*3,128) # the 64*3*3 layer is mapped to 128 nodes in the full connected layer
        self.fc2=nn.Linear(128,64)
        self.fc3=nn.Linear(64,27)
    def forward(self,x):
        x=F.relu(F.max_pool2d(self.conv1(x),2))
        x=F.relu(F.max_pool2d(self.conv2(x),2))
        x=F.relu(F.max_pool2d(self.conv3(x),2))
        #flatten
        x=x.view(x.size(0),-1)
        
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=self.fc3(x)
        return x

In [10]:
model=SimpleCNN()
total_params=sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"total parameters is:{total_params}")


total parameters is:107163


In [11]:
import torch.optim as optim

criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=0.001)


In [12]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



SimpleCNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=576, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=27, bias=True)
)

In [13]:
num_epochs=18
for epoch in range(num_epochs):
    model.train()
    running_losses=0.0
    for images,labels in train_loader:
        images,labels=images.to(device),labels.to(device)
        optimizer.zero_grad()
        outputs=model(images)
        loss=criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        running_losses+=loss.item()
        
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_losses/len(train_loader):.4f}")

Epoch [1/18], Loss: 1.0039
Epoch [2/18], Loss: 0.3739
Epoch [3/18], Loss: 0.2935
Epoch [4/18], Loss: 0.2500
Epoch [5/18], Loss: 0.2230
Epoch [6/18], Loss: 0.2023
Epoch [7/18], Loss: 0.1858
Epoch [8/18], Loss: 0.1683
Epoch [9/18], Loss: 0.1581
Epoch [10/18], Loss: 0.1466
Epoch [11/18], Loss: 0.1362
Epoch [12/18], Loss: 0.1306
Epoch [13/18], Loss: 0.1197
Epoch [14/18], Loss: 0.1141
Epoch [15/18], Loss: 0.1087
Epoch [16/18], Loss: 0.1007
Epoch [17/18], Loss: 0.0970
Epoch [18/18], Loss: 0.0906


In [14]:
model.eval()
correct=0
total=0

for images,labels in test_loader:
    images,labels=images.to(device),labels.to(device)
    outputs=model(images)
    _,predicted=torch.max(outputs,1)
    total+=labels.size(0)
    correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 91.83%


In [15]:
torch.save(model.state_dict(), "pytorch_ocr.pth")