In [1]:
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(torch.cuda.get_device_name(0))

NVIDIA GeForce RTX 3050 Ti Laptop GPU


In [3]:
trans = transforms.Compose(
    [
		transforms.Resize((80, 150)), # height, width
        transforms.Grayscale(),
        transforms.ToTensor(),
        transforms.Normalize((0.5), (0.5)),
    ]
)

In [4]:
train_data = torchvision.datasets.ImageFolder('./data/', transform=trans)
test_data = torchvision.datasets.ImageFolder('./test/', transform=trans)

train_loader = torch.utils.data.DataLoader(
    train_data, batch_size=32, shuffle=True, num_workers=2
)

test_loader = torch.utils.data.DataLoader(
    test_data, batch_size=32, shuffle=True, num_workers=2
)

In [5]:
print(train_data.classes)

['0', '1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '5', '6', '7', '8', '9']


In [6]:
all_data, all_labels = [], []
for data, labels in train_loader:
    all_data.append(data.to(device))
    all_labels.append(labels.to(device))

In [7]:
# Combine all batches into a single tensor for data and labels
all_data = torch.cat(all_data)
all_labels = torch.cat(all_labels)

preloaded_dataset = torch.utils.data.TensorDataset(all_data, all_labels)
train_loader = torch.utils.data.DataLoader(preloaded_dataset, batch_size=32, shuffle=True)

In [8]:
img, label = train_data[0]
print(img.size())

torch.Size([1, 80, 150])


In [9]:
class NeuralNet(nn.Module):

    def __init__(self) -> None:
        super().__init__()

        self.conv1 = nn.Conv2d(1, 36, 5)  # (36, 80 - 5 + 1, 150 - 5 + 1) = (36, 76, 146)
        self.pool = nn.MaxPool2d(2)  # (36, 38, 73)
        self.conv2 = nn.Conv2d(36, 64, 5)  # (64, 34, 69)
        # max pool -> (64, 17, 34)
        # flatten -> (64 * 17 * 34)
        self.dense1 = nn.Linear(64 * 17 * 34, 64)
        self.dense2 = nn.Linear(64, 40)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.dense1(x))
        x = self.dense2(x)

        return x

In [10]:
net = NeuralNet()
net = net.to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [11]:
for epoch in range(40):
    print(f"epoch {epoch}")

    running_loss = 0.0

    for i, data in enumerate(train_loader):
        inputs, labels = data
        
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = loss_func(outputs, labels)

        loss.backward()
        optimizer.step()
        running_loss += loss.item()


    print(f"Loss: {running_loss / len(train_loader):.4f}")

epoch 0
Loss: 3.6917
epoch 1
Loss: 3.6782
epoch 2
Loss: 3.6713
epoch 3
Loss: 3.6555
epoch 4
Loss: 3.6421
epoch 5
Loss: 3.6249
epoch 6
Loss: 3.5997
epoch 7
Loss: 3.5652
epoch 8
Loss: 3.5077
epoch 9
Loss: 3.4166
epoch 10
Loss: 3.2661
epoch 11
Loss: 2.9934
epoch 12
Loss: 2.5394
epoch 13
Loss: 1.9417
epoch 14
Loss: 1.3388
epoch 15
Loss: 0.8955
epoch 16
Loss: 0.6315
epoch 17
Loss: 0.4409
epoch 18
Loss: 0.3495
epoch 19
Loss: 0.3206
epoch 20
Loss: 0.2440
epoch 21
Loss: 0.2243
epoch 22
Loss: 0.1537
epoch 23
Loss: 0.1247
epoch 24
Loss: 0.1014
epoch 25
Loss: 0.1102
epoch 26
Loss: 0.1059
epoch 27
Loss: 0.0782
epoch 28
Loss: 0.0908
epoch 29
Loss: 0.0788
epoch 30
Loss: 0.0560
epoch 31
Loss: 0.0633
epoch 32
Loss: 0.0491
epoch 33
Loss: 0.0463
epoch 34
Loss: 0.0437
epoch 35
Loss: 0.0419
epoch 36
Loss: 0.0512
epoch 37
Loss: 0.0654
epoch 38
Loss: 0.0438
epoch 39
Loss: 0.0403


In [12]:
correct = 0
total = 0

net.eval()

with torch.no_grad():
	for data in test_loader:
		img, lb = data
		img, lb = img.to(device), lb.to(device)
		
		outputs = net(img)
		_, pred = torch.max(outputs, 1)
		total += lb.size(0)
		correct += (pred == lb).sum().item()

accuracy = 100 * correct / total

print(accuracy)

86.57407407407408


In [13]:
torch.save(net.state_dict(), 'deliverables1.pth')

In [14]:
import string

alphas = [c for c in string.ascii_lowercase]
nums = [c for c in string.digits]

tokens = [*alphas, *nums]

encoding = {
    'frac': 0,
    'int': 1,
    'sqrt': 2,
    'sum': 3,
}

for i, t in enumerate(tokens):
    encoding[t] = i + 4

classes = [*encoding]

print(encoding)
print(classes)

{'frac': 0, 'int': 1, 'sqrt': 2, 'sum': 3, 'a': 4, 'b': 5, 'c': 6, 'd': 7, 'e': 8, 'f': 9, 'g': 10, 'h': 11, 'i': 12, 'j': 13, 'k': 14, 'l': 15, 'm': 16, 'n': 17, 'o': 18, 'p': 19, 'q': 20, 'r': 21, 's': 22, 't': 23, 'u': 24, 'v': 25, 'w': 26, 'x': 27, 'y': 28, 'z': 29, '0': 30, '1': 31, '2': 32, '3': 33, '4': 34, '5': 35, '6': 36, '7': 37, '8': 38, '9': 39}
['frac', 'int', 'sqrt', 'sum', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


In [24]:
import os

def load_img(path):
    img = Image.open(path)
    img = trans(img)
    img = img.unsqueeze(0) # turn into batch
    return img

imgs = []
# imgs = [load_img(p) for p in paths]

for i in range(0, 40):
    path = f'./test/{i}/'
    filename = os.listdir(path)[0]
    imgs.append(load_img(f'{path}{filename}'))

imgs.append(load_img('./sqrt.png'))

In [25]:
net.eval()

with torch.no_grad():
	for i, img in enumerate(imgs):
		outputs = net(img.to(device))
		_, pred = torch.max(outputs, 1)

		pred = train_data.classes[pred.item()]

		print(f"{classes[int(pred)]}")

frac
int
sqrt
sum
a
s
e
d
e
i
g
h
i
j
k
l
m
n
s
p
g
r
s
t
u
v
w
x
y
z
8
1
2
3
4
5
s
7
8
2
sqrt
