In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor

geometrical_features = torch.tensor([
    # Aspect Ratio, Stroke Count, Horizontal Symmetry, Enclosed Areas
    [1.0, 1, 1, 1],  # '0': Circular, 1 stroke, symmetrical, 1 enclosed area
    [2.0, 1, 0, 0],  # '1': Tall and narrow, 1 stroke, not symmetrical, no enclosed areas
    [1.0, 1, 0, 0],  # '2': Curvy, 1 stroke, not symmetrical, no enclosed areas
    [1.0, 1, 0, 0],  # '3': Round in the middle, 1 stroke, not symmetrical, no enclosed areas
    [1.0, 2, 0, 0],  # '4': Open, usually 2 strokes, not symmetrical, no enclosed areas
    [1.0, 1, 0, 0],  # '5': Curvy with a horizontal line, 1 stroke, not symmetrical, no enclosed areas
    [1.0, 1, 0, 1],  # '6': Circular with an inward tail, 1 stroke, not symmetrical, 1 enclosed area
    [1.0, 1, 0, 0],  # '7': Angular, 1 stroke, not symmetrical, no enclosed areas
    [1.0, 2, 1, 2],  # '8': Two circles, 2 strokes, symmetrical, 2 enclosed areas
    [1.0, 1, 0, 1]   # '9': Circular with an outward tail, 1 stroke, not symmetrical, 1 enclosed area
])

class CNNWithGeometricalInfusion(nn.Module):
    def __init__(self, geo_feature_size):
        super(CNNWithGeometricalInfusion, self).__init__()
        # CNN layers
        self.conv1 = nn.Conv2d(1, 16, 3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * 7 * 7, 128)

        # Layer for integrating geometrical features
        self.fc_geo = nn.Linear(geo_feature_size, 128)
        self.fc_combined = nn.Linear(128 + 128, 10)

    def forward(self, x, geo_features):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 7 * 7)
        x = torch.relu(self.fc1(x))

        geo_features = torch.relu(self.fc_geo(geo_features))
        combined = torch.cat((x, geo_features), dim=1)
        combined = self.fc_combined(combined)
        return combined



In [2]:
transform = transforms.Compose([transforms.ToTensor()])
mnist_train = MNIST(root='./data', train=True, download=True, transform=transform)
mnist_test = MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(mnist_train, batch_size=32, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=32, shuffle=False)


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = CNNWithGeometricalInfusion(geo_feature_size=4).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10  # Number of epochs

for epoch in range(num_epochs):
    model.train()
    for (images, labels) in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Extract geometrical features for each label
        geometrical_features = geometrical_features.to(device)
        geo_features = geometrical_features[labels]
        
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images, geo_features)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')


Epoch 1/10, Loss: 0.0025763968005776405
Epoch 2/10, Loss: 0.0021123921032994986
Epoch 3/10, Loss: 0.002298399806022644
Epoch 4/10, Loss: 0.23923571407794952
Epoch 5/10, Loss: 0.00018289666331838816
Epoch 6/10, Loss: 0.00012004919699393213
Epoch 7/10, Loss: 1.4848546925350092e-05
Epoch 8/10, Loss: 0.00011062492558266968
Epoch 9/10, Loss: 9.238675033884647e-07
Epoch 10/10, Loss: 8.95604825927876e-05


In [8]:
model.eval()
total = 0
correct = 0

with torch.no_grad():
    for (images, labels) in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        geo_features = geometrical_features[labels].to(device)

        outputs = model(images, geo_features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on the test set: {accuracy:.2f}%')


Accuracy on the test set: 99.76%
