In [7]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

# Path to your images
data_path = "Data/"  # path to the folder containing subfolders for each class

# Number of classes
num_classes = 7  # Adjust this to the number of your classes (letters + words)

# Transformations
transform = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load dataset
full_dataset = torchvision.datasets.ImageFolder(root=data_path, transform=transform)

# Create a list of targets (labels) for stratification
targets = [label for _, label in full_dataset.samples]

# Stratified split into train and test sets
train_idx, test_idx = train_test_split(
    range(len(full_dataset)),
    test_size=0.2,
    random_state=42,
    stratify=targets
)

# Create train and test subsets
train_dataset = Subset(full_dataset, train_idx)
test_dataset = Subset(full_dataset, test_idx)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(128 * 37 * 37, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 37 * 37) # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN(num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the CNN
def train_model(num_epochs):
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if i % 100 == 99:    # print every 100 mini-batches
                print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
                running_loss = 0.0

# Testing the CNN
def test_model():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the test images: {100 * correct // total} %')


# Example training and testing
train_model(num_epochs=10)
test_model()


Accuracy of the network on the test images: 99 %


In [8]:
from sklearn.metrics import classification_report

# Testing the CNN with additional metrics
def test_model_with_metrics():
    y_pred = []
    y_true = []

    # no gradient needed
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images)

            _, predicted = torch.max(outputs.data, 1)
            y_pred.extend(predicted.cpu().numpy())
            y_true.extend(labels.cpu().numpy())

    # Calculate metrics
    print(classification_report(y_true, y_pred, target_names=full_dataset.classes))

# Example testing
test_model_with_metrics()

              precision    recall  f1-score   support

           A       0.96      1.00      0.98        23
           B       1.00      0.96      0.98        24
           C       1.00      1.00      1.00        28
       Hello       1.00      1.00      1.00        25
           I       1.00      1.00      1.00        30
           M       0.96      0.96      0.96        28
           O       1.00      1.00      1.00        57

    accuracy                           0.99       215
   macro avg       0.99      0.99      0.99       215
weighted avg       0.99      0.99      0.99       215



In [1]:
# torch.save(model.state_dict(), 'simple_cnn_model.pth')

### This code serves as a proof of concept. The model trained here was not actually used in out live Camera Feed. The model used in the Live camera Feed was trained using google's Teachable Machine Program. 

Teachable Machine Instructions

1) https://teachablemachine.withgoogle.com/train
2) choose image Project
3) choose standard
4) for each folder of images, drag them into a box and give them the appropriate label.
5) Train the model
6) click export
7) click the middle button (Tensorflow NOT JS or LITE)
8) click keras and download
9) unzip the file and place the model and label file in the model folder of this project
10) replace the index array in the Live camera file with the index array used in the label text file provided with the model