In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(64*5*5, 64)
        self.fc2 = nn.Linear(64, 11)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 64*5*5)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [3]:
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [4]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(15),
    transforms.RandomAffine(0, translate=(0.1, 0.1), shear=0.1, scale=(0.9, 1.1)),
    transforms.ColorJitter(brightness=0.3),
    transforms.ToTensor()
])

class AugmentedDataset(Dataset):
    def __init__(self, X, y, augmentation_factor=50, transform=None):
        """
        Args:
            X (numpy array): Training images, shape of (N, 28, 28)
            y (numpy array): The corresponding label of each image, shape of (N,)
            augmentation_factor: Number of augmented versions to create per image
        """
        self.X = X
        self.y = y
        self.augmentation_factor = augmentation_factor
        self.transform = transform
        
        # Pre-generates and stores all augmented samples
        self.samples = self._augment_samples()

    def _augment_samples(self):
        """
        Creates a new dataset consisting of both orignal images and augmented images
        Returns:
            list of tuples (image, label)
        """
        augmented_images = []
        augmented_labels = []

        # Add original dataset
        for img, label in zip(self.X, self.y):
            img = img.reshape(28, 28)
            img_tensor = torch.tensor(img, dtype=torch.float32)
            augmented_images.append(img_tensor)
            augmented_labels.append(label)

        # Add augmented dataset
        for img, label in zip(self.X, self.y):
            img = img.reshape(28, 28)

            for i in range(self.augmentation_factor):
                if self.transform:
                    # Apply random transformation
                    aug_img = self.transform(img)

                    augmented_images.append(aug_img)
                    augmented_labels.append(label)

        # Zip back into a list of image-label tuples
        return list(zip(augmented_images, augmented_labels))
    
    def __len__(self):
        # Returns total number of samples (including augmented)
        return len(self.samples)
    
    def __getitem__(self, index):
        # Gets a sample based on index given and converts img-label to pytorch tensors
        img, label = self.samples[index]

        # Convert img to float tensor and add channel dimension if necessary
        img_tensor = torch.tensor(img, dtype=torch.float32)
        if img_tensor.ndim == 2: # if still only [H, W], add channel
            img_tensor = img_tensor.unsqueeze(0)

        # Convert label to integer tensor
        label_tensor = torch.tensor(label, dtype=torch.long)

        return img_tensor, label_tensor


In [5]:
import os
import numpy as np
from PIL import Image

def load_images(folder_path):
    images=[]
    labels=[]

    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            # Load image
            img_path = os.path.join(folder_path, filename)
            img = Image.open(img_path).convert('L') # Convert img to grayscale
            img = img.resize((28, 28))
            img_array = np.array(img) / 255.0 # Normalize
            img_array = img_array.reshape(28, 28, 1) # Add channel dimension

            # Extract label from filename (Assumes filename starts with a number)
            if filename.startswith('slash'):
                label = 10 # Assings '/' to be encoded as 10
            else:
                label = int(filename[0])
            
            images.append(img_array)
            labels.append(label)
    return np.array(images), np.array(labels)


In [6]:
from sklearn.model_selection import train_test_split

# Load data from image folder
folder_path = "C:/Users/Sam/Documents/Comp Sci/Terraria Bot/Terraria-Bot/dataset/Health Numbers/Resized_digits"
X, y = load_images(folder_path)

# Split into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Further split train into train and validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train if len(set(y_train)) > 1 else None)

# Assumes X_train and y_train are numpy arrays
train_dataset = AugmentedDataset(X_train, y_train, augmentation_factor=10, transform=transform)
val_dataset = AugmentedDataset(X_val, y_val, augmentation_factor=0, transform=transform)

print("Train size:", len(train_dataset))
print("Val size:", len(val_dataset))

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNModel().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

Train size: 693
Val size: 16


In [7]:
# Training loop
for epoch in range(25):
    train_loss, train_total, train_correct = 0, 0, 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        i, prediced = torch.max(outputs, 1)
        train_total += labels.size(0)
        train_correct += (prediced==labels).sum().item()

    train_acc = 100 * train_correct / train_total

    # Model Validation
    model.eval()
    val_correct, val_total, val_loss = 0, 0, 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            i, prediced = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (prediced==labels).sum().item()
    
    val_acc = 100 * val_correct / val_total

    print(f"Epoch {epoch+1}, " 
          f"Train Loss: {train_loss/len(train_loader):.4f}, "
          f"Train Acc: {train_acc:.2f}%, "
          f"Val Loss: {val_loss/len(val_loader):.4f}, "
          f"Val Acc: {val_acc:.2f}%")

  img_tensor = torch.tensor(img, dtype=torch.float32)


Epoch 1, Train Loss: 2.3880, Train Acc: 13.28%, Val Loss: 2.3687, Val Acc: 12.50%
Epoch 2, Train Loss: 2.3329, Train Acc: 14.14%, Val Loss: 2.2541, Val Acc: 25.00%
Epoch 3, Train Loss: 2.1660, Train Acc: 21.50%, Val Loss: 1.9149, Val Acc: 31.25%
Epoch 4, Train Loss: 1.7928, Train Acc: 39.11%, Val Loss: 1.3989, Val Acc: 50.00%
Epoch 5, Train Loss: 1.3792, Train Acc: 53.25%, Val Loss: 1.1644, Val Acc: 68.75%
Epoch 6, Train Loss: 1.2204, Train Acc: 58.30%, Val Loss: 1.0248, Val Acc: 68.75%
Epoch 7, Train Loss: 1.0089, Train Acc: 66.38%, Val Loss: 0.9941, Val Acc: 68.75%
Epoch 8, Train Loss: 0.9782, Train Acc: 65.51%, Val Loss: 0.8279, Val Acc: 87.50%
Epoch 9, Train Loss: 0.8374, Train Acc: 73.02%, Val Loss: 0.7302, Val Acc: 87.50%
Epoch 10, Train Loss: 0.7748, Train Acc: 74.75%, Val Loss: 0.7230, Val Acc: 87.50%
Epoch 11, Train Loss: 0.7503, Train Acc: 76.05%, Val Loss: 0.6356, Val Acc: 87.50%
Epoch 12, Train Loss: 0.7355, Train Acc: 75.32%, Val Loss: 0.8466, Val Acc: 81.25%
Epoch 13, Tra

In [9]:
torch.save(model.state_dict(), "text_classifier_weights.pth")