In [1]:
import os
from accelerate import accelerator
import random
from PIL import Image
from data.dataset import *
from pathlib import Path

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

if torch.cuda.is_available():
    device = "cuda"  # Use NVIDIA GPU (if available)
elif torch.backends.mps.is_available():
    device = "mps"  # Use Apple Silicon GPU (if available)
else:
    device = "cpu"  # Default to CPU if no GPU is available

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
image_path_list = list(Path("data/fashion_mnist_images").glob("*/*/*.png"))
train_dir = "data/fashion_mnist_images/train"
test_dir = "data/fashion_mnist_images/test"

fashion_mnist_labels = {
    0: "T-shirt/top",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle boot",
}

EPOCHS = 10
NUM_WORKERS = os.cpu_count()
BATCH_SIZE = 128
LEARNING_RATE = 0.01
LAYER_SIZE = 128

In [3]:
data_transform = transforms.Compose(
    [
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize(size=(28, 28)), ##resize to 28x28 (matching custom nn data inputs)
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,)) ##normalize between -1 and 1 (matching custom nn data inputs)
    ]
)

train_data = datasets.ImageFolder(
    root=train_dir, transform=data_transform, target_transform=None
)
test_data = datasets.ImageFolder(
    root=test_dir, transform=data_transform, target_transform=None
)

sample_image, _ = train_data[0]  # Get the first sample (image, label)
input_size = sample_image.numel()  # Flattened size of the image

# Get the output size dynamically by checking the number of classes
output_size = len(train_data.classes) 

print(f"Creating DataLoader's with batch size {BATCH_SIZE} and {NUM_WORKERS} workers.")

train_dataloader = DataLoader(
    dataset=train_data,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=True,
)

test_dataloader = DataLoader(
    dataset=test_data, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False
)

Creating DataLoader's with batch size 128 and 10 workers.


In [4]:
# Define the Fully Connected Neural Network (MLP)
class FullyConnectedNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FullyConnectedNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  # No activation (softmax applied in loss)
        return x


# Initialize model, loss function, and optimizer
model = FullyConnectedNN(input_size, LAYER_SIZE, output_size).to(device)
criterion = nn.CrossEntropyLoss()  # Cross-entropy for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Training loop
print("Training started...")
for epoch in range(EPOCHS):
    for batch_idx, (images, labels) in enumerate(train_dataloader):
        images, labels = images.to(device), labels.to(device)
        images = images.view(-1, input_size)  # Flatten images to 1D

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if not epoch % 100:
            print(
                f"Epoch [{epoch+1}/{EPOCHS}], Step [{batch_idx+1}/{len(train_dataloader)}], Loss: {loss.item():.4f}"
            )

print("Training complete!")


# Evaluate on test data
def evaluate():
    model.eval()  # Set to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():  # No gradients needed for testing
        for images, labels in test_dataloader:
            images, labels = images.to(device), labels.to(device)
            images = images.view(-1, input_size)  # Flatten images
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)  # Get class with highest score
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")


evaluate()

Training started...
Epoch [1/10], Step [100/469], Loss: 0.6022
Epoch [1/10], Step [200/469], Loss: 0.5053
Epoch [1/10], Step [300/469], Loss: 0.5723
Epoch [1/10], Step [400/469], Loss: 0.4756
Epoch [2/10], Step [100/469], Loss: 0.3849
Epoch [2/10], Step [200/469], Loss: 0.3999
Epoch [2/10], Step [300/469], Loss: 0.4094
Epoch [2/10], Step [400/469], Loss: 0.3953
Epoch [3/10], Step [100/469], Loss: 0.2864
Epoch [3/10], Step [200/469], Loss: 0.4024
Epoch [3/10], Step [300/469], Loss: 0.3411
Epoch [3/10], Step [400/469], Loss: 0.4055
Epoch [4/10], Step [100/469], Loss: 0.3745
Epoch [4/10], Step [200/469], Loss: 0.3461
Epoch [4/10], Step [300/469], Loss: 0.2947
Epoch [4/10], Step [400/469], Loss: 0.3732
Epoch [5/10], Step [100/469], Loss: 0.3271
Epoch [5/10], Step [200/469], Loss: 0.2615
Epoch [5/10], Step [300/469], Loss: 0.2598
Epoch [5/10], Step [400/469], Loss: 0.4016


KeyboardInterrupt: 