In [1]:
import os
import torch
from torch import nn, optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# import matplotlib.pyplot as plt

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

Using cuda


In [8]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),           # Resize to fixed size
    transforms.ToTensor(),                 # Convert to tensor
    transforms.Normalize([0.5], [0.5])     # Normalize to [-1, 1]
])

train_data = datasets.ImageFolder(root='dataset_split/train', transform=transform)
test_data = datasets.ImageFolder('dataset_split/test', transform=transform)

train_loader = DataLoader(train_data, batch_size=8, shuffle=True)
test_loader = DataLoader(test_data, batch_size=8, shuffle=True)

In [9]:
print(train_data.class_to_idx)  # must match your folders
print(len(train_data.classes))  # must match output layer

for _, y in train_loader:
    print(y)
    assert y.max() < len(train_data.classes)


{'burger': 0, 'car': 1, 'hotdog': 2}
3
tensor([0, 0, 2, 0, 2, 1, 1, 1])
tensor([1, 1, 0, 0, 2, 1, 0, 2])
tensor([2, 1, 0, 2, 2, 0, 0, 1])
tensor([0, 0, 1, 1, 1, 1, 2, 1])
tensor([2, 1, 2, 2, 0, 2, 1, 1])
tensor([2, 0, 0, 2, 2, 0, 1, 1])
tensor([1, 1, 0, 2, 0, 2, 1, 1])
tensor([1, 2, 2, 2, 0, 1, 1, 2])
tensor([0, 2, 2, 1, 1, 2, 1, 0])
tensor([2, 0, 0, 0, 0, 0, 2, 1])
tensor([2, 2, 0, 0, 1, 2, 1, 1])
tensor([0, 1, 2, 0, 0, 1, 2, 2])
tensor([2, 1, 2, 0, 1, 0, 2, 0])
tensor([1, 1, 1, 1, 2, 0, 0, 2])
tensor([0, 2, 1, 0, 0, 2, 0, 0])
tensor([2])


In [10]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 64),
            nn.ReLU(),
            nn.Linear(64, len(train_data.classes))
        )

    def forward(self, x):
        return self.fc(self.conv(x))

model = SimpleCNN().to(device)

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
epochs = 5

for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        pred = torch.argmax(out, dim=1)
        correct += (pred == y).sum().item()
        total += y.size(0)

    acc = 100 * correct / total
    print(f"Epoch {epoch+1}: Loss={total_loss:.4f}, Acc={acc:.2f}%")

    model.eval()
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for x_val, y_val in test_loader:
            x_val, y_val = x_val.to(device), y_val.to(device)
            out_val = model(x_val)
            pred_val = torch.argmax(out_val, dim=1)
            val_correct += (pred_val == y_val).sum().item()
            val_total += y_val.size(0)

    val_acc = 100 * val_correct / val_total
    print(f"Validation Acc: {val_acc:.2f}%")

Epoch 1: Loss=4.3175, Acc=95.04%
Validation Acc: 90.32%
Epoch 2: Loss=2.6351, Acc=96.69%
Validation Acc: 90.32%
Epoch 3: Loss=1.9988, Acc=96.69%
Validation Acc: 87.10%
Epoch 4: Loss=2.2608, Acc=95.04%
Validation Acc: 90.32%
Epoch 5: Loss=1.3655, Acc=96.69%
Validation Acc: 90.32%


In [None]:
# save model as pth
# torch.save(model.state_dict(), "ml_models/simplecnn.pth")

In [None]:
# export to onxx
# dummy_input = torch.randn(1, 3, 64, 64).to(device)
# torch.onnx.export(model, dummy_input, "ml_models/simplecnn.onnx", 
#                   input_names=["input"], output_names=["output"],
#                   dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
#                   opset_version=11)

In [3]:
# test a model for inference

class SimpleCNNForTest(nn.Module):
    def __init__(self, num_classes=3):  # adjust class count
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.fc(self.conv(x))

model = SimpleCNNForTest(num_classes=3)  # Match class count
model.load_state_dict(torch.load("ml_models/simplecnn.pth", map_location=device))
model.to(device)
model.eval()

SimpleCNNForTest(
  (conv): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=8192, out_features=64, bias=True)
    (2): ReLU()
    (3): Linear(in_features=64, out_features=3, bias=True)
  )
)

In [5]:
# inference
from PIL import Image
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

img = Image.open("inference/burg-2.png").convert("RGB")
img_tensor = transform(img).unsqueeze(0)  # [1, 3, 64, 64]
img_tensor = img_tensor.to(device)

with torch.no_grad():
    output = model(img_tensor)
    pred = torch.argmax(output, dim=1)

# print("Predicted class index:", pred.item())
idx_to_class = {0: 'burger', 1: 'car', 2: 'hotdog'}  # define based on your dataset
predicted_label = idx_to_class[pred.item()]
probs = torch.softmax(output, dim=1)
confidence = probs[0][pred.item()].item()
print(f"Predicted label: {predicted_label} ({confidence:.2f})")
# print("Predicted label:", predicted_label)


Predicted label: car (1.00)
