In [1]:
batch_size = 32
num_epochs = 5

train_directory = 'data/casting_data/train'
validation_directory = 'data/casting_data/val'

In [2]:
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, models, transforms

In [3]:
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

inference_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [4]:
train_dataset = datasets.ImageFolder(root=train_directory, transform=train_transforms)
full_val_dataset = datasets.ImageFolder(root=validation_directory, transform=inference_transforms)

val_size = int(0.5 * len(full_val_dataset))
test_size = len(full_val_dataset) - val_size
val_dataset, test_dataset = random_split(full_val_dataset, [val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [5]:
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

device

device(type='mps')

In [6]:
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Modify the final fully connected layer to match the number of classes in your new dataset
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)  # num_classes should be set to the number of your new categories

model = model.to(device)

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [8]:
def train_for_epoch(epoch):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

In [9]:
def evaluate_model(data_loader):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Evaluation loss: {running_loss / len(data_loader)}, Accuracy: {100 * correct / total}%")

In [10]:
for epoch in range(num_epochs):
    train_for_epoch(epoch)
    evaluate_model(val_loader)

print("Training complete")

Epoch 1/5, Loss: 0.2750089086162356
Evaluation loss: 0.22979466741283736, Accuracy: 89.35574229691876%
Epoch 2/5, Loss: 0.1518262427013654
Evaluation loss: 0.023492252577852923, Accuracy: 99.43977591036415%
Epoch 3/5, Loss: 0.1333295546221332
Evaluation loss: 0.026046083415470395, Accuracy: 99.15966386554622%
Epoch 4/5, Loss: 0.12433242993071102
Evaluation loss: 0.07660181703416431, Accuracy: 98.59943977591037%
Epoch 5/5, Loss: 0.10292471072170883
Evaluation loss: 0.011415122717153281, Accuracy: 99.71988795518207%
Training complete


In [13]:
evaluate_model(test_loader)

Evaluation loss: 0.008001111214980483, Accuracy: 100.0%


In [14]:
idx_to_class = {v: k for k, v in train_dataset.class_to_idx.items()}

def predict(img_path):
    image = Image.open(img_path)

    transformed_image = inference_transforms(image)
    transformed_image = transformed_image.unsqueeze(0)  # Add batch dimension
    transformed_image = transformed_image.to(device)

    model.eval()

    with torch.no_grad():
        outputs = model(transformed_image)
        _, predicted = torch.max(outputs, 1)
        predicted_class = idx_to_class[predicted.item()]

    return predicted_class

In [16]:
predicted_class = predict('data/casting_data/val/ok/cast_ok_0_239.jpeg')
print(f'Predicted class: {predicted_class}')

Predicted class: ok
