In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
NORMALIZE_MEAN = [0.485, 0.456, 0.406]
NORMALIZE_STD  = [0.229, 0.224, 0.225]

IMAGE_SIZE = 224
BATCH_SIZE = 16

NUM_WORKERS = 2
NUM_CLASSES = 2
NUM_EPOCHS = 30
LEARNING_RATE = 1e-3

In [3]:
# Transforms for the training set: augmentation + normalization
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(IMAGE_SIZE, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(NORMALIZE_MEAN, NORMALIZE_STD)
])

val_transforms = transforms.Compose([
    transforms.Resize(int(IMAGE_SIZE * 1.14)),
    transforms.CenterCrop(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(NORMALIZE_MEAN, NORMALIZE_STD)
])

In [4]:
train_dataset_path = "../../../dataset/train"
val_dataset_path = "../../../dataset/val"

train_dataset = datasets.ImageFolder(root=train_dataset_path, transform=train_transforms)
val_dataset   = datasets.ImageFolder(root=val_dataset_path,   transform=val_transforms)

train_loader = DataLoader(
    train_dataset,
    shuffle=True,
    pin_memory=True,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
)


val_loader = DataLoader(
    val_dataset,
    shuffle=False,
    pin_memory=True,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
)


In [5]:
if __name__ == "__main__":
    images, labels = next(iter(train_loader))
    print(f"Batch shape: {images.shape}")
    print(f"Labels shape: {labels.shape}")

Batch shape: torch.Size([16, 3, 224, 224])
Labels shape: torch.Size([16])


In [6]:
def train(model, loader, optimizer, criterion, device):
    model.train()
    training_loss, correct, total = 0.0, 0, 0

    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        training_loss += loss.item() * images.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += images.size(0)

    avg_loss = training_loss/total
    avg_accuracy = correct/total
    return avg_loss, avg_accuracy

In [7]:
def validate(model, loader, criterion, device):
    model.eval()
    validate_loss, correct, total = 0.0, 0, 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            validate_loss += loss.item() * images.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += images.size(0)

    avg_loss = validate_loss/total
    avg_accuracy = correct/total
    return avg_loss, avg_accuracy

In [8]:
model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.classifier.parameters(), lr=LEARNING_RATE)

In [10]:
best_val_loss = float('inf')

for epoch in range(NUM_EPOCHS):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    validate_loss, val_acc     = validate(model, val_loader, criterion, device)

    print(f"Epoch {epoch+1}/{NUM_EPOCHS}: "
          f"Train loss {train_loss:.4f}, acc {train_acc:.4f} | "
          f"Val   loss {validate_loss:.4f}, acc {val_acc:.4f}")

    # checkpoint best model
    if validate_loss < best_val_loss:
        best_val_loss = validate_loss
        torch.save(model.state_dict(), "best_mobilenetv2_teadiseases.pth")

Epoch 1/30: Train loss 1.5350, acc 0.5398 | Val   loss 0.6608, acc 0.7636Epoch 2/30: Train loss 0.6672, acc 0.7681 | Val   loss 0.5216, acc 0.8111Epoch 3/30: Train loss 0.5454, acc 0.8145 | Val   loss 0.4983, acc 0.8263Epoch 4/30: Train loss 0.4650, acc 0.8361 | Val   loss 0.4244, acc 0.8455Epoch 5/30: Train loss 0.4567, acc 0.8400 | Val   loss 0.3440, acc 0.8727Epoch 6/30: Train loss 0.4331, acc 0.8487 | Val   loss 0.3090, acc 0.8939Epoch 7/30: Train loss 0.3900, acc 0.8625 | Val   loss 0.3905, acc 0.8626Epoch 8/30: Train loss 0.3955, acc 0.8597 | Val   loss 0.4944, acc 0.8253Epoch 9/30: Train loss 0.3819, acc 0.8694 | Val   loss 0.3081, acc 0.8909Epoch 10/30: Train loss 0.3868, acc 0.8636 | Val   loss 0.2972, acc 0.8970Epoch 11/30: Train loss 0.3917, acc 0.8617 | Val   loss 0.3190, acc 0.9040Epoch 12/30: Train loss 0.3650, acc 0.8751 | Val   loss 0.3211, acc 0.8818Epoch 13/30: Train loss 0.3705, acc 0.8686 | Val   loss 0.3317, acc 0.8869Epoch 14/30: Train loss 0.3849, acc 0.8627 | Va