In [1]:
import sys
sys.path.append("src")

import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Is CUDA available: ", torch.cuda.is_available())

Is CUDA available:  False


### Prepare the dataset

In [2]:
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import Dataset, random_split, DataLoader

train_dataset_path = 'dataset/ogyeiv2/train'
dataset = ImageFolder(train_dataset_path)
train_dataset, val_dataset = random_split(dataset, [0.8, 0.2]) 

test_dataset_path = 'dataset/ogyeiv2/test'
test_dataset_origin = ImageFolder(test_dataset_path)

class TransformDataset(Dataset):
  def __init__(self, dataset, transforms):
    super(TransformDataset, self).__init__()
    self.dataset = dataset
    self.transforms = transforms

  def __len__(self):
    return len(self.dataset)

  def __getitem__(self, idx):
    x, y = self.dataset[idx]
    return self.transforms(x), y

We're going to use a model pretrained on ImageNet, so it's best to normalize our dataset accordingly to match the model's training data.

The transforms.ColorJitter function simulates lighting and exposure changes by randomly varying image brightness and contrast by Â±25%. This helps the model stay robust to illumination differences and focus on shape and texture instead of light intensity

In [3]:
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ColorJitter(brightness=0.25, contrast=0.25),
    transforms.RandomRotation(10),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

eval_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = TransformDataset(train_dataset, train_transforms)
val_dataset = TransformDataset(val_dataset, eval_transforms)
test_dataset = TransformDataset(test_dataset_origin, eval_transforms)

train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=0,
    pin_memory=False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0,
    pin_memory=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0,
    pin_memory=False
)

In [4]:
print(f"Number of classes: {len(dataset.classes)}")
print(f"Class names: {dataset.classes}")
print(f"Training images: {len(train_dataset)}")
print(f"Validation images: {len(val_dataset)}")
print(f"Test images: {len(test_dataset)}")

Number of classes: 84
Class names: ['acc_long_600_mg', 'advil_ultra_forte', 'akineton_2_mg', 'algoflex_forte_dolo_400_mg', 'algoflex_rapid_400_mg', 'algopyrin_500_mg', 'ambroxol_egis_30_mg', 'apranax_550_mg', 'aspirin_ultra_500_mg', 'atoris_20_mg', 'atorvastatin_teva_20_mg', 'betaloc_50_mg', 'bila_git', 'c_vitamin_teva_500_mg', 'calci_kid', 'cataflam_50_mg', 'cataflam_dolo_25_mg', 'cetirizin_10_mg', 'cold_fx', 'coldrex', 'concor_10_mg', 'concor_5_mg', 'condrosulf_800_mg', 'controloc_20_mg', 'covercard_plus_10_mg_2_5_mg_5_mg', 'coverex_4_mg', 'diclopram_75-mg_20-mg', 'dorithricin_mentol', 'dulsevia_60_mg', 'enterol_250_mg', 'favipiravir_meditop_200_mg', 'ibumax_400_mg', 'jutavit_c_vitamin', 'jutavit_cink', 'kalcium_magnezium_cink', 'kalium_r', 'koleszterin_kontroll', 'lactamed', 'lactiv_plus', 'laresin_10_mg', 'letrox_50_mikrogramm', 'lordestin_5_mg', 'merckformin_xr_1000_mg', 'meridian', 'metothyrin_10_mg', 'mezym_forte_10_000_egyseg', 'milgamma', 'milurit_300_mg', 'naprosyn_250_mg', '

### Model

We use a pretrained MobileNetV3-Large as the backbone for pill classification.

The convolutional feature extractor is frozen to retain learned visual patterns from ImageNet,
and the final classifier layer is replaced to match our 84 pill classes.

We will train the model on a small dataset, so we will only train the last layer of the classifier.

In [5]:
import torch.nn as nn
from torchinfo import summary
from torchvision.models import mobilenet_v3_large, MobileNet_V3_Large_Weights

weights = MobileNet_V3_Large_Weights.IMAGENET1K_V2
model = mobilenet_v3_large(weights=weights)

num_features = model.classifier[3].in_features

model.classifier[3] = nn.Linear(num_features, 84, bias=True)

for param in model.parameters():
    param.requires_grad = False

for param in model.classifier[3].parameters():
    param.requires_grad = True

model = model.to(device)

summary(model, input_size=(1, 3, 224, 224), device='cpu')


Layer (type:depth-idx)                             Output Shape              Param #
MobileNetV3                                        [1, 84]                   --
â”œâ”€Sequential: 1-1                                  [1, 960, 7, 7]            --
â”‚    â””â”€Conv2dNormActivation: 2-1                   [1, 16, 112, 112]         --
â”‚    â”‚    â””â”€Conv2d: 3-1                            [1, 16, 112, 112]         (432)
â”‚    â”‚    â””â”€BatchNorm2d: 3-2                       [1, 16, 112, 112]         (32)
â”‚    â”‚    â””â”€Hardswish: 3-3                         [1, 16, 112, 112]         --
â”‚    â””â”€InvertedResidual: 2-2                       [1, 16, 112, 112]         --
â”‚    â”‚    â””â”€Sequential: 3-4                        [1, 16, 112, 112]         (464)
â”‚    â””â”€InvertedResidual: 2-3                       [1, 24, 56, 56]           --
â”‚    â”‚    â””â”€Sequential: 3-5                        [1, 24, 56, 56]           (3,440)
â”‚    â””â”€InvertedResidual: 2-4      

### Training

In [None]:
import os
import torch.optim as optim
from sklearn.metrics import classification_report, accuracy_score

os.makedirs("models", exist_ok=True)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
EPOCHS = 1
best_vloss = 1e5
best_test_loss = float('inf')

def train_one_epoch(epoch_index):
    running_loss = 0.
    last_loss = 0.

    for batch_index, data in enumerate(train_loader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if batch_index % 10 == 9:
            last_loss = running_loss / 10. # average loss by 10 batches
            print(f'Epoch: {epoch_index + 1}, batch: {batch_index}, loss: {last_loss}')
            running_loss = 0.

    return last_loss

def evaluate_loss_acc(dataloader):
    """Return (avg_loss, accuracy) computed with torch only."""
    model.eval()
    total_loss, total_correct, total_samples = 0.0, 0, 0
    y_true, y_pred = [], []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            preds = outputs.argmax(dim=1)
            total_correct += (preds == labels).sum().item()
            total_samples += labels.size(0)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
    
    avg_loss = total_loss / len(dataloader)
    acc = total_correct / max(1, total_samples)
    return avg_loss, acc, y_true, y_pred

for epoch in range(EPOCHS):
    print(f'Epoch {epoch + 1}/{EPOCHS}')

    # Switching the model to training mode
    model.train(True)
    # The age of training
    avg_loss = train_one_epoch(epoch)

    # Switching the model to validation mode
    model.eval()
    running_vloss = 0.0

    # Validation
    train_loss, train_acc, y_true_train, y_pred_train = evaluate_loss_acc(train_loader)
    val_loss,  val_acc, y_true_val, y_pred_val  = evaluate_loss_acc(val_loader)

    print(f"Training  - loss: {train_loss:.4f}, accuracy: {train_acc*100:.2f}%")
    print(f"Validation - loss: {val_loss:.4f},  accuracy: {val_acc*100:.2f}%")

    # Saving the best model
    if val_loss < best_test_loss:
        best_test_loss = val_loss
        torch.save(model.state_dict(), "models/meds_classifier.pt")

    if val_acc >= 0.75:
        print(f"ðŸŽ¯ Target validation accuracy reached ({val_acc*100:.2f}%). Stopping.")
        break
    
    print("Best model saved as models/meds_classifier.pt")

print("\nðŸ“Š Final evaluation on test set:")
test_loss, test_acc, y_true_test, y_pred_test = evaluate_loss_acc(test_loader)
print(f"Test - loss: {test_loss:.4f}, accuracy: {test_acc*100:.2f}%")

print(classification_report(
    y_true_test,
    y_pred_test,
    target_names=test_dataset_origin.classes,
    digits=3,
    zero_division=0
))

Epoch 1/20


KeyboardInterrupt: 