<a href="https://colab.research.google.com/github/ramires666/machine-learning-zoomcamp-homework/blob/main/clothing_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/alexeygrigorev/clothing-dataset-small.git

Cloning into 'clothing-dataset-small'...
remote: Enumerating objects: 3839, done.[K
remote: Counting objects: 100% (400/400), done.[K
remote: Compressing objects: 100% (400/400), done.[K
remote: Total 3839 (delta 9), reused 385 (delta 0), pack-reused 3439 (from 1)[K
Receiving objects: 100% (3839/3839), 100.58 MiB | 26.38 MiB/s, done.
Resolving deltas: 100% (10/10), done.


In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
input_size = 299

train_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range=10,
    zoom_range=0.1,
    horizontal_flip=True
)

train_ds = train_gen.flow_from_directory(
    './clothing-dataset-small/train',
    target_size=(input_size, input_size),
    batch_size=32
)


val_gen = ImageDataGenerator(preprocessing_function=preprocess_input)

val_ds = train_gen.flow_from_directory(
    './clothing-dataset-small/validation',
    target_size=(input_size, input_size),
    batch_size=32,
    shuffle=False
)

Found 3068 images belonging to 10 classes.
Found 341 images belonging to 10 classes.


In [None]:
checkpoint = keras.callbacks.ModelCheckpoint(
    'xception_v4_1_{epoch:02d}_{val_accuracy:.3f}.keras',
    save_best_only=True,
    monitor='val_accuracy',
    mode='max'
)

In [None]:
def make_model(input_size=150, learning_rate=0.01, size_inner=100,
               droprate=0.5):

    base_model = Xception(
        weights='imagenet',
        include_top=False,
        input_shape=(input_size, input_size, 3)
    )

    base_model.trainable = False

    #########################################

    inputs = keras.Input(shape=(input_size, input_size, 3))
    base = base_model(inputs, training=False)
    vectors = keras.layers.GlobalAveragePooling2D()(base)

    inner = keras.layers.Dense(size_inner, activation='relu')(vectors)
    drop = keras.layers.Dropout(droprate)(inner)

    outputs = keras.layers.Dense(10)(drop)

    model = keras.Model(inputs, outputs)

    #########################################

    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    loss = keras.losses.CategoricalCrossentropy(from_logits=True)

    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=['accuracy']
    )

    return model

In [None]:
learning_rate = 0.0005
size = 100
droprate = 0.2

model = make_model(
    input_size=input_size,
    learning_rate=learning_rate,
    size_inner=size,
    droprate=droprate
)

history = model.fit(train_ds, epochs=50, validation_data=val_ds,
                   callbacks=[checkpoint])

Epoch 1/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 843ms/step - accuracy: 0.5803 - loss: 1.3574 - val_accuracy: 0.8446 - val_loss: 0.5358
Epoch 2/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 752ms/step - accuracy: 0.8104 - loss: 0.5661 - val_accuracy: 0.8622 - val_loss: 0.4536
Epoch 3/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 755ms/step - accuracy: 0.8565 - loss: 0.4200 - val_accuracy: 0.8680 - val_loss: 0.4051
Epoch 4/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 751ms/step - accuracy: 0.8690 - loss: 0.3779 - val_accuracy: 0.8475 - val_loss: 0.4545
Epoch 5/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 754ms/step - accuracy: 0.8803 - loss: 0.3737 - val_accuracy: 0.8886 - val_loss: 0.3829
Epoch 6/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 747ms/step - accuracy: 0.8879 - loss: 0.3280 - val_accuracy: 0.8915 - val_loss: 0.3561
Epoch 7/50
[1m96/96[

KeyboardInterrupt: 

# Task
Convert the provided TensorFlow/Keras code for an image classification model using Xception to PyTorch.

## Data loading

### Subtask:
Convert the data loading and preprocessing steps from `ImageDataGenerator` to PyTorch's `Dataset` and `DataLoader`.


**Reasoning**:
Import the necessary modules from PyTorch for data handling and image transformations.



In [37]:
import os

import torch
import numpy as np

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from PIL import Image

**Reasoning**:
Define a custom PyTorch Dataset class to load images from directories, apply transformations, and provide image-label pairs. Implement the `__len__` and `__getitem__` methods.



In [38]:
class ClothingDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

**Reasoning**:
Define image transformations for both training and validation datasets, including preprocessing based on Xception's requirements. Then, create instances of the `ClothingDataset` and `DataLoader` for both sets.



In [39]:
input_size = 224

In [40]:
# Define image transformations for training with data augmentation and preprocessing

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transforms = transforms.Compose([
    # image transformations
    transforms.RandomRotation(10), # Equivalent to shear_range
    transforms.RandomResizedCrop(input_size, scale=(0.9, 1.0)), # Equivalent to zoom_range
    transforms.RandomHorizontalFlip(), # Equivalent to horizontal_flip

    transforms.ToTensor(),

    # preprocessing
    transforms.Normalize(mean=mean, std=std)
])

# Define image transformations for validation with preprocessing
val_transforms = transforms.Compose([
    # image transformations
    transforms.Resize((input_size, input_size)), # Resize to the target size

    transforms.ToTensor(),

    # preprocessing
    transforms.Normalize(mean=mean, std=std)
])

# Create instances of the custom dataset for training and validation
train_dataset = ClothingDataset(
    data_dir='./clothing-dataset-small/train',
    transform=train_transforms
)

val_dataset = ClothingDataset(
    data_dir='./clothing-dataset-small/validation',
    transform=val_transforms
)

# Create DataLoaders for iterating through the datasets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print(f"Number of training batches: {len(train_loader)}")
print(f"Number of validation batches: {len(val_loader)}")

Number of training batches: 96
Number of validation batches: 11


## Model Definition

### Subtask:
Translate the Xception model and the custom layers from TensorFlow/Keras to PyTorch modules.

**Reasoning**:
Import the necessary modules from PyTorch for defining the neural network, including a pre-trained Xception model from `torchvision.models`.

In [41]:
import torch.nn as nn
import torchvision.models as models

**Reasoning**:
Define the PyTorch model based on the structure of the Keras model: load a pre-trained Xception model, remove its original classification head, add a global average pooling layer, a dense inner layer with ReLU activation and dropout, and a final dense output layer.

**Reasoning**:
Create an instance of the PyTorch model and move it to the appropriate device (GPU if available).

## Model Definition (using MobileNetV2)

### Subtask:
Translate the model definition to use MobileNetV2 from `torchvision.models`.

**Reasoning**:
Import the necessary modules from PyTorch for defining the neural network and MobileNetV2 from `torchvision.models`.

**Reasoning**:
Define the PyTorch model using a pre-trained MobileNetV2 model, replace its classifier, and add the necessary layers for the clothing classification task.

In [42]:
class ClothingClassifierMobileNet(nn.Module):
    def __init__(self, size_inner=100, droprate=0.2, num_classes=10):
        super(ClothingClassifierMobileNet, self).__init__()
        # Load pre-trained MobileNetV2 model
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V1')

        # Replace the original classifier
        # The original classifier in MobileNetV2 starts with a Conv2d layer
        # followed by a Linear layer. We need to replace the entire classifier.
        # The input features to the classifier are 1280 for MobileNetV2.
        self.base_model.classifier = nn.Identity() # Remove the original classifier

        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.inner = nn.Linear(1280, size_inner) # 1280 is the number of output features from MobileNetV2 features
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(droprate)
        self.output_layer = nn.Linear(size_inner, num_classes)


    def forward(self, x):
        x = self.base_model.features(x) # Access features attribute for MobileNetV2
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1) # Flatten the output from pooling
        x = self.inner(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.output_layer(x)
        return x

**Reasoning**:
Create an instance of the MobileNetV2-based PyTorch model and move it to the appropriate device (GPU if available).

In [43]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

size = 32 # Corresponds to size_inner in the TensorFlow code
droprate = 0.2 # Corresponds to droprate in the TensorFlow code

In [44]:
model = ClothingClassifierMobileNet(size_inner=size, droprate=droprate, num_classes=len(train_dataset.classes))
model.to(device)

print(f"Model is on: {device}")

Model is on: cuda


## Training Loop

### Subtask:
Adapt the training loop, including the optimizer, loss function, and model training steps, from Keras's `model.fit` to a PyTorch training loop.

**Reasoning**:
Import the necessary PyTorch modules for optimization and loss calculation.

In [45]:
import torch.optim as optim
import torch.nn as nn

**Reasoning**:
Define the optimizer and the loss function for the model training.

In [46]:
learning_rate = 0.0001

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Use CrossEntropyLoss for classification with logits
criterion = nn.CrossEntropyLoss()

## Model Checkpointing

### Subtask:
Implement model checkpointing in PyTorch equivalent to the Keras `ModelCheckpoint` callback.

**Reasoning**:
Define the training loop, including iterating over epochs and batches, calculating loss and accuracy, performing backpropagation, and updating model weights. Also, include the logic for saving the best model based on validation accuracy.

In [47]:
num_epochs = 65
best_val_accuracy = 0.0
checkpoint_path = 'mobilenet_v2_v1_{epoch:02d}_{val_accuracy:.3f}.pth'

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the parameter gradients

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct_predictions / total_predictions
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.4f}')

    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0
    val_correct_predictions = 0
    val_total_predictions = 0

    with torch.no_grad():  # Disable gradient calculation during validation
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total_predictions += labels.size(0)
            val_correct_predictions += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = val_correct_predictions / val_total_predictions
    print(f'Epoch {epoch+1}/{num_epochs}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

    # Checkpoint logic
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        # Format the checkpoint filename with epoch and accuracy
        current_checkpoint_path = checkpoint_path.format(epoch=epoch+1, val_accuracy=val_accuracy)
        torch.save(model.state_dict(), current_checkpoint_path)
        print(f'Checkpoint saved to {current_checkpoint_path}')

print('Finished Training')

Epoch 1/65, Train Loss: 1.4000, Train Accuracy: 0.5831
Epoch 1/65, Val Loss: 0.6715, Val Accuracy: 0.8211
Checkpoint saved to mobilenet_v2_v1_01_0.821.pth
Epoch 2/65, Train Loss: 0.5745, Train Accuracy: 0.8325
Epoch 2/65, Val Loss: 0.3538, Val Accuracy: 0.8827
Checkpoint saved to mobilenet_v2_v1_02_0.883.pth
Epoch 3/65, Train Loss: 0.3631, Train Accuracy: 0.8947
Epoch 3/65, Val Loss: 0.3028, Val Accuracy: 0.9003
Checkpoint saved to mobilenet_v2_v1_03_0.900.pth
Epoch 4/65, Train Loss: 0.2622, Train Accuracy: 0.9257
Epoch 4/65, Val Loss: 0.2495, Val Accuracy: 0.9150
Checkpoint saved to mobilenet_v2_v1_04_0.915.pth
Epoch 5/65, Train Loss: 0.1822, Train Accuracy: 0.9485
Epoch 5/65, Val Loss: 0.2480, Val Accuracy: 0.9179
Checkpoint saved to mobilenet_v2_v1_05_0.918.pth
Epoch 6/65, Train Loss: 0.1398, Train Accuracy: 0.9671
Epoch 6/65, Val Loss: 0.2555, Val Accuracy: 0.9150
Epoch 7/65, Train Loss: 0.1258, Train Accuracy: 0.9645
Epoch 7/65, Val Loss: 0.2076, Val Accuracy: 0.9326
Checkpoint sa

## Export PyTorch model to ONNX

### Subtask:
Convert the trained PyTorch model to the ONNX format.

**Reasoning**:
Load the best saved PyTorch model, set it to evaluation mode, and export it to an ONNX file using `torch.onnx.export`.

In [48]:
!pip install onnx



In [49]:
# Find the best checkpoint file
import glob
import os

list_of_files = glob.glob('mobilenet_v2_v1_*.pth')
latest_file = max(list_of_files, key=os.path.getctime)
print(f"Loading the best model from: {latest_file}")

# latest_file = 'mobilenet_0.889.pth'

model = ClothingClassifierMobileNet(size_inner=size, droprate=droprate, num_classes=len(train_dataset.classes))
model.load_state_dict(torch.load(latest_file))
model.to(device)

# Set the model to evaluation mode
model.eval();

Loading the best model from: mobilenet_v2_v1_62_0.947.pth


In [50]:
test_dataset = ClothingDataset(
    data_dir='./clothing-dataset-small/test',
    transform=val_transforms,
)

test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [51]:
for inputs, labels in test_loader:
    inputs, labels = inputs.to(device), labels.to(device)
    break

In [52]:
inputs.shape

torch.Size([32, 3, 224, 224])

In [53]:
labels

tensor([4, 4, 3, 0, 2, 3, 4, 6, 2, 0, 5, 2, 8, 9, 5, 6, 3, 7, 1, 2, 2, 6, 5, 3,
        4, 4, 9, 6, 6, 9, 2, 3], device='cuda:0')

In [54]:
outputs = model(inputs)
pred_labels = torch.max(outputs, 1).indices
(labels == pred_labels).sum() / len(pred_labels)

tensor(0.8750, device='cuda:0')

In [22]:
!pip install keras-image-helper

Collecting keras-image-helper
  Downloading keras_image_helper-0.0.2-py3-none-any.whl.metadata (3.5 kB)
Collecting numpy>=2.3.2 (from keras-image-helper)
  Downloading numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Downloading keras_image_helper-0.0.2-py3-none-any.whl (5.4 kB)
Downloading numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.6/16.6 MB[0m [31m120.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy, keras-image-helper
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the s

In [33]:
!pip install numpy==2.0.2

Collecting numpy==2.0.2
  Downloading numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/60.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.9/60.9 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.2/19.2 MB[0m [31m74.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.3.5
    Uninstalling numpy-2.3.5:
      Successfully uninstalled numpy-2.3.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
keras-image-helper 0.0.2 requires numpy>=2.3.2, but

In [55]:
from keras_image_helper import create_preprocessor

In [56]:
def preprocess_pytorch_style(X):
    # X: shape (1, 299, 299, 3), dtype=float32, values in [0, 255]
    X = X / 255.0

    mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
    std = np.array([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1)

    # Convert NHWC → NCHW
    # from (batch, height, width, channels) → (batch, channels, height, width)
    X = X.transpose(0, 3, 1, 2)

    # Normalize
    X = (X - mean) / std

    return X.astype(np.float32)

In [57]:
from keras_image_helper import create_preprocessor

In [58]:
preprocessor = create_preprocessor(preprocess_pytorch_style, target_size=(224, 224))

In [59]:
url = 'http://bit.ly/mlbookcamp-pants'
X = preprocessor.from_url(url)

In [60]:
X.shape

(1, 3, 224, 224)

In [61]:
X = torch.Tensor(X).to(device)

In [62]:
pred = np.array(model(X).data[0].cpu())

In [63]:
classes = [
    "dress",
    "hat",
    "longsleeve",
    "outwear",
    "pants",
    "shirt",
    "shoes",
    "shorts",
    "skirt",
    "t-shirt",
]

dict(zip(classes, pred.tolist()))

{'dress': -0.05191972106695175,
 'hat': -5.399651050567627,
 'longsleeve': -1.9374825954437256,
 'outwear': -1.8340309858322144,
 'pants': 14.948799133300781,
 'shirt': -6.481940746307373,
 'shoes': 0.08216299116611481,
 'shorts': 0.5064080357551575,
 'skirt': -4.075653553009033,
 't-shirt': -4.513818740844727}

In [64]:
# Define dummy input for ONNX export
# The input shape should match the input shape of your model (batch_size, channels, height, width)
# Use a batch size of 1 for simplicity when exporting
dummy_input = torch.randn(1, 3, input_size, input_size).to(device)

# Export the model to ONNX format
onnx_path = "clothing_classifier_mobilenet_v2_latest.onnx"

# Install onnxscript if not already installed
!pip install onnxscript

torch.onnx.export(
    model,                     # PyTorch Model
    dummy_input,               # Dummy input tensor
    onnx_path,                 # Path to save the ONNX model
    verbose=True,              # Print export details
    input_names=['input'],     # Input layer name
    output_names=['output'],   # Output layer name
    dynamic_axes={             # Dynamic batch size
        'input' : {0 : 'batch_size'},
        'output' : {0 : 'batch_size'}
    }
)

print(f"Model exported to {onnx_path}")



  torch.onnx.export(


[torch.onnx] Obtain model graph for `ClothingClassifierMobileNet([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `ClothingClassifierMobileNet([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 104 of general pattern rewrite rules.
Model exported to clothing_classifier_mobilenet_v2_latest.onnx
