In [1]:
!git clone https://github.com/alexeygrigorev/clothing-dataset-small.git

Cloning into 'clothing-dataset-small'...
remote: Enumerating objects: 3839, done.[K
remote: Counting objects: 100% (400/400), done.[K
remote: Compressing objects: 100% (400/400), done.[K
remote: Total 3839 (delta 9), reused 385 (delta 0), pack-reused 3439 (from 1)[K
Receiving objects: 100% (3839/3839), 100.58 MiB | 22.48 MiB/s, done.
Resolving deltas: 100% (10/10), done.
Updating files: 100% (3783/3783), done.


In [2]:
from PIL import Image
import numpy as np

# Load an image
img = Image.open('clothing-dataset-small/train/pants/0098b991-e36e-4ef1-b5ee-4154b21e2a92.jpg')

# Resize to target size
img = img.resize((224, 224))

# Convert to numpy array
x = np.array(img)
print(x.shape)  # (224, 224, 3)

(224, 224, 3)


In [3]:
import torch
import torchvision.models as models
from torchvision import transforms

In [4]:
# Load pre-trained model
model = models.mobilenet_v2(weights='IMAGENET1K_V1')
model.eval();

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


100%|██████████| 13.6M/13.6M [00:00<00:00, 106MB/s] 


In [5]:
# Preprocessing for MobileNetV2
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [6]:
x = preprocess(img)

In [7]:
img = Image.open('clothing-dataset-small/train/pants/0098b991-e36e-4ef1-b5ee-4154b21e2a92.jpg')
img_t = preprocess(img)
batch_t = torch.unsqueeze(img_t, 0)

# Make prediction
with torch.no_grad():
    output = model(batch_t)

# Get top predictions
_, indices = torch.sort(output, descending=True)

In [8]:
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt -O imagenet_classes.txt

# Load ImageNet class names
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]

# Get top 5 predictions
top5_indices = indices[0, :5].tolist()
top5_classes = [categories[i] for i in top5_indices]

print("Top 5 predictions:")
for i, class_name in enumerate(top5_classes):
    print(f"{i+1}: {class_name}")

--2025-12-04 05:18:11--  https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10472 (10K) [text/plain]
Saving to: ‘imagenet_classes.txt’


2025-12-04 05:18:11 (67.7 MB/s) - ‘imagenet_classes.txt’ saved [10472/10472]

Top 5 predictions:
1: jean
2: suit
3: cardigan
4: sweatshirt
5: overskirt


In [9]:
import os
from torch.utils.data import Dataset

class ClothingDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [26]:
input_size = 224

# ImageNet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
# train_transforms = transforms.Compose([
#     transforms.Resize((input_size, input_size)),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=mean, std=std)
# ])

# Training transforms WITH augmentation
train_transforms = transforms.Compose([
    transforms.RandomRotation(10),           # Rotate up to 10 degrees
    transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),  # Zoom
    transforms.RandomHorizontalFlip(),       # Horizontal flip
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# Validation transforms - NO augmentation, same as before
val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [27]:
from torch.utils.data import DataLoader

train_dataset = ClothingDataset(
    data_dir='./clothing-dataset-small/train',
    transform=train_transforms
)

val_dataset = ClothingDataset(
    data_dir='./clothing-dataset-small/validation',
    transform=val_transforms
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [12]:
import torch.nn as nn

class ClothingClassifierMobileNet(nn.Module):
    def __init__(self, num_classes=10):
        super(ClothingClassifierMobileNet, self).__init__()

        # Load pre-trained MobileNetV2
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V1')

        # Freeze base model parameters
        for param in self.base_model.parameters():
            param.requires_grad = False

        # Remove original classifier
        self.base_model.classifier = nn.Identity()

        # Add custom layers
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.output_layer = nn.Linear(1280, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.output_layer(x)
        return x

In [13]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ClothingClassifierMobileNet(num_classes=10)
model.to(device);

In [14]:
model.forward(torch.rand(1, 3, 224, 224).to(device))

tensor([[ 0.0283,  0.0306, -0.3546, -0.4617, -0.4808, -0.0478, -0.1942, -0.0039,
          0.3301, -0.0070]], grad_fn=<AddmmBackward0>)

In [15]:
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

In [16]:
# # Training loop
# num_epochs = 10

# for epoch in range(num_epochs):
#     # Training phase
#     model.train()  # Set the model to training mode
#     running_loss = 0.0
#     correct = 0
#     total = 0

#     # Iterate over the training data
#     for inputs, labels in train_loader:
#         # Move data to the specified device (GPU or CPU)
#         inputs, labels = inputs.to(device), labels.to(device)

#         # Zero the parameter gradients to prevent accumulation
#         optimizer.zero_grad()
#         # Forward pass
#         outputs = model(inputs)
#         # Calculate the loss
#         loss = criterion(outputs, labels)
#         # Backward pass and optimize
#         loss.backward()
#         optimizer.step()

#         # Accumulate training loss
#         running_loss += loss.item()
#         # Get predictions
#         _, predicted = torch.max(outputs.data, 1)
#         # Update total and correct predictions
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

#     # Calculate average training loss and accuracy
#     train_loss = running_loss / len(train_loader)
#     train_acc = correct / total

#     # Validation phase
#     model.eval()  # Set the model to evaluation mode
#     val_loss = 0.0
#     val_correct = 0
#     val_total = 0

#     # Disable gradient calculation for validation
#     with torch.no_grad():
#         # Iterate over the validation data
#         for inputs, labels in val_loader:
#             # Move data to the specified device (GPU or CPU)
#             inputs, labels = inputs.to(device), labels.to(device)
#             # Forward pass
#             outputs = model(inputs)
#             # Calculate the loss
#             loss = criterion(outputs, labels)

#             # Accumulate validation loss
#             val_loss += loss.item()
#             # Get predictions
#             _, predicted = torch.max(outputs.data, 1)
#             # Update total and correct predictions
#             val_total += labels.size(0)
#             val_correct += (predicted == labels).sum().item()

#     # Calculate average validation loss and accuracy
#     val_loss /= len(val_loader)
#     val_acc = val_correct / val_total

#     # Print epoch results
#     print(f'Epoch {epoch+1}/{num_epochs}')
#     print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
#     print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

In [17]:
def make_model(learning_rate=0.01):
    model = ClothingClassifierMobileNet(num_classes=10)
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer

In [18]:
# learning_rates = [0.0001, 0.001, 0.01, 0.1]

# for lr in learning_rates:
#     print(f'\n=== Learning Rate: {lr} ===')
#     model, optimizer = make_model(learning_rate=lr)
#     train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)

In [19]:
import torch.nn as nn

class ClothingClassifierMobileNet(nn.Module):
    def __init__(self, size_inner=100, droprate=0.2, num_classes=10):
        super(ClothingClassifierMobileNet, self).__init__()

        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V1')

        for param in self.base_model.parameters():
            param.requires_grad = False

        self.base_model.classifier = nn.Identity()

        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.inner = nn.Linear(1280, size_inner)  # New inner layer
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(droprate)  # Add dropout
        self.output_layer = nn.Linear(size_inner, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.inner(x)
        x = self.relu(x)
        x = self.dropout(x)  # Apply dropout
        x = self.output_layer(x)
        return x

def make_model(
        learning_rate=0.001,
        size_inner=100,
        droprate=0.2,
):
    model = ClothingClassifierMobileNet(
        num_classes=10,
        size_inner=size_inner,
        droprate=droprate,
    )
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer

In [28]:
def train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device):
    best_val_accuracy = 0.0  # Initialize variable to track the best validation accuracy


    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_acc = val_correct / val_total

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')


        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc
            checkpoint_path = f'mobilenet_v4_{epoch+1:02d}_{val_acc:.3f}.pth'
            torch.save(model.state_dict(), checkpoint_path)
            print(f'Checkpoint saved: {checkpoint_path}')

In [21]:
# import torchvision.models as models

# sizes_inner = [1000, 500, 100]

# for size_inner in sizes_inner:
#     print(f'\n=== Size inner: {size_inner} ===')
#     model, optimizer = make_model(
#         learning_rate=0.001,
#         size_inner=size_inner
#     )
#     train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)

In [22]:
# learning_rate = 0.001
# size_inner = 100

# model, optimizer = make_model(
#         learning_rate=learning_rate,
#         size_inner=size_inner
#     )
# train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)

In [23]:
# num_epochs = 50
# learning_rate = 0.001
# size_inner = 100

# droprates = [0.1, 0.2, 0.5, 0.7]

# for droprate in droprates:
#     print(f'\n=== Droprate: {droprate} ===')
#     model, optimizer = make_model(
#         learning_rate=learning_rate,
#         size_inner=size_inner,
#         droprate=droprate,
#     )
#     train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)

In [24]:
droprate = 0.2 # the best defined value

In [25]:
# # Training transforms WITH augmentation
# train_transforms = transforms.Compose([
#     transforms.RandomRotation(10),           # Rotate up to 10 degrees
#     transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),  # Zoom
#     transforms.RandomHorizontalFlip(),       # Horizontal flip
#     transforms.ToTensor(),
#     transforms.Normalize(mean=mean, std=std)
# ])

# # Validation transforms - NO augmentation, same as before
# val_transforms = transforms.Compose([
#     transforms.Resize((224, 224)),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=mean, std=std)
# ])

In [29]:
num_epochs = 50
learning_rate = 0.001
size_inner = 100
droprate = 0.2

model, optimizer = make_model(
        learning_rate=learning_rate,
        size_inner=size_inner,
        droprate=droprate,
    )
train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)

Epoch 1/50
  Train Loss: 1.4163, Train Acc: 0.5310
  Val Loss: 0.9237, Val Acc: 0.7566
Checkpoint saved: mobilenet_v4_01_0.757.pth
Epoch 2/50
  Train Loss: 0.9045, Train Acc: 0.7014
  Val Loss: 0.6702, Val Acc: 0.7830
Checkpoint saved: mobilenet_v4_02_0.783.pth
Epoch 3/50
  Train Loss: 0.7627, Train Acc: 0.7347
  Val Loss: 0.7654, Val Acc: 0.7566
Epoch 4/50
  Train Loss: 0.6614, Train Acc: 0.7738
  Val Loss: 0.6546, Val Acc: 0.7771
Epoch 5/50
  Train Loss: 0.6194, Train Acc: 0.7894
  Val Loss: 0.5960, Val Acc: 0.7918
Checkpoint saved: mobilenet_v4_05_0.792.pth
Epoch 6/50
  Train Loss: 0.5864, Train Acc: 0.8008
  Val Loss: 0.5932, Val Acc: 0.7859
Epoch 7/50
  Train Loss: 0.5609, Train Acc: 0.8067
  Val Loss: 0.5976, Val Acc: 0.7889
Epoch 8/50
  Train Loss: 0.5310, Train Acc: 0.8194
  Val Loss: 0.6289, Val Acc: 0.7771
Epoch 9/50
  Train Loss: 0.5081, Train Acc: 0.8240
  Val Loss: 0.6487, Val Acc: 0.7918
Epoch 10/50
  Train Loss: 0.4872, Train Acc: 0.8370
  Val Loss: 0.5950, Val Acc: 0.79

KeyboardInterrupt: 

In [30]:
path = '/content/mobilenet_v4_12_0.824.pth'

In [31]:
model = ClothingClassifierMobileNet(size_inner=100, droprate=0.2, num_classes=10)
model.load_state_dict(torch.load(path))
model.to(device)
model.eval();

In [32]:
x = val_transforms(img)
batch_t = torch.unsqueeze(x, 0).to(device)

# Make prediction
with torch.no_grad():
    output = model(batch_t)

# Get top predictions
_, indices = torch.sort(output, descending=True)

In [34]:
output

tensor([[-0.0512, -5.5840, -0.1152, -1.5063,  7.9622, -1.1374, -2.0074, -1.5383,
         -1.8389, -5.6806]])

In [35]:
train_dataset.class_to_idx

{'dress': 0,
 'hat': 1,
 'longsleeve': 2,
 'outwear': 3,
 'pants': 4,
 'shirt': 5,
 'shoes': 6,
 'shorts': 7,
 'skirt': 8,
 't-shirt': 9}

In [36]:
classes = [
    "dress", "hat", "longsleeve", "outwear", "pants",
    "shirt", "shoes", "shorts", "skirt", "t-shirt"
]

In [37]:
dict(zip(classes, output[0].to('cpu')))

{'dress': tensor(-0.0512),
 'hat': tensor(-5.5840),
 'longsleeve': tensor(-0.1152),
 'outwear': tensor(-1.5063),
 'pants': tensor(7.9622),
 'shirt': tensor(-1.1374),
 'shoes': tensor(-2.0074),
 'shorts': tensor(-1.5383),
 'skirt': tensor(-1.8389),
 't-shirt': tensor(-5.6806)}

In [38]:
# pants class has the highest value - correct

In [41]:
!pip install onnx onnxscript

Collecting onnxscript
  Downloading onnxscript-0.5.6-py3-none-any.whl.metadata (13 kB)
Collecting onnx_ir<2,>=0.1.12 (from onnxscript)
  Downloading onnx_ir-0.1.12-py3-none-any.whl.metadata (3.2 kB)
Downloading onnxscript-0.5.6-py3-none-any.whl (683 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m683.0/683.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx_ir-0.1.12-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.3/129.3 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx_ir, onnxscript
Successfully installed onnx_ir-0.1.12 onnxscript-0.5.6


In [44]:
# Create dummy input
dummy_input = torch.randn(1, 3, 224, 224).to(device)

# Export to ONNX
onnx_path = "clothing_classifier_mobilenet_v4.onnx"

torch.onnx.export(
    model,
    dummy_input,
    onnx_path,
    verbose=True,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={
        'input': {0: 'batch_size'},
        'output': {0: 'batch_size'}
    }
)

print(f"Model exported to {onnx_path}")

  torch.onnx.export(


[torch.onnx] Obtain model graph for `ClothingClassifierMobileNet([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `ClothingClassifierMobileNet([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 105 of general pattern rewrite rules.
Model exported to clothing_classifier_mobilenet_v4.onnx
