# Training a Fashion Image Classifier with PyTorch

## Environment Setup

In [5]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor


## Downloading the Dataset
We use the torchvision.datasets API to pull the FashionMNIST data. This dataset consists of 70,000 grayscale images of clothing across 10 categories.

In [6]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

100%|██████████| 26.4M/26.4M [00:02<00:00, 9.66MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 149kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 2.75MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 14.5MB/s]


### Preparing the DataLoaders

In [7]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


### Basic Tensor Manipulation

In [8]:
import torch

# Create a 2x3 tensor with random numbers
tensor = torch.rand(2, 3)  # values sampled between 0 and 1

# Displaying the tensor
print("Tensor :\n", tensor)

# Checking the shape (dimensions)
print("Tensor Shape:", tensor.size())  # Alternatively: tensor.shape

# Checking the data type
print("Tensor Type:", tensor.dtype)

Tensor :
 tensor([[0.5672, 0.8801, 0.0197],
        [0.9543, 0.9752, 0.1437]])
Tensor Shape: torch.Size([2, 3])
Tensor Type: torch.float32


In [9]:
import torch

# Creating two 2x2 tensors manually
tensor1 = torch.tensor([[1, 2], [3, 4]])
tensor2 = torch.tensor([[5, 6], [7, 8]])

# Perform Matrix Multiplication
matmul_result = torch.matmul(tensor1, tensor2)

print("Matrix Product :\n", matmul_result)

Matrix Product :
 tensor([[19, 22],
        [43, 50]])


## Implementing a Custom Neural Network Class

In [10]:
import torch.nn as nn
import torch.optim as optim

# Define the model architecture
model = nn.Sequential(
    nn.Linear(1, 1)    # 1 input feature, 1 output neuron
)

# Define Loss Function and Optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters())

**DESIGN NOTE:**  
This implementation exploits PyTorch's imperative execution.  

- `__init__`: Encapsulates the structural state and learnable parameters.  
- `forward`: Defines the dynamic transformation logic.  

The define-by-run nature of this framework is critical for the **DynamicCNN** modality-switching logic, enabling flexible data flow that is essential for complex research architectures.


In [11]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleModel(nn.Module):
    def __init__(self):
        super().__init__()
        # Structural State: Define the learnable parameters
        self.fc = nn.Linear(5, 10)   # Dense layer: 5 inputs -> 10 outputs

    def forward(self, x):
        # Transformation Logic: Define the flow of data
        x = F.relu(self.fc(x))       # Linear layer followed by ReLU activation
        return x

# Instantiating the model
model = SimpleModel()
print(model)

SimpleModel(
  (fc): Linear(in_features=5, out_features=10, bias=True)
)


### Defining the CNN Architecture for FashionMNIST

In [12]:
class CNNFashionMNIST(nn.Module):
    def __init__(self):
        super().__init__()
        # 1. Extraction de caractéristiques : Convolution
        # Entrée : (1, 28, 28) -> Sortie : (32, 26, 26)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)

        # 2. Réduction de dimension : Max Pooling
        # Entrée : (32, 26, 26) -> Sortie : (32, 13, 13)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # 3. Classification : Couches entièrement connectées (Dense)
        # Aplatissement : 32 canaux * 13 de large * 13 de haut = 5408 caractéristiques
        self.fc1 = nn.Linear(32 * 13 * 13, 128)
        self.fc2 = nn.Linear(128, 10) # 10 classes pour FashionMNIST

# Initialisation
model = CNNFashionMNIST()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

## Calculating Total Parameters

In [13]:
print(model)

CNNFashionMNIST(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=5408, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


## Deep CNN Architecture for FashionMNIST

In [14]:
import torch.nn as nn
import torch.nn.functional as F

class CNNFashionMNIST(nn.Module):
    def __init__(self):
        super().__init__()
        # Block 1: Extracts low-level features (edges)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Block 2: Extracts mid-level features (shapes)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Classifier Head
        self.fc1 = nn.Linear(64 * 5 * 5, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x)) # Output: (32, 26, 26)
        x = self.pool1(x)         # Output: (32, 13, 13)

        x = F.relu(self.conv2(x)) # Output: (64, 11, 11)
        x = self.pool2(x)         # Output: (64, 5, 5)

        # Correct Flattening for batches
        x = x.view(x.size(0), -1)

        x = F.relu(self.fc1(x))
        x = self.fc2(x) # Logits output for CrossEntropyLoss
        return x

## Dynamic Modality Switching: Grayscale vs. RGB

In [15]:
class DynamicCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # Convolution + ReLU
        self.convNG = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)  # (28x28x1) image NG-> (26x26x32) dimension feature map
        self.convRGB = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)

        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)                      # MaxPooling (2x2) -> (13x13x32), stride = padding

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # (11x11x64) -> (5x5x64)

        # Fully connected layers
        self.fcNG = nn.Linear(32 * 13 * 13, 128)
        self.fcRGB = nn.Linear(64 * 5 * 5, 128)  # Dense cachée 32: feature map, 13 * 13 => neurones
        self.fc2 = nn.Linear(128, 10)
    def forward(self,x):
        if(x.shape[1] ==1):
            x = F.relu(self.convNG(x))
            x = self.pool1(x)
            x = x.view(x.size(0), -1)
            x = F.relu(self.fcNG(x))
            x = self.fc2(x)
            return x
        else:
            x = F.relu(self.convRGB(x))
            x = self.pool1(x)
            x = F.relu(self.conv2(x))
            x= self.pool2(x)
            x = x.view(x.size(0), -1) #Flatten  0: garder la dimension du batch, -1: flatten all dimensions to one
            x = F.relu(self.fcRGB(x))
            x = self.fc2(x) # Don't apply relu because this is the classification layer (tête de classification), the layer in which we will determine which class our input belong to.
        return x



## Real-Time Modality Testing

In [16]:
model = DynamicCNN()

# Test with grayscale image (batch of 1, 1 channel, 28x28)
gray_img = torch.randn(2, 1, 28, 28)  # Batch size 2
out_gray = model(gray_img)
print("Grayscale output shape:", out_gray.shape)  # Should be (2, 10)

# Test with RGB image (batch of 2, 3 channels, 28x28)
rgb_img = torch.randn(2, 3, 28, 28)  # Batch size 2
print("RGB input shape:", rgb_img.shape)
out_rgb = model(rgb_img)
print("RGB output shape:", out_rgb.shape)  # Should be (2, 10)

Grayscale output shape: torch.Size([2, 10])
RGB input shape: torch.Size([2, 3, 28, 28])
RGB output shape: torch.Size([2, 10])
