# Imports

In [1]:
import json
import math
import os

import copy
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from tqdm import tqdm


import kornia.augmentation as K
import kornia.color as C

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import torchvision
from torchvision.transforms import ToTensor
from torchvision.transforms import v2
from torchvision.transforms.functional import pil_to_tensor
from torchvision.datasets import INaturalist
from torchvision import models

from torchinfo import summary

import webdataset as wds

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
  warn(


# Checking device

In [2]:
print("CUDA available?", torch.cuda.is_available())
print("Device name:", torch.cuda.get_device_name(0))
print("Current device:", torch.cuda.current_device())

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

print("Using:", device)

CUDA available? True
Device name: NVIDIA GeForce RTX 4090
Current device: 0
Using: cuda:0


# Experiment setup

In [3]:
setup = {
    "experiment": "efficientnet_b4-10000-SGD-CELoss",
    "num_classes": 10000,
    "batch_size": 64,
    "num_workers": 8,
    "prefetch_factor": 2,
    "criterion": nn.CrossEntropyLoss(),
    "lr": 1e-3,
    "lambda_reg": 1e-3,
    "momentum": 0.9,
    "max_epochs": 5
}

folder = f"./experiments/{setup['experiment']}"
os.makedirs(folder, exist_ok=True)
file_path = os.path.join(folder, "setup.txt")

with open(file_path, "w", encoding="utf-8") as f:
    json.dump(setup, f, indent=4, ensure_ascii=False, default=str)

print(f"Saved in: {file_path}")

os.makedirs(os.path.join(f"./experiments/{setup['experiment']}/tensorboard"), exist_ok=True)
os.makedirs(os.path.join(f"./experiments/{setup['experiment']}/models"), exist_ok=True)

Saved in: ./experiments/efficientnet_b4-10000-SGD-CELoss/setup.txt


# Functions

In [4]:
def plot_layers(efficientnet_b4, writer, epoch):
    layers = list(efficientnet_b4.modules())

    layer_id = 1
    for layer in layers:
        if isinstance(layer, nn.Linear):
            writer.add_histogram(f'Bias/linear-{layer_id}', layer.bias, epoch)
            writer.add_histogram(f'Weight/linear-{layer_id}', layer.weight, epoch)
            writer.add_histogram(f'Grad/linear-{layer_id}', layer.weight.grad, epoch)
            layer_id += 1

# Augmentations

In [5]:
mean = [0.4650194027653909, 0.48128506681789435, 0.37711871442015105]
std = [0.24237112423460933, 0.23366727265227194, 0.25144634756835477]

class Div255(nn.Module):
    def forward(self, x):
        return x / 255.0

class ToTensorModule(nn.Module):
    def __init__(self):
        super().__init__()
        self.to_tensor = ToTensor()
    def forward(self, x):
        return self.to_tensor(x)

train_transform = torch.nn.Sequential(
    ToTensorModule(),
    
    K.RandomCrop((224,224)),
    
    K.RandomHorizontalFlip(p=0.5),
    K.RandomVerticalFlip(p=0.5),
    K.RandomPerspective(p=0.2),
    
    K.RandomGaussianBlur((3,3), sigma=(0.3, 1.0)),
    K.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    K.RandomBrightness(brightness=(0.8,1.2), p=0.3),
 
    K.Normalize(mean=mean, std=std, p=1.0)
)

test_transform = torch.nn.Sequential(
    ToTensorModule(),
    
    K.CenterCrop((224,224)),
    
    K.Normalize(mean=mean, std=std, p=1.0)
)

## View

In [6]:
# train_transform_view = torch.nn.Sequential(
#     ToTensorModule(),
    
#     K.Resize((224,224)),

#     K.RandomHorizontalFlip(p=1.0),
#     K.RandomVerticalFlip(p=1.0),
#     K.RandomPerspective(p=1.0),
    
#     K.RandomGaussianBlur((3,3), sigma=(0.3, 1.0)),
#     K.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
#     K.RandomBrightness(brightness=(0.8,1.2), p=1.0),
# )

# img_dir  = "../dataset/2021_train_mini/00000_Animalia_Annelida_Clitellata_Haplotaxida_Lumbricidae_Lumbricus_terrestris"

# img_files = [f for f in os.listdir(img_dir) if f.endswith(".jpg")]

# N = 4
# img_files = img_files[:N]

# cols = 2
# rows = N // cols + int(N % cols != 0)

# fig, axes = plt.subplots(rows, cols, figsize=(6*cols, 4*rows))

# for idx, img_fname in enumerate(img_files):
#     pil = Image.open(os.path.join(img_dir, img_fname)).convert("RGB")
#     t = pil_to_tensor(pil).float().to(device).unsqueeze(0)

#     out = train_transform_view(t)
#     out = out.squeeze(0).permute(1,2,0).cpu().numpy()

#     r, c = divmod(idx, cols)
#     ax = axes[r, c]
#     ax.imshow(out)
#     ax.set_title(f"Image {idx+1}")
#     ax.axis("off")
    
# plt.tight_layout()
# plt.show()

# Dataset

## Make dataloaders

In [7]:
shard_dir = os.path.join("../dataset", "shards")

def decode_label(b):
    if isinstance(b, (bytes, bytearray)):
        return torch.tensor(int(b.decode('utf-8')), dtype=torch.long)
    elif isinstance(b, int):
        return torch.tensor(b, dtype=torch.long)
    else:
        raise TypeError(f"Tipo inesperado: {type(b)}")

def make_dataloader(train, filenames):
    if train:
        split = "train"
        transforms = train_transform
        num_workers = setup['num_workers']
    else:
        split = "test"
        transforms = test_transform
        num_workers = int(setup['num_workers'] / 2)
        
    pattern = os.path.join(shard_dir, split, filenames)
    
    dataset = (
        wds.WebDataset(pattern, shardshuffle=False)
        .shuffle(2000)
        .decode("pil")
        .to_tuple("jpg", "cls")
        .map_tuple(transforms, decode_label)
        .batched(setup['batch_size'], partial=torch.stack)
    )
    
    return DataLoader(
        dataset,
        batch_size=None,
        num_workers=num_workers,
        pin_memory=True,
        prefetch_factor=setup['prefetch_factor'],
        persistent_workers=True,
    )

train_dataloader = make_dataloader(train=True, filenames="data-{000000..000049}.tar")

test_dataloader = make_dataloader(train=False, filenames="data-{000000..000009}.tar")

## Get class names and size of data loaders

In [8]:
train_dataset = INaturalist(
    root='../dataset/',
    version='2021_train_mini',
    transform=train_transform,
    download=False
)

test_dataset = INaturalist(
    root='../dataset/',
    version='2021_valid',
    transform=test_transform,
    download=False
)

class_names = train_dataset.all_categories

short_class_names = [c.split("_")[-2] + " " + c.split("_")[-1] 
               for c in train_dataset.all_categories]

train_dataset_size = len(train_dataset)
train_dataloader_size = math.ceil(train_dataset_size / setup['batch_size'])

test_dataset_size = len(test_dataset)
test_dataloader_size = math.ceil(test_dataset_size / setup['batch_size'])

# Architecture

In [9]:
weights = models.EfficientNet_B4_Weights.IMAGENET1K_V1

efficientnet_b4 = models.efficientnet_b4(weights=weights)

for param in efficientnet_b4.parameters():
    param.requires_grad = False

num_features = efficientnet_b4.classifier[1].in_features
efficientnet_b4.classifier[1] = nn.Linear(num_features, setup['num_classes'])

## View

In [10]:
print(efficientnet_b4.classifier)

Sequential(
  (0): Dropout(p=0.4, inplace=True)
  (1): Linear(in_features=1792, out_features=10000, bias=True)
)


In [11]:
for name, param in efficientnet_b4.named_parameters():
    if param.requires_grad == True:
        print(name, param.requires_grad)

classifier.1.weight True
classifier.1.bias True


In [12]:
summary(efficientnet_b4, input_size=(setup['batch_size'], 3, 224, 224))

Layer (type:depth-idx)                                  Output Shape              Param #
EfficientNet                                            [64, 10000]               --
├─Sequential: 1-1                                       [64, 1792, 7, 7]          --
│    └─Conv2dNormActivation: 2-1                        [64, 48, 112, 112]        --
│    │    └─Conv2d: 3-1                                 [64, 48, 112, 112]        (1,296)
│    │    └─BatchNorm2d: 3-2                            [64, 48, 112, 112]        (96)
│    │    └─SiLU: 3-3                                   [64, 48, 112, 112]        --
│    └─Sequential: 2-2                                  [64, 24, 112, 112]        --
│    │    └─MBConv: 3-4                                 [64, 24, 112, 112]        (2,940)
│    │    └─MBConv: 3-5                                 [64, 24, 112, 112]        (1,206)
│    └─Sequential: 2-3                                  [64, 32, 56, 56]          --
│    │    └─MBConv: 3-6                    

# Train

In [13]:
def train():
    tensorboard_path = f'experiments/{setup["experiment"]}/tensorboard/'
    
    writer = SummaryWriter(log_dir=tensorboard_path)
    
    efficientnet_b4.to(device)

    optimizer = torch.optim.SGD(
        filter(lambda p: p.requires_grad, efficientnet_b4.parameters()),
        lr=setup['lr'],
        weight_decay=setup['lambda_reg'],
        momentum=setup['momentum'])
    
    criterion = setup['criterion']
    
    accuracies = []
    max_accuracy = -1.0

    writer.add_graph(efficientnet_b4, input_to_model=next(iter(train_dataloader))[0].to(device))
    
    for epoch in tqdm(range(setup["max_epochs"])):

        accumulated_loss_train = 0.0

        train_accuracies = []

        efficientnet_b4.train()
        
        for train_batch in train_dataloader:
            optimizer.zero_grad()

            train_x, train_label = train_batch
            train_x = train_x.to(device)
            train_label = train_label.to(device)

            predicts = efficientnet_b4(train_x)

            loss = criterion(predicts, train_label.long())
            accumulated_loss_train += loss.item()

            loss.backward()
            optimizer.step()
            
            predict_labels = torch.max(predicts, axis=1)[1]
            correct = torch.sum(predict_labels == train_label)
            accuracy_train = correct/train_label.size(0)

            train_accuracies.append(accuracy_train)

        full_loss_train = accumulated_loss_train / train_dataloader_size

        avg_train_acurracie = sum(train_accuracies) / train_dataloader_size

        test_loss, accuracy_test = validate(efficientnet_b4, criterion, test_dataloader, writer, epoch)
        accuracies.append(accuracy_test)

        if accuracy_test > max_accuracy:
            best_model = copy.deepcopy(efficientnet_b4)
            max_accuracy = accuracy_test
            print(f'Saving: Best model at epoch {epoch+1:3d} | Accuracy: {accuracy_test:8.4f}%')
    
        print(f'Epoch: {epoch + 1:3d} | Accuracy Test: {accuracy_test:3.4f}%')

        writer.add_scalar('Loss/train', full_loss_train, epoch)
        writer.add_scalar('Loss/test', test_loss, epoch)
        writer.add_scalar('Accuracy/train', avg_train_acurracie, epoch)
        writer.add_scalar('Accuracy/test', accuracy_test, epoch)

        plot_layers(efficientnet_b4, writer, epoch)
   
    models_path = f"./experiments/{setup['experiment']}/models/"
    path = f'{models_path}{setup["experiment"]}-{max_accuracy:.2f}.pkl'
    torch.save(best_model, path)
    print(f'Model saved in: {path}')

    writer.flush()
    writer.close()

    return best_model

# Validation

In [14]:
def validate(net, criterion, test_dataloader, writer, epoch):

    accumulated_loss_test = 0.0
    test_accuracies = []

    net.eval()

    images_to_plot = []
    
    with torch.no_grad():
        for idx, test_batch in enumerate(test_dataloader):
            
            test_x, test_label = test_batch
            test_x = test_x.to(device)
            test_label = test_label.to(device)
            
            predicts = net(test_x).detach()
            loss = criterion(predicts, test_label)
            accumulated_loss_test += loss.item()
    
            predict_labels = torch.max(predicts, axis=1)[1]
            correct = torch.sum(predict_labels == test_label)
            accuracy_val = correct/test_label.size(0)
    
            test_accuracies.append(accuracy_val.to('cpu').numpy())

    avg_loss = accumulated_loss_test / test_dataloader_size
    accuracy = (sum(test_accuracies) * 100) / test_dataloader_size
    
    return avg_loss, accuracy 

# Fit

In [15]:
best_model = train()

Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [64, 1, 3, 224, 224]
Error occurs, No graph saved


RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [64, 1, 3, 224, 224]