In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from pathlib import Path

import lightning as pl
import torch
from torch.utils.data import DataLoader

from dataset import WatermarkedDataset
from trainer import DifficultyScheduler, WatermarkRemovalModel

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'


device

'cpu'

In [4]:
def calculate_difficulty_step(
    initial_difficulty: float,
    max_difficulty: float,
    max_epochs: int,
) -> float:
    """
    Calculate the step size for difficulty increase based on epochs.

    Parameters
    ----------
    initial_difficulty: float
        Starting difficulty level between ``0`` and ``1``
    max_difficulty: float
        Target difficulty level between ``0`` and ``1``
    max_epochs: int
        Number of epochs to train for

    Returns
    -------
    step_size: float
        Amount to increase difficulty by each epoch

    """
    total_difficulty_increase = max_difficulty - initial_difficulty
    return total_difficulty_increase / max_epochs

In [5]:
initial_difficulty = 0.25
max_difficulty = 0.95
max_epochs = 5

image_size = 512

In [6]:
# calculate difficulty step size
difficulty_step = calculate_difficulty_step(
    initial_difficulty=initial_difficulty,
    max_difficulty=max_difficulty,
    max_epochs=max_epochs,
)

In [7]:
# create datasets
train_dataset = WatermarkedDataset(
    root_dir='/Users/nathancooperjones/GitHub/multi_label_pigeon/assets',
    difficulty=initial_difficulty,
    image_size=image_size,
)

# create dataloaders
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=2,
    num_workers=os.cpu_count(),
    shuffle=True,
    pin_memory=True,
)

In [8]:
# create model
model = WatermarkRemovalModel(
    learning_rate=1e-3,
)

In [9]:
# create trainer
trainer = pl.Trainer(
    accelerator=device,
    max_epochs=max_epochs,
    enable_checkpointing=False,
    # TODO: add CSV logger
    callbacks=[
        # pl.callbacks.ModelCheckpoint(
        #     dirpath=output_path,
        #     filename='watermark-removal-{epoch:02d}-{val_loss:.2f}',
        #     monitor='val_loss',
        #     mode='min',
        #     save_top_k=3,
        # ),
        # pl.callbacks.EarlyStopping(
        #     monitor='val_loss',
        #     patience=10,
        #     mode='min',
        # ),
        DifficultyScheduler(
            initial_difficulty=initial_difficulty,
            max_difficulty=max_difficulty,
            step_size=difficulty_step,
        ),
    ],
)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Users/nathancooperjones/miniconda3/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/Users/nathancooperjones/miniconda3/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default


In [10]:
# train the model
trainer.fit(model, train_loader)

/Users/nathancooperjones/miniconda3/lib/python3.11/site-packages/lightning/pytorch/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.

   | Name       | Type       | Params | Mode 
---------------------------------------------------
0  | enc_1      | DoubleConv | 39.0 K | train
1  | enc_2      | DoubleConv | 221 K  | train
2  | enc_3      | DoubleConv | 886 K  | train
3  | enc_4      | DoubleConv | 3.5 M  | train
4  | enc_5      | DoubleConv | 14.2 M | train
5  | dec_5      | DoubleConv | 7.1 M  | train
6  | dec_4      | DoubleConv | 3.0 M  | train
7  | dec_3      | DoubleConv | 738 K  | train
8  | dec_2      | DoubleConv | 184 K  | train
9  | dec1       | DoubleConv | 110 K  | train
10 | pool       | MaxPool2d  | 0      | train
11 | upsample   | Upsample   | 0      | train
12 | final_conv | Conv2d     | 195    | train
13 | activation | Sigmoid    | 0      | train
14 | vgg        | Sequential | 1.7 M  | eval 
--------

Training: |                                               | 0/? [00:00<?, ?it/s]

MisconfigurationException: ReduceLROnPlateau conditioned on metric val_loss which is not available. Available metrics are: ['train_l1_loss', 'train_perceptual_loss', 'train_total_loss']. Condition can be set using `monitor` key in lr scheduler dict

----- 