In [1]:
import numpy as np
import torch
cuda_ver = torch.version.cuda.replace(".", "")
import time

import composer
from composer.models import ComposerResNetCIFAR
from torchvision import datasets, transforms

torch.manual_seed(42) # For replicability


from composer.datasets.ffcv_utils import ffcv_monkey_patches
from composer.datasets.ffcv_utils import write_ffcv_dataset

ffcv_monkey_patches()

device = "gpu"
batch_size = 32
num_workers = 1

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
cuda_ver

'113'

# Dataset and Loader
Next, we instantiate our CIFAR10 dataset and dataloader. We'll use the Torchvision CIFAR10 and PyTorch dataloader for the sake of familiarity.

In [3]:
# Normalization constants
mean = (0.507, 0.487, 0.441)
std = (0.267, 0.256, 0.276)

batch_size = 1024
num_workers = 2
data_directory = "/tmp"

cifar10_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])

train_dataset = datasets.CIFAR10(data_directory, train=True, download=True, transform=cifar10_transforms)
test_dataset = datasets.CIFAR10(data_directory, train=False, download=True, transform=cifar10_transforms)

train_dataloader = torch.utils.data.DataLoader(train_dataset, 
                                               num_workers=num_workers, 
                                               batch_size=batch_size,
                                               pin_memory=True,
                                               drop_last=True,
                                               shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, 
                                              num_workers=num_workers, 
                                              batch_size=batch_size,
                                              pin_memory=True,
                                              drop_last=False,
                                              shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
type(train_dataset)

torchvision.datasets.cifar.CIFAR10

# Model
Next, we create our model. We're using composer's built-in ResNet18. To use your own custom model, please see the [custom models tutorial](https://docs.mosaicml.com/en/stable/tutorials/adding_models_datasets.html#models).

In [5]:
model = ComposerResNetCIFAR(model_name='resnet_20', num_classes=10)

# Optimizer and Scheduler
The trainer will handle instantiating the optimizer, but first we need to create the optimizer and LR scheduler. We're using [MosaicML's SGD with decoupled weight decay](https://arxiv.org/abs/1711.05101):

In [6]:
optimizer = composer.optim.DecoupledSGDW(
    model.parameters(), # Model parameters to update
    lr=0.05, # Peak learning rate
    momentum=0.9,
    weight_decay=2.0e-3 # If this looks large, it's because its not scaled by the LR as in non-decoupled weight decay
)

To keep the runtime short, we'll train our baseline model for five epochs. The first epoch will be linear warmup, followed by four epochs of constant LR. We achieve this by instantiating a `LinearWithWarmupScheduler` class. Feel free to increase the number of epochs in case you want to see the impact of running it for a longer duration.

In [7]:
lr_scheduler = composer.optim.LinearWithWarmupScheduler(
    t_warmup="1ep", # Warm up over 1 epoch
    alpha_i=1.0, # Flat LR schedule achieved by having alpha_i == alpha_f
    alpha_f=1.0
)

# Train a baseline model
And now we create our trainer: Note: We want to gpu as a device because FFCV works the best on GPU-capable machines.

In [8]:
train_epochs = "5ep" # Train for 5 epochs
device = "gpu"

trainer = composer.trainer.Trainer(
    model=model,
    train_dataloader=train_dataloader,
    eval_dataloader=test_dataloader,
    max_duration=train_epochs,
    optimizers=optimizer,
    schedulers=lr_scheduler,
    device=device
)

Python 3.9.13 | packaged by conda-forge | (main, May 27 2022, 16:56:21) 
Type 'copyright', 'credits' or 'license' for more information
IPython 8.4.0 -- An enhanced Interactive Python. Type '?' for help.


check self.logger



In [1]:  





In [1]:  exit





We train and measure the training time below.

In [9]:
start_time = time.perf_counter()
trainer.fit()
end_time = time.perf_counter()
print(f"It took {end_time - start_time:0.4f} seconds to train")

Epoch     0 train 100%|█████████████████████████| 48/48 [00:13<00:00,  3.67ba/s, loss/train=1.5937]         

Epoch     0 val     0%|                         | 0/10 [00:00<?, ?ba/s]         [A
Epoch     0 val    10%|██▌                      | 1/10 [00:00<00:02,  3.36ba/s]         [A
Epoch     0 val    30%|███████▌                 | 3/10 [00:00<00:00,  8.03ba/s]         [A
Epoch     0 val    50%|████████████▌            | 5/10 [00:00<00:00, 10.12ba/s]         [A
Epoch     0 val    70%|█████████████████▌       | 7/10 [00:00<00:00, 11.25ba/s]         [A
Epoch     0 val    90%|██████████████████████▌  | 9/10 [00:00<00:00, 12.03ba/s]         [A
Epoch     0 val   100%|█████████████████████████| 10/10 [00:00<00:00, 12.03ba/s]         [A
Epoch     0 val   100%|█████████████████████████| 10/10 [00:00<00:00, 12.03ba/s]         [A
Epoch     0 val   100%|█████████████████████████| 10/10 [00:00<00:00, 10.50ba/s, metrics/eval/Accuracy=0.3409]         [A
Epoch     1 train 100%|██████████████

It took 36.4631 seconds to train





# Use FFCV dataloaders to Speed Up Training
Next, we convert dataset to a format used by FFCV. FFCV uses it's own data format suitable for faster dataloading. Once this cell executes successfuly, you can find ```cifar_train.ffcv``` and ```cifar_val.ffcv``` in ```data_directory``` directory.

In [None]:
from composer.datasets.ffcv_utils import write_ffcv_dataset
from torchvision.datasets import CIFAR10


# Train dataset
ds = CIFAR10(root=data_directory, train=True, download=True)
write_ffcv_dataset(dataset=ds, write_path=data_directory + "/cifar_train.ffcv")

# validation dataset
ds = CIFAR10(root=data_directory, train=False, download=True)
write_ffcv_dataset(dataset=ds, write_path=data_directory + "/cifar_val.ffcv")

Current version of ffcv (0.0.3) has a bug where calling [len(dataloader) does shuffling](https://github.com/libffcv/ffcv/issues/163) of image indices to load, therefore, calls to len are expensive. Composer calls len(dataloader) function in training loop for every batch and, hence, this is a performance hit. We fix it by patching the len function using ffcv_monkey_patches. 

In [None]:
from composer.datasets.ffcv_utils import ffcv_monkey_patches
ffcv_monkey_patches()



Now let us construct FFCV train and test dataloaders. We use the similar transformations as used for TorchVision datasets.


In [None]:
import ffcv
from ffcv.fields.decoders import IntDecoder, SimpleRGBImageDecoder

# Please note that this mean/std is different from the mean/std used for regular PyTorch dataloader as
# ToTensor does the normalization for PyTorch dataloaders.
cifar10_mean_ffcv = np.array([125.307, 122.961, 113.8575])
cifar10_std_ffcv = np.array([51.5865, 50.847, 51.255])
label_pipeline = [IntDecoder(), ffcv.transforms.ToTensor(), ffcv.transforms.Squeeze()]
image_pipeline = [SimpleRGBImageDecoder(), ffcv.transforms.ToTensor(),
                ffcv.transforms.ToTorchImage(channels_last=False, convert_back_int16=False),
                ffcv.transforms.Convert(torch.float32),
                transforms.Normalize(cifar10_mean_ffcv, cifar10_std_ffcv),
            ]

ffcv_train_dataloader = ffcv.Loader(
                data_directory + "/cifar_train.ffcv",
                batch_size=batch_size,
                num_workers=num_workers,
                order=ffcv.loader.OrderOption.RANDOM,
                pipelines={
                    'image': image_pipeline,
                    'label': label_pipeline
                },
                drop_last=True,
            )
ffcv_test_dataloader = ffcv.Loader(
                data_directory + "/cifar_val.ffcv",
                batch_size=batch_size,
                num_workers=num_workers,
                order=ffcv.loader.OrderOption.RANDOM,
                pipelines={
                    'image': image_pipeline,
                    'label': label_pipeline
                },
                drop_last=False,
            )



Now let's instantiate our model, optimizer, and trainer again but with FFCV dataloaders. No need to instantiate our scheduler again because it's stateless!


In [None]:
model = ComposerResNetCIFAR(model_name="resnet_20", num_classes=10)

optimizer = composer.optim.DecoupledSGDW(
    model.parameters(),
    lr=0.05,
    momentum=0.9,
    weight_decay=2.0e-3
)

trainer = composer.trainer.Trainer(
    model=model,
    train_dataloader=ffcv_train_dataloader,
    eval_dataloader=ffcv_test_dataloader,
    max_duration=train_epochs,
    optimizers=optimizer,
    schedulers=lr_scheduler,
    device=device,
)



And let's get training!


In [None]:
start_time = time.perf_counter()
trainer.fit()
end_time = time.perf_counter()
accelerated_time = end_time - start_time
print(f"It took {accelerated_time:0.4f} seconds to train with FFCV dataloaders")

# GeoLife dataset

In [3]:
import os
import sys
import inspect

CURR_DIR = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
PARENT_DIR = os.path.dirname(CURR_DIR)
sys.path.insert(0, CURR_DIR)

In [4]:
from dataset.ffcv_loader.dataset_ffcv import GeoLifeCLEF2022DatasetFFCV
from dataset.pytorch_dataset import GeoLifeCLEF2022Dataset

In [5]:
save_dir = "/home/mila/s/sara.ebrahim-elkafrawy/scratch/ecosystem_project/tmp_geo"

# GeoLife + Pytorch

In [6]:
train_dataset = GeoLifeCLEF2022Dataset(
    "/network/scratch/s/sara.ebrahim-elkafrawy",
    "train",
    region="both",
    patch_data="all", # self.opts.data.bands,
    use_rasters=False,
    patch_extractor=None,
    transform=None,
    target_transform=None,
    )

val_dataset = GeoLifeCLEF2022Dataset(
        "/network/scratch/s/sara.ebrahim-elkafrawy",
        "val",
        region="both",
        patch_data="all", #self.opts.data.bands,
        use_rasters=False,
        patch_extractor=None,
        transform=None,
        target_transform=None,
    )

train_dataloader = torch.utils.data.DataLoader(train_dataset, 
                                               num_workers=num_workers, 
                                               batch_size=batch_size,
                                               pin_memory=True,
                                               drop_last=True,
                                               shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, 
                                              num_workers=num_workers, 
                                              batch_size=batch_size,
                                              pin_memory=True,
                                              drop_last=False,
                                              shuffle=False)

In [7]:
model = ComposerResNetCIFAR(model_name='resnet_20', num_classes=17037)

In [8]:
optimizer = composer.optim.DecoupledSGDW(
    model.parameters(), # Model parameters to update
    lr=0.05, # Peak learning rate
    momentum=0.9,
    weight_decay=2.0e-3 # If this looks large, it's because its not scaled by the LR as in non-decoupled weight decay
)

In [9]:
train_epochs = "2ep"
trainer = composer.trainer.Trainer(
    model=model,
    train_dataloader=val_dataloader,
    eval_dataloader=val_dataloader,
    max_duration=train_epochs,
    optimizers=optimizer,
#     schedulers=lr_scheduler,
    device=device,
)

INFO:composer.utils.reproducibility:Setting seed to 2359799908
INFO:composer.trainer.trainer:Run name: 1658191072-fair-oarfish
INFO:composer.trainer.trainer:Stepping schedulers every batch. To step schedulers every epoch, set `step_schedulers_every_batch=False`.
INFO:composer.trainer.trainer:Setting seed to 2359799908
INFO:composer.utils.reproducibility:Setting seed to 2359799908


In [None]:
start_time = time.perf_counter()
trainer.fit()
end_time = time.perf_counter()
print(f"It took {end_time - start_time:0.4f} seconds to train")

# GeoLife + FFCV

In [10]:
import ffcv
import torch
from ffcv.fields.decoders import (
    IntDecoder,
    NDArrayDecoder,
    SimpleRGBImageDecoder,
    CenterCropRGBImageDecoder,
)
from ffcv.loader import Loader, OrderOption
from ffcv.transforms import (
    RandomHorizontalFlip,
    Cutout,
    NormalizeImage,
    RandomTranslate,
    Convert,
    ToDevice,
    ToTensor,
    ToTorchImage,
    ImageMixup,
)

In [11]:
ffcv_train_dataset = GeoLifeCLEF2022DatasetFFCV(
    "/network/scratch/s/sara.ebrahim-elkafrawy",
    "train",
    region="both",
    patch_data="all", # self.opts.data.bands,
    use_rasters=False,
    patch_extractor=None,
    transform=None,
    target_transform=None,
    )

train_write_path = os.path.join(
       save_dir , "geolife_train_data.ffcv"
    )

ffcv_val_dataset = GeoLifeCLEF2022DatasetFFCV(
        "/network/scratch/s/sara.ebrahim-elkafrawy",
        "val",
        region="both",
        patch_data="all", #self.opts.data.bands,
        use_rasters=False,
        patch_extractor=None,
        transform=None,
        target_transform=None,
    )

val_write_path = os.path.join(
        save_dir, "geolife_val_data.ffcv"
    )

In [12]:
# write_ffcv_dataset(dataset=ffcv_train_dataset, write_path=save_dir + "/geo_train.ffcv")
write_ffcv_dataset(dataset=ffcv_val_dataset, write_path=save_dir + "/geo_val.ffcv")

INFO:composer.datasets.ffcv_utils:Writing dataset in FFCV <file>.ffcv format to /home/mila/s/sara.ebrahim-elkafrawy/scratch/ecosystem_project/tmp_geo/geo_val.ffcv.
100%|██████████| 256/256 [00:01<00:00, 212.40it/s]


In [13]:
label_pipeline = [IntDecoder(), ffcv.transforms.ToTensor(), ffcv.transforms.Squeeze()]
image_pipeline = [SimpleRGBImageDecoder(), ffcv.transforms.ToTensor(),
                ffcv.transforms.ToTorchImage(channels_last=False, convert_back_int16=False),
                ffcv.transforms.Convert(torch.float32),
                transforms.Normalize(
                    np.array([106.9413, 114.8729, 104.5280]),
                    np.array([51.0005, 44.8594, 43.2014]),),
            ]

ffcv_train_dataloader = ffcv.Loader(
                save_dir + "/geo_train.ffcv",
                batch_size=batch_size,
                num_workers=num_workers,
                order=ffcv.loader.OrderOption.RANDOM,
                pipelines={
                    'image': image_pipeline,
                    'label': label_pipeline
                },
                drop_last=True,
            )
ffcv_val_dataloader = ffcv.Loader(
                save_dir + "/geo_val.ffcv",
                batch_size=batch_size,
                num_workers=num_workers,
                order=ffcv.loader.OrderOption.RANDOM,
                pipelines={
                    'image': image_pipeline,
                    'label': label_pipeline
                },
                drop_last=False,
            )

In [14]:
# type(val_dataloader), type(ffcv_val_dataloader)

In [15]:
#data2 = next(iter(ffcv_val_dataloader))

In [16]:
model = ComposerResNetCIFAR(model_name='resnet_20', num_classes=17037)

In [17]:
optimizer = composer.optim.DecoupledSGDW(
    model.parameters(), # Model parameters to update
    lr=0.05, # Peak learning rate
    momentum=0.9,
    weight_decay=2.0e-3 # If this looks large, it's because its not scaled by the LR as in non-decoupled weight decay
)

In [18]:
device

'gpu'

In [19]:
train_epochs = "2ep"
trainer = composer.trainer.Trainer(
    model=model,
    train_dataloader=ffcv_val_dataloader,
    eval_dataloader=ffcv_val_dataloader,
    max_duration=train_epochs,
    optimizers=optimizer,
#     schedulers=lr_scheduler,
    device=device,
)

INFO:composer.utils.reproducibility:Setting seed to 1769560530
INFO:composer.trainer.trainer:Run name: 1658191086-famous-kagu
INFO:composer.trainer.trainer:Stepping schedulers every batch. To step schedulers every epoch, set `step_schedulers_every_batch=False`.
INFO:composer.trainer.trainer:Setting seed to 1769560530
INFO:composer.utils.reproducibility:Setting seed to 1769560530


In [20]:
start_time = time.perf_counter()
trainer.fit()
end_time = time.perf_counter()
print(f"It took {end_time - start_time:0.4f} seconds to train")

INFO:composer.trainer.trainer:Using precision Precision.FP32
Epoch     0 train 100%|█████████████████████████| 8/8 [00:09<00:00,  1.14s/ba, loss/train=9.8233]         

Epoch     0 val     0%|                         | 0/8 [00:00<?, ?ba/s]         [A
Epoch     0 val    25%|██████▎                  | 2/8 [00:00<00:00, 15.83ba/s]         [A
Epoch     0 val    62%|███████████████▋         | 5/8 [00:00<00:00, 20.93ba/s]         [A
Epoch     0 val   100%|█████████████████████████| 8/8 [00:00<00:00, 22.69ba/s]         [A
Epoch     0 val   100%|█████████████████████████| 8/8 [00:00<00:00, 22.69ba/s]         [A
Epoch     0 val   100%|█████████████████████████| 8/8 [00:00<00:00, 22.69ba/s]         [A
Epoch     0 val   100%|█████████████████████████| 8/8 [00:00<00:00, 20.56ba/s, metrics/eval/Accuracy=0.0000]         [A
Epoch     1 train 100%|█████████████████████████| 8/8 [00:01<00:00,  5.85ba/s, loss/train=8.8551]         

Epoch     1 val     0%|                         | 0/8 [00:00<?, 

It took 12.0103 seconds to train





In [None]:
245.5064 +  269.8520 / 2 seconds
886.5012 It took 894.2513 seconds to train

In [None]:
PL model:
ffcv ->    10 epochs 58.09489850606769
no   ->    10 epochs 35.13264705892652    