In [1]:
import numpy as np
import torch
cuda_ver = torch.version.cuda.replace(".", "")

import time

import composer
from composer.models import ComposerResNetCIFAR
from torchvision import datasets, transforms

torch.manual_seed(42) # For replicability


from composer.datasets.ffcv_utils import ffcv_monkey_patches
from composer.datasets.ffcv_utils import write_ffcv_dataset

ffcv_monkey_patches()

device = "gpu"
batch_size = 32
num_workers = 1

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
cuda_ver

'113'

# Dataset and Loader
Next, we instantiate our CIFAR10 dataset and dataloader. We'll use the Torchvision CIFAR10 and PyTorch dataloader for the sake of familiarity.

In [None]:
# Normalization constants
mean = (0.507, 0.487, 0.441)
std = (0.267, 0.256, 0.276)

batch_size = 1024
num_workers = 2
data_directory = "/tmp"

cifar10_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])

train_dataset = datasets.CIFAR10(data_directory, train=True, download=True, transform=cifar10_transforms)
test_dataset = datasets.CIFAR10(data_directory, train=False, download=True, transform=cifar10_transforms)

train_dataloader = torch.utils.data.DataLoader(train_dataset, 
                                               num_workers=num_workers, 
                                               batch_size=batch_size,
                                               pin_memory=True,
                                               drop_last=True,
                                               shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, 
                                              num_workers=num_workers, 
                                              batch_size=batch_size,
                                              pin_memory=True,
                                              drop_last=False,
                                              shuffle=False)

In [None]:
type(train_dataset)

# Model
Next, we create our model. We're using composer's built-in ResNet18. To use your own custom model, please see the [custom models tutorial](https://docs.mosaicml.com/en/stable/tutorials/adding_models_datasets.html#models).

In [None]:
model = ComposerResNetCIFAR(model_name='resnet_20', num_classes=10)

# Optimizer and Scheduler
The trainer will handle instantiating the optimizer, but first we need to create the optimizer and LR scheduler. We're using [MosaicML's SGD with decoupled weight decay](https://arxiv.org/abs/1711.05101):

In [None]:
optimizer = composer.optim.DecoupledSGDW(
    model.parameters(), # Model parameters to update
    lr=0.05, # Peak learning rate
    momentum=0.9,
    weight_decay=2.0e-3 # If this looks large, it's because its not scaled by the LR as in non-decoupled weight decay
)

To keep the runtime short, we'll train our baseline model for five epochs. The first epoch will be linear warmup, followed by four epochs of constant LR. We achieve this by instantiating a `LinearWithWarmupScheduler` class. Feel free to increase the number of epochs in case you want to see the impact of running it for a longer duration.

In [None]:
lr_scheduler = composer.optim.LinearWithWarmupScheduler(
    t_warmup="1ep", # Warm up over 1 epoch
    alpha_i=1.0, # Flat LR schedule achieved by having alpha_i == alpha_f
    alpha_f=1.0
)

# Train a baseline model
And now we create our trainer: Note: We want to gpu as a device because FFCV works the best on GPU-capable machines.

In [None]:
train_epochs = "5ep" # Train for 5 epochs
device = "gpu"

trainer = composer.trainer.Trainer(
    model=model,
    train_dataloader=train_dataloader,
    eval_dataloader=test_dataloader,
    max_duration=train_epochs,
    optimizers=optimizer,
    schedulers=lr_scheduler,
    device=device
)

We train and measure the training time below.

In [None]:
start_time = time.perf_counter()
trainer.fit()
end_time = time.perf_counter()
print(f"It took {end_time - start_time:0.4f} seconds to train")

# Use FFCV dataloaders to Speed Up Training
Next, we convert dataset to a format used by FFCV. FFCV uses it's own data format suitable for faster dataloading. Once this cell executes successfuly, you can find ```cifar_train.ffcv``` and ```cifar_val.ffcv``` in ```data_directory``` directory.

In [None]:
from composer.datasets.ffcv_utils import write_ffcv_dataset
from torchvision.datasets import CIFAR10


# Train dataset
ds = CIFAR10(root=data_directory, train=True, download=True)
write_ffcv_dataset(dataset=ds, write_path=data_directory + "/cifar_train.ffcv")

# validation dataset
ds = CIFAR10(root=data_directory, train=False, download=True)
write_ffcv_dataset(dataset=ds, write_path=data_directory + "/cifar_val.ffcv")

Current version of ffcv (0.0.3) has a bug where calling [len(dataloader) does shuffling](https://github.com/libffcv/ffcv/issues/163) of image indices to load, therefore, calls to len are expensive. Composer calls len(dataloader) function in training loop for every batch and, hence, this is a performance hit. We fix it by patching the len function using ffcv_monkey_patches. 

In [None]:
from composer.datasets.ffcv_utils import ffcv_monkey_patches
ffcv_monkey_patches()



Now let us construct FFCV train and test dataloaders. We use the similar transformations as used for TorchVision datasets.


In [None]:
import ffcv
from ffcv.fields.decoders import IntDecoder, SimpleRGBImageDecoder

# Please note that this mean/std is different from the mean/std used for regular PyTorch dataloader as
# ToTensor does the normalization for PyTorch dataloaders.
cifar10_mean_ffcv = np.array([125.307, 122.961, 113.8575])
cifar10_std_ffcv = np.array([51.5865, 50.847, 51.255])
label_pipeline = [IntDecoder(), ffcv.transforms.ToTensor(), ffcv.transforms.Squeeze()]
image_pipeline = [SimpleRGBImageDecoder(), ffcv.transforms.ToTensor(),
                ffcv.transforms.ToTorchImage(channels_last=False, convert_back_int16=False),
                ffcv.transforms.Convert(torch.float32),
                transforms.Normalize(cifar10_mean_ffcv, cifar10_std_ffcv),
            ]

ffcv_train_dataloader = ffcv.Loader(
                data_directory + "/cifar_train.ffcv",
                batch_size=batch_size,
                num_workers=num_workers,
                order=ffcv.loader.OrderOption.RANDOM,
                pipelines={
                    'image': image_pipeline,
                    'label': label_pipeline
                },
                drop_last=True,
            )
ffcv_test_dataloader = ffcv.Loader(
                data_directory + "/cifar_val.ffcv",
                batch_size=batch_size,
                num_workers=num_workers,
                order=ffcv.loader.OrderOption.RANDOM,
                pipelines={
                    'image': image_pipeline,
                    'label': label_pipeline
                },
                drop_last=False,
            )



Now let's instantiate our model, optimizer, and trainer again but with FFCV dataloaders. No need to instantiate our scheduler again because it's stateless!


In [None]:
model = ComposerResNetCIFAR(model_name="resnet_20", num_classes=10)

optimizer = composer.optim.DecoupledSGDW(
    model.parameters(),
    lr=0.05,
    momentum=0.9,
    weight_decay=2.0e-3
)

trainer = composer.trainer.Trainer(
    model=model,
    train_dataloader=ffcv_train_dataloader,
    eval_dataloader=ffcv_test_dataloader,
    max_duration=train_epochs,
    optimizers=optimizer,
    schedulers=lr_scheduler,
    device=device,
)



And let's get training!


In [None]:
start_time = time.perf_counter()
trainer.fit()
end_time = time.perf_counter()
accelerated_time = end_time - start_time
print(f"It took {accelerated_time:0.4f} seconds to train with FFCV dataloaders")

# GeoLife dataset

In [2]:
import os
import sys
import inspect

CURR_DIR = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
PARENT_DIR = os.path.dirname(CURR_DIR)
sys.path.insert(0, CURR_DIR)

In [3]:
from dataset.ffcv_loader.dataset_ffcv import GeoLifeCLEF2022DatasetFFCV
from dataset.pytorch_dataset import GeoLifeCLEF2022Dataset

In [4]:
save_dir = "/home/mila/s/sara.ebrahim-elkafrawy/scratch/ecosystem_project/tmp_geo"

# GeoLife + Pytorch

In [5]:
train_dataset = GeoLifeCLEF2022Dataset(
    "/network/scratch/s/sara.ebrahim-elkafrawy",
    "train",
    region="both",
    patch_data="all", # self.opts.data.bands,
    use_rasters=False,
    patch_extractor=None,
    transform=None,
    target_transform=None,
    )

val_dataset = GeoLifeCLEF2022Dataset(
        "/network/scratch/s/sara.ebrahim-elkafrawy",
        "val",
        region="both",
        patch_data="all", #self.opts.data.bands,
        use_rasters=False,
        patch_extractor=None,
        transform=None,
        target_transform=None,
    )

train_dataloader = torch.utils.data.DataLoader(train_dataset, 
                                               num_workers=num_workers, 
                                               batch_size=batch_size,
                                               pin_memory=True,
                                               drop_last=True,
                                               shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, 
                                              num_workers=num_workers, 
                                              batch_size=batch_size,
                                              pin_memory=True,
                                              drop_last=False,
                                              shuffle=False)

In [6]:
model = ComposerResNetCIFAR(model_name='resnet_20', num_classes=17037)

In [7]:
optimizer = composer.optim.DecoupledSGDW(
    model.parameters(), # Model parameters to update
    lr=0.05, # Peak learning rate
    momentum=0.9,
    weight_decay=2.0e-3 # If this looks large, it's because its not scaled by the LR as in non-decoupled weight decay
)

In [8]:
train_epochs = "1ep"
trainer = composer.trainer.Trainer(
    model=model,
    train_dataloader=val_dataloader,
    eval_dataloader=val_dataloader,
    max_duration=train_epochs,
    optimizers=optimizer,
#     schedulers=lr_scheduler,
    device=device,
)

In [10]:
start_time = time.perf_counter()
trainer.fit()
end_time = time.perf_counter()
print(f"It took {end_time - start_time:0.4f} seconds to train")

Epoch     0 train 100%|█████████████████████████| 1253/1253 [07:34<00:00,  2.76ba/s, loss/train=8.4862]         

Epoch     0 val     0%|                         | 0/1253 [00:00<?, ?ba/s]         [A
Epoch     0 val     0%|                         | 1/1253 [00:00<11:33,  1.80ba/s]         [A
Epoch     0 val     0%|                         | 2/1253 [00:00<08:31,  2.45ba/s]         [A
Epoch     0 val     0%|                         | 3/1253 [00:01<07:57,  2.62ba/s]         [A
Epoch     0 val     0%|                         | 4/1253 [00:01<07:43,  2.70ba/s]         [A
Epoch     0 val     0%|                         | 5/1253 [00:01<07:30,  2.77ba/s]         [A
Epoch     0 val     0%|                         | 6/1253 [00:02<07:27,  2.78ba/s]         [A
Epoch     0 val     1%|▏                        | 7/1253 [00:02<07:23,  2.81ba/s]         [A
Epoch     0 val     1%|▏                        | 8/1253 [00:02<07:22,  2.81ba/s]         [A
Epoch     0 val     1%|▏                        

It took 894.2513 seconds to train





# GeoLife + FFCV

In [9]:
import ffcv
import torch
from ffcv.fields.decoders import (
    IntDecoder,
    NDArrayDecoder,
    SimpleRGBImageDecoder,
    CenterCropRGBImageDecoder,
)
from ffcv.loader import Loader, OrderOption
from ffcv.transforms import (
    RandomHorizontalFlip,
    Cutout,
    NormalizeImage,
    RandomTranslate,
    Convert,
    ToDevice,
    ToTensor,
    ToTorchImage,
    ImageMixup,
)

In [10]:
ffcv_train_dataset = GeoLifeCLEF2022DatasetFFCV(
    "/network/scratch/s/sara.ebrahim-elkafrawy",
    "train",
    region="both",
    patch_data="all", # self.opts.data.bands,
    use_rasters=False,
    patch_extractor=None,
    transform=None,
    target_transform=None,
    )

train_write_path = os.path.join(
       save_dir , "geolife_train_data.ffcv"
    )

ffcv_val_dataset = GeoLifeCLEF2022DatasetFFCV(
        "/network/scratch/s/sara.ebrahim-elkafrawy",
        "val",
        region="both",
        patch_data="all", #self.opts.data.bands,
        use_rasters=False,
        patch_extractor=None,
        transform=None,
        target_transform=None,
    )

val_write_path = os.path.join(
        save_dir, "geolife_val_data.ffcv"
    )

In [11]:
# write_ffcv_dataset(dataset=ffcv_train_dataset, write_path=save_dir + "/geo_train.ffcv")
write_ffcv_dataset(dataset=ffcv_val_dataset, write_path=save_dir + "/geo_val.ffcv")

100%|██████████| 40080/40080 [00:49<00:00, 803.34it/s] 


In [13]:
label_pipeline = [IntDecoder(), ffcv.transforms.ToTensor(), ffcv.transforms.Squeeze()]
image_pipeline = [SimpleRGBImageDecoder(), ffcv.transforms.ToTensor(),
                ffcv.transforms.ToTorchImage(channels_last=False, convert_back_int16=False),
                ffcv.transforms.Convert(torch.float32),
                transforms.Normalize(
                    np.array([106.9413, 114.8729, 104.5280]),
                    np.array([51.0005, 44.8594, 43.2014]),),
            ]

ffcv_train_dataloader = ffcv.Loader(
                save_dir + "/geo_train.ffcv",
                batch_size=batch_size,
                num_workers=num_workers,
                order=ffcv.loader.OrderOption.RANDOM,
                pipelines={
                    'image': image_pipeline,
                    'label': label_pipeline
                },
                drop_last=True,
            )
ffcv_val_dataloader = ffcv.Loader(
                save_dir + "/geo_val.ffcv",
                batch_size=batch_size,
                num_workers=num_workers,
                order=ffcv.loader.OrderOption.RANDOM,
                pipelines={
                    'image': image_pipeline,
                    'label': label_pipeline
                },
                drop_last=False,
            )

In [14]:
type(val_dataloader), type(ffcv_val_dataloader)

(torch.utils.data.dataloader.DataLoader, ffcv.loader.loader.Loader)

In [16]:
data = next(iter(val_dataloader))

In [18]:
data2 = next(iter(ffcv_val_dataloader))

In [21]:
data2

(tensor([[[[ 7.0703e-01,  6.2860e-01,  5.8938e-01,  ...,  1.3541e+00,
             1.1188e+00,  1.1776e+00],
           [ 6.8742e-01,  6.6781e-01,  6.4820e-01,  ...,  1.3149e+00,
             1.0796e+00,  1.0992e+00],
           [ 6.4820e-01,  6.6781e-01,  6.6781e-01,  ...,  1.4325e+00,
             1.1580e+00,  1.0992e+00],
           ...,
           [ 1.1776e+00,  2.0207e+00,  1.4913e+00,  ...,  4.3252e-01,
             2.9527e-01,  2.3644e-01],
           [ 1.5109e+00,  1.6286e+00,  1.3345e+00,  ...,  5.3056e-01,
             1.7762e-01, -3.8064e-02],
           [ 1.7658e+00,  1.0011e+00,  1.1188e+00,  ...,  3.7370e-01,
             4.1291e-01, -7.7280e-02]],
 
          [[ 2.0346e-01,  1.1429e-01,  6.9709e-02,  ...,  5.8242e-01,
             3.1492e-01,  4.2638e-01],
           [ 1.8117e-01,  1.5888e-01,  1.3658e-01,  ...,  5.3784e-01,
             2.7034e-01,  3.3721e-01],
           [ 1.3658e-01,  1.5888e-01,  1.5888e-01,  ...,  6.4930e-01,
             3.5950e-01,  3.1492e-01],


In [10]:
model = ComposerResNetCIFAR(model_name='resnet_20', num_classes=17037)

In [11]:
optimizer = composer.optim.DecoupledSGDW(
    model.parameters(), # Model parameters to update
    lr=0.05, # Peak learning rate
    momentum=0.9,
    weight_decay=2.0e-3 # If this looks large, it's because its not scaled by the LR as in non-decoupled weight decay
)

In [12]:
train_epochs = "1ep"
trainer = composer.trainer.Trainer(
    model=model,
    train_dataloader=ffcv_val_dataloader,
    eval_dataloader=ffcv_val_dataloader,
    max_duration=train_epochs,
    optimizers=optimizer,
#     schedulers=lr_scheduler,
    device=device,
)

In [13]:
start_time = time.perf_counter()
trainer.fit()
end_time = time.perf_counter()
print(f"It took {end_time - start_time:0.4f} seconds to train")

Epoch     0 train 100%|█████████████████████████| 1253/1253 [03:37<00:00,  5.75ba/s, loss/train=8.2338]         

Epoch     0 val     0%|                         | 0/1253 [00:00<?, ?ba/s]         [A
Epoch     0 val     0%|                         | 2/1253 [00:00<01:20, 15.54ba/s]         [A
Epoch     0 val     0%|                         | 5/1253 [00:00<01:00, 20.69ba/s]         [A
Epoch     0 val     1%|▏                        | 8/1253 [00:00<00:55, 22.25ba/s]         [A
Epoch     0 val     1%|▏                        | 11/1253 [00:00<00:53, 23.19ba/s]         [A
Epoch     0 val     1%|▎                        | 14/1253 [00:00<00:52, 23.56ba/s]         [A
Epoch     0 val     1%|▎                        | 17/1253 [00:00<00:52, 23.72ba/s]         [A
Epoch     0 val     2%|▍                        | 20/1253 [00:00<00:51, 24.00ba/s]         [A
Epoch     0 val     2%|▍                        | 23/1253 [00:00<00:50, 24.25ba/s]         [A
Epoch     0 val     2%|▌                   

It took 269.8520 seconds to train





In [None]:
245.5064 +  269.8520 / 2 seconds
886.5012 It took 894.2513 seconds to train