# Basic training of a CNN on imagenet from tfrecord files using NVidia DALI

Here we will run a simplified training loop for a CNN model on ImageNet. We will create an [NVidia DALI](https://docs.nvidia.com/deeplearning/dali/user-guide/docs/index.html) input pipeline based on the [tfrecord](https://docs.nvidia.com/deeplearning/dali/user-guide/docs/operations/nvidia.dali.fn.readers.tfrecord.html#nvidia-dali-fn-readers-tfrecord) reader to read the ImageNet dataset stored in tfrecord files. Here we will make the input pipeline use bothe the cpu and gpu.

In [1]:
import glob
import time
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
import nvidia.dali.fn as fn
import nvidia.dali.types as types
import nvidia.dali.tfrecord as tfrec
from torchvision import models
from nvidia.dali.pipeline import Pipeline
from nvidia.dali.plugin.pytorch import DALIClassificationIterator, LastBatchPolicy

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_dir = '/scratch/snx3000/datasets/imagenet/ILSVRC2012_1k/'

tfrec_files = sorted(glob.glob(f'{data_dir}/train/*'))
index_files = sorted(glob.glob(f'{data_dir}/idx_files/train/*'))

In [3]:
batch_size = 128

pipe = Pipeline(batch_size=batch_size,
                num_threads=12,
                device_id=0)

with pipe:
    example = fn.readers.tfrecord(
        path=tfrec_files,
        index_path=index_files,
        features={
            'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ''),
            'image/class/label': tfrec.FixedLenFeature((), tfrec.int64, -1),
        },
        # read_ahead=True  # seems causing some troubles
    )
    label = example['image/class/label'] - 1
    image = fn.decoders.image(example['image/encoded'], device='mixed', output_type=types.RGB)
    image = fn.resize(image, device='gpu', size=(224, 224), dtype=types.FLOAT)
    image = fn.transpose(image, perm=(2, 0, 1))
    pipe.set_outputs(image.gpu(), label.gpu())

In [4]:
pipe.build()

In [5]:
train_loader = DALIClassificationIterator(
    pipe,
    last_batch_padded=False,
    auto_reset=True,
    last_batch_policy=LastBatchPolicy.DROP,
)

In [6]:
device = 0

model = models.resnet50()
model.to(device);

In [7]:
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [8]:
def benchmark_step(model, imgs, labels):
    optimizer.zero_grad()
    output = model(imgs)
    loss = F.cross_entropy(output, labels)
    loss.backward()
    optimizer.step()

In [9]:
num_epochs = 5
num_iters = 10
imgs_sec = []
for epoch in range(num_epochs):
    t0 = time.time()
    for step, samples in enumerate(train_loader):
        if step > num_iters:
            break
        
        imgs = samples[0]['data']
        labels = samples[0]['label']
        benchmark_step(model, imgs, labels)

    dt = time.time() - t0
    imgs_sec.append(batch_size * num_iters / dt)

    print(f' * Epoch {epoch:2d}: '
          f'{imgs_sec[epoch]:.2f} images/sec per GPU')

 * Epoch  0: 177.55 images/sec per GPU
 * Epoch  1: 216.81 images/sec per GPU
 * Epoch  2: 188.60 images/sec per GPU
 * Epoch  3: 202.28 images/sec per GPU
 * Epoch  4: 201.57 images/sec per GPU
