In [1]:
# source: https://github.com/fastai/fastbook/blob/master/05_pet_breeds.ipynb
# doc about Data Parallel with fastai v2: https://dev.fast.ai/distributed#ParallelTrainer

# Intitialization
from utils import *
from fastai2.distributed import *
from fastai2.vision.all import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Choose a GPU
gpu = 0

if torch.cuda.is_available():
    n_gpu = torch.cuda.device_count()
    if gpu is None: gpu = list(range(n_gpu))[0] 
    torch.cuda.set_device(gpu)
else:
    n_gpu = None

print(f'cuda device: {torch.cuda.current_device()}')
print(f'cuda device name: {torch.cuda.get_device_name(0)}')

cuda device: 0
cuda device name: Tesla V100-PCIE-32GB


In [3]:
# Get data and dataloaders
path = untar_data(URLs.PETS)

Path.BASE_PATH = path

pets = DataBlock(blocks = (ImageBlock, CategoryBlock),
                 get_items=get_image_files, 
                 splitter=RandomSplitter(seed=42),
                 get_y=using_attr(RegexLabeller(r'(.+)_\d+.jpg$'), 'name'),
                 item_tfms=Resize(460),
                 batch_tfms=aug_transforms(size=224, min_scale=0.75))
dls = pets.dataloaders(path/"images")

## DataParallel with fastai v2

In [4]:
# Get learner and put it to Data Parallel (DP) and then, detach it after training
learn = cnn_learner(dls, resnet34, metrics=error_rate).to_fp16()
if gpu is None and n_gpu: learn.parallel_ctx()

In [5]:
# Training
learn.fine_tune(2)

epoch,train_loss,valid_loss,error_rate,time
0,1.539825,0.33029,0.103518,00:13


epoch,train_loss,valid_loss,error_rate,time
0,0.513095,0.318171,0.103518,00:15
1,0.324813,0.236592,0.069689,00:15


## DataParallel with PyTorch

In [6]:
learn = cnn_learner(dls, resnet34, metrics=error_rate).to_fp16()
if torch.cuda.device_count() > 1:
    learn.model = nn.DataParallel(learn.model)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    learn.model.to(device);
    
learn.fine_tune(2)

epoch,train_loss,valid_loss,error_rate,time
0,1.702696,0.319656,0.109608,00:20


epoch,train_loss,valid_loss,error_rate,time
0,0.573543,0.277454,0.079838,00:18
1,0.398447,0.23903,0.072395,00:17


## Without DataParallel 

In [7]:
learn = cnn_learner(dls, resnet34, metrics=error_rate).to_fp16()
learn.fine_tune(2)

epoch,train_loss,valid_loss,error_rate,time
0,1.549658,0.354018,0.113667,00:14


epoch,train_loss,valid_loss,error_rate,time
0,0.545782,0.309262,0.097429,00:16
1,0.340073,0.217408,0.075778,00:16


## END