<a href="https://colab.research.google.com/github/roy-sr/pytorchcustomloader_deepspeed/blob/master/DeepSpeed_PytorchCustomLoader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/drive',  force_remount=True)

Mounted at /content/drive


**Import libraries**

In [0]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import argparse
import os
import torchvision.datasets.vision
import cv2
from datetime import datetime
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR

**Network**

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

**Custom Data Loader**

In [0]:
def default_image_loader(path):
     image = cv2.imread(path)
     image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
     return image


class MNISTCustom_Loader(torchvision.datasets.vision.VisionDataset):
    def __init__(self, root, filenames_filename, class_filename, transform=None, target_transform=None):
        super(MNISTCustom_Loader, self).__init__(root, transform=transform,target_transform=target_transform)
        """ filenames_filename: A text file with each line containing the path to an image e.g. images/class/sample.jpg
            class_filename: A text file with each line containing the class of the image """

        self.base_path = root  
        self.loader = default_image_loader
        self.filenamelist = []
        for line in open(filenames_filename):
            self.filenamelist.append(line.rstrip('\n'))

        self.targets = []
        for line in open(class_filename):
            self.targets.append(int(line.rstrip('\n')))   

        self.transform = transform
        self.target_transform = target_transform
        print("Loader Intialized Successfully")
      
    def __getitem__(self, index):
        class_type = int(self.targets[index])
        path = self.filenamelist[index]
        img_path = os.path.join(self.base_path,str(path).replace("\\","/"))
        img = self.loader(img_path)       
        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            class_type = self.target_transform(int(class_type))    

        return img, class_type

    def __len__(self):
        return len(self.targets)

**Arguments**

In [7]:
# Training settings
parser = argparse.ArgumentParser(description='DeepSpeed_PytorchCustomLoader')
parser.add_argument('--epochs', type=int, default=12, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='enables CUDA training')

_StoreTrueAction(option_strings=['--no-cuda'], dest='no_cuda', nargs=0, const=True, default=False, type=None, choices=None, help='enables CUDA training', metavar=None)

**State of the art pytorch train model**

In [0]:
def train(args, model, device, train_loader, optimizer, epoch, sum_loss):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        sum_loss += loss
        loss.backward()
        optimizer.step()
        return sum_loss
        

**Main**

In [0]:
args = parser.parse_args([])

use_cuda = not args.no_cuda and torch.cuda.is_available()

device = torch.device("cuda" if use_cuda else "cpu")
print("Device is ", device)

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

train_dataset = MNISTCustom_Loader("/content/drive/My Drive/Colab Notebooks/Digit_Images/",
                                   "/content/drive/My Drive/Colab Notebooks/Digit_Images/image_files.txt" ,
                                   "/content/drive/My Drive/Colab Notebooks/Digit_Images/class.txt",
                                   transform=transforms.Compose([
                                                                 transforms.ToTensor()  ,
                                                                 transforms.Normalize((0.1307,), (0.3081,))                                       
                                                                ]))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, **kwargs)


model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=1.0)

scheduler = StepLR(optimizer, step_size=1, gamma=0.7)

Device is  cuda
Loader Intialized Successfully


In [0]:
print("Start time : ",datetime.now())
for epoch in range(1, args.epochs + 1):
    sum_loss = 0
    sum_loss = train(args, model, device, train_loader, optimizer, epoch, sum_loss)
    scheduler.step()
    print("Epoch : ", epoch , " Current_epoch_train_sum_loss : " , sum_loss)
print("End time : ",datetime.now())

Start time :  2020-03-03 16:55:03.669965
Epoch :  1  Current_epoch_train_sum_loss :  tensor(2.3463, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  2  Current_epoch_train_sum_loss :  tensor(5353339., device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  3  Current_epoch_train_sum_loss :  tensor(21434644., device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  4  Current_epoch_train_sum_loss :  tensor(3295686.5000, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  5  Current_epoch_train_sum_loss :  tensor(1166831.5000, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  6  Current_epoch_train_sum_loss :  tensor(1401051., device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  7  Current_epoch_train_sum_loss :  tensor(1125358.7500, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  8  Current_epoch_train_sum_loss :  tensor(791744.8125, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  9  Current_epoch_train_sum_loss :  tensor(723905.7500, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  10  Current_epo

**DeepSpeed Installation from  GITHUB**

In [3]:
!git init
!git pull https://github.com/microsoft/DeepSpeed.git
!./install.sh

Initialized empty Git repository in /content/.git/
remote: Enumerating objects: 43, done.[K
remote: Counting objects: 100% (43/43), done.[K
remote: Compressing objects: 100% (35/35), done.[K
remote: Total 546 (delta 15), reused 19 (delta 8), pack-reused 503[K
Receiving objects: 100% (546/546), 406.13 KiB | 1.07 MiB/s, done.
Resolving deltas: 100% (264/264), done.
From https://github.com/microsoft/DeepSpeed
 * branch            HEAD       -> FETCH_HEAD
Updating git hash/branch info
git_hash = '7dbeba3'
git_branch = 'master'
No hostfile exists at /job/hostfile, installing locally
Collecting torch==1.2
[?25l  Downloading https://files.pythonhosted.org/packages/30/57/d5cceb0799c06733eefce80c395459f28970ebb9e896846ce96ab579a3f1/torch-1.2.0-cp36-cp36m-manylinux1_x86_64.whl (748.8MB)
[K     |████████████████████████████████| 748.9MB 24kB/s 
[?25hCollecting torchvision==0.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/06/e6/a564eba563f7ff53aa7318ff6aaa5bd8385cbda39ed55b

**Import DeepSpeed**

In [4]:
import deepspeed



**DeepSpeed Train**

In [0]:
def train_with_deepsped(args, model_engine, device, train_loader,  epoch, sum_loss):
    for batch_idx, (data, target) in enumerate(train_loader):
        #- deepspeed
        data, target = data[0].to(model_engine.local_rank) , target[0].to(model_engine.local_rank)
        #- - deepspeed
        data , target = Variable(data.unsqueeze(0)), Variable(target.unsqueeze(0))
        
        #- deepspeed
        output = model_engine(data)
        loss = F.nll_loss(output, target)
        sum_loss += loss
        #- deepspeed , runs backpropagation
        model_engine.backward(loss)
        #- deepspeed , weight update - deepspeed
        model_engine.step()
        return sum_loss
        

**Deepspeed additional Arguments**

In [8]:
parser.add_argument('--local_rank', type=int, default=0,
                    help='local rank passed from distributed launcher')

parser.add_argument('--deepspeed_config', default="/content/drive/My Drive/Colab Notebooks/Casme/ds_config.json", type=str,
                    help='deepspeed config file')


_StoreAction(option_strings=['--deepspeed_config'], dest='deepspeed_config', nargs=None, const=None, default='/content/drive/My Drive/Colab Notebooks/Casme/ds_config.json', type=<class 'str'>, choices=None, help='deepspeed config file', metavar=None)

**DeepSpeed Main**

In [0]:
args = parser.parse_args([])

use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print("Device is ", device)

device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

train_dataset = MNISTCustom_Loader("/content/drive/My Drive/Colab Notebooks/Digit_Images/",
                                   "/content/drive/My Drive/Colab Notebooks/Digit_Images/image_files.txt" ,
                                   "/content/drive/My Drive/Colab Notebooks/Digit_Images/class.txt",
                                   transform=transforms.Compose([
                                                                 transforms.ToTensor()  ,
                                                                 transforms.Normalize((0.1307,), (0.3081,))                                       
                                                                ]))


model = Net().to(device)

# Initialize DeepSpeed to use the following features
# 1) Distributed model
# 2) Distributed data loader
# 3) DeepSpeed optimizer

os.environ["RANK"] = "0"
os.environ["WORLD_SIZE"] = "1"
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = "6000"

model_engine, optimizer, train_loader, __ = deepspeed.initialize(args=args, model=model, model_parameters=model.parameters(), training_data=train_dataset)



Device is  cuda
Loader Intialized Successfully
DeepSpeed info: version=0.1.0, git-hash=7e3509b, git-branch=master


[INFO 2020-03-03 17:07:55] Set device to local rank 0 within node.
[INFO 2020-03-03 17:07:55] Using DeepSpeed Optimizer param name adam as basic optimizer
[INFO 2020-03-03 17:07:55] DeepSpeed Basic Optimizer = FusedAdam (
Parameter Group 0
    betas: [0.8, 0.999]
    bias_correction: True
    eps: 1e-08
    lr: 0.001
    max_grad_norm: 0.0
    weight_decay: 3e-07
)
[INFO 2020-03-03 17:07:55] DeepSpeed LR Scheduler = None
[INFO 2020-03-03 17:07:55] rank:0 step=0, skipped=0, lr=[0.001], mom=[[0.8, 0.999]]


 After Train batch 28 micro_batch 28 and grad_acc 1
1 1
DeepSpeedLight configuration:
  allgather_size ............... 500000000
  allreduce_always_fp32 ........ False
  disable_allgather ............ False
  dump_state ................... False
  dynamic_loss_scale_args ...... None
  fp16_enabled ................. False
  global_rank .................. 0
  gradient_accumulation_steps .. 1
  gradient_clipping ............ 0.0
  initial_dynamic_scale ........ 4294967296
  loss_scale ................... 0
  optimizer_legacy_fusion ...... True
  optimizer_name ............... adam
  optimizer_params ............. {'lr': 0.001, 'betas': [0.8, 0.999], 'eps': 1e-08, 'weight_decay': 3e-07}
  prescale_gradients ........... False
  scheduler_name ............... None
  scheduler_params ............. None
  sparse_gradients_enabled ..... False
  steps_per_print .............. 2000
  tensorboard_enabled .......... False
  tensorboard_job_name ......... DeepSpeedJobName
  tensorboard_output_path .

In [0]:
print("Start time : ",datetime.now())

for epoch in range(1, args.epochs + 1):
    sum_loss = 0
    sum_loss = train_with_deepsped(args, model_engine, device, train_loader, epoch, sum_loss)
    print("Epoch : ", epoch , " Current_epoch_train_sum_loss : ", sum_loss )

print("End time : ",datetime.now())

Start time :  2020-03-03 17:08:00.698877
Epoch :  1  Current_epoch_train_sum_loss :  tensor(2.5222, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  2  Current_epoch_train_sum_loss :  tensor(1.5162, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  3  Current_epoch_train_sum_loss :  tensor(0.0890, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  4  Current_epoch_train_sum_loss :  tensor(0.0160, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  5  Current_epoch_train_sum_loss :  tensor(0.0016, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  6  Current_epoch_train_sum_loss :  tensor(0.0017, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  7  Current_epoch_train_sum_loss :  tensor(0.0004, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  8  Current_epoch_train_sum_loss :  tensor(5.7220e-06, device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  9  Current_epoch_train_sum_loss :  tensor(0., device='cuda:0', grad_fn=<AddBackward0>)
Epoch :  10  Current_epoch_train_sum_loss :  tensor(0., dev