In [5]:
import torch.nn as nn

_cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def _make_layers(cfg):
    layers = []
    in_channels = 3
    for layer_cfg in cfg:
        if layer_cfg == 'M':
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        else:
            layers.append(nn.Conv2d(in_channels=in_channels,
                                    out_channels=layer_cfg,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1,
                                    bias=True))
            layers.append(nn.BatchNorm2d(num_features=layer_cfg))
            layers.append(nn.ReLU(inplace=True))
            in_channels = layer_cfg
    return nn.Sequential(*layers)


class _VGG(nn.Module):
    """
    VGG module for 3x32x32 input, 10 classes
    """

    def __init__(self, name='VGG11'):
        super(_VGG, self).__init__()
        cfg = _cfg[name]
        self.layers = _make_layers(cfg)
        flatten_features = 512
        self.fc1 = nn.Linear(flatten_features, 10)

    def forward(self, x):
        y = self.layers(x)
        y = y.view(y.size(0), -1)
        y = self.fc1(y)
        return y


def VGG11():
    return _VGG('VGG11')

In [3]:
# Part 1: 40 minibatches
import os
import torch
import json
import copy
import numpy as np
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import logging
import random
import torch.distributed as dist
import torch.utils.data.distributed



device = "cpu"
torch.set_num_threads(4)

batch_size = 256 # batch for one node
def train_model(model, train_loader, optimizer, criterion, epoch):
    """
    model (torch.nn.module): The model created to train
    train_loader (pytorch data loader): Training data loader
    optimizer (optimizer.*): A instance of some sort of optimizer, usually SGD
    criterion (nn.CrossEntropyLoss) : Loss function used to train the network
    epoch (int): Current epoch number
    """
    for epoch in range(1):  # loop over the dataset multiple times
        running_loss = 0.0
        for batch_idx, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if batch_idx % 40 == 39:    # print every 20 mini-batches
                print(f'[{epoch + 1}, {batch_idx + 1:5d}] loss: {running_loss / 20:.3f}')
                running_loss = 0.0
    print('Finished Training')
    return None

def test_model(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target)
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = correct / len(test_loader.dataset) * 100
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset), accuracy))

    return accuracy


def main():
    learning_rate =0.01
    weight_decay = 1e-4
    #weight_decay = 0
   # normalize = transforms.Normalize(mean=[x/255.0 for x in [125.3, 123.0, 113.9]],
                                #std=[x/255.0 for x in [63.0, 62.1, 66.7]])
    #normalize = transforms.Normalize(mean=[x/255.0 for x in [0.4914, 0.4822, 0.4465]],
            #std=[x/255.0 for x in [0.2023, 0.1994, 0.2010]])
    normalize=transforms.Normalize(
        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) )
    transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
            ])
    # transform_train=transforms.Compose([
    # transforms.Resize(size=(224, 224)),
    # transforms.ToTensor(),normalize,
    #         ])

    transform_test = transforms.Compose([
            transforms.ToTensor(),
            normalize])
    training_set = datasets.CIFAR10(root="./data", train=True,
                                                download=True, transform=transform_train)
    train_loader = torch.utils.data.DataLoader(training_set,
                                                    num_workers=2,
                                                    batch_size=batch_size,
                                                    sampler=None,
                                                    shuffle=True,
                                                    pin_memory=True)
    test_set = datasets.CIFAR10(root="./data", train=False,
                                download=True, transform=transform_test)

    test_loader = torch.utils.data.DataLoader(test_set,
                                              num_workers=2,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              pin_memory=True)
    training_criterion = torch.nn.CrossEntropyLoss().to(device)

    model = VGG11()
    model.to(device)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay,nesterov=True)
    training_criterion = torch.nn.CrossEntropyLoss().to(device)

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
    #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,'min', factor=0.5)

    # running training for one epoch
    for epoch in range(1):
        train_model(model, train_loader, optimizer, training_criterion, epoch)
        test_acc = test_model(model, test_loader, training_criterion)
        scheduler.step(test_acc)  # Update the learning rate scheduler

    if test_acc is not None:
        print(f"Highest accuracy achieved: {test_acc:.2f}%")


if __name__ == "__main__":
    main()

Files already downloaded and verified
Files already downloaded and verified
[1,    40] loss: 3.613
[1,    80] loss: 2.792
[1,   120] loss: 2.414
[1,   160] loss: 2.195
Finished Training
Test set: Average loss: 0.0047, Accuracy: 6099/10000 (60.99%)

Highest accuracy achieved: 60.99%




In [12]:
cleanup()

In [14]:
#Part2 40mini batches- 40 epoch
import os
import torch
import json
import copy
import numpy as np
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import logging
import random
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data.distributed import DistributedSampler
import torch.distributed as dist
import torch.utils.data.distributed


#import model as mdl
device = "cuda"
torch.set_num_threads(4)

batch_size = 256 # batch for one node

def setup(rank, world_size):
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = '12355'

    # initialize the process group
    dist.init_process_group("gloo", rank=rank, world_size=world_size)

def cleanup():
    dist.destroy_process_group()


def train_model(model, train_loader, optimizer, criterion, epoch):
    model.train()
    running_loss = 0.0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        output = model(data)
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if batch_idx % 40 == 39:
            print(f'[{epoch + 1}, {batch_idx + 1:5d}] loss: {running_loss / 40:.3f}')
            running_loss = 0.0

    return None

def test_model(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target)
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

def main():
    rank = 0
    world_size = 1
    setup(rank, world_size)

    normalize=transforms.Normalize(
        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) )
    transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
            ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        normalize])
    training_set = datasets.CIFAR10(root="./data", train=True,
                                    download=True, transform=transform_train)

    # Training sampler
    train_sampler = DistributedSampler(training_set, num_replicas=world_size, rank=rank)

    train_loader = torch.utils.data.DataLoader(training_set,
                                               num_workers=2,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               shuffle=False,
                                               pin_memory=True)
    test_set = datasets.CIFAR10(root="./data", train=False,
                                download=True, transform=transform_test)

    test_loader = torch.utils.data.DataLoader(test_set,
                                              num_workers=2,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              pin_memory=True)
    training_criterion = torch.nn.CrossEntropyLoss().to(device)

    model = VGG11()
    model.to(device)
    # Adding DDP model
    model = DDP(model, device_ids=[rank])
    optimizer = optim.SGD(model.parameters(), lr=0.01,
                          momentum=0.9, weight_decay=1e-4)

    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

    for epoch in range(40):
        train_model(model, train_loader, optimizer, training_criterion, epoch)
        test_model(model, test_loader, training_criterion)

    cleanup()
    scheduler.step()

if __name__ == "__main__":
    main()

Files already downloaded and verified
Files already downloaded and verified
[1,    40] loss: 1.816
[1,    80] loss: 1.514
[1,   120] loss: 1.309
[1,   160] loss: 1.185
Test set: Average loss: 1.1205, Accuracy: 6046/10000 (60%)

[2,    40] loss: 1.050
[2,    80] loss: 0.983
[2,   120] loss: 0.943
[2,   160] loss: 0.881
Test set: Average loss: 0.9883, Accuracy: 6598/10000 (66%)

[3,    40] loss: 0.834
[3,    80] loss: 0.806
[3,   120] loss: 0.756
[3,   160] loss: 0.749
Test set: Average loss: 0.8780, Accuracy: 7033/10000 (70%)

[4,    40] loss: 0.717
[4,    80] loss: 0.703
[4,   120] loss: 0.662
[4,   160] loss: 0.656
Test set: Average loss: 0.7633, Accuracy: 7366/10000 (74%)

[5,    40] loss: 0.624
[5,    80] loss: 0.618
[5,   120] loss: 0.585
[5,   160] loss: 0.599
Test set: Average loss: 0.7730, Accuracy: 7340/10000 (73%)

[6,    40] loss: 0.562
[6,    80] loss: 0.568
[6,   120] loss: 0.528
[6,   160] loss: 0.548
Test set: Average loss: 0.6431, Accuracy: 7807/10000 (78%)

[7,    40] l