In [None]:
import torch, warnings, torchvision, os, h5py, time
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, utils, datasets
from torch.utils.data import DataLoader, Dataset, sampler, SubsetRandomSampler, TensorDataset
import torch.backends.cudnn as cudnn
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')

In [None]:
# checking if cuda is available
train_on_gpu = torch.cuda.is_available()
torch.cuda.set_device(0)
if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on Tesla T4 Device {}'.format(str(torch.cuda.current_device())))

In [None]:
torch.cuda.device_count()

In [None]:
# opening h5 files
ROOT_DIR = "/home/nikunjlad"
hf = h5py.File(os.path.join(ROOT_DIR,"data/cifar-10/cifar10.h5"), 'r')

In [None]:
hf.keys()

In [None]:
# train, test data with labels being converted to numpy array from HDF5 format
x_train = np.array(hf.get("X_train"), dtype=np.float32) 
y_train = np.array(hf.get("y_train"), dtype=np.int64)
x_test = np.array(hf.get("X_test"), dtype=np.float32)
y_test = np.array(hf.get("y_test"), dtype=np.int64)

In [None]:
print("Training data: ", x_train.shape)
print("Training labels: ", y_train.shape)
print("Testing data: ", x_test.shape)
print("Testing labels: ", y_test.shape)

In [None]:
class Dataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = data
        self.targets = torch.LongTensor(targets)
        self.transform = transform

    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]

        if self.transform:
            x = Image.fromarray(self.data[index].astype(np.uint8).transpose(1,2,0))
            x = self.transform(x)

        return x, y

    def __len__(self):
        return len(self.data)

In [None]:
valid_size = 0.10
num_train = len(x_train)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]
X_train_1 = x_train[train_idx,:,:,:]
y_train_1 = y_train[train_idx]
X_valid_1 = x_train[valid_idx, :, :, :]
y_valid_1 = y_train[valid_idx]
train_data =list(X_train_1.transpose(0, 3, 1, 2)) 
train_targets = list(y_train_1)
valid_data =list(X_valid_1.transpose(0, 3, 1, 2)) 
valid_targets = list(y_valid_1)
test_data = list(x_test.transpose(0, 3, 1, 2))
test_targets = list(y_test)

In [None]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])


train_dataset = Dataset(train_data, train_targets, transform_train)
valid_dataset = Dataset(valid_data, valid_targets, transform = transform_test)
test_dataset = Dataset(test_data, test_targets, transform = transform_test)
train_dataloader = DataLoader(train_dataset, batch_size=64)
valid_dataloader = DataLoader(valid_dataset, batch_size=64)
test_dataloader = DataLoader(test_dataset, batch_size=64)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
train_data_size = len(train_dataset)
valid_data_size = len(valid_dataset)
test_data_size = len(test_dataset)
num_train_data_batches = len(train_dataloader)
num_valid_data_batches = len(valid_dataloader)
num_test_data_batches = len(test_dataloader)

In [None]:
print("Number of training samples: ", train_data_size)
print("{} batches each having 64 samples".format(str(num_train_data_batches)))
print("Number of validation samples: ", valid_data_size)
print("{} batches each having 64 samples".format(str(num_valid_data_batches)))
print("Number of testing samples: ", test_data_size)
print("{} batches each having 64 samples".format(str(num_test_data_batches)))

In [None]:
batch = next(iter(train_dataloader))
images, labels = batch

grid = torchvision.utils.make_grid(images[:64], nrow=8)
plt.figure(figsize=(10,10))
plt.imshow(np.transpose(grid, (1,2,0)))

for data, target in train_dataloader:
    print("Batch image tensor dimensions: ", data.shape)
    print("Batch label tensor dimensions: ", target.shape)
    break

In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [None]:
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [None]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [None]:
def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])

net = ResNet34()
net = net.cuda()
print(net)

In [None]:
# if torch.cuda.current_device() in range(torch.cuda.device_count()):
#     net = torch.nn.DataParallel(net)
#     cudnn.benchmark = True

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9, weight_decay=5e-4)

In [None]:
epochs = 100
history = list()
for epoch in range(epochs):
    epoch_start = time.time()
    print("Epoch: {}/{}".format(epoch+1, epochs))
     
    # Set to training mode
    net.train()
     
    # Loss and Accuracy within the epoch
    train_loss = 0.0
    train_acc = 0.0
     
    valid_loss = 0.0
    valid_acc = 0.0
 
    for i, (inputs, labels) in enumerate(train_dataloader):
 
        inputs = inputs.cuda()
        labels = labels.cuda()
         
        # Clean existing gradients
        optimizer.zero_grad()
         
        # Forward pass - compute outputs on input data using the model
        outputs = net(inputs)
         
        # Compute loss
        loss = criterion(outputs, labels)
         
        # Backpropagate the gradients
        loss.backward()
         
        # Update the parameters
        optimizer.step()
         
        # Compute the total loss for the batch and add it to train_loss
        train_loss += loss.item() * inputs.size(0)
         
        # Compute the accuracy
        ret, predictions = torch.max(outputs.data, 1)
        correct_counts = predictions.eq(labels.data.view_as(predictions))
         
        # Convert correct_counts to float and then compute the mean
        acc = torch.mean(correct_counts.type(torch.FloatTensor))
         
        # Compute total accuracy in the whole batch and add to train_acc
        train_acc += acc.item() * inputs.size(0)
         
        print("Batch number: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}".format(i, loss.item(), acc.item() * 100))
        
    # Validation - No gradient tracking needed
    with torch.no_grad():
 
        # Set to evaluation mode
        net.eval()

        # Validation loop
        for j, (inputs, labels) in enumerate(valid_dataloader):
            inputs = inputs.cuda()
            labels = labels.cuda()

            # Forward pass - compute outputs on input data using the model
            outputs = net(inputs)

            # Compute loss
            loss = criterion(outputs, labels)

            # Compute the total loss for the batch and add it to valid_loss
            valid_loss += loss.item() * inputs.size(0)

            # Calculate validation accuracy
            ret, predictions = torch.max(outputs.data, 1)
            correct_counts = predictions.eq(labels.data.view_as(predictions))

            # Convert correct_counts to float and then compute the mean
            acc = torch.mean(correct_counts.type(torch.FloatTensor))

            # Compute total accuracy in the whole batch and add to valid_acc
            valid_acc += acc.item() * inputs.size(0)

            print("Validation Batch number: {:03d}, Validation: Loss: {:.4f}, Accuracy: {:.4f}".format(j, loss.item(), acc.item() * 100))
     
    # Find average training loss and training accuracy
    avg_train_loss = train_loss/train_data_size 
    avg_train_acc = train_acc/float(train_data_size)

    # Find average training loss and training accuracy
    avg_valid_loss = valid_loss/valid_data_size 
    avg_valid_acc = valid_acc/float(valid_data_size)

    history.append([avg_train_loss, avg_valid_loss, avg_train_acc, avg_valid_acc])

    epoch_end = time.time()

    print("Epoch : {:03d}, Training: Loss: {:.4f}, \
            Accuracy: {:.4f}%, \n\t\tValidation : Loss : {:.4f}, \
            Accuracy: {:.4f}%, Time: {:.4f}s".format(epoch, avg_train_loss, \
                                                     avg_train_acc*100, avg_valid_loss, \
                                                     avg_valid_acc*100, epoch_end-epoch_start))

In [None]:
test_loss = 0
test_acc = 0

# Validation - No gradient tracking needed
with torch.no_grad():

    # Set to evaluation mode
    net.eval()

    # Validation loop
    for j, (inputs, labels) in enumerate(test_dataloader):
        inputs = inputs.cuda()
        labels = labels.cuda()

        # Forward pass - compute outputs on input data using the model
        outputs = net(inputs)

        # Compute loss
        loss = criterion(outputs, labels)

        # Compute the total loss for the batch and add it to valid_loss
        test_loss += loss.item() * inputs.size(0)

        # Calculate validation accuracy
        ret, predictions = torch.max(outputs.data, 1)
        correct_counts = predictions.eq(labels.data.view_as(predictions))

        # Convert correct_counts to float and then compute the mean
        acc = torch.mean(correct_counts.type(torch.FloatTensor))

        # Compute total accuracy in the whole batch and add to valid_acc
        test_acc += acc.item() * inputs.size(0)

        print("Test Batch number: {:03d}, Test: Loss: {:.4f}, Accuracy: {:.4f}".format(j, loss.item(), acc.item() * 100))

    
    avg_test_loss = test_loss/test_data_size 
    avg_test_acc = test_acc/float(test_data_size)
    
    print("Test: Loss : {:.4f}, Accuracy: {:.4f}%".format(avg_test_loss, avg_test_acc*100))

## Observations

Run1
1. Architecture = ResNet-18
2. Epochs = 100
3. batch size = 64
4. optimizer = SGD
5. alpha = 0.0005
6. training loss = 0.0396
7. training accuracy = 98.7225%
8. validation loss = 0.3397
9. validation accuracy = 91.4%
10. approx runtime = ~101 minutes (1.65hrs) / 61secs for 1 epoch
11. test loss = 3688
12. test accuracy = 91.08%