You can check and explore further methods applied the tasks we have previously covered from the following link:
https://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results.html#43494641522d313030

Skeleton code is provided from the following tutorial: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import pandas as pd
import cv2
from scipy.ndimage.interpolation import map_coordinates
from scipy.ndimage.filters import gaussian_filter
import matplotlib.pyplot as plt

print("Versions..........")
print("torch:       ", torch.__version__)
print("torchvision: ", torchvision.__version__)

D:\Anaconda3\envs\myEnv001\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
D:\Anaconda3\envs\myEnv001\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll


Versions..........
torch:        1.10.1+cu102
torchvision:  0.11.2+cu102


Dataset: https://pytorch.org/docs/stable/_modules/torchvision/datasets/cifar.html#CIFAR100

### Defining and testing custom image transformation

In [None]:
# [DONE] TODO: write at least one data transform or augmentation method yourself ==> DONE
def elastic_transform(image, alpha, sigma, alpha_affine, random_state=None, probability=1.0):
    """
    Elastic deformation of images as described in [Simard2003] (with modifications).
    [Simard2003] Simard, Steinkraus and Platt, "Best Practices for
    Convolutional Neural Networks applied to Visual Document Analysis", in
    Proc. of the International Conference on Document Analysis and
    Recognition, 2003.

    :param image (width,height,channel): An image as a numpy array compatible with cv2.
    :param alpha: Size of distortion (pixels). typical value is between 1.5 and 3 x image height
    :param sigma: Variance of the Gaussian distribution with whom the uniform random displacement fields will be convolved. Typical value is between 0.01 and 0.1 image height.
    :param alpha_affine: Size of the affine transformations. Typical value is between 0.01 and 0.1 image height.
    :param random_state: State of the RNG for reproducibility of random displacement fields. Default is None.
    :param probability: chance of the transform happening (it is a random deformation). Default is 1.0, meaning the transformation will definitely happen.

    :return A new image with the same shape as the input image (numpy array)
    """
    if random_state is None:
        random_state = np.random.RandomState(None)

    if np.random.uniform() > probability:
        return image
    
    image = image.numpy() if 'tensor' in type(image).__name__.lower() else image
    
    shape = image.shape
    shape_size = shape[:2]
    
    # Random affine
    center_square = np.float32(shape_size) // 2
    square_size = min(shape_size) // 3
    pts1 = np.float32([center_square + square_size, [center_square[0]+square_size, center_square[1]-square_size], center_square - square_size])
    pts2 = pts1 + random_state.uniform(-alpha_affine, alpha_affine, size=pts1.shape).astype(np.float32)
    M = cv2.getAffineTransform(pts1, pts2)
    image = cv2.warpAffine(image, M, shape_size[::-1], borderMode=cv2.BORDER_REFLECT_101)

    dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma) * alpha
    dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma) * alpha
    dz = np.zeros_like(dx)

    x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2]))
    indices = np.reshape(y+dy, (-1, 1)), np.reshape(x+dx, (-1, 1)), np.reshape(z, (-1, 1))

    return map_coordinates(image, indices, order=1, mode='reflect').reshape(shape)


def elastic_transform_batch(batch, alpha, sigma, alpha_affine, random_state=None, probability=1.0):
    # Perform Elastic Deformation on a whole batch. Here, batch can also be a torch tensor from a dataloader or something.
    batch_ = batch.numpy() if 'tensor' in type(batch).__name__.lower() else batch
    return np.concatenate([elastic_transform(img, alpha, sigma, alpha_affine, random_state, probability) for img in batch_]).reshape(batch_.shape)


# Testing the elastic distorter
sampleset = torchvision.datasets.CIFAR100(root='./data/sample', train=False, download=True)
sampledata = sampleset.data[np.random.choice(len(sampleset),10),...]
#sampledata_deformed = np.concatenate([elastic_transform(img,img.shape[1]*1.1,img.shape[1]*0.08,img.shape[1]*0.08) for img in sampledata]).reshape(10,32,32,3)
h,w = sampledata.shape[1:3]
sampledata_deformed = elastic_transform_batch(sampledata, w*1.1, w*0.1, w*0.02)
plt.figure(figsize = (10,50))
i = 0
for idx in range(10):
    i += 1
    plt.subplot(10,2,i)
    plt.imshow(sampledata[idx,...])
    plt.axis('off')
    if i == 1: plt.title('Original')
    i += 1
    plt.subplot(10,2,i)
    plt.imshow(sampledata_deformed[idx,...])
    if i == 2: plt.title('Deformed')
    plt.axis('off')
plt.show()

In [None]:
# In this cell we will exercise converting between cv2 images (H,W,C) and torch tensors (C,H,W) or (B,C,H,W)
R = 1*np.ones((1080,1920))
G = 2*np.ones((1080,1920))
B = 3*np.ones((1080,1920))
i3 = np.stack([R, G, B], 0)
print("shape of i3: ", i3.shape)
i4 = np.stack([i3,10*i3,100*i3,1000*i3,10000*i3],0)
print("shape of i4: ", i4.shape)
i3c = i4[2,...]
print("shape of i3c: ",i3c.shape)
i3n = i3c.transpose((1,2,0))
print("shape of i3n: ",i3n.shape)
i3n[:,:,2]

### Preliminaries and Data Loaders

In [2]:
# [DONE] TODO: You may consider appyling more transform such as data augmentation methods, etc.
# TODO: You may consider hyperparameter optimization: in this cell, we have batch_size!
# [DONE] TODO: use the previously defined data transform/augmentation method in the following transform.

# Global constants
ELASTIC_ALPHA = 32*1.1
ELASTIC_SIGMA = 32*0.1
ELASTIC_ALPHA_AFFINE = 32*0.02
ELASTIC_PROBABILITY = 0.5
P_VERT_FLIP = 0.5
P_HORZ_FLIP = 0.5
JITTER_BRIGHTNESS = 0.1
JITTER_SATURATION = 0.1
JITTER_CONTRAST = 0.2
AFFINE_ROTATION = 50
AFFINE_TRANSLATION = (0.1, 0.1)
AFFINE_SCALE = (0.5, 1.5)
AFFINE_SHEAR = 20
NORM_MEAN = (0.5, 0.5, 0.5)
NORM_STD = (0.5, 0.5, 0.5)

# Hyperparameters
hparams = {
    'batch_size': 4,
    'epochs': 10,
    'lr': 0.001,
    'num_cnn_blocks': 3,
    'num_fc_blocks': 3}

# Creating dummy transform
class DummyTransform(object):
    def __init__(self):
        self.globx = 0
    def __call__(self, image):
        self.globx += 1
        #print("call %d"%self.globx)
        return image


# Generating custom class for elastic deformation
class ElasticDeformation(object):
    """Elastic deformation used as image augmentation for classification task, as per the follwoing paper.
    Simard, Steinkraus and Platt, "Best Practices for Convolutional Neural Networks applied to Visual Document Analysis",
    in Proc. of the International Conference on Document Analysis and Recognition, 2003.

    Note that calling this class's function will treat its image input according to its class.
    If the image is a numpy array, it will be treated like an OpenCV image (channels last; H,W,C)
    If the image is a torch.tensor, it will be treated accordingly (channels first; C,H,W)

    Args:

    :param alpha: Size of distortion (pixels). typical value is between 1.5 and 3 x image height
    :param sigma: Variance of the Gaussian distribution with whom the uniform random displacement fields will be convolved. Typical value is between 0.01 and 0.1 image height.
    :param alpha_affine: Size of the affine transformations. Typical value is between 0.01 and 0.1 image height.
    :param random_state: State of the RNG for reproducibility of random displacement fields. Default is None.
    :param probability: Chance of the transform happening (it is a random deformation). Default is 1.0, meaning the transformation will definitely happen.
    :param output: Type of output for its callable function. Options are 'array' for OpenCV output (channels last), or 'tensor' for PyTorch Tensor (channels first)
    """
    def __init__(self, alpha, sigma, alpha_affine, random_state=None, probability=1.0, output='array'):
        self.alpha = alpha
        self.sigma = sigma
        self.alpha_affine = alpha_affine
        self.random_state = random_state
        self.probability = probability
        self.output = output


    def __call__(self, image):
        """Perform elastic transformation on an image using the class parameters for transformation.
    
        Args:
            :param image (numpy.ndarray or torch.tensor): Input image or tensor of image.
                Note that calling this class's function will treat its image input according to its class.
                If the image is a numpy array, it will be treated like an OpenCV image (channels last; H,W,C)
                If the image is a torch.tensor, it will be treated accordingly (channels first; C,H,W)

        Returns:
            :return image: If output=='array' in constructor, output will be an OpenCV array (channels last), if output=='tensor', a PyTorch tensor (channels first) will be returned.
        """
        if self.random_state is None:
            random_state = np.random.RandomState(None)

        if np.random.uniform() > self.probability:
            if 'tensor' in type(image).__name__.lower():
                if self.output=='tensor':
                    return image
                else:
                    return image.numpy().transpose((1,2,0))
            else:
                if self.output=='tensor':
                    return torch.tensor(image.transpose((2,0,1)))
                else:
                    return image
        
        image = image.numpy().transpose((1,2,0)) if 'tensor' in type(image).__name__.lower() else image
        
        shape = image.shape
        shape_size = shape[:2]
        
        # Random affine
        center_square = np.float32(shape_size) // 2
        square_size = min(shape_size) // 3
        pts1 = np.float32([center_square + square_size, [center_square[0]+square_size, center_square[1]-square_size], center_square - square_size])
        pts2 = pts1 + random_state.uniform(-self.alpha_affine, self.alpha_affine, size=pts1.shape).astype(np.float32)
        M = cv2.getAffineTransform(pts1, pts2)
        image = cv2.warpAffine(image, M, shape_size[::-1], borderMode=cv2.BORDER_REFLECT_101)

        dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), self.sigma) * self.alpha
        dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), self.sigma) * self.alpha
        dz = np.zeros_like(dx)

        x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2]))
        indices = np.reshape(y+dy, (-1, 1)), np.reshape(x+dx, (-1, 1)), np.reshape(z, (-1, 1))

        out = map_coordinates(image, indices, order=1, mode='reflect').reshape(shape)
        return out if self.output=='array' else torch.tensor(out.transpose((2,0,1)))


    def __repr__(self):
        return self.__class__.__name__+"(\n"+\
            "    alpha = {},\n".format(self.alpha)+\
            "    sigma = {},\n".format(self.sigma)+\
            "    alpha_affine = {},\n".format(self.alpha_affine)+\
            "    random_state = {},\n".format(self.random_state)+\
            "    probability = {},\n".format(self.probability)+\
            "    output = {})".format(self.output)



def elastic_transform_chosen(img):
    return elastic_transform(img, ELASTIC_ALPHA, ELASTIC_SIGMA, ELASTIC_ALPHA_AFFINE, random_state=None, probability=ELASTIC_PROBABILITY)

# Image augmentation transforms
transform2 = transforms.Compose([
        transforms.ToTensor(),
        transforms.RandomVerticalFlip(P_VERT_FLIP),
        transforms.RandomHorizontalFlip(P_HORZ_FLIP),
        transforms.ColorJitter(brightness=JITTER_BRIGHTNESS, contrast=JITTER_CONTRAST, saturation=JITTER_SATURATION),
        transforms.RandomAffine(AFFINE_ROTATION, AFFINE_TRANSLATION, AFFINE_SCALE, AFFINE_SHEAR),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transform1 = transforms.Compose([
        transforms.ToTensor(),
        DummyTransform(),
        transforms.ToTensor(),
        transforms.RandomVerticalFlip(P_VERT_FLIP),
        transforms.RandomHorizontalFlip(P_HORZ_FLIP),
        transforms.ColorJitter(brightness=JITTER_BRIGHTNESS, contrast=JITTER_CONTRAST, saturation=JITTER_SATURATION),
        transforms.RandomAffine(AFFINE_ROTATION, AFFINE_TRANSLATION, AFFINE_SCALE, AFFINE_SHEAR),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transform = transforms.Compose([
        transforms.ToTensor(),
        ElasticDeformation(ELASTIC_ALPHA, ELASTIC_SIGMA, ELASTIC_ALPHA_AFFINE, random_state=None, probability=ELASTIC_PROBABILITY, output='tensor'),
        transforms.RandomVerticalFlip(P_VERT_FLIP),
        transforms.RandomHorizontalFlip(P_HORZ_FLIP),
        transforms.ColorJitter(brightness=JITTER_BRIGHTNESS, contrast=JITTER_CONTRAST, saturation=JITTER_SATURATION),
        transforms.RandomAffine(AFFINE_ROTATION, AFFINE_TRANSLATION, AFFINE_SCALE, AFFINE_SHEAR),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


trainset = torchvision.datasets.CIFAR100(root='./data/CIFAR100', train=True, download=True, transform=transform1)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=hparams['batch_size'], shuffle=True, num_workers=0)
testset = torchvision.datasets.CIFAR100(root='./data/CIFAR100', train=False, download=True, transform=transform1)
testloader = torch.utils.data.DataLoader(testset, batch_size=hparams['batch_size'], shuffle=False, num_workers=0)

classes = ('apples', 'aquarium fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottles', 
           'bowls', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'cans', 'castle', 'caterpillar', 'cattle', 'chair', 
           'chimpanzee', 'clock', 'cloud', 'cockroach', 'computer keyboard', 'couch', 'crab', 'crocodile', 
           'cups', 'dinosaur', 'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 
           'kangaroo', 'lamp', 'lawn-mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple', 'motorcycle', 
           'mountain', 'mouse', 'mushrooms', 'oak', 'oranges', 'orchids', 'otter', 'palm', 'pears', 'pickup truck', 
           'pine', 'plain', 'plates', 'poppies', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 
           'roses', 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 
           'streetcar', 'sunflowers', 'sweet peppers', 'table', 'tank', 'telephone', 'television', 'tiger', 'tractor',
           'train', 'trout', 'tulips', 'turtle', 'wardrobe', 'whale', 'willow', 'wolf', 'woman', 'worm')

Files already downloaded and verified
Files already downloaded and verified


### Visualizing some normalized images

In [3]:
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.figure(figsize=(12,3))
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


In [4]:

# get some random training images
dataiter = iter(testloader)


In [None]:
images, labels = dataiter.next()
#images, labels = testloader[0]
print(images.shape)
print(labels.shape)
print(labels)
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(hparams['batch_size'])))

In [None]:
# base method we provide:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 5)
        self.fc1 = nn.Linear(32 * 5 * 5, 200)
        self.fc2 = nn.Linear(200, 128)
        self.fc3 = nn.Linear(128, 100)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
# (Optional but highly recommended) TODO: You can define your own neural network to create a better performing model!
class MyOwnNet(Net):
    def __init__(self):
        super(Net, self).__init__()
        pass
    
    def forward(self, x):
        pass

In [None]:
# if you want to train on GPU:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(device)

In [None]:
# Do not forget to initialize your own network if you defined one!
#inzvaNet = MyOwnNet()
inzvaNet = Net()

inzvaNet.to(device)

inzvaNet.train()
# base optimizer with following parameters:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(inzvaNet.parameters(), lr=0.001, momentum=0.9)

# TODO: play with hyperparameters and chosen methods to achieve higher accuracy! You can apply grid or random search.

In [None]:
# Training loop
for epoch in range(4):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = inzvaNet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

In [None]:
dataiter = iter(testloader)
images, labels = dataiter.next()

outputs = inzvaNet(images)
_, predicted = torch.max(outputs.data, 1)
# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]for j in range(batch_size)))

In [None]:
# test on all test data
inzvaNet.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = inzvaNet(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(correct)
print(total)
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

In [None]:
# class-wise accuracy
class_correct = list(0. for i in range(100))
class_total = list(0. for i in range(100))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = inzvaNet(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(batch_size):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(100):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))