For example, let's consider the MNIST dataset and select only 0s and 8s to train a classifier.

In [1]:
! pip install git+https://github.com/airi-industrial-ai/ec23-tutorial -q

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m778.1/778.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.2/840.2 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for ectutorial (setup.py) ... [?25l[?25hdone


In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot as plt
from ectutorial.mnist_utils import *
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import random


In [3]:

#Defining a few functions as the images are in a 0-1 scale instead of 0-255 scale
def weighted_grayscale(x, color_index=0):
    weights = [0.3, 0.59, 0.11]
    return x  * weights[color_index]
def gen_weighted_grayscale(image):
    imgs = [image ]
    for c in range(3):
        imgs.append(weighted_grayscale(image, c))
    cimgs = []
    for img in imgs:
        gs, _, _, _ = gen_colored(img)
        cimgs.append(gs)
    return tuple(cimgs)

In [4]:

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# download MNIST training and testing datasets, then prepare corresponding dataloaders (batch size = 100)
mnist_train = datasets.MNIST("./data", train=True, download=True,  transform=transforms.ToTensor())
mnist_test = datasets.MNIST("./data", train=False, download=True,  transform=transforms.ToTensor())

train_loader = DataLoader(mnist_train, batch_size = 1, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size = 1, shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 112797857.88it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 105243869.53it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 34153056.08it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 20396711.74it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



##Task1

In [10]:

'''
    Creating the OOD data class loaders for the three types as mentioned in task 1

'''

def create_custom_dataloader():
  rr = torchvision.transforms.RandomResizedCrop((28, 28))
  data_color = []
  data_gs_ = []
  data_rotate = []
  label = []
  test_loader = DataLoader(mnist_test, batch_size = 1, shuffle=False)
  for i , (x,label_) in enumerate(test_loader):
    x = x.squeeze()
    data_rotate.append(np.concatenate([rr(x.unsqueeze(0))] * 3, axis=0))
    data_color.append(gen_colored(x)[random.randint(1, 3)].transpose(2,0,1))
    data_gs_.append(gen_weighted_grayscale(x)[random.randint(1, 3)].transpose(2,0,1))
    label.append(label_)


  data_colored = [i for i in zip(data_color, label)]
  data_gs = [i for i in zip(data_gs_, label)]
  data_rotated = [i for i in zip(data_rotate, label)]

  data_color_loader = DataLoader(data_colored, batch_size = 32, shuffle=False)
  data_gs_loader = DataLoader(data_gs, batch_size = 32, shuffle=False)
  data_rotated_loader = DataLoader(data_rotated, batch_size = 32, shuffle=False)

  return data_color_loader,data_gs_loader,data_rotated_loader



##Task 2

In [11]:
def epoch(loader, model, opt=None, augmix_augment_ = False):
    """Standard training/evaluation epoch over the dataset"""
    loss = 0
    if(opt!= None):
      model.train()
      for (i,j) in (loader):
            if(augmix_augment_):
              i = augmix_augment(i)
            j = j.squeeze()
            opt.zero_grad()
            pred = model(i.to(device))
            loss =  nn.CrossEntropyLoss()(pred,torch.LongTensor(j).to(device))
            # Backward pass to compute the gradient
            with(torch.enable_grad()):
              loss.backward()
            # Clip the gradient to the range [-epsilon, epsilon]
            opt.step()

    accuracy = 0
    loss = 0
    model.eval()
    for (i,j) in (loader):
      if(augmix_augment_):
          # Here the augmix_augment is used to fix the channels to 3 and not do any augmentation on test data as the name might suggest
          i = augmix_augment(i, doaug =  False)
      j = j.squeeze()
      pred = model(i.to(device))
      loss +=  nn.CrossEntropyLoss()(pred,torch.tensor(j).to(device)).item()
      predictions = torch.argmax(pred, dim = 1)  # Assuming binary classification
      accuracy += (predictions == j.to(device)).sum().item()
    return  accuracy/len(loader.dataset)*100 , loss


In [12]:
# Utility fn to create the dataloaders for Task 2 (increasing channels from 1 --> 3)
modified_images = []
labels = []

# Apply the channel concatenation transformation to all images
for image, label in mnist_train:
    # Convert to NumPy array and concatenate along the channel dimension
    image_rgb = np.concatenate([image.numpy()] * 3, axis=0)

    # Append the modified image and label to the new lists
    modified_images.append(image_rgb)
    labels.append(label)

# Convert the lists to NumPy arrays
modified_images = np.array(modified_images)
mnist_train_3c = [i for i in zip(modified_images, labels)]
train_loader = DataLoader(mnist_train_3c, batch_size = 32, shuffle=True)


modified_images = []
labels = []

# Apply the channel concatenation transformation to all images
for image, label in mnist_test:
    # Convert to NumPy array and concatenate along the channel dimension
    image_rgb = np.concatenate([image.numpy()] * 3, axis=0)

    # Append the modified image and label to the new lists
    modified_images.append(image_rgb)
    labels.append(label)

# Convert the lists to NumPy arrays
modified_images = np.array(modified_images)
mnist_test_3c = [i for i in zip(modified_images, labels)]
test_loader = DataLoader(mnist_test_3c, batch_size = 32, shuffle=False)


In [13]:
torch.manual_seed(0)

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)

model_cnn = nn.Sequential(nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(),
                          nn.Conv2d(32, 32, 3, padding=1, stride=2), nn.ReLU(),
                          nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
                          nn.Conv2d(64, 64, 3, padding=1, stride=2), nn.ReLU(),
                          Flatten(),
                          nn.Linear(7*7*64, 100), nn.ReLU(),
                          nn.Linear(100, 10)).to(device)

In [14]:
opt = optim.SGD(model_cnn.parameters(), lr=1e-1)

accuracyC=[]
accuracyG=[]
accuracyR=[]


for t in range(2): # training until 2 epochs only as it is time taking and getting over 99 percent accuracy in 2 epochs itself
      acc, train_loss = epoch(train_loader, model_cnn, opt)
print(f"Train accuracy{acc}" )
acc, test_loss = epoch(test_loader, model_cnn)
print(f" Test accuracy{acc}" )
# standard training
for times in range(3):
  data_color_loader,data_gs_loader,data_rotated_loader = create_custom_dataloader()
  acc, test_loss = epoch(data_color_loader, model_cnn)
  accuracyC.append(acc)
  acc, test_loss = epoch(data_gs_loader, model_cnn)
  accuracyG.append(acc)
  acc, test_loss = epoch(data_rotated_loader, model_cnn)
  accuracyR.append(acc)

  loss +=  nn.CrossEntropyLoss()(pred,torch.tensor(j).to(device)).item()


Train accuracy99.01666666666667
 Test accuracy98.72999999999999


In [15]:
print(f"\n\nColored_MNIST mean Test accuracy {np.array(accuracyC).mean():.2f} standard deviation {np.array(accuracyC).std():.2f} " )
print(f"Grayscale_MNIST Test accuracy {np.array(accuracyG).mean():.2f} standard deviation {np.array(accuracyG).std():.2f}" )
print(f"Rotated_MNIST Test accuracy {np.array(accuracyR).mean():.2f} standard deviation {np.array(accuracyR).std():.2f}" )




Colored_MNIST mean Test accuracy 98.60 standard deviation 0.04 
Grayscale_MNIST Test accuracy 98.43 standard deviation 0.05
Rotated_MNIST Test accuracy 45.71 standard deviation 0.11


##Task 4

In [16]:
torch.manual_seed(0)

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)



In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
var = ["motion_blur" , "zigzag" , "fog"]
for type_ in var:
  model_cnn_aug = nn.Sequential(nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(),
                          nn.Conv2d(32, 32, 3, padding=1, stride=2), nn.ReLU(),
                          nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
                          nn.Conv2d(64, 64, 3, padding=1, stride=2), nn.ReLU(),
                          Flatten(),
                          nn.Linear(7*7*64, 100), nn.ReLU(),
                          nn.Linear(100, 10)).to(device)
  opt = optim.SGD(model_cnn_aug.parameters(), lr=1e-1)


  print(f"-----------------{type_}-----------------")
  loaded_array_test = np.load(f'./drive/MyDrive/{type_}/test_images.npy')
  loaded_array_lab_test = np.load(f'./drive/MyDrive/{type_}/test_labels.npy')

  loaded_array_train = np.load(f'./drive/MyDrive/{type_}/train_images.npy')
  loaded_array_lab_train = np.load(f'./drive/MyDrive/{type_}/train_labels.npy')

  mnist_train_c = [((i / 255.) ,j) for i,j in zip(loaded_array_train, loaded_array_lab_train)]
  mnist_test_c = [((i / 255.) ,j) for i,j in zip(loaded_array_test, loaded_array_lab_test)]

  train_loader = DataLoader(mnist_train_c, batch_size = 32, shuffle=True)
  test_loader = DataLoader(mnist_test_c, batch_size = 32, shuffle=False)


  # standard training
  for t in range(2):
      acc, train_loss = epoch(train_loader, model_cnn_aug, opt , augmix_augment_ = True)
  print(f"Train accuracy{acc}  Train loss : {train_loss}" )

  # NOTE :-  Here the augmix_augment is used to fix the channels to 3 and not do any augmentation on test data as the name might suggest

  acc, test_loss = epoch(test_loader, model_cnn_aug , augmix_augment_= True)
  print(f"MNIST-C data trained model Test accuracy{acc}  Test loss : {test_loss}" )

  acc, test_loss = epoch(test_loader, model_cnn , augmix_augment_= True)
  print("Vanilla model Test accuracy" , acc, test_loss)


-----------------motion_blur-----------------


  loss +=  nn.CrossEntropyLoss()(pred,torch.tensor(j).to(device)).item()


Train accuracy98.39333333333333  Train loss : 96.79468112639734
MNIST-C data trained model Test accuracy98.16  Test loss : 16.92746583907865
Vanilla model Test accuracy 97.11 28.42546249111183
-----------------zigzag-----------------
Train accuracy98.235  Train loss : 100.63171849783976
MNIST-C data trained model Test accuracy97.64  Test loss : 21.353138562757522
Vanilla model Test accuracy 91.99000000000001 88.41492238239152
-----------------fog-----------------
Train accuracy98.48166666666667  Train loss : 95.63834621245041
MNIST-C data trained model Test accuracy98.07000000000001  Test loss : 17.765403081662953
Vanilla model Test accuracy 86.61 151.55270560085773


##Utilities for AugMix function

In [None]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Reference implementation of AugMix's data augmentation method in numpy."""
import numpy as np
from PIL import Image

# CIFAR-10 constants
MEAN = [0.4914, 0.4822, 0.4465]
STD = [0.2023, 0.1994, 0.2010]


def normalize(image):
  """Normalize input image channel-wise to zero mean and unit variance."""
  image = image.transpose(2, 0, 1)  # Switch to channel-first
  mean, std = np.array(MEAN), np.array(STD)
  image = (image - mean[:, None, None]) / std[:, None, None]
  return image.transpose(1, 2, 0)


def apply_op(image, op, severity):
  image = np.clip(image * 255., 0, 255).astype(np.uint8)
  image = image.squeeze()
  pil_img = Image.fromarray(image)  # Convert to PIL.Image
  pil_img = op(pil_img, severity)
  return np.asarray(pil_img) / 255.


def augment_and_mix(image, severity=3, width=3, depth=-1, alpha=1.):
  """Perform AugMix augmentations and compute mixture.

  Args:
    image: Raw input image as float32 np.ndarray of shape (h, w, c)
    severity: Severity of underlying augmentation operators (between 1 to 10).
    width: Width of augmentation chain
    depth: Depth of augmentation chain. -1 enables stochastic depth uniformly
      from [1, 3]
    alpha: Probability coefficient for Beta and Dirichlet distributions.

  Returns:
    mixed: Augmented and mixed image.
  """
  ws = np.float32(
      np.random.dirichlet([alpha] * width))
  m = np.float32(np.random.beta(alpha, alpha))

  mix = np.zeros_like(image)
  for i in range(width):
    image_aug = image.copy()
    d = depth if depth > 0 else np.random.randint(1, 4)
    for _ in range(d):
      op = np.random.choice(augmentations)
      image_aug = apply_op(image_aug, op, severity)
    # Preprocessing commutes since all coefficients are convex
    #mix += ws[i] * normalize(image_aug)
    mix += ws[i] * (np.expand_dims(image_aug, axis=2))

  mixed = (1 - m) *  (image) + m * mix
  return mixed


def augmix_augment(data, doaug = True):
  res = []
  for i in data:
    if doaug:
      res.append(torch.Tensor(gen_colored(augment_and_mix(i.numpy()))[0].transpose(2,0,1)))
    else:
      res.append(torch.Tensor(gen_colored(i)[0].transpose(2,0,1)))
  return(torch.stack(res))



In [None]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base augmentations operators."""

import numpy as np
from PIL import Image, ImageOps, ImageEnhance

# ImageNet code should change this value
IMAGE_SIZE = 28


def int_parameter(level, maxval):
  """Helper function to scale `val` between 0 and maxval .

  Args:
    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
    maxval: Maximum value that the operation can have. This will be scaled to
      level/PARAMETER_MAX.

  Returns:
    An int that results from scaling `maxval` according to `level`.
  """
  return int(level * maxval / 10)


def float_parameter(level, maxval):
  """Helper function to scale `val` between 0 and maxval.

  Args:
    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
    maxval: Maximum value that the operation can have. This will be scaled to
      level/PARAMETER_MAX.

  Returns:
    A float that results from scaling `maxval` according to `level`.
  """
  return float(level) * maxval / 10.


def sample_level(n):
  return np.random.uniform(low=0.1, high=n)


def autocontrast(pil_img, _):
  return ImageOps.autocontrast(pil_img)


def equalize(pil_img, _):
  return ImageOps.equalize(pil_img)


def posterize(pil_img, level):
  level = int_parameter(sample_level(level), 4)
  return ImageOps.posterize(pil_img, 4 - level)


def rotate(pil_img, level):
  degrees = int_parameter(sample_level(level), 30)
  if np.random.uniform() > 0.5:
    degrees = -degrees
  return pil_img.rotate(degrees, resample=Image.BILINEAR)


def solarize(pil_img, level):
  level = int_parameter(sample_level(level), 256)
  return ImageOps.solarize(pil_img, 256 - level)


def shear_x(pil_img, level):
  level = float_parameter(sample_level(level), 0.3)
  if np.random.uniform() > 0.5:
    level = -level
  return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE),
                           Image.AFFINE, (1, level, 0, 0, 1, 0),
                           resample=Image.BILINEAR)


def shear_y(pil_img, level):
  level = float_parameter(sample_level(level), 0.3)
  if np.random.uniform() > 0.5:
    level = -level
  return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE),
                           Image.AFFINE, (1, 0, 0, level, 1, 0),
                           resample=Image.BILINEAR)


def translate_x(pil_img, level):
  level = int_parameter(sample_level(level), IMAGE_SIZE / 3)
  if np.random.random() > 0.5:
    level = -level
  return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE),
                           Image.AFFINE, (1, 0, level, 0, 1, 0),
                           resample=Image.BILINEAR)


def translate_y(pil_img, level):
  level = int_parameter(sample_level(level), IMAGE_SIZE / 3)
  if np.random.random() > 0.5:
    level = -level
  return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE),
                           Image.AFFINE, (1, 0, 0, 0, 1, level),
                           resample=Image.BILINEAR)


# operation that overlaps with ImageNet-C's test set
def color(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Color(pil_img).enhance(level)


# operation that overlaps with ImageNet-C's test set
def contrast(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Contrast(pil_img).enhance(level)


# operation that overlaps with ImageNet-C's test set
def brightness(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Brightness(pil_img).enhance(level)


# operation that overlaps with ImageNet-C's test set
def sharpness(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Sharpness(pil_img).enhance(level)


augmentations = [
    autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
    translate_x, translate_y
]

augmentations_all = [
    autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
    translate_x, translate_y, color, contrast, brightness, sharpness
]