<a href="https://colab.research.google.com/github/rajlm10/D2L-Torch/blob/main/D2L_CIFAR10_Challenge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install d2l -q

[K     |████████████████████████████████| 82 kB 691 kB/s 
[K     |████████████████████████████████| 9.9 MB 83.7 MB/s 
[K     |████████████████████████████████| 15.7 MB 28.4 MB/s 
[K     |████████████████████████████████| 61 kB 8.7 MB/s 
[K     |████████████████████████████████| 11.2 MB 86.9 MB/s 
[K     |████████████████████████████████| 930 kB 84.9 MB/s 
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests~=2.23.0, but you have requests 2.25.1 which is incompatible.
datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.[0m
[?25h

In [None]:
import collections
import math
import os
import shutil
import pandas as pd
import torch
import torchvision
import torchvision.models as models
from torch import nn
from d2l import torch as d2l
import numpy as np

In [None]:
d2l.DATA_HUB['cifar10_tiny'] = (d2l.DATA_URL + 'kaggle_cifar10_tiny.zip', '2068874e4b9a9f0fb07ebe0ad2b29754449ccacd')
# If you use the full dataset downloaded for the Kaggle competition, set # `demo` to False
demo = True
if demo:
  data_dir = d2l.download_extract('cifar10_tiny')
else:
  data_dir = '../data/cifar-10/'

Downloading ../data/kaggle_cifar10_tiny.zip from http://d2l-data.s3-accelerate.amazonaws.com/kaggle_cifar10_tiny.zip...


In [None]:
pd.read_csv('/data/kaggle_cifar10_tiny/trainLabels.csv').head()

Unnamed: 0,id,label
0,1,frog
1,2,truck
2,3,truck
3,4,deer
4,5,automobile


In [None]:
data_dir='/data/kaggle_cifar10_tiny'

def read_csv_labels(fname):
  """Read `fname` to return a filename to label dictionary.""" 
  with open(fname, 'r') as f:
    # Skip the file header line (column name)
    lines = f.readlines()[1:]
  tokens = [l.rstrip().split(',') for l in lines]
  return dict(((name, label) for name, label in tokens))

labels = read_csv_labels(os.path.join(data_dir, 'trainLabels.csv')) 
print('# training examples:', len(labels))
print('# classes:', len(set(labels.values())))

# training examples: 1000
# classes: 10


In [None]:
list(labels.items())[:5]

[('1', 'frog'),
 ('2', 'truck'),
 ('3', 'truck'),
 ('4', 'deer'),
 ('5', 'automobile')]

Next, we define the reorg_train_valid function to split the validation set out of the original training set. The argument valid_ratio in this function is the ratio of the number of examples in the validation set to the number of examples in the original training set. More concretely, let n be the number of images of the class with the least examples, and r be the ratio. The validation set will split out max(⌊nr⌋, 1) images for each class.

In [None]:
def copyfile(filename, target_dir):
  """Copy a file into a target directory.""" 
  os.makedirs(target_dir, exist_ok=True) 
  shutil.copy(filename, target_dir)

In [None]:
def reorg_train_valid(data_dir, labels, valid_ratio):
  """Split the validation set out of the original training set."""
  # The number of examples of the class that has the fewest examples in the training dataset
  n = collections.Counter(labels.values()).most_common()[-1][1]
  n_valid_per_label = max(1, math.floor(n * valid_ratio))
  label_count = {}
  for train_file in os.listdir(os.path.join(data_dir, 'train')):
    label = labels[train_file.split('.')[0]]
    fname = os.path.join(data_dir, 'train', train_file)
    copyfile(fname, os.path.join(data_dir, 'train_valid_test','train_valid', label))
    if label not in label_count or label_count[label] < n_valid_per_label:
      copyfile(fname, os.path.join(data_dir, 'train_valid_test', 'valid', label))
      label_count[label] = label_count.get(label, 0) + 1
    else:
      copyfile(fname, os.path.join(data_dir, 'train_valid_test', 'train', label))
  return n_valid_per_label

In [None]:
def reorg_test(data_dir):
  """Organize the testing set for data loading during prediction.""" 
  for test_file in os.listdir(os.path.join(data_dir, 'test')):
    copyfile(os.path.join(data_dir, 'test', test_file), os.path.join(data_dir, 'train_valid_test', 'test','unknown'))

In [None]:
def reorg_cifar10_data(data_dir, valid_ratio):
  labels = read_csv_labels(os.path.join(data_dir, 'trainLabels.csv')) 
  reorg_train_valid(data_dir, labels, valid_ratio) 
  reorg_test(data_dir)

In [None]:
batch_size = 32 if demo else 128 
valid_ratio = 0.1 
reorg_cifar10_data(data_dir, valid_ratio)

In [None]:
def find_normalization_params(data_dir):
  transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
    ])
  train_ds = torchvision.datasets.ImageFolder(os.path.join(data_dir, 'train_valid_test', 'train'), transform=transform)
  train_iter=torch.utils.data.DataLoader(train_ds, batch_size, shuffle=True, drop_last=True)
  means=torch.zeros((len(train_iter),3))
  stds=torch.zeros((len(train_iter),3))
  for i,batch in enumerate(train_iter):
    mean,std=batch[0].mean((0,2,3)),batch[0].std((0,2,3))
    means[i,:]=mean
    stds[i,:]=std
  return means.mean(axis=0),stds.mean(axis=0)

In [None]:
mean,std=find_normalization_params(data_dir)
mean,std

(tensor([0.4888, 0.4808, 0.4437]), tensor([0.2439, 0.2410, 0.2591]))

In [None]:
transform_train = torchvision.transforms.Compose(
    [# Scale the image up to a square of 40 pixels in both height and width torchvision.transforms.Resize(40),
     # Randomly crop a square image of 40 pixels in both height and width to # produce a small square of 0.64 
     #to 1 times the area of the original image, and then scale it to a square of 32 pixels in both height and width
     torchvision.transforms.RandomResizedCrop(32, scale=(0.64, 1.0),ratio=(1.0, 1.0)),
     torchvision.transforms.RandomHorizontalFlip(), 
     torchvision.transforms.ToTensor(),
     # Standardize each channel of the image 
     torchvision.transforms.Normalize(mean,std)])

transform_test = torchvision.transforms.Compose(
    [torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize(mean,std)])


In [None]:
train_ds, train_valid_ds = [torchvision.datasets.ImageFolder( os.path.join(data_dir, 'train_valid_test', folder), transform=transform_train) for folder in ['train', 'train_valid']]

valid_ds, test_ds = [torchvision.datasets.ImageFolder( os.path.join(data_dir, 'train_valid_test', folder), transform=transform_test) for folder in ['valid', 'test']]

During training, we need to specify all the image augmentation operations defined above. 

When the validation set is used for model evaluation during hyperparameter tuning, no randomness from image augmentation should be introduced.

 Before final prediction, we train the model on the combined training set and validation set to make full use of all the labeled data.

In [None]:
train_iter=torch.utils.data.DataLoader(train_ds, batch_size, shuffle=True, drop_last=True)
train_valid_iter=torch.utils.data.DataLoader(train_valid_ds, batch_size, shuffle=True, drop_last=True) #we use this at the end (all folds)

valid_iter = torch.utils.data.DataLoader(valid_ds, batch_size, shuffle=False, drop_last=True)
test_iter = torch.utils.data.DataLoader(test_ds, batch_size, shuffle=False, drop_last=False)

In [None]:
net = models.efficientnet_b0(pretrained=True) 
net

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-3dd342df.pth


  0%|          | 0.00/20.5M [00:00<?, ?B/s]

EfficientNet(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): ConvNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): ConvNormActivation(
 

In [None]:
def get_net(): 
  num_classes = 10
  net = models.efficientnet_b0(pretrained=True) 
  net.classifier[1] = nn.Linear(net.classifier[1].in_features,num_classes) 
  nn.init.xavier_uniform_(net.classifier[1].weight)
  nn.init.zeros_(net.classifier[1].bias)
  return net
  
loss = nn.CrossEntropyLoss(reduction="none")

In [None]:
def train_batch(net,X,y,loss,optimizer,devices):
  X = X.to(devices[0])
  y = y.to(devices[0])
  net.train()
  optimizer.zero_grad()
  pred = net(X)
  l = loss(pred, y)
  l.sum().backward()
  optimizer.step()
  train_loss_sum = l.sum() 
  train_acc_sum = d2l.accuracy(pred, y) 
  return train_loss_sum, train_acc_sum

In [None]:
def evaluate_accuracy_gpu(net, data_iter, device=None):
  if isinstance(net, nn.Module):
    net.eval() # Set the model to evaluation mode 
    if not device:
      device = next(iter(net.parameters())).device 

  # No. of correct predictions, no. of predictions 
  metric = d2l.Accumulator(2)
  with torch.no_grad():
    for X, y in data_iter:
      X = X.to(device)
      y = y.to(device) 
      metric.add(d2l.accuracy(net(X), y), y.numel())
  return metric[0] / metric[1]

In [None]:
def train(net, train_iter, valid_iter, num_epochs, lr, wd, devices, lr_period, lr_decay):
  params_body = [param for name, param in net.named_parameters() if name not in ["classifier.1.weight", "classifier.1.bias"]]

  optimizer = torch.optim.SGD([{'params':params_body},{'params':net.classifier.parameters(),'lr':lr}], lr=lr, momentum=0.9, weight_decay=wd)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, lr_period, lr_decay) 
  num_batches, timer = len(train_iter), d2l.Timer()
  
  net = nn.DataParallel(net, device_ids=devices).to(devices[0])

  for epoch in range(num_epochs): 
    net.train()
    metric = d2l.Accumulator(3)

    for i, (features, labels) in enumerate(train_iter):
      timer.start()
      l, acc = train_batch(net, features, labels,loss, optimizer, devices) 
      metric.add(l, acc, labels.shape[0])
      timer.stop()
    if valid_iter is not None:
      valid_acc = evaluate_accuracy_gpu(net, valid_iter)
    scheduler.step()
    
    measures = (f'train loss {metric[0] / metric[2]:.3f}, ' f'train acc {metric[1] / metric[2]:.3f}')
    if valid_iter is not None:
      measures += f', valid acc {valid_acc:.3f}'
    print(measures)

  print(f'\n{metric[2] * num_epochs / timer.sum():.1f}' f' examples/sec on {str(devices)}')
      

In [None]:
devices, num_epochs, lr, wd = d2l.try_all_gpus(), 20, 2e-4, 5e-4 
lr_period, lr_decay, net = 4, 0.9, get_net()
train(net, train_iter, valid_iter, num_epochs, lr, wd, devices, lr_period,lr_decay)

train loss 2.638, train acc 0.167, valid acc 0.109
train loss 2.458, train acc 0.223, valid acc 0.172
train loss 2.310, train acc 0.237, valid acc 0.156
train loss 2.149, train acc 0.276, valid acc 0.297
train loss 2.017, train acc 0.291, valid acc 0.281
train loss 1.899, train acc 0.315, valid acc 0.266
train loss 1.848, train acc 0.319, valid acc 0.266
train loss 1.747, train acc 0.376, valid acc 0.359
train loss 1.633, train acc 0.434, valid acc 0.422
train loss 1.651, train acc 0.426, valid acc 0.422
train loss 1.645, train acc 0.423, valid acc 0.344
train loss 1.551, train acc 0.449, valid acc 0.312
train loss 1.551, train acc 0.470, valid acc 0.500
train loss 1.421, train acc 0.484, valid acc 0.406
train loss 1.368, train acc 0.506, valid acc 0.328
train loss 1.402, train acc 0.489, valid acc 0.375
train loss 1.335, train acc 0.513, valid acc 0.406
train loss 1.278, train acc 0.529, valid acc 0.344
train loss 1.276, train acc 0.545, valid acc 0.438
train loss 1.245, train acc 0.5

After obtaining a promising model with hyperparameters, we use all the labeled data (including the validation set) to retrain the model and classify the testing set.

In [None]:
net = get_net()
num_epochs=50
train(net, train_valid_iter, None, num_epochs, lr, wd, devices, lr_period,lr_decay)

train loss 2.704, train acc 0.142
train loss 2.416, train acc 0.201
train loss 2.241, train acc 0.199
train loss 2.248, train acc 0.231
train loss 2.172, train acc 0.257
train loss 2.049, train acc 0.276
train loss 1.949, train acc 0.293
train loss 1.889, train acc 0.304
train loss 1.875, train acc 0.323
train loss 1.798, train acc 0.346
train loss 1.790, train acc 0.365
train loss 1.658, train acc 0.417
train loss 1.667, train acc 0.414
train loss 1.574, train acc 0.422
train loss 1.537, train acc 0.438
train loss 1.531, train acc 0.453
train loss 1.455, train acc 0.458
train loss 1.439, train acc 0.478
train loss 1.404, train acc 0.500
train loss 1.334, train acc 0.516
train loss 1.276, train acc 0.543
train loss 1.270, train acc 0.523
train loss 1.264, train acc 0.535
train loss 1.231, train acc 0.570
train loss 1.142, train acc 0.593
train loss 1.109, train acc 0.601
train loss 1.171, train acc 0.585
train loss 1.128, train acc 0.594
train loss 1.037, train acc 0.629
train loss 1.0

In [None]:
preds=[]
for X, _ in test_iter:
  net.eval()
  y_hat = net(X.to(devices[0])) 
  preds.extend(y_hat.argmax(dim=1).type(torch.int32).cpu().numpy())


In [None]:
sorted_ids = list(range(1, len(test_ds) + 1))
sorted_ids.sort(key=lambda x: str(x))
df = pd.DataFrame({'id': sorted_ids, 'label': preds})
df['label'] = df['label'].apply(lambda x: train_valid_ds.classes[x]) 
df.to_csv('submission.csv', index=False)
df.head()

Unnamed: 0,id,label
0,1,ship
1,2,frog
2,3,cat
3,4,frog
4,5,dog
