# Timescapes technical exercise - Peiwen (Scott) Luo

# IMPORTANT!!! please make sure data storaged in: /data/cifar-10-batches-py/ eg. data_batch_1/2/3

## steps:
- Load data
- Load pretrained model
- train the model
- find all red auto
- save images
- save model

In [1]:

import os
import pickle

import torch
from torch import Tensor

from torchvision.transforms import ToTensor, Compose

import numpy as np
from torch.utils.data import TensorDataset, DataLoader, Dataset, random_split
from torch.optim import lr_scheduler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision.transforms as transforms

import cv2

import matplotlib.pyplot as plt
%matplotlib inline

# Setup

In [2]:
data_dir = r"data/cifar-10-batches-py"

train_filenames = ["data_batch_1",
                   "data_batch_2",
                   "data_batch_3",
                   "data_batch_4",
                   "data_batch_5"]
test_filenames = ["test_batch"]

batch_size = 128

total_size = 50000
train_size = 40000
val_size = 10000
test_size = 10000

# total_size = 50000
# train_size = 40000
# val_size = 10000
# test_size = 10000

# Load data

In [3]:
class Cifar10Dataset(Dataset):
    def __init__(self, input_data, targets, transform=None):
        self.data = input_data
        self.targets = torch.LongTensor(targets)
        self.transform = transform

    def __getitem__(self, index):
        image = self.data[index]
        target = self.targets[index]

        image = np.reshape(image,(3,32,32))
        image = np.transpose(image, (1,2,0))

        if self.transform:
            image = self.transform(image)

        return image, target

    def __len__(self):
        return len(self.targets)

In [4]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

def show_img(img_list):
    img = np.reshape(img_list,(3,32,32))
    imgView=np.transpose(img, (1,2,0))
    plt.imshow(imgView)
    plt.show()

def save_img(img_list, filename):
    img = np.reshape(img_list,(3,32,32))
    imgView=np.transpose(img, (1,2,0))
    cv2.imwrite(filename, cv2.cvtColor(imgView, cv2.COLOR_RGB2BGR))

def load_data(data_dir, filenames):
    data_list = []
    label_list = []
    for filename in filenames:
        output = unpickle(os.path.join(data_dir,filename))
        data_list.append(output[b'data'])
        label_list.append(output[b'labels'])

    x = np.concatenate(data_list, axis=0)
    y = np.concatenate(label_list, axis=0)

    print(x.shape, y.shape)

    return x,y

In [5]:
x_train,y_train = load_data(data_dir, train_filenames)
x_test,y_test = load_data(data_dir, test_filenames)

(50000, 3072) (50000,)
(10000, 3072) (10000,)


In [6]:
# generate the pytorch dataloader from raw dataset
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])


train_dataset = Cifar10Dataset(x_train[:total_size],y_train[:total_size],transform=transform)
train_dataset,val_dataset = random_split(train_dataset, [train_size, val_size])
test_dataset = Cifar10Dataset(x_test,y_test,transform=transform)



dataloaders = {'train': DataLoader(train_dataset, batch_size=batch_size, shuffle=True),
               'val': DataLoader(val_dataset, batch_size=batch_size, shuffle=True),
               'test': DataLoader(test_dataset, batch_size=batch_size, shuffle=True)}

dataset_sizes = {'train':train_size, 'val': val_size, 'test':test_size}

## define classes name

In [7]:
metadata = unpickle(os.path.join(data_dir,"batches.meta"))
classes = metadata[b'label_names']

# Train model from pretrained model

# Retrain pretrained model

In [11]:
# define training model function

import time
import copy

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

## load pretrained model

In [46]:
import torchvision.models as models

# vgg16 = models.vgg16(pretrained=True)

# resnet18 = models.resnet18(pretrained=True)
# efficientnet_b7 = models.efficientnet_b7(pretrained=True)

mobilenet_v2 = models.mobilenet_v2(pretrained=True)
# mobilenet_v3_small = models.mobilenet_v3_small(pretrained=True)

In [47]:
model = mobilenet_v2

# for param in model.parameters():
#     param.requires_grad = False

## change the input layer and output layer

In [48]:
model

MobileNetV2(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05,

### mobilenet_v2

In [49]:
model.classifier = nn.Sequential(nn.Dropout(p=0.5),
                                 nn.Linear(in_features=1280, out_features=100, bias=True),
                                 nn.LeakyReLU(),
                                 nn.Dropout(p=0.5),
                                 nn.Linear(in_features=100, out_features=10, bias=True),
                                 )

model

MobileNetV2(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05,

### mobilenet_v3_small

In [50]:
# model.classifier = nn.Sequential(nn.Linear(in_features=576, out_features=256, bias=True),
#                                  nn.Hardswish(),
#                                  nn.Dropout(p=0.5),
#                                  nn.Linear(in_features=256, out_features=10, bias=True),
#                                  )
#
# model

### vgg 16

In [51]:
# model.classifier = nn.Sequential(nn.Linear(in_features=25088, out_features=4096, bias=True),
#                                  nn.LeakyReLU(),
#                                  nn.Dropout(p=0.5),
#                                  nn.Linear(in_features=4096, out_features=512, bias=True),
#                                  nn.LeakyReLU(),
#                                  nn.Dropout(p=0.5),
#                                  nn.Linear(in_features=512, out_features=10, bias=True)
#                                  )
#
# model

### resnet

In [52]:
# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, 10)

## build own model - pool performance

In [53]:
# class Cifar10Model(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.network = nn.Sequential(
#             nn.Conv2d(3, 32, kernel_size=3, padding=1),
#             nn.ReLU(),
#             nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2), # output: 64 x 16 x 16
#
#             nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(),
#             nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
#
#
#             nn.Flatten(),
#             nn.Linear(128*8*8, 1024),
#             nn.ReLU(),
#             nn.Linear(1024, 512),
#             nn.ReLU(),
#             nn.Linear(512, 10))
#
#
#     def forward(self, xb):
#         return self.network(xb)
#
#
# my_model = Cifar10Model()

# Training

In [54]:
# set up criterion as cross entropy loss
criterion = nn.CrossEntropyLoss()

# set up optimizer as SGD
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.01)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# train model
model = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=25)

Epoch 0/24
----------
train Loss: 1.7007 Acc: 0.3810
val Loss: 1.0795 Acc: 0.6221

Epoch 1/24
----------
train Loss: 1.0332 Acc: 0.6487
val Loss: 0.8401 Acc: 0.7091

Epoch 2/24
----------
train Loss: 0.8319 Acc: 0.7207
val Loss: 0.7409 Acc: 0.7438

Epoch 3/24
----------
train Loss: 0.7226 Acc: 0.7577
val Loss: 0.7019 Acc: 0.7574

Epoch 4/24
----------
train Loss: 0.6291 Acc: 0.7918
val Loss: 0.6736 Acc: 0.7705

Epoch 5/24
----------
train Loss: 0.5524 Acc: 0.8184
val Loss: 0.6580 Acc: 0.7712

Epoch 6/24
----------
train Loss: 0.4918 Acc: 0.8395
val Loss: 0.6564 Acc: 0.7825

Epoch 7/24
----------
train Loss: 0.4097 Acc: 0.8672
val Loss: 0.6382 Acc: 0.7917

Epoch 8/24
----------
train Loss: 0.3866 Acc: 0.8774
val Loss: 0.6359 Acc: 0.7926

Epoch 9/24
----------
train Loss: 0.3715 Acc: 0.8789
val Loss: 0.6463 Acc: 0.7906

Epoch 10/24
----------
train Loss: 0.3566 Acc: 0.8857
val Loss: 0.6575 Acc: 0.7887

Epoch 11/24
----------
train Loss: 0.3485 Acc: 0.8869
val Loss: 0.6562 Acc: 0.7872

Ep

# Prediction

In [55]:
import cv2

def predict_images(image, model):
    image = image.unsqueeze(0)
    out = model(image)
    _, pred = torch.max(out, 1)
    return classes[pred[0]]


# Find all red automobile

In [56]:
def check_red(image:np.ndarray) -> bool:
    """
    check if the image is red-ish
    :param image: image
    :return: bool
    """
    num_r = 0
    # crop center
    # for each pixel is red
    mat_r = image.reshape(3,32,32)[0]
    mat_g = image.reshape(3,32,32)[1]
    mat_b = image.reshape(3,32,32)[2]
    # r > 100 and r > g*1.5 and r > b*1.5
    for i in range(4,len(mat_r)-3):
        for j in range(4, len(mat_r[i])-3):
            if (mat_r[i,j] > 100) and \
               (mat_r[i,j] > 2*mat_g[i,j]) and \
               (mat_r[i,j] > 2*mat_b[i,j]) :
                num_r += 1
    if num_r > int(24*24/5):
        print("Number of red pixel:", num_r)
        return True

    return False



### save images

In [59]:
for i in range(len(test_dataset)):
    image,label = test_dataset[i]
    if predict_images(image, model) == b'automobile':
        # print('Label:', classes[label], ', Predicted:', predict_images(image, model))
        image_view = test_dataset.data[i]
        if check_red(image_view):
            print('Index:', str(i),'Label:', classes[label], ', Predicted:', predict_images(image, model))
            # show_img(image_view)
            save_img(image_view, f"images/red_automobile/{str(i)}.jpg")

Number of red pixel: 169
Index: 6 Label: b'automobile' , Predicted: b'automobile'
Number of red pixel: 178
Index: 81 Label: b'automobile' , Predicted: b'automobile'
Number of red pixel: 250
Index: 114 Label: b'automobile' , Predicted: b'automobile'
Number of red pixel: 177
Index: 231 Label: b'automobile' , Predicted: b'automobile'
Number of red pixel: 175
Index: 281 Label: b'airplane' , Predicted: b'automobile'
Number of red pixel: 155
Index: 532 Label: b'truck' , Predicted: b'automobile'
Number of red pixel: 159
Index: 677 Label: b'automobile' , Predicted: b'automobile'
Number of red pixel: 131
Index: 941 Label: b'automobile' , Predicted: b'automobile'
Number of red pixel: 164
Index: 973 Label: b'automobile' , Predicted: b'automobile'
Number of red pixel: 163
Index: 1098 Label: b'automobile' , Predicted: b'automobile'
Number of red pixel: 135
Index: 1177 Label: b'truck' , Predicted: b'automobile'
Number of red pixel: 118
Index: 1234 Label: b'automobile' , Predicted: b'automobile'
Numb

# saving model

In [36]:

PATH = 'models/mobile_v2_1.h5'
torch.save(model, PATH)

# load model

In [20]:

PATH = 'models/mobile_v2_1.h5'
model = torch.load(PATH)
model.eval()

NameError: name 'Net' is not defined