In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import cv2                         # To read and manipulate images
import matplotlib.pyplot as plt    # Python 2D plotting library
%matplotlib inline  
from sklearn.preprocessing import LabelEncoder
import torch
import torchvision
import torchvision.models as models
import torch.nn as nn
from sklearn.model_selection import train_test_split
import pickle

In [2]:
TRAIN_PATH = 'data/CAX_Superhero_Train/'
TEST_PATH = 'data/CAX_Superhero_Test/'
superhero_dic = {'Ant-Man':'ant_man', 'Aquaman':'aqua_man', 'Avengers':'avengers', 'Batman':'bat_man', 
                 'Black Panther':'black_panther', 'Captain America':'captain_america', 'Catwoman':'cat_woman',
                 'Ghost Rider':'ghostrider', 'Hulk':'hulk', 'Iron Man':'iron_man', 'Spiderman':'spider_man', 
                 'Superman':'super_man'}

labelencoder_dic = {0: 'ant_man', 1: 'aqua_man', 2: 'avengers', 3: 'bat_man', 4: 'black_panther', 5: 'captain_america', 
                    6: 'cat_woman', 7: 'ghostrider', 8: 'hulk', 9: 'iron_man', 10: 'spider_man', 11: 'super_man'}

#IMG_HEIGHT = 260
#IMG_WIDTH = 200
#IMG_HEIGHT = 224
#IMG_WIDTH = 224
IMG_HEIGHT = 32
IMG_WIDTH = 32

In [3]:
def read_image(filepath, color_mode=cv2.IMREAD_COLOR, target_size=None):
    """Read an image from a file and resize it"""
    img = cv2.imread(filepath, color_mode)
    if target_size: 
        img = cv2.resize(img, target_size, interpolation = cv2.INTER_AREA)
    return img

def read_train_data_properties(train_dir):
    """Read basic properties of training images"""
    tmp = []
    for i,dir_name in enumerate(next(os.walk(train_dir))[1]):
        img_dir = os.path.join(train_dir, dir_name)
        superhero_dir = next(os.walk(img_dir))[2]
        for j in range(len(superhero_dir)):
            img_name = superhero_dir[j]
            img_path = os.path.join(img_dir, img_name)
            img_shape = read_image(img_path).shape
            superhero = superhero_dic[dir_name]
            tmp.append([img_name, img_shape[0], img_shape[1], img_shape[0]/img_shape[1], img_shape[2], superhero, img_path])

    train_df = pd.DataFrame(tmp, columns = ['img_name','img_height', 'img_width',  'img_ratio', 'num_channels',
                                            'superhero', 'img_path'])
    return train_df

def read_test_data_properties(test_dir):
    """Read basic properties of test images"""
    tmp = []
    img_dir = next(os.walk(test_dir))[2]
    for i,img_name in enumerate(img_dir):
        img_path = os.path.join(test_dir, img_name)
        img_shape = read_image(img_path).shape
        tmp.append([img_name, img_shape[0], img_shape[1], img_shape[0]/img_shape[1], img_shape[2], img_path])

    test_df = pd.DataFrame(tmp, columns = ['img_name','img_height', 'img_width',  'img_ratio', 'num_channels', 'img_path'])
    return test_df

def load_raw_data(image_size=(IMG_HEIGHT, IMG_WIDTH)):
    """Load raw data."""
    # Python lists to store the training images/masks and test images.
    labelencoder = LabelEncoder()
    image_size=(IMG_HEIGHT, IMG_WIDTH)
    x_train, y_train, x_test = [],[],[]

    # Read and resize train images/superheroes. 
    print('Loading and resizing train images and labels ...')
    os.sys.stdout.flush()
    for i, filename in tqdm(enumerate(train_df['img_path']), total=len(train_df)):
        img = read_image(train_df['img_path'].loc[i], target_size=image_size)
        superhero = train_df['superhero'].loc[i] 
        x_train.append(img) 
        y_train.append(superhero)
        
    # Read and resize test images. 
    print('Loading and resizing test images ...')
    os.sys.stdout.flush()
    for i, filename in tqdm(enumerate(test_df['img_path']), total=len(test_df)):
        img = read_image(test_df['img_path'].loc[i], target_size=image_size)
        x_test.append(img)    
        
    # Transform lists into 4-dim numpy arrays (N,H,W,C)
    x_train = np.array(x_train)
    y_train = labelencoder.fit_transform(y_train)
    x_test = np.array(x_test)
    
    '''
    # save to file labelencoder_dic
    labelencoder_dic = dict(enumerate(labelencoder.classes_))
    with open('labelencoder_dic.pkl', 'wb') as file:
        pickle.dump(labelencoder_dic, file, protocol=pickle.HIGHEST_PROTOCOL)
    '''    
      
    return x_train, y_train, x_test

# Shuffle two numpy arrays in the same order
def randomize(a, b):
    # Generate the permutation index array.
    s = np.arange(a.shape[0])
    np.random.shuffle(s)
    # Shuffle the arrays by giving the permutation in the square brackets.
    shuffled_a = a[s]
    shuffled_b = b[s]
    return shuffled_a, shuffled_b

In [4]:
# Basic properties of images/masks
train_df = read_train_data_properties(TRAIN_PATH)
print('train_df:')
print(train_df.describe())
test_df = read_test_data_properties(TEST_PATH)
print('test_df:')
print(test_df.describe())
print('')

train_df:
        img_height    img_width    img_ratio  num_channels
count  5433.000000  5433.000000  5433.000000        5433.0
mean    230.794773   197.981594     1.169293           3.0
std      37.450213    24.658881     0.162639           0.0
min     120.000000    73.000000     0.463636           3.0
25%     200.000000   190.000000     1.000000           3.0
50%     246.000000   200.000000     1.294737           3.0
75%     260.000000   200.000000     1.300000           3.0
max     522.000000   540.000000     3.561644           3.0
test_df:
        img_height    img_width    img_ratio  num_channels
count  3375.000000  3375.000000  3375.000000        3375.0
mean    233.944889   203.209185     1.159080           3.0
std      36.831582    31.001904     0.160660           0.0
min      80.000000    80.000000     0.561873           3.0
25%     200.000000   190.000000     1.000000           3.0
50%     246.000000   200.000000     1.294737           3.0
75%     260.000000   200.000000     1

In [7]:
train_df.to_csv('data/train_df.csv', index=False)
test_df.to_csv('data/test_df.csv', index=False)

In [12]:
# Counting unique train image shapes.
df = pd.DataFrame([[x] for x in zip(train_df['img_height'], train_df['img_width'])])
df[0].value_counts()[:10]

(260, 200)    1614
(246, 190)    1262
(180, 180)    1113
(200, 200)     600
(253, 253)     146
(225, 225)     140
(218, 218)      42
(400, 400)      21
(259, 194)      19
(480, 360)      18
Name: 0, dtype: int64

In [13]:
# Counting unique test image shapes.
df = pd.DataFrame([[x] for x in zip(test_df['img_height'], test_df['img_width'])])
df[0].value_counts()[:10]

(260, 200)    958
(246, 190)    729
(180, 180)    656
(253, 253)    263
(200, 200)    202
(225, 225)    123
(400, 400)     36
(218, 218)     27
(224, 224)     18
(260, 230)     17
Name: 0, dtype: int64

In [134]:
# Read images/labels from files and resize them. Each image is stored as a 3-dim array where the number of channels is 3
train_df = read_train_data_properties(TRAIN_PATH)
test_df = read_test_data_properties(TEST_PATH)
x_train, y_train, x_test = load_raw_data()

Loading and resizing train images and labels ...


100%|█████████████████████████████████████████████████████████████████████████████| 5433/5433 [00:09<00:00, 558.56it/s]


Loading and resizing test images ...


100%|█████████████████████████████████████████████████████████████████████████████| 3375/3375 [00:06<00:00, 556.38it/s]


In [125]:
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=42, 
                                                      shuffle=True, stratify=y_train)

In [126]:
x_train.shape, x_valid.shape

((4346, 32, 32, 3), (1087, 32, 32, 3))

In [129]:
#save dataset to files
np.save('data/x_train', x_train)
np.save('data/y_train', y_train)
np.save('data/x_valid', x_valid)
np.save('data/y_valid', y_valid)
np.save('data/x_test', x_test)

In [9]:
import torchvision.models as models

In [10]:
alexnet = models.alexnet?

In [None]:
alexnet = models.alexnet

In [11]:
alexnet = models.alexnet(pretrained = True)

Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to C:\Users\Андрей/.torch\models\alexnet-owt-4df8aa71.pth
100%|███████████████████████████████████████████████████████████████| 244418560/244418560 [00:41<00:00, 5960080.64it/s]


In [12]:
print(alexnet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=4096,

In [13]:
alexnet.

<bound method Module.modules of AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
 

In [14]:
model_conv = torchvision.models.squeezenet1_1()

In [15]:
for name, params in model_conv.named_children():
    print(name)

features
classifier


In [20]:
## How many In_channels are there for the conv layer
in_ftrs = model_conv.classifier[1].in_channels
in_ftrs

512

In [19]:
## How many Out_channels are there for the conv layer
out_ftrs = model_conv.classifier[1].out_channels
out_ftrs

1000

In [21]:
## Converting a sequential layer to list of layers 
features = list(model_conv.classifier.children())
features

[Dropout(p=0.5),
 Conv2d(512, 1000, kernel_size=(1, 1), stride=(1, 1)),
 ReLU(inplace),
 AvgPool2d(kernel_size=13, stride=1, padding=0, ceil_mode=False, count_include_pad=True)]

In [None]:
## Changing the conv layer to required dimension
features[1] = nn.Conv2d(in_ftrs, n_class, kernel_size,stride)

In [22]:
print(model_conv)

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace)
    )
    (5): MaxPool2d(kernel_size=(3, 3), stride=(2,

In [23]:
model_conv = models.alexnet(pretrained=True)

In [25]:
num_ftrs = model_conv.classifier[6].in_features
num_ftrs

4096

In [28]:
n_class = 12
model_conv = models.alexnet(pretrained=True)
# Number of filters in the bottleneck layer
num_ftrs = model_conv.classifier[6].in_features
# convert all the layers to list and remove the last one
features = list(model_conv.classifier.children())[:-1]
## Add the last layer based on the num of classes in our dataset
features.extend([nn.Linear(num_ftrs, n_class)])
## convert it into container and add it to our model class.
model_conv.classifier = nn.Sequential(*features)

In [33]:
model_conv.train?

In [None]:
model_base = nn.Sequential(
                nn.Conv2d(3, 32, kernel_size=7, stride=1),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(32),
                nn.MaxPool2d(kernel_size=2, stride=2),
                Flatten(), # see above for explanation
                nn.Linear(5408, 1024),
                nn.ReLU(inplace=True),
                nn.Linear(1024, 10), # affine layer
              )