In [1]:
import numpy as np
import os, sys, cv2, time
import mxnet as mx
from mxnet import gluon, init, autograd, nd, image
from mxnet.gluon import data as gdata, utils as gutils, nn, loss as gloss
from mxnet.gluon.data.vision import datasets, transforms

The gray to RGB is converted by [this site](https://demos.algorithmia.com/colorize-photos).

In [2]:
final_validation_target_folder = 'Scoring_Validation_Data(Gray_2_RGB)/'

In [3]:
files = os.listdir(final_validation_target_folder)
total_final_validation_data = len(files)
final_validation_data_x = nd.zeros((total_final_validation_data, 256, 256, 3)).astype(np.uint8)
final_validation_data_y = nd.zeros(total_final_validation_data)
total_num = 0

for index, f in enumerate(files):
    final_validation_data_x[index] = image.imread(final_validation_target_folder+f)

    label = f.split('_',1)[0]
    if label =='0':
        final_validation_data_y[index] = 0
        total_num += 1
    elif label =='1':
        final_validation_data_y[index] = 1
        total_num += 1
    elif label =='2':
        final_validation_data_y[index] = 2
        total_num += 1
    elif label =='3':
        final_validation_data_y[index] = 3
        total_num += 1
    elif label =='4':
        final_validation_data_y[index] = 4
        total_num += 1
    elif label =='5':
        final_validation_data_y[index] = 5
        total_num += 1
    else:
        raise RuntimeError('Cannot label training data!')
#    break

if total_num == total_final_validation_data:
    print("Sum check pass!")

Sum check pass!


In [4]:
train_mean = [0.5905187523955868, 0.5332879723913606, 0.49680592040529487]  #over all data
train_std = [0.28736647217403904, 0.28094815765222825, 0.2815392173263535]  #over all data

In [5]:
batch_size = 64
num_workers = 0 if sys.platform.startswith('win32') else 4

final_validation_dataset = gdata.ArrayDataset(final_validation_data_x, final_validation_data_y)
    
transformer = []
transformer += [gdata.vision.transforms.ToTensor()] # transer the train data from shape (sample, H, W, channel) to (sample, channel, H, W) and rescale to between 0 and 1 
transformer += [gdata.vision.transforms.Normalize(train_mean, train_std)]
transformer = gdata.vision.transforms.Compose(transformer)
final_validation_iter = gdata.DataLoader(final_validation_dataset.transform_first(transformer), batch_size = batch_size, shuffle=False, num_workers=num_workers)

In [6]:
def try_gpu():
    """If GPU is available, return mx.gpu(0); else return mx.cpu()."""
    try:
        ctx = mx.gpu()
        _ = nd.array([0], ctx=ctx)
    except mx.base.MXNetError:
        ctx = mx.cpu()
    return ctx
def evaluate_accuracy(data_iter, net, ctx=[mx.cpu()]):
    """Evaluate accuracy of a model on the given data set."""
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    acc_sum, n = nd.array([0]), 0
    for batch in data_iter:
        features, labels, _ = _get_batch(batch, ctx)
        for X, y in zip(features, labels):
            y = y.astype('float32')
            acc_sum += (net(X).argmax(axis=1) == y).sum().copyto(mx.cpu())
            n += y.size
        acc_sum.wait_to_read()
    return acc_sum.asscalar() / n

def _get_batch(batch, ctx):
    """Return features and labels on ctx."""
    features, labels = batch
    if labels.dtype != features.dtype:
        labels = labels.astype(features.dtype)
    return (gutils.split_and_load(features, ctx),
            gutils.split_and_load(labels, ctx), features.shape[0])

def train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs):
    """Train and evaluate a model with CPU or GPU."""
    print('training on', ctx)
    loss = gloss.SoftmaxCrossEntropyLoss()
    average_time = 0.0
    
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X, y = X.as_in_context(ctx), y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)            
                l = loss(y_hat, y).sum()
            l.backward()
            trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        
        test_acc = evaluate_accuracy(test_iter, net, ctx)
        epoch_time = time.time() - start
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc, epoch_time))
        average_time += epoch_time
    print('Average time per epoch is %.4f s.'%(average_time/num_epochs))

In [7]:
ctx = try_gpu()

In [8]:
class Residual(nn.Block):
    def __init__(self, num_channels, use_1x1conv=False, strides=1, **kwargs):
        super(Residual, self).__init__(**kwargs) 
        self.conv1 = nn.Conv2D(num_channels, kernel_size=3, padding=1, strides=strides) # this layer down-sampling the input
        self.conv2 = nn.Conv2D(num_channels, kernel_size=3, padding=1)  # the layer doesn't down-sampling the input
        
        if use_1x1conv:
            self.conv3 = nn.Conv2D(num_channels, kernel_size=1, strides=strides)
            # to match the dimension of feature map of conv1 + conv2, the strides must be set to be identicla to conv 1
        else: 
            self.conv3 = None 
            
        self.bn1 = nn.BatchNorm() 
        self.bn2 = nn.BatchNorm()
        
    def forward(self, X):
        Y = nd.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y)) 
        if self.conv3:
            X = self.conv3(X)

        return nd.relu(Y + X)
    
def resnet_block(num_channels, num_residuals, first_block=False): 
    blk = nn.Sequential() 
    for i in range(num_residuals): 
        if i == 0 and not first_block:
            #net.add(Residual(num_channels, use_1x1conv=True, strides=2))
            blk.add(Residual(num_channels, use_1x1conv=True, strides=2))
        else:
            #net.add(Residual(num_channels, strides=1))
            blk.add(Residual(num_channels, strides=1))
    return blk

In [12]:
num_of_output_channels = [64, 128, 256, 512]
num_residuals = [3, 4, 6, 3]

net_RESNET = nn.Sequential() 
net_RESNET.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3),
        nn.BatchNorm(), 
        nn.Activation('relu'),
        nn.MaxPool2D(pool_size=3, strides=2, padding=1))

for i, num in enumerate(num_of_output_channels):
    if i==0:
        net_RESNET.add(resnet_block(num, num_residuals[i], first_block=True))
    else:
        net_RESNET.add(resnet_block(num, num_residuals[i]))
    net_RESNET.add(nn.Dropout(0.1))

net_RESNET.add(nn.GlobalAvgPool2D(), nn.Dense(6))
net_RESNET.load_parameters('Weight_Train_Test_Mixed/RESNET34_DROPOUT_train_data_transposed_7_fine_tuned.params', ctx= ctx)

In [13]:
print('Accuracy of model is %.4f %% .'%(100*evaluate_accuracy(final_validation_iter, net_RESNET, ctx)))

Accuracy of model is 93.8272 % .
