In [1]:
import mxnet as mx
from mxnet import gluon
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet.gluon import nn

import h5py
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

ctx = mx.cpu()

In [2]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

def accuracy(output, labels):
    return nd.mean(nd.argmax(output, axis=1) == labels).asscalar()

def evaluate(net, data_iter):
    loss, acc, n = 0., 0., 0.
    steps = len(data_iter)
    for data, label in data_iter:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)
        output = net(data)
        acc += accuracy(output, label)
        loss += nd.mean(softmax_cross_entropy(output, label)).asscalar()
    return loss/steps, acc/steps

# input the features we get by get_feature.ipynb

In [3]:
def load_data(model_name, batch_size=128, train_size=0.8):
    features = nd.load('features_train_%s.nd' % model_name)[0]
    labels = nd.load('labels.nd')[0]

    n_train = int(features.shape[0]*train_size)

    X_train = features[:n_train]
    y_train = labels[:n_train]

    X_val = features[n_train:]
    y_val = labels[n_train:]

    dataset_train = gluon.data.ArrayDataset(X_train, y_train)
    dataset_val = gluon.data.ArrayDataset(X_val, y_val)

    data_iter_train = gluon.data.DataLoader(dataset_train, batch_size)
    data_iter_val = gluon.data.DataLoader(dataset_val, batch_size)
    
    return data_iter_train, data_iter_val

In [4]:
def build_model():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Dense(256, activation='relu'))
        net.add(nn.Dropout(0.5))
        net.add(nn.Dense(120))

    net.initialize(ctx=ctx)
    return net

# Transfer learning

In [5]:
def train_model(model_name):
    epochs = 50
    batch_size = 128
    
    data_iter_train, data_iter_val = load_data(model_name, batch_size)
    net = build_model()
 
    trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': 1e-4, 'wd': 1e-5})
    
    for epoch in range(epochs):
        train_loss = 0.
        train_acc = 0.
        steps = len(data_iter_train)
        for data, label in data_iter_train:
            data, label = data.as_in_context(ctx), label.as_in_context(ctx)

            with autograd.record():
                output = net(data)
                loss = softmax_cross_entropy(output, label)

            loss.backward()
            trainer.step(batch_size)

            train_loss += nd.mean(loss).asscalar()
            train_acc += accuracy(output, label)

        val_loss, val_acc = evaluate(net, data_iter_val)
    
    print("Epoch %d. loss: %.4f, acc: %.2f%%, val_loss %.4f, val_acc %.2f%% Model: %s" % (
        epoch+1, train_loss/steps, train_acc/steps*100, val_loss, val_acc*100, model_name))
    
    return val_loss

# Train all the pre_train models

In [6]:
from mxnet.gluon.model_zoo.model_store import _model_sha1

losses = []

for model_name in sorted(_model_sha1.keys()):
#     print(model_name)
    val_loss = train_model(model_name)
    losses.append((model_name, val_loss))


Epoch 50. loss: 0.1197, acc: 97.33%, val_loss 0.9693, val_acc 76.68% Model: alexnet
Epoch 50. loss: 1.3277, acc: 70.57%, val_loss 1.3429, val_acc 75.96% Model: densenet121
Epoch 50. loss: 1.1792, acc: 72.92%, val_loss 1.2187, val_acc 81.93% Model: densenet161
Epoch 50. loss: 1.0941, acc: 76.56%, val_loss 1.1592, val_acc 78.63% Model: densenet169
Epoch 50. loss: 1.0790, acc: 75.52%, val_loss 1.1218, val_acc 81.67% Model: densenet201
Epoch 50. loss: 0.0120, acc: 100.00%, val_loss 1.0326, val_acc 73.70% Model: inceptionv3
Epoch 50. loss: 0.0759, acc: 97.98%, val_loss 0.8530, val_acc 78.43% Model: resnet101_v1
Epoch 50. loss: 0.0996, acc: 96.61%, val_loss 0.6772, val_acc 82.87% Model: resnet152_v1
Epoch 50. loss: 0.1193, acc: 97.20%, val_loss 0.8436, val_acc 74.70% Model: resnet18_v1
Epoch 50. loss: 0.0748, acc: 99.41%, val_loss 0.5078, val_acc 87.18% Model: resnet18_v2
Epoch 50. loss: 0.0858, acc: 97.92%, val_loss 0.7815, val_acc 80.61% Model: resnet34_v1
Epoch 50. loss: 0.0598, acc: 99.4

In [7]:
df = pd.DataFrame(losses, columns=['model', 'val_loss'])
df = df.sort_values('val_loss')
df.head()

Unnamed: 0,model,val_loss
11,resnet34_v2,0.49722
9,resnet18_v2,0.507815
7,resnet152_v1,0.677227
12,resnet50_v1,0.75245
10,resnet34_v1,0.781471


In [8]:
df.to_csv('models.csv', index=None)

In [9]:
df = pd.read_csv('models.csv')

# Sort the l

In [10]:
for i, (model_name, val_loss) in df.iterrows():
    print('%s | %s' % (model_name, val_loss))

resnet34_v2 | 0.4972200890382131
resnet18_v2 | 0.5078153510888418
resnet152_v1 | 0.6772272189458212
resnet50_v1 | 0.7524495720863342
resnet34_v1 | 0.7814714312553406
vgg16_bn | 0.7888141870498657
vgg19_bn | 0.7992375890413921
vgg11_bn | 0.8299247622489929
resnet18_v1 | 0.8435645103454591
resnet101_v1 | 0.8530370990435282
vgg13_bn | 0.8742475112279257
alexnet | 0.969298283259074
squeezenet1.0 | 0.980705757935842
squeezenet1.1 | 1.0228662292162578
inceptionv3 | 1.0326079527537029
vgg19 | 1.091732641061147
densenet201 | 1.1217530965805054
vgg16 | 1.1551291545232136
densenet169 | 1.1591972510019941
densenet161 | 1.218744158744812
vgg11 | 1.2438867886861165
densenet121 | 1.3429047664006548
vgg13 | 1.4020016590754192


In [11]:
def load_models_data(model_names, batch_size=128, train_size=0.8):
    features = [nd.load('features_train_%s.nd' % model_name)[0] for model_name in model_names]
    features = nd.concat(*features, dim=1)
    labels = nd.load('labels.nd')[0]
    
    n_train = int(features.shape[0]*train_size)
    
    X_train = features[:n_train]
    y_train = labels[:n_train]

    X_val = features[n_train:]
    y_val = labels[n_train:]

    dataset_train = gluon.data.ArrayDataset(X_train, y_train)
    dataset_val = gluon.data.ArrayDataset(X_val, y_val)

    data_iter_train = gluon.data.DataLoader(dataset_train, batch_size, shuffle=True)
    data_iter_val = gluon.data.DataLoader(dataset_val, batch_size)
    
    return data_iter_train, data_iter_val

In [12]:
df.head(10)

Unnamed: 0,model,val_loss
0,resnet34_v2,0.49722
1,resnet18_v2,0.507815
2,resnet152_v1,0.677227
3,resnet50_v1,0.75245
4,resnet34_v1,0.781471
5,vgg16_bn,0.788814
6,vgg19_bn,0.799238
7,vgg11_bn,0.829925
8,resnet18_v1,0.843565
9,resnet101_v1,0.853037


In [13]:
net = build_model()
epochs = 100
batch_size = 128
model_names = ['resnet34_v2', 'resnet18_v2']
data_iter_train, data_iter_val = load_models_data(model_names, batch_size=batch_size)


lr_sch = mx.lr_scheduler.FactorScheduler(step=400, factor=0.9)
trainer = gluon.Trainer(net.collect_params(), 'adam', 
                        {'learning_rate': 1e-4, 'wd': 1e-5, 'lr_scheduler': lr_sch})

for epoch in range(epochs):
    train_loss = 0.
    train_acc = 0.
    steps = len(data_iter_train)
    for data, label in data_iter_train:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)

        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)

        loss.backward()
        trainer.step(batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, label)

    val_loss, val_acc = evaluate(net, data_iter_val)

    print("Epoch %d. loss: %.4f, acc: %.2f%%, val_loss %.4f, val_acc %.2f%%" % (
        epoch+1, train_loss/steps, train_acc/steps*100, val_loss, val_acc*100))

Epoch 1. loss: 4.6070, acc: 6.32%, val_loss 3.4070, val_acc 19.89%
Epoch 2. loss: 3.3050, acc: 19.27%, val_loss 2.6139, val_acc 46.11%
Epoch 3. loss: 2.6430, acc: 35.29%, val_loss 2.0169, val_acc 57.51%
Epoch 4. loss: 2.0474, acc: 48.11%, val_loss 1.6553, val_acc 65.48%
Epoch 5. loss: 1.6864, acc: 55.40%, val_loss 1.4384, val_acc 69.37%
Epoch 6. loss: 1.3804, acc: 63.87%, val_loss 1.2527, val_acc 74.08%
Epoch 7. loss: 1.1616, acc: 69.79%, val_loss 1.1088, val_acc 76.08%
Epoch 8. loss: 0.9060, acc: 78.19%, val_loss 0.9816, val_acc 78.55%
Epoch 9. loss: 0.7913, acc: 80.34%, val_loss 0.9268, val_acc 79.21%
Epoch 10. loss: 0.6517, acc: 84.77%, val_loss 0.8439, val_acc 79.01%
Epoch 11. loss: 0.5769, acc: 84.51%, val_loss 0.7893, val_acc 80.69%
Epoch 12. loss: 0.4900, acc: 87.83%, val_loss 0.7573, val_acc 80.63%
Epoch 13. loss: 0.4191, acc: 90.69%, val_loss 0.7061, val_acc 82.83%
Epoch 14. loss: 0.3589, acc: 92.19%, val_loss 0.6817, val_acc 81.47%
Epoch 15. loss: 0.3079, acc: 94.27%, val_los

In [14]:
net = build_model()
epochs = 100
batch_size = 128
model_names = ['inceptionv3', 'resnet152_v1']
data_iter_train, data_iter_val = load_models_data(model_names, batch_size=batch_size)


lr_sch = mx.lr_scheduler.FactorScheduler(step=400, factor=0.9)
trainer = gluon.Trainer(net.collect_params(), 'adam', 
                        {'learning_rate': 1e-4, 'wd': 1e-5, 'lr_scheduler': lr_sch})

for epoch in range(epochs):
    train_loss = 0.
    train_acc = 0.
    steps = len(data_iter_train)
    for data, label in data_iter_train:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)

        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)

        loss.backward()
        trainer.step(batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, label)

    val_loss, val_acc = evaluate(net, data_iter_val)

    print("Epoch %d. loss: %.4f, acc: %.2f%%, val_loss %.4f, val_acc %.2f%%" % (
        epoch+1, train_loss/steps, train_acc/steps*100, val_loss, val_acc*100))

Epoch 1. loss: 7.9950, acc: 6.45%, val_loss 6.2256, val_acc 0.00%
Epoch 2. loss: 3.9955, acc: 13.02%, val_loss 6.9279, val_acc 0.00%
Epoch 3. loss: 3.2837, acc: 24.74%, val_loss 8.0866, val_acc 0.00%
Epoch 4. loss: 2.7520, acc: 32.75%, val_loss 8.3690, val_acc 0.00%
Epoch 5. loss: 2.1339, acc: 44.47%, val_loss 9.2169, val_acc 0.00%
Epoch 6. loss: 1.7401, acc: 52.73%, val_loss 9.7609, val_acc 0.00%
Epoch 7. loss: 1.3937, acc: 63.87%, val_loss 10.5012, val_acc 0.00%
Epoch 8. loss: 1.1288, acc: 68.42%, val_loss 10.5817, val_acc 0.00%
Epoch 9. loss: 0.9430, acc: 72.85%, val_loss 11.8650, val_acc 0.00%
Epoch 10. loss: 0.7794, acc: 77.28%, val_loss 11.2317, val_acc 0.00%
Epoch 11. loss: 0.6034, acc: 82.62%, val_loss 12.3496, val_acc 0.00%
Epoch 12. loss: 0.5591, acc: 83.46%, val_loss 11.9016, val_acc 0.00%
Epoch 13. loss: 0.4916, acc: 85.22%, val_loss 12.0809, val_acc 0.00%
Epoch 14. loss: 0.3946, acc: 88.28%, val_loss 12.3181, val_acc 0.00%
Epoch 15. loss: 0.3555, acc: 89.58%, val_loss 12.8

KeyboardInterrupt: 

In [14]:
features_test = [nd.load('features_test_%s.nd' % model_name)[0] for model_name in model_names]
features_test = nd.concat(*features_test, dim=1)

In [15]:
output = nd.softmax(net(features_test.as_in_context(ctx))).asnumpy()

In [16]:
df_pred = pd.read_csv('sample_submission.csv')

for i, c in enumerate(df_pred.columns[1:]):
    df_pred[c] = output[:,i]

df_pred.to_csv('pred.csv', index=None)

In [19]:
zip(np.argmax(pd.read_csv('pred_0.28.csv').values[:,1:], axis=-1), np.argmax(df_pred.values[:,1:], axis=-1))[:10]

FileNotFoundError: File b'pred_0.28.csv' does not exist