**This notebook uses random forest to do the classification**

The main content is train and tune the random forest model


In [None]:
# load data
import os
import mxnet as mx
from mxnet.image import imread, imresize
from mxnet import gluon, init, nd, autograd
from mxnet.gluon import nn
from mxnet.gluon.data.vision import transforms
from mxnet.gluon.loss import SoftmaxCrossEntropyLoss
from gluoncv import model_zoo

transform_train = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize()
])

transform_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize()
])

batch_size = 16

path = '../input/case-classification/final_project_dataset'
train_path = os.path.join(path, 'train')
val_path = os.path.join(path, 'validate')
test_path = os.path.join(path, 'test')

train_loader = gluon.data.DataLoader(
    gluon.data.vision.ImageFolderDataset(train_path).transform_first(transform_train),
    batch_size=batch_size, shuffle=True)

validation_loader = gluon.data.DataLoader(
    gluon.data.vision.ImageFolderDataset(val_path).transform_first(transform_test),
    batch_size=batch_size, shuffle=False)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

im1 = imread(train_path + "/class_1/B003AOIJ74.jpg")
im2 = imread(train_path + "/class_4/B002GQRGOY.jpg")

plt.imshow(im1.asnumpy())
plt.title("Two Wheels")
plt.show()
plt.imshow(im2.asnumpy())
plt.title("Zero Wheels")
plt.show()

In [None]:
def FineTuneAlexnet(classes, ctx):
    '''
    classes: number of the output classes 
    ctx: training context (CPU or GPU)
    '''
    finetune_net = gluon.model_zoo.vision.alexnet(classes=classes, pretrained=False, ctx=ctx)
    finetune_net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
    pretrained_net = gluon.model_zoo.vision.alexnet(pretrained=True, ctx=ctx)
    finetune_net.features = pretrained_net.features
    
    return finetune_net

In [None]:
ctx = mx.gpu() # Set this to CPU or GPU depending on your training instance
epochs = 20
learning_rate = 0.001
num_outputs = 6  # 6 output classes
net = model_zoo.get_model('ResNet50_v1', pretrained=True, ctx=ctx)
net

In [None]:
softmax_cross_etropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate})

In [None]:
def finetune_accuracy(output, label):
    # output: (batch, num_output) float32 ndarray
    # label: (batch, ) int32 ndarray
    return (output.argmax(axis=1) == label.astype('float32')).mean()

In [None]:
# Starting the outer loop, we will have 10 epochs (10 full pass through our dataset)
for epoch in range(epochs):
    
    train_loss, val_loss, train_acc, valid_acc = 0., 0., 0., 0.
    
    # Training loop: (with autograd and trainer steps, etc.)
    # This loop does the training of the neural network (weights are updated)
    for i, (data, label) in enumerate(train_loader):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_etropy_loss(output, label)
        loss.backward()
        train_acc += finetune_accuracy(output, label)
        train_loss += loss.mean()
        trainer.step(data.shape[0])
    
    # Validation loop:
    # This loop tests the trained network on validation dataset
    # No weight updates here
    for i, (data, label) in enumerate(validation_loader):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        valid_acc += finetune_accuracy(output, label)
        val_loss += softmax_cross_etropy_loss(output, label).mean()
        
    # Take averages
    train_loss /= len(train_loader)
    train_acc /= len(train_loader)
    val_loss /= len(validation_loader)
    valid_acc /= len(validation_loader)
    
    print("Epoch %d: train loss %.3f, train acc %.3f, val loss %.3f, val acc %.3f" % (
        epoch, train_loss.asnumpy()[0], train_acc.asnumpy()[0], val_loss.asnumpy()[0], valid_acc.asnumpy()[0]))

In [None]:
test_ids = test_df["ID"].values.tolist()
test_image_files = [file + ".jpg" for file in test_df["ASIN"].values.tolist()]

In [None]:
test_images = nd.zeros((len(os.listdir(test_path)), 224, 224, 3))
for idx, im_name in enumerate(test_image_files):
    test_images[idx] = imresize(imread(os.path.join(test_path, im_name)), 224, 224)

# make sure shuffle=False
test_loader = gluon.data.DataLoader(
    transform_test(test_images),
    batch_size=batch_size, shuffle=False)

In [None]:
ctx = mx.gpu() # Set this to CPU or GPU depending on your training instance
test_preds = []
for i, data in enumerate(test_loader):
    data = data.as_in_context(ctx)
    pred = net(data)
    test_preds += pred.argmax(axis=1).asnumpy().tolist()
print(test_preds)

In [None]:
import pandas as pd

test_subm_df = pd.DataFrame({"ID": test_ids,
                             "label": test_preds})

test_subm_df.to_csv("final_project.csv", index=False)