# Redes Neuronales Convolucionales MxNet

In [19]:
import numpy as np
import mxnet as mx
import matplotlib.pyplot as plt

from mxnet import gluon, nd, image
from mxnet.gluon.data.vision import transforms
from mxnet.gluon.model_zoo import vision


In [20]:
transform_train = transforms.Compose([
    # Randomly crop an area and resize it to be 32x32, then pad it to be 40x40
    transforms.RandomCrop(32, pad=4),
    # Randomly flip the image horizontally
    transforms.RandomFlipLeftRight(),
    # Transpose the image from height*width*num_channels to num_channels*height*width
    # and map values from [0, 255] to [0,1]
    transforms.ToTensor(),
    # Normalize the image with mean and standard deviation calculated across all images
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
])

In [21]:
# Number of data loader workers
num_workers = 8
# Calculate effective total batch size
batch_size = 128 

# Set train=True for training data
# Set shuffle=True to shuffle the training data
train_data = gluon.data.DataLoader(
    gluon.data.vision.CIFAR100(train=True).transform_first(transform_train),
    batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)

# Set train=False for validation data
val_data = gluon.data.DataLoader(
    gluon.data.vision.CIFAR100(train=False).transform_first(transform_test),
    batch_size=batch_size, shuffle=False, num_workers=num_workers)

# 1.1 Entrenar modelo from scrath

Descargamos un modelo desde el repositorio de MxNet.

In [2]:
# https://mxnet.apache.org/versions/1.9.1/api/python/docs/api/gluon/model_zoo/index.html

model = vision.resnet18_v1(pretrained=False)

In [3]:
model.collect_params()

{'features.0.weight': Parameter (shape=(64, -1, 7, 7), dtype=<class 'numpy.float32'>),
 'features.1.gamma': Parameter (shape=(-1,), dtype=<class 'numpy.float32'>),
 'features.1.beta': Parameter (shape=(-1,), dtype=<class 'numpy.float32'>),
 'features.1.running_mean': Parameter (shape=(-1,), dtype=<class 'numpy.float32'>),
 'features.1.running_var': Parameter (shape=(-1,), dtype=<class 'numpy.float32'>),
 'features.4.0.body.0.weight': Parameter (shape=(64, -1, 3, 3), dtype=<class 'numpy.float32'>),
 'features.4.0.body.1.gamma': Parameter (shape=(-1,), dtype=<class 'numpy.float32'>),
 'features.4.0.body.1.beta': Parameter (shape=(-1,), dtype=<class 'numpy.float32'>),
 'features.4.0.body.1.running_mean': Parameter (shape=(-1,), dtype=<class 'numpy.float32'>),
 'features.4.0.body.1.running_var': Parameter (shape=(-1,), dtype=<class 'numpy.float32'>),
 'features.4.0.body.3.weight': Parameter (shape=(64, -1, 3, 3), dtype=<class 'numpy.float32'>),
 'features.4.0.body.4.gamma': Parameter (shap

Inicializamos los parametros en la GPU.

In [5]:
from mxnet.gluon import nn

ctx=[mx.gpu()]
model.output = nn.Dense(100)
model.initialize(mx.init.Xavier(), ctx = ctx)

In [11]:
# Learning rate decay factor
lr_decay = 0.1
# Epochs where learning rate decays
lr_decay_epoch = [80, 160, np.inf]

# Nesterov accelerated gradient descent
optimizer = 'sgd'
# Set parameters
optimizer_params = {'learning_rate': 0.1, 'wd': 0.0001, 'momentum': 0.9}

# Define our trainer for net
trainer = gluon.Trainer(model.collect_params(), optimizer, optimizer_params)

In [12]:
loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()

In [13]:
train_metric = mx.gluon.metric.Accuracy()

In [16]:
def test(ctx, val_data):
    metric = mx.gluon.metric.Accuracy()
    for i, batch in enumerate(val_data):
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
        outputs = [model(X) for X in data]
        metric.update(label, outputs)
    return metric.get()

In [17]:
import time 
from mxnet import autograd as ag

epochs = 100
lr_decay_count = 0

for epoch in range(epochs):
    tic = time.time()
    train_metric.reset()
    train_loss = 0

    # Learning rate decay
    if epoch == lr_decay_epoch[lr_decay_count]:
        trainer.set_learning_rate(trainer.learning_rate*lr_decay)
        lr_decay_count += 1

    # Loop through each batch of training data
    for i, batch in enumerate(train_data):
        # Extract data and label
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)

        # AutoGrad
        with ag.record():
            output = [model(X) for X in data]
            loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]

        # Backpropagation
        for l in loss:
            l.backward()

        # Optimize
        trainer.step(batch_size)

        # Update metrics
        train_loss += sum([l.sum().asnumpy() for l in loss])
        train_metric.update(label, output)

    name, acc = train_metric.get()
    # Evaluate on Validation data
    name, val_acc = test(ctx, val_data)
    print('[Epoch %d] train=%f val=%f loss=%f time: %f' %
        (epoch, acc, val_acc, train_loss, time.time()-tic))



[Epoch 0] train=0.277103 val=0.327500 loss=116590.569061 time: 66.905487
[Epoch 1] train=0.325040 val=0.351500 loss=108665.043823 time: 67.415293
[Epoch 2] train=0.368670 val=0.393100 loss=101842.684494 time: 67.596745


In [18]:
train_loss

101842.68449401855