# 图片分类

In [2]:
import mxnet as mx
from mxnet import autograd as ag
from mxnet import gluon
from mxnet import init
from mxnet import nd
from time import time
from mxnet.gluon import nn
from mxnet.gluon.model_zoo import vision as models

import warnings
warnings.filterwarnings('ignore')

## parameter

In [3]:
path_list = '/home/carsmart/users/xiaoming/yichewang/train_lst/train_train.lst'
path_rec = '/home/carsmart/users/xiaoming/yichewang/train_lst/train_train.rec'
val_path_list = '/home/carsmart/users/xiaoming/yichewang/train_lst/train_val.lst'
val_path_rec = '/home/carsmart/users/xiaoming/yichewang/train_lst/train_val.rec'
save_path = '/home/carsmart/users/xiaoming/yichewang/model/resnet50/'

resize = 224
data_shape = (3, 224, 224)
(mean_r, mean_g, mean_b) = (0.4914, 0.4822, 0.4465)
(std_r, std_g, std_b) = (0.2023, 0.1994, 0.2010)

batch_size = 64
ctx = [mx.gpu(1)]
learning_rate = 0.1
wd = 0.0001
lr_step = 600000 // batch_size * 40
lr_factor = 0.1
num_epochs = 300

## Iter

In [4]:
def get_iter(kv):
    train_iter = mx.io.ImageRecordIter(
        path_imglist = path_list, 
        path_imgrec = path_rec, 
        resize = resize, 
        data_shape = data_shape, 
        batch_size = batch_size, 
        rand_mirror = False, 
        rand_crop = False, 
        mean_r = mean_r, 
        mean_g = mean_g, 
        mean_b = mean_b, 
        std_r = std_r, 
        std_g = std_g, 
        std_b = std_b, 
        num_parts = kv.num_workers, 
        part_index = kv.rank, 
        shuffle = True
    )
    train_iter = mx.io.PrefetchingIter(train_iter)
    
    val_iter = mx.io.ImageRecordIter(
        path_imglist = val_path_list, 
        path_imgrec = val_path_rec, 
        resize = resize, 
        data_shape = data_shape, 
        batch_size = batch_size, 
        rand_mirror = False, 
        rand_crop = False, 
        mean_r = mean_r, 
        mean_g = mean_g, 
        mean_b = mean_b, 
        std_r = std_r, 
        std_g = std_g, 
        std_b = std_b, 
        num_parts = kv.num_workers, 
        part_index = kv.rank
    )
    
    return(train_iter, val_iter)

## model define
use model define in mxnet.gluon.model_zoo

In [5]:
net_resnet50 = models.get_model(name = 'resnet50_v1', pretrained = False)
def get_net():
    net = nn.HybridSequential()
    with net.name_scope():
        net.add(net_resnet50.features)
        net.add(nn.Dense(1062))
    return net

net = get_net()

## train define

In [7]:
def get_batch(batch, ctx):
    """return data and label on ctx"""
    if isinstance(batch, mx.io.DataBatch):
        data = batch.data[0]
        label = batch.label[0]
    else:
        data, lable = batch
    return (gluon.utils.split_and_load(data, ctx), 
           gluon.utils.split_and_load(label, ctx), 
           data.shape[0])

In [8]:
def evaluate_accuracy(data_iterator, net, ctx = [mx.cpu()]):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    acc = nd.array([0])
    n = 0.
    if isinstance(data_iterator, mx.io.MXDataIter):
        data_iterator.reset()
    for batch in data_iterator:
        data, label, batch_size = get_batch(batch, ctx)
        for x, y in zip(data, label):
            acc += nd.sum(net(x).argmax(axis = 1) == y).copyto(mx.cpu())
            n += y.size
        acc.wait_to_read() # don't push too many operators into backend
    return acc.asscalar() / n

In [9]:
def train(train_data, val_data, net, loss, trainer, ctx, num_epochs, 
          print_batches = None, save_epochs = 10000):
    """train a net work"""
    print("Start training on ", ctx)
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    for epoch in range(num_epochs):
        train_loss, train_acc, n, m = 0.0, 0.0, 0.0, 0.0
        if isinstance(train_data, (mx.io.PrefetchingIter, mx.io.MXDataIter)):
            train_data.reset()
        start = time()
        for i, batch in enumerate(train_data):
            data, label, batch = get_batch(batch, ctx)
            losses = []
            with ag.record():
                outputs = [net(x) for x in data]
                losses = [loss(yhat, y) for yhat, y in zip(outputs, label)]
            for l in losses:
                l.backward()
            train_acc += sum([(yhat.argmax(axis = 1) == y).sum().asscalar() 
                             for yhat, y in zip(outputs, label)])
            train_loss += sum([l.sum().asscalar() for l in losses])
            trainer.step(batch_size)
            n += batch_size
            m += sum([y.size for y in label])
            if print_batches and (i + 1) % print_batches == 0:
                print("Data %d. Loss: %f, Train acc %f, Time %.1f sec" % (
                    n, train_loss / n, train_acc / m, time() - start
                ))
        test_acc = evaluate_accuracy(val_data, net, ctx)
        if not epoch % save_epochs:
            net.collect_params().save(save_path + str(epoch) + '0.params')
        print("Epoch %d. Loss: %.3f, Train acc %.2f, Test acc %.2f, Time %.1f sec" %(
            epoch, train_loss / n, train_acc / m, test_acc, time() - start
        ))

## train

In [None]:
kv = mx.kvstore.create("local")
(train_iter, val_iter) = get_iter(kv)
loss = gluon.loss.SoftmaxCrossEntropyLoss()
lr_scheduler = mx.lr_scheduler.FactorScheduler(step = lr_step, factor = lr_factor)
net.initialize(ctx = ctx, init = init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 
                       'sgd', {'learning_rate': learning_rate, 'wd': wd, 
                               'lr_scheduler': lr_scheduler})
train(train_iter, val_iter, net, loss, trainer, ctx, num_epochs = num_epochs, 
      print_batches = 1000, save_epochs = 1)

Start training on  [gpu(0), gpu(1)]


In [1]:
7668 / 60

127.8