# cifar-100 conv net with Caffe for DVIA

Experimental layers. Includes NiN, BN and Dropout.

## Download and convert the cifar-100 dataset to LMDB

In [1]:
##--%%time
##--!python download-cifar-100.py
##--!ipython convert-cifar-100-32x32.ipy


Downloading...
Dataset already downloaded. Did not download twice.

Extracting...
Dataset already extracted. Did not extract twice.

Converting...
Conversion was already done. Did not convert twice.

CPU times: user 8 ms, sys: 16 ms, total: 24 ms
Wall time: 1.17 s


## Build the model with Caffe. 

In [1]:
import numpy as np
import os, sys
import re

scriptpath    = os.path.dirname(os.path.realpath( "xxxx" ))
caffe_root    = os.path.sep.join(scriptpath.split(os.path.sep)[:-2])
#caffe_root  = os.path.join(os.environ['HOME'], 'Projects', 'dvcaffe')
cifar_db_root = os.path.join(os.environ['HOME'], 'Projects', 'IMAGES', 'dvia', 'cifar_png.32x32')
dvia_db_root  = os.path.join(os.environ['HOME'], 'Projects', 'IMAGES', 'dvia', 'png.32x32')

import caffe
from caffe import layers as L
from caffe import params as P

print "scriptpath = {}".format(scriptpath)
print "caffe_root = {}".format(caffe_root)
print "cifar_db_root = {}".format(cifar_db_root)
print "dvia_db_root = {}".format(dvia_db_root)

scriptpath = /home/maheriya/Projects/dvcaffe/examples/dvia_32x32
caffe_root = /home/maheriya/Projects/dvcaffe
cifar_db_root = /home/maheriya/Projects/IMAGES/dvia/cifar_png.32x32
dvia_db_root = /home/maheriya/Projects/IMAGES/dvia/png.32x32


## Load and visualise the untrained network's internal structure and shape
The network's structure (graph) visualisation tool of caffe is broken in the current release. We will simply print here the data shapes. 

In [2]:
weight_param = dict(lr_mult=1, decay_mult=1)
bias_param   = dict(lr_mult=2, decay_mult=0)
## Use for training from scratch
learned_param = [weight_param, bias_param]

frozen_weight_param = dict(lr_mult=0.2, decay_mult=0.2)  # *0.2
frozen_bias_param   = dict(lr_mult=0.4, decay_mult=0)    # *0.2
## Use for training from a pretrained model
frozen_param = [frozen_weight_param, frozen_bias_param]

wgt_filler = {'type': 'xavier'}
bn_param = '''param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }
  param {
    lr_mult: 0
  }'''

low_dropout = {'dropout_ratio': 0.3}
mid_dropout = {'dropout_ratio': 0.5}


def cnn_inner_layers(n, param=learned_param):
    '''
    n: caffe.NetSpec instance
    It is assumed that n.data is already created.
    '''
    # First main conv layer
    n.conv1  = L.Convolution(n.data,    kernel_size=5, stride=1, num_output=64, weight_filler=wgt_filler, param=param)
    n.pool1  = L.Pooling(n.conv1,       kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.relu1b = L.ReLU(n.pool1, in_place=True)

    # Second main conv layer
    n.conv2  = L.Convolution(n.relu1b,   kernel_size=3, stride=1, num_output=100, weight_filler=wgt_filler, param=param)
    n.pool2  = L.Pooling(n.conv2,        kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.relu2  = L.ReLU(n.pool2, in_place=True)

    # Third and last main convolution layer.
    n.conv3  = L.Convolution(n.relu2,    kernel_size=3, stride=1, num_output=200, weight_filler=wgt_filler, param=learned_param)
    n.pool3  = L.Pooling(n.conv3,        kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.relu3  = L.ReLU(n.pool3, in_place=True)

    # Last fc converted to convolution.
    n.conv_last = L.Convolution(n.relu3, kernel_size=1, stride=1, num_output=384, weight_filler=wgt_filler, param=learned_param)
    n.relu_last = L.ReLU(n.conv_last, in_place=True)
    #--n.fc_last   = L.InnerProduct(n.relu3, num_output=768, weight_filler=wgt_filler, param=learned_param)
    #--n.relu_last = L.ReLU(n.fc_last, in_place=True)
    return n


# For pre-training
def cnn_cifar(imgdb, mean_file, batch_size, mirror=True):
    n = caffe.NetSpec()
    #n.data, n.label_coarse, n.label_fine = L.HDF5Data(batch_size=batch_size, source=imgdb, ntop=3)
    n.data, n.label_coarse = L.Data(batch_size=batch_size, source=imgdb, backend=P.Data.LMDB, 
                             transform_param=dict(scale=1./256, mirror=mirror, mean_file=mean_file), ntop=2)

    # Create inner layers
    n = cnn_inner_layers(n)

    # Output: 20-class and 100-class classifiers
    n.fc_coarse       = L.InnerProduct(n.relu_last, num_output=20, weight_filler=wgt_filler, param=learned_param)
    n.accuracy_coarse = L.Accuracy(n.fc_coarse, n.label_coarse)
    n.loss_coarse     = L.SoftmaxWithLoss(n.fc_coarse, n.label_coarse)
#     n.fc_coarse       = L.Convolution(n.drop_last, kernel_size=1, stride=1, num_output=20, weight_filler=wgt_filler, param=learned_param)
#     n.fc_avpool_coarse= L.Pooling(n.fc_coarse, kernel_size=2, stride=2, pool=P.Pooling.AVE)
#     n.accuracy_coarse = L.Accuracy(n.fc_avpool_coarse, n.label_coarse)
#     n.loss_coarse     = L.SoftmaxWithLoss(n.fc_avpool_coarse, n.label_coarse, loss_weight=0.65)

#     n.fc_fine         = L.InnerProduct(n.drop_last, num_output=100, weight_filler=wgt_filler, param=learned_param)
#     n.accuracy_fine   = L.Accuracy(n.fc_fine, n.label_fine)
#     n.loss_fine       = L.SoftmaxWithLoss(n.fc_fine, n.label_fine, loss_weight=0.35)
##     n.fc_avpool_fine  = L.Pooling(n.fc_fine, kernel_size=2, stride=2, pool=P.Pooling.AVE)
##     n.accuracy_fine   = L.Accuracy(n.fc_avpool_fine, n.label_fine)
##     n.loss_fine       = L.SoftmaxWithLoss(n.fc_avpool_fine, n.label_fine, loss_weight=0.35)

    return n.to_proto()

with open('dvia_pretrain.prototxt', 'w') as f:
    lmdb      = os.path.join(cifar_db_root, 'data/cifar_32x32/trn_lmdb')
    mean_file = os.path.join(cifar_db_root, 'data/cifar_32x32/trn_mean.binaryproto')
    prto = str(cnn_cifar(lmdb, mean_file, 100, mirror=True))
    prto = re.sub(r'top: "(bn[0-3])"(\s+)param {[^}]+}', 'top: "\\1"\\2{}'.format(bn_param), prto)
    f.write(prto)
    
with open('dvia_pretest.prototxt', 'w') as f:
    lmdb      = os.path.join(cifar_db_root, 'data/cifar_32x32/val_lmdb')
    mean_file = os.path.join(cifar_db_root, 'data/cifar_32x32/val_mean.binaryproto')
    prto = str(cnn_cifar(lmdb, mean_file, 120, mirror=False))
    prto = re.sub(r'top: "(bn[0-3])"(\s+)param {[^}]+}', 'top: "\\1"\\2{}'.format(bn_param), prto)
    f.write(prto)

!python /usr/local/caffe/python/draw_net.py dvia_pretrain.prototxt cifar_net.png

# For training
def cnn(lmdb, mean_file, batch_size, mirror=True):
    n = caffe.NetSpec()
    ## Input LMDB data layer
    n.data, n.label = L.Data(batch_size=batch_size, source=lmdb, backend=P.Data.LMDB, 
                             transform_param=dict(scale=1./256, mirror=True, mean_file=mean_file), ntop=2)

    # Create inner layers
    n = cnn_inner_layers(n, frozen_param)

    # Output 4-class classifier
    n.fc_class         = L.InnerProduct(n.relu_last, num_output=4, weight_filler=wgt_filler, param=learned_param)
    n.accuracy_class   = L.Accuracy(n.fc_class, n.label)
    n.loss_class       = L.SoftmaxWithLoss(n.fc_class, n.label)

##     n.fc_class         = L.Convolution(n.drop_last, kernel_size=1, stride=1, num_output=4, weight_filler=wgt_filler, param=learned_param)
##     n.fc_avpool_class  = L.Pooling(n.fc_class, kernel_size=2, stride=2, pool=P.Pooling.AVE)
##     n.accuracy_class   = L.Accuracy(n.fc_avpool_class, n.label)
##     n.loss_class       = L.SoftmaxWithLoss(n.fc_avpool_class, n.label)

    return n.to_proto()

with open('dvia_train.prototxt', 'w') as f:
    lmdb      = os.path.join(dvia_db_root, 'data/dvia_32x32/trn_lmdb')
    mean_file = os.path.join(dvia_db_root, 'data/dvia_32x32/trn_mean.binaryproto')
    prto = str(cnn(lmdb, mean_file, 100, mirror=True))
    prto = re.sub(r'top: "(bn[0-3])"(\s+)param {[^}]+}', 'top: "\\1"\\2{}'.format(bn_param), prto)
    f.write(prto)
    
with open('dvia_test.prototxt', 'w') as f:
    lmdb      = os.path.join(dvia_db_root, 'data/dvia_32x32/val_lmdb')
    mean_file = os.path.join(dvia_db_root, 'data/dvia_32x32/val_mean.binaryproto')
    prto = str(cnn(lmdb, mean_file, 120, mirror=True))
    prto = re.sub(r'top: "(bn[0-3])"(\s+)param {[^}]+}', 'top: "\\1"\\2{}'.format(bn_param), prto)
    f.write(prto)

!python /usr/local/caffe/python/draw_net.py dvia_train.prototxt dvia_net.png

Drawing net to cifar_net.png
Drawing net to dvia_net.png


In [3]:
caffe.set_mode_gpu()
solver = None
solver = caffe.get_solver('dvia_solver.prototxt')

In [4]:
print("Layers' features:")
[(k, v.data.shape) for k, v in solver.net.blobs.items()]

Layers' features:


[('data', (100, 3, 32, 32)),
 ('label', (100,)),
 ('label_data_1_split_0', (100,)),
 ('label_data_1_split_1', (100,)),
 ('conv1', (100, 64, 28, 28)),
 ('pool1', (100, 64, 14, 14)),
 ('conv2', (100, 100, 12, 12)),
 ('pool2', (100, 100, 6, 6)),
 ('conv3', (100, 200, 4, 4)),
 ('pool3', (100, 200, 2, 2)),
 ('conv_last', (100, 384, 2, 2)),
 ('fc_class', (100, 4)),
 ('fc_class_fc_class_0_split_0', (100, 4)),
 ('fc_class_fc_class_0_split_1', (100, 4)),
 ('accuracy_class', ()),
 ('loss_class', ())]

In [5]:
print("Parameters and shape:")
[(k, v[0].data.shape) for k, v in solver.net.params.items()]

Parameters and shape:


[('conv1', (64, 3, 5, 5)),
 ('conv2', (100, 64, 3, 3)),
 ('conv3', (200, 100, 3, 3)),
 ('conv_last', (384, 200, 1, 1)),
 ('fc_class', (4, 1536))]

## Pre-Train Using Cifar-100 DB (32x32 original images)
The purpose of this pre-training part is to take advantage of the Cifar-100 database to get better feature extractor as a initial condition for later training with our own image database. 

In [7]:
solver = None

In [None]:
%%time
!caffe train -solver dvia_presolver.prototxt

In [1]:
!ls -rt cifar_pretrain_iter*.caffemodel | tail -n1 | xargs -i cp {} cifar_pretrained.caffemodel

## Solver's params

The solver's params for the created net are defined in a `.prototxt` file. 

Notice that because `max_iter: 100000`, the training will loop 2 times on the 50000 training data. Because we train data by minibatches of 100 as defined above when creating the net, there will be a total of `100000*100/50000 = 200` epochs on some of those pre-shuffled 100 images minibatches.

We will test the net on `test_iter: 100` different test images at each `test_interval: 1000` images trained. 
____

Here, **RMSProp** is used, it is SDG-based, it converges faster than a pure SGD and it is robust.
____

In [8]:
!cat dvia_solver.prototxt

train_net: "dvia_train.prototxt"
test_net: "dvia_test.prototxt"

test_iter: 100
test_interval: 1000

base_lr: 0.001
momentum: 0.0
weight_decay: 0.001

lr_policy: "inv"
gamma: 0.0001
power: 0.75

display: 100

max_iter: 100000

snapshot: 25000
snapshot_prefix: "dvia_train"
solver_mode: GPU

type: "RMSProp"
rms_decay: 0.98


## Alternative way to train directly in Python
Since a recent update, there is no output in python by default, which is bad for debugging. 
Skip this cell and train with the second method shown below if needed. It is commented out in case you just chain some `shift+enter` ipython shortcuts. 

In [9]:
# %%time
# solver.solve()
solver = None

## Train by calling caffe in command line
Just set the parameters correctly. Be sure that the notebook is at the root of the ipython notebook server. 
You can run this in an external terminal if you open it in the notebook's directory. 

It is also possible to finetune an existing net with a different solver or different data. Here I do it, because I feel the net could better fit the data. 

In [10]:
%%time
!caffe train -solver dvia_solver.prototxt -weights cifar_pretrained.caffemodel

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 3.1 µs


Caffe brewed. 
## Test the model completely on test data
Let's test directly in command-line:

In [None]:
%%time
!ls -rt dvia_train_iter*.caffemodel | tail -n1 | xargs -i cp {} dvia_trained.caffemodel
!caffe test -model dvia_test.prototxt -weights dvia_trained.caffemodel -iterations 100

I1112 15:22:32.683954 16332 caffe.cpp:279] Use CPU.
I1112 15:22:32.881461 16332 net.cpp:58] Initializing net from parameters: 
state {
  phase: TEST
  level: 0
  stage: ""
}
layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  transform_param {
    scale: 0.00390625
    mirror: true
    mean_file: "/home/maheriya/Projects/IMAGES/dvia/png.32x32/data/dvia_32x32/val_mean.binaryproto"
  }
  data_param {
    source: "/home/maheriya/Projects/IMAGES/dvia/png.32x32/data/dvia_32x32/val_lmdb"
    batch_size: 120
    backend: LMDB
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 0.2
    decay_mult: 0.2
  }
  param {
    lr_mult: 0.4
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
l

## The model achieved near 87.91% accuracy
The above is purely test/validation database that is not used for training.

In [2]:
!jupyter nbconvert --to markdown dvia-train-32x32.ipynb

[NbConvertApp] Converting notebook dvia-train-32x32.ipynb to markdown
[NbConvertApp] Writing 1531721 bytes to dvia-train-32x32.md
