In [3]:
import os
import sys
import pandas as pd
import numpy as np

sys.path.insert(0, '/home/xiaojun/ssd_caffe/python')
import caffe

train_lmdb = '/home/xiaojun/kaggle/dog-breed-identification/dataset/train_lmdb'
val_lmdb = '/home/xiaojun/kaggle/dog-breed-identification/dataset/val_lmdb'

dataset_dir = 'dataset/'
label_file = dataset_dir + 'labels.csv'
train_set = dataset_dir + 'train/'
test_set = dataset_dir + 'test/'

In [4]:
if os.path.exists(label_file):
    df_csv = pd.read_csv(label_file)
else:
    print label_file, 'does not exist'

label_list = df_csv.as_matrix()
print 'label size: {}'.format(label_list.shape[0])

train_images = os.listdir(train_set)
print 'training images: {}'.format(len(train_images))

test_images = os.listdir(test_set)
print 'test images: {}'.format(len(test_images))

label size: 10222
training images: 10222
test images: 10357


In [5]:
#convert training dataset to lmdb

#create category.txt
breed = df_csv.loc[:, 'breed'].as_matrix()
unique_label = np.unique(breed)
print 'breed size: {}'.format(len(unique_label))
with open(dataset_dir + 'category.txt', 'w') as f:
    for item in unique_label:
        f.write(item + '\n')
        
#split labels.csv to train.txt and val.txt
NUM_TRAIN = int(np.floor(label_list.shape[0] * 0.8))
NUM_VAL = int(label_list.shape[0] - NUM_TRAIN)

df_txt = df_csv.copy()
for i in df_txt.index:
    df_txt.loc[i, 'id'] = str(train_set + df_csv.loc[i, 'id'] + '.jpg')
    df_txt.loc[i, 'breed'] = int(np.argwhere(unique_label == df_csv.loc[i, 'breed']))

labels = df_txt.as_matrix()
train_labels = labels[:NUM_TRAIN]
val_labels = labels[NUM_TRAIN:]

with open(dataset_dir + 'train.txt', 'w') as f:
    for item in train_labels:
        f.write(item[0] + ' ' + str(item[1]) + '\n')
        
with open(dataset_dir + 'val.txt', 'w') as f:
    for item in val_labels:
        f.write(item[0] + ' ' + str(item[1]) + '\n')

breed size: 120


In [6]:
#define resnet50
from caffe import layers as L, params as P

def conv_bn_scale_relu(in_blob, num_output, kernel_size, stride, pad, mode, bias_term=False):
    conv = L.Convolution(in_blob, num_output=num_output, kernel_size=kernel_size, stride=stride, pad=pad,
                         bias_term=bias_term,
                         param=dict(lr_mult=0, decay_mult=1))
    if mode == 'train':
        bn = L.BatchNorm(conv, use_global_stats=False, in_place=True)
    else:
        bn = L.BatchNorm(conv, use_global_stats=True, in_place=True)
    scale = L.Scale(bn, bias_term=True, in_place=True)
    relu = L.ReLU(scale, in_place=True)
    return conv, bn, scale, relu

def conv_bn_scale(in_blob, num_output, kernel_size, stride, pad, mode, bias_term=False):
    conv = L.Convolution(in_blob, num_output=num_output, kernel_size=kernel_size, stride=stride, pad=pad,
                         bias_term=bias_term,
                         param=dict(lr_mult=0, decay_mult=1))
    if mode == 'train':
        bn = L.BatchNorm(conv, use_global_stats=False, in_place=True)
    else:
        bn = L.BatchNorm(conv, use_global_stats=True, in_place=True)
    scale = L.Scale(bn, bias_term=True, in_place=True)
    return conv, bn, scale

def conv_block(in_blob, kernel_num, stage, mode):
    if stage == 2:
        conv1, bn1, scale1 = conv_bn_scale(in_blob, kernel_num[2], 1, 1, 0, mode)
    else:
        conv1, bn1, scale1 = conv_bn_scale(in_blob, kernel_num[2], 1, 2, 0, mode)
    conv2a, bn2a, scale2a, relu2a = conv_bn_scale_relu(in_blob, kernel_num[0], 1, 1, 0, mode)
    if stage == 2:
        conv2b, bn2b, scale2b, relu2b = conv_bn_scale_relu(relu2a, kernel_num[1], 3, 1, 1, mode)
    else:
        conv2b, bn2b, scale2b, relu2b = conv_bn_scale_relu(relu2a, kernel_num[1], 3, 2, 1, mode)
    conv2c, bn2c, scale2c = conv_bn_scale(relu2b, kernel_num[2], 1, 1, 0, mode) 
    wise = L.Eltwise(scale2c, scale1, operation=P.Eltwise.SUM)
    relu = L.ReLU(wise, in_place=True)
    return conv1, bn1, scale1, conv2a, bn2a, scale2a, relu2a, conv2b, bn2b, scale2b, relu2b, conv2c, bn2c, scale2c, wise, relu

def identity_block(in_blob, kernel_num, mode):
    conv2a, bn2a, scale2a, relu2a = conv_bn_scale_relu(in_blob, kernel_num[0], 1, 1, 0, mode)
    conv2b, bn2b, scale2b, relu2b = conv_bn_scale_relu(relu2a, kernel_num[1], 3, 1, 1, mode)
    conv2c, bn2c, scale2c = conv_bn_scale(relu2b, kernel_num[2], 1, 1, 0, mode)
    wise = L.Eltwise(scale2c, in_blob, operation=P.Eltwise.SUM)
    relu = L.ReLU(wise, in_place=True)
    return conv2a, bn2a, scale2a, relu2a, conv2b, bn2b, scale2b, relu2b, conv2c, bn2c, scale2c, wise, relu

def resnet50_net(mode, train_data=None, val_data=None, train_batchsize=0, val_batchsize=0):
    n = caffe.NetSpec()
    kernel_num_list = [[64, 64, 256], [128, 128, 512], [256, 256, 1024], [512, 512, 2048]] # kernel num of stage2/3/4/5
    block_num_list = [3, 4, 6, 3] # block num of stage2/3/4/5
    block_list = ['a', 'b', 'c', 'd', 'e', 'f']
    
    if mode == 'train':
        n.data, n.label = L.Data(name='data', source=train_data, batch_size=train_batchsize, backend=P.Data.LMDB, ntop=2,
                                 include=dict(phase=caffe.TRAIN),
                                 transform_param=dict(mirror=True, mean_value=[104, 117, 123], crop_size=224))
        train_data_layer_str = str(n.to_proto())
    
        n.data, n.label = L.Data(name='data', source=val_data, batch_size=val_batchsize, backend=P.Data.LMDB, ntop=2,
                                 include=dict(phase=caffe.TEST),
                                 transform_param=dict(mirror=False, mean_value=[104, 117, 123], crop_size=224))
    elif mode == 'deploy':
        n.data = L.Input(shape=dict(dim=[1, 3, 224, 224]))
    else:
        print 'Error: unknown mode'

    #stage 1
    conv_name = 'conv1'
    bn_name = 'bn_conv1'
    scale_name = 'scale_conv1'
    relu_name = 'conv1_relu'
    
    n.__dict__['tops'][conv_name], n.__dict__['tops'][bn_name], \
    n.__dict__['tops'][scale_name], n.__dict__['tops'][relu_name] \
    = conv_bn_scale_relu(n.data, num_output=64, kernel_size=7, stride=2, pad=3, mode=mode, bias_term=True)
    
    n.pool1 = L.Pooling(n.__dict__['tops'][relu_name], pool=P.Pooling.MAX, kernel_size=3, stride=2)
    
    bottom = n.pool1
    
    #stage 2/3/4/5
    for s in xrange(4):       
        for i in xrange(block_num_list[s]):
            conv_name = 'res' + str(s+2) + block_list[i] + '_branch'
            bn_name = 'bn' + str(s+2) + block_list[i] + '_branch'
            scale_name = 'scale' + str(s+2) + block_list[i] + '_branch'
            wise_name = 'res' + str(s+2) + block_list[i]
            relu_name = 'res' + str(s+2) + block_list[i] + '_relu'

            if i == 0:
                n.__dict__['tops'][conv_name+'1'], n.__dict__['tops'][bn_name+'1'], \
                n.__dict__['tops'][scale_name+'1'], n.__dict__['tops'][conv_name+'2a'], \
                n.__dict__['tops'][bn_name+'2a'], n.__dict__['tops'][scale_name+'2a'], \
                n.__dict__['tops'][conv_name+'2a_relu'], \
                n.__dict__['tops'][conv_name+'2b'], n.__dict__['tops'][bn_name+'2b'], \
                n.__dict__['tops'][scale_name+'2b'], n.__dict__['tops'][conv_name+'2b_relu'], \
                n.__dict__['tops'][conv_name+'2c'], n.__dict__['tops'][bn_name+'2c'], \
                n.__dict__['tops'][scale_name+'2c'],  \
                n.__dict__['tops'][wise_name], n.__dict__['tops'][relu_name] \
                = conv_block(bottom, kernel_num_list[s], s+2, mode)
            else:
                n.__dict__['tops'][conv_name+'2a'], n.__dict__['tops'][bn_name+'2a'], \
                n.__dict__['tops'][scale_name+'2a'], n.__dict__['tops'][conv_name+'2a_relu'], \
                n.__dict__['tops'][conv_name+'2b'], n.__dict__['tops'][bn_name+'2b'], \
                n.__dict__['tops'][scale_name+'2b'], n.__dict__['tops'][conv_name+'2b_relu'], \
                n.__dict__['tops'][conv_name+'2c'], n.__dict__['tops'][bn_name+'2c'], \
                n.__dict__['tops'][scale_name+'2c'], n.__dict__['tops'][wise_name], \
                n.__dict__['tops'][relu_name] \
                = identity_block(bottom, kernel_num_list[s], mode)

            bottom = n.__dict__['tops'][relu_name]
    
    #average pooling and fc layer
    n.pool5 = L.Pooling(bottom, pool=P.Pooling.AVE, kernel_size=7, stride=1)
    n.fc120 = L.InnerProduct(n.pool5, num_output=120, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
    if mode == 'train':
        n.loss = L.SoftmaxWithLoss(n.fc120, n.label, include=dict(phase=caffe.TRAIN))
        n.accuracy1 = L.Accuracy(n.fc120, n.label, top_k=1, include=dict(phase=caffe.TEST))
        n.accuracy5 = L.Accuracy(n.fc120, n.label, top_k=5, include=dict(phase=caffe.TEST))
        return 'name: "DogNet-ResNet50"\n' + train_data_layer_str + str(n.to_proto())
    elif mode == 'deploy':
        n.prob = L.Softmax(n.fc120)
        return 'name: "DogNet-ResNet50"\n' + str(n.to_proto())
    else:
        print 'Error: unknown mode'
        
with open('resnet50/resnet50_train_val.prototxt', 'w') as f:
    net_spec_str = resnet50_net(mode='train', train_data=train_lmdb, val_data=val_lmdb, train_batchsize=16, val_batchsize=8)
    f.write(net_spec_str)
with open ('resnet50/resnet50_deploy.prototxt', 'w') as f:
    net_spec_str = resnet50_net(mode='deploy')
    f.write(net_spec_str)

In [7]:
#define solver.prototxt
def create_solver():
    solver = caffe.proto.caffe_pb2.SolverParameter()
    solver.net = "resnet50_train_val.prototxt"
    solver.test_iter.append(250)
    solver.test_interval = 500
#    solver.average_loss = 40
#    solver.test_initialization = false
    solver.type = 'SGD'
    solver.base_lr = 0.001
    solver.lr_policy = "step"
    solver.gamma = 0.1
    solver.stepsize = 1000
    solver.momentum = 0.9
    solver.weight_decay = 0.0005
    solver.display = 100
    solver.max_iter = 4000
    solver.snapshot = 1000
    solver.solver_mode = P.Solver.GPU
    solver.snapshot_prefix = "snapshot_resnet50"
    return solver

with open("resnet50/resnet50_solver.prototxt", "w") as f:
    f.write(str(create_solver()))

In [4]:
#finetune resnet50
weights = '/home/intel/models/caffemodels/resnet/ResNet-50-model.caffemodel'
solver_file = 'resnet50/resnet50_solver.prototxt'

niter = 100
caffe.set_mode_cpu()

solver = caffe.get_solver(solver_file)
solver.net.copy_from(weights)

for iter in xrange(niter):
    solver.step(1)

In [8]:
#evaluation

# Helper function for deprocessing preprocessed images, e.g., for display.
def deprocess_net_image(image):
    image = image.copy()              # don't modify destructively
    image = image[::-1]               # BGR -> RGB
    image = image.transpose(1, 2, 0)  # CHW -> HWC
    image += [123, 117, 104]          # (approximately) undo mean subtraction

    # clamp values in [0, 255]
    image[image < 0], image[image > 255] = 0, 255

    # round and cast from float32 to uint8
    image = np.round(image)
    image = np.require(image, dtype=np.uint8)

    return image

trained_weights = '/home/xiaojun/kaggle/dog-breed-identification/snapshot_resnet50_iter_4000.caffemodel'
net = caffe.Net('resnet50/resnet50_train_val.prototxt', trained_weights, caffe.TEST)
net.forward()

{'accuracy1': array(0.625, dtype=float32),
 'accuracy5': array(1., dtype=float32)}

In [9]:
deploy_net = caffe.Net('resnet50/resnet50_deploy.prototxt', trained_weights, caffe.TEST)

In [10]:
# run inference on testset
import numpy as np
mean_file = '/home/xiaojun/kaggle/dog-breed-identification/dataset/ilsvrc_2012_mean.npy'

transformer = caffe.io.Transformer({'data': deploy_net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_mean('data', np.load(mean_file).mean(1).mean(1))
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2, 1, 0))

In [11]:
import pandas as pd

image_list = os.listdir(test_set)
num_test_image = len(image_list)
result_df = pd.DataFrame(columns=unique_label)
file_name_list = []

for i in xrange(num_test_image):
    image_path = test_set + image_list[i]
    image = caffe.io.load_image(image_path)
    transformed_image = transformer.preprocess('data', image)
    deploy_net.blobs['data'].data[...] = transformed_image
    deploy_net.forward()
    prob = deploy_net.blobs['prob'].data[0]
    #out = deploy_net.blobs['prob'].data[0].argmax()
    file_name, ext = os.path.splitext(image_list[i])
    file_name_list.append(file_name)
    
    item = pd.Series(np.array(prob), index=unique_label)
    result_df = result_df.append(item, ignore_index=True)

fn_df = pd.DataFrame(file_name_list, columns=['id'])
result_df = pd.concat([fn_df, result_df], axis=1)

result_df.to_csv('resnet50_result.csv', index=False)

  warn("The default mode, 'constant', will be changed to 'reflect' in "
