In [1]:
import caffe
import tensorflow as tf
import numpy as np
import cv2
import os
import mxnet as mx
from mxnet import image
import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

  from ._conv import register_converters as _register_converters


In [2]:
num_points = 8

model_stage1 = '../models/FLD_full/stage1.prototxt'
assert(os.path.exists(model_stage1))
weights_stage1 = '../models/FLD_full/FLD_full_models/stage1.caffemodel'
assert(os.path.exists(weights_stage1))

model_stage2 = '../models/FLD_full/cascade.prototxt'
assert(os.path.exists(model_stage2))
weights_stage2 = '../models/FLD_full/FLD_full_models/stage2.caffemodel'
assert(os.path.exists(weights_stage2))

model_stage3 = '../models/FLD_full/cascade.prototxt'
assert(os.path.exists(model_stage3))
weights_stage3_easy = '../models/FLD_full/FLD_full_models/stage3_easy.caffemodel'
assert(os.path.exists(weights_stage3_easy))
weights_stage3_hard = '../models/FLD_full/FLD_full_models/stage3_hard.caffemodel'
assert(os.path.exists(weights_stage3_hard))

In [3]:
# caffe prepare
caffe.set_mode_gpu()
caffe.set_device(0)

In [4]:
# create net and load weights
net_stage1 = caffe.Net(model_stage1, weights_stage1, caffe.TEST)
# display([(k, v.data.shape) for k, v in net_stage1.blobs.items()])

net_stage2 = caffe.Net(model_stage2, weights_stage2, caffe.TEST)
# display([(k, v.data.shape) for k, v in net_stage2.blobs.items()])

net_stage3_easy = caffe.Net(model_stage3, weights_stage3_easy, caffe.TEST)
net_stage3_hard = caffe.Net(model_stage3, weights_stage3_hard, caffe.TEST)
pipeline = {
    'num_points': num_points,
    'net_stage1': net_stage1,
    'net_stage2': net_stage2,
    'net_stage3_easy': net_stage3_easy,
    'net_stage3_hard': net_stage3_hard
}

In [None]:
image_path = '../data/FLD_full/'

In [None]:
def plot_image(img_path, display=True):
    with open(img_path, 'rb') as f:
        img = image.imdecode(f.read())
    if display:
        plt.imshow(img.asnumpy())
    return img

In [None]:
def pipeline_forword(img_orig: mx.ndarray.ndarray.NDArray, pipeline: dict):
    """
    pipeline forward to make image processed
    """
    # preprocess: image resize & pad
    def get_padding_size(image):
        h, w, _ = image.shape
        longest_edge = max(h, w)
        top, bottom, left, right = (0, 0, 0, 0)
        if h < longest_edge:
            dh = longest_edge - h
            top = dh // 2
            bottom = dh - top
        elif w < longest_edge:
            dw = longest_edge - w
            left = dw // 2
            right = dw - left
        else:
            pass
        return top, bottom, left, right
    scale = 224 / max(img_orig.shape)
    s1 = round(img_orig.shape[0] * scale)
    s2 = round(img_orig.shape[1] * scale)
    if img_orig.shape[0] == 224 and img_orig.shape[1] == 224:
        img_resi = img_orig.asnumpy()
    else:
        # https://www.programcreek.com/python/example/86048/cv2.copyMakeBorder
        img_resi = cv2.resize(img_orig.asnumpy(), (s1,s2))
        top, bottom, left, right = get_padding_size(img_resi)
        BLACK = [0, 0, 0]
        constant = cv2.copyMakeBorder(img_resi, top , bottom, left, right, cv2.BORDER_CONSTANT, value=BLACK)
        img_resi = cv2.resize(constant, (224, 224))
    
    offset = np.array([0,0,0])
    assert(img_resi.shape[0] == 224)
    assert(img_resi.shape[1] == 224)
    
    def get_orig_coordinate(p):
        """
        calculate location.
        """
        value = (p + 0.5) * 224
        return ( value - np.tile(np.array([offset[1], offset[0]]).T.reshape(-1, 1), (pipeline['num_points'],1)) \
                .reshape(value.shape) ) / scale
    
    # image normalization
    img_stan = img_resi.astype('float32')
    # normaliziation, refer to https://stackoverflow.com/questions/27970134/what-is-an-equivalent-of-matlab-permutea-3-2-1-in-python
    img_stan = np.transpose(img_stan, (1,0,2))
    img_stan = np.transpose(img_stan, (2,1,0))
    
    visibility_case = np.array(['Visible','Occlude','Inexistent'])
    
    # stage 1 fp
    res_stage1 = pipeline['net_stage1'].forward(**{
        pipeline['net_stage1'].inputs[0]: np.asarray([img_stan]) 
    })
    landmark_stage1 = res_stage1['fc8'][:, 0:pipeline['num_points']*2]
    landmark_stage1_values = res_stage1['fc8'][:, pipeline['num_points']*2:].reshape((3,pipeline['num_points']))
    v1 = np.argmax(landmark_stage1_values, axis=0)
    visibility_stage1 = visibility_case[v1]
    
    prediction_stage1 = {
        'landmark': landmark_stage1, #get_orig_coordinate(landmark_stage1),
        'visibility': visibility_stage1
    };
    
    # stage 2 fp   
    res_stage2 = pipeline['net_stage2'].forward(**{
        pipeline['net_stage2'].inputs[0]: np.asarray([img_stan]), 
        pipeline['net_stage2'].inputs[1]: np.asarray([landmark_stage1])
    });
    landmark_stage2 = landmark_stage1 - res_stage2['fc8'][:, :pipeline['num_points']*2] / 5;
    landmark_stage2_values = res_stage1['fc8'][:, pipeline['num_points']*2:].reshape((3, pipeline['num_points']))
    
    v2 = np.argmax(landmark_stage2_values, axis=0)
    visibility_stage2 = visibility_case[v2]
    
    prediction_stage2 = {
        'landmark': landmark_stage2, #get_orig_coordinate(landmark_stage2), 
        'visibility': visibility_stage2
    };
    
    # stage 3 fp
    res_stage3_easy = pipeline['net_stage3_easy'].forward(**{
        pipeline['net_stage2'].inputs[0]: np.asarray([img_stan]), 
        pipeline['net_stage2'].inputs[1]: np.asarray([landmark_stage2])
    });
    res_stage3_hard = pipeline['net_stage3_hard'].forward(**{
        pipeline['net_stage2'].inputs[0]: np.asarray([img_stan]), 
        pipeline['net_stage2'].inputs[1]: np.asarray([landmark_stage2])
    });
    landmark_stage3 = landmark_stage2 - (res_stage3_easy['fc8'][:, 0:pipeline['num_points']*2] /5  
                                         + res_stage3_hard['fc8'][:, 0:pipeline['num_points']*2]/5) /2
    
    landmark_stage3_easy_values = res_stage3_easy['fc8'][:, pipeline['num_points']*2:] \
            .reshape((3, pipeline['num_points']))
    landmark_stage3_hard_values = res_stage3_hard['fc8'][:, pipeline['num_points']*2:] \
            .reshape((3, pipeline['num_points']))

    v3 = np.argmax(landmark_stage3_easy_values + landmark_stage3_hard_values, axis=0)
    visibility_stage3 = visibility_case[v3]

    prediction_stage3 = {
        'landmark': landmark_stage3,#get_orig_coordinate(landmark_stage3),
        'visibility': visibility_stage3
    };
    
    # output
    prediction = {
        'stage1': prediction_stage1,
        'stage2': prediction_stage2,
        'stage3': prediction_stage3,
        'num_points': pipeline['num_points']
    }
    
    return prediction

In [5]:
def standize_image(img_path):
    with open(img_path, 'rb') as f:
        img = image.imdecode(f.read())
    img_resi = cv2.resize(img.asnumpy(), (224, 224))
    # image normalization
    img_stan = img_resi.astype('float32')
    # normaliziation, refer to https://stackoverflow.com/questions/27970134/what-is-an-equivalent-of-matlab-permutea-3-2-1-in-python
    img_stan = np.transpose(img_stan, (1,0,2))
    img_stan = np.transpose(img_stan, (2,1,0))
    return img_stan

In [6]:
def pipeline_batch_process(img_stan: [], pipeline: dict):
    """
    pipeline forward to make image processed
    """
    visibility_case = np.array(['Visible','Occlude','Inexistent'])
    
    # stage 1 fp
    res_stage1 = pipeline['net_stage1'].forward(**{
        pipeline['net_stage1'].inputs[0]: np.asarray(img_stan) 
    })
    landmark_stage1 = res_stage1['fc8'][:, 0:pipeline['num_points']*2]
    landmark_stage1_values = res_stage1['fc8'][:, pipeline['num_points']*2:].reshape((3,pipeline['num_points']))
    v1 = np.argmax(landmark_stage1_values, axis=0)
    visibility_stage1 = visibility_case[v1]
    
    prediction_stage1 = {
        'landmark': get_orig_coordinate(landmark_stage1),
        'visibility': visibility_stage1
    };
    
    # stage 2 fp   
    res_stage2 = pipeline['net_stage2'].forward(**{
        pipeline['net_stage2'].inputs[0]: np.asarray([img_stan]), 
        pipeline['net_stage2'].inputs[1]: np.asarray([landmark_stage1])
    });
    landmark_stage2 = landmark_stage1 - res_stage2['fc8'][:, :pipeline['num_points']*2] / 5;
    landmark_stage2_values = res_stage1['fc8'][:, pipeline['num_points']*2:].reshape((3, pipeline['num_points']))
    
    v2 = np.argmax(landmark_stage2_values, axis=0)
    visibility_stage2 = visibility_case[v2]
    
    prediction_stage2 = {
        'landmark': get_orig_coordinate(landmark_stage2),
        'visibility': visibility_stage2
    };
    
    # stage 3 fp
    res_stage3_easy = pipeline['net_stage3_easy'].forward(**{
        pipeline['net_stage2'].inputs[0]: np.asarray([img_stan]), 
        pipeline['net_stage2'].inputs[1]: np.asarray([landmark_stage2])
    });
    res_stage3_hard = pipeline['net_stage3_hard'].forward(**{
        pipeline['net_stage2'].inputs[0]: np.asarray([img_stan]), 
        pipeline['net_stage2'].inputs[1]: np.asarray([landmark_stage2])
    });
    landmark_stage3 = landmark_stage2 - (res_stage3_easy['fc8'][:, 0:pipeline['num_points']*2] /5  
                                         + res_stage3_hard['fc8'][:, 0:pipeline['num_points']*2]/5) /2
    
    landmark_stage3_easy_values = res_stage3_easy['fc8'][:, pipeline['num_points']*2:] \
            .reshape((3, pipeline['num_points']))
    landmark_stage3_hard_values = res_stage3_hard['fc8'][:, pipeline['num_points']*2:] \
            .reshape((3, pipeline['num_points']))

    v3 = np.argmax(landmark_stage3_easy_values + landmark_stage3_hard_values, axis=0)
    visibility_stage3 = visibility_case[v3]

    prediction_stage3 = {
        'landmark': get_orig_coordinate(landmark_stage3),
        'visibility': visibility_stage3
    };
    
    # output
    prediction = {
        'stage1': prediction_stage1,
        'stage2': prediction_stage2,
        'stage3': prediction_stage3,
        'num_points': pipeline['num_points']
    }
    
    return prediction

In [7]:
def show_results(img, prediction):
    """
    display landmark in picture.
    """
    plt.imshow(img.asnumpy())
    visibility_case = np.array(['Visible', 'Occlude', 'Inexistent'])
    colors = np.array(['y','b','g'])
    for c in range(3):
        pairs = prediction['stage1']['landmark'].reshape(-1, 2)\
            [np.where(prediction['stage1']['visibility'] == visibility_case[c])]
        plt.plot(pairs[:, 0], pairs[:, 1], marker='s', color=colors[c], ls='')

In [None]:
# network processing
# http://caffe.berkeleyvision.org/tutorial/interfaces.html
# http://christopher5106.github.io/deep/learning/2015/09/04/Deep-learning-tutorial-on-Caffe-Technology.html
files = [os.path.join(image_path, f) for f in os.listdir(image_path) if os.path.isfile(os.path.join(image_path, f))]

In [8]:
# network processing
# http://caffe.berkeleyvision.org/tutorial/interfaces.html
# http://christopher5106.github.io/deep/learning/2015/09/04/Deep-learning-tutorial-on-Caffe-Technology.html
image_path = '../../FashionAI-Attributes/data/web/Images/skirt_length_labels'
files = [os.path.join(image_path, f) for f in os.listdir(image_path) if os.path.isfile(os.path.join(image_path, f))]

In [20]:
def pipeline_single_process(tuple_data):
    """
    pipeline forward to make image processed
    """
    i, img_stan = tuple_data
    visibility_case = np.array(['Visible','Occlude','Inexistent'])
    
    # stage 1 fp
    res_stage1 = pipeline['net_stage1'].forward(**{
        pipeline['net_stage1'].inputs[0]: np.asarray([img_stan]) 
    })
    landmark_stage1 = res_stage1['fc8'][:, 0:pipeline['num_points']*2]
    landmark_stage1_values = res_stage1['fc8'][:, pipeline['num_points']*2:].reshape((3,pipeline['num_points']))
    v1 = np.argmax(landmark_stage1_values, axis=0)
    visibility_stage1 = visibility_case[v1]
    
    prediction_stage1 = {
        'landmark': res_stage1, #get_orig_coordinate(landmark_stage1),
        'visibility': visibility_stage1
    };
    
    # stage 2 fp   
    res_stage2 = pipeline['net_stage2'].forward(**{
        pipeline['net_stage2'].inputs[0]: np.asarray([img_stan]), 
        pipeline['net_stage2'].inputs[1]: np.asarray([landmark_stage1])
    });
    landmark_stage2 = landmark_stage1 - res_stage2['fc8'][:, :pipeline['num_points']*2] / 5;
    landmark_stage2_values = res_stage1['fc8'][:, pipeline['num_points']*2:].reshape((3, pipeline['num_points']))
    
    v2 = np.argmax(landmark_stage2_values, axis=0)
    visibility_stage2 = visibility_case[v2]
    
    prediction_stage2 = {
        'landmark': landmark_stage2, #get_orig_coordinate(landmark_stage2),
        'visibility': visibility_stage2
    };
    
    # stage 3 fp
    res_stage3_easy = pipeline['net_stage3_easy'].forward(**{
        pipeline['net_stage2'].inputs[0]: np.asarray([img_stan]), 
        pipeline['net_stage2'].inputs[1]: np.asarray([landmark_stage2])
    });
    res_stage3_hard = pipeline['net_stage3_hard'].forward(**{
        pipeline['net_stage2'].inputs[0]: np.asarray([img_stan]), 
        pipeline['net_stage2'].inputs[1]: np.asarray([landmark_stage2])
    });
    landmark_stage3 = landmark_stage2 - (res_stage3_easy['fc8'][:, 0:pipeline['num_points']*2] /5  
                                         + res_stage3_hard['fc8'][:, 0:pipeline['num_points']*2]/5) /2
    
    landmark_stage3_easy_values = res_stage3_easy['fc8'][:, pipeline['num_points']*2:] \
            .reshape((3, pipeline['num_points']))
    landmark_stage3_hard_values = res_stage3_hard['fc8'][:, pipeline['num_points']*2:] \
            .reshape((3, pipeline['num_points']))

    v3 = np.argmax(landmark_stage3_easy_values + landmark_stage3_hard_values, axis=0)
    visibility_stage3 = visibility_case[v3]

    prediction_stage3 = {
        'landmark': landmark_stage3, #get_orig_coordinate(landmark_stage3),
        'visibility': visibility_stage3
    };
    
    # output
    prediction = {
        'stage1': prediction_stage1,
        'stage2': prediction_stage2,
        'stage3': prediction_stage3,
        'num_points': pipeline['num_points']
    }
    
    return i, prediction

In [None]:
# multiprocessing for loading image
from tqdm import tqdm
from glob import glob
import multiprocessing

n = len(files)
results = {}

with multiprocessing.Pool(12) as pool:
    # merge parameter as tuple
    args = [(i, standize_image(files[i])) for i in range(n)]
    with tqdm(pool.imap_unordered(pipeline_single_process, args), total=n) as pbar:
        for i, prediction in pbar:
            results[i] = prediction

  6%|▋         | 632/10110 [19:57<4:59:25,  1.90s/it]

In [None]:
# prediction = pipeline_batch_process(imgs, pipeline)
# show_results(img_orig, prediction)