In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime
import os
import sys
import time

In [None]:
sys.path.append("/home/caleml/main-pe/")

In [None]:
import numpy as np
import tensorflow as tf

from tensorflow.keras import Model, Input, Sequential
from tensorflow.keras.layers import MaxPooling2D, UpSampling2D, Convolution2D, Activation, BatchNormalization, Reshape
from tensorflow.keras.layers import Permute, add, concatenate
from tensorflow.keras.losses import mean_squared_error
from tensorflow.keras.optimizers import RMSprop

from tensorflow.keras.applications import ResNet50

In [None]:
from data.datasets.mpii import MpiiSinglePerson
from data.utils.data_utils import TEST_MODE, TRAIN_MODE, VALID_MODE
from data.loader import BatchLoader

from model import blocks
from model import layers
from model import losses
from model import config
from model import callbacks
from model.models import BaseModel, AppearanceModel
from model.utils import pose_format

# model

In [None]:
class PoseModel(object):
    
    def __init__(self, input_tensor, n_joints, n_blocks, kernel_size):
        self.n_joints = n_joints
        self.n_blocks = n_blocks
        self.kernel_size = kernel_size
        
        self.n_heatmaps = self.n_joints   # this seems silly but we will augment with context later
        
        self.build(input_tensor)
        
    @property
    def model(self):
        return self._model
        
    def build(self, inp):
        '''
        1. stem
        2. stacking the blocks
        '''
        
        outputs = list() 
        x = self.stem(inp)
        
        # static layers
        num_rows, num_cols, num_filters = x.get_shape().as_list()[1:]
        # print("num rows %s, num cols %s, num filters %s" % (num_rows, num_cols, num_filters))
        pose_input_shape = (num_rows, num_cols, self.n_joints)   # (32, 32, 16)
        self.pose_softargmax_model = self.build_softargmax_model(pose_input_shape)
        self.joint_visibility_model = self.build_visibility_model(pose_input_shape)
        
        # hourglass blocks
        for i_block in range(self.n_blocks):
            
            block_shape = x.get_shape().as_list()[1:]
            x = self.reception_block(x, name='rBlock%d' % (i_block + 1))
            
            identity_map = x
            x = self.sepconv_block(x, name='SepConv%d' % (i_block + 1))
            h = self.pose_block(x, name='RegMap%d' % (i_block + 1))
            
            pose, visible = self.pose_regression_2d(h, name='PoseReg%s' % (i_block + 1))
            pose_vis = concatenate([pose, visible], axis=-1)
            print("pose shape %s, vis shape %s, concat shape %s" % (str(pose.shape), str(visible.shape), str(pose_vis.shape)))
            
            outputs.append(pose_vis)

            if i_block < self.n_blocks - 1:
                h = self.fremap_block(h, block_shape[-1], name='fReMap%d' % (i_block + 1))
                x = add([identity_map, x, h])
                
        self._model = Model(inputs=inp, outputs=outputs)
        
    def stem(self, inp):
        '''
        inception v4 stem
        
        input: 256 x 256 x 3
        output: 32 x 32 x 576
        '''
        xi = Input(shape=inp.get_shape().as_list()[1:]) # 256 x 256 x 3

        x = layers.conv_bn_act(xi, 32, (3, 3), strides=(2, 2))
        x = layers.conv_bn_act(x, 32, (3, 3))
        x = layers.conv_bn_act(x, 64, (3, 3))

        a = layers.conv_bn_act(x, 96, (3, 3), strides=(2, 2))
        b = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
        x = concatenate([a, b])

        a = layers.conv_bn_act(x, 64, (1, 1))
        a = layers.conv_bn(a, 96, (3, 3))
        b = layers.conv_bn_act(x, 64, (1, 1))
        b = layers.conv_bn_act(b, 64, (5, 1))
        b = layers.conv_bn_act(b, 64, (1, 5))
        b = layers.conv_bn(b, 96, (3, 3))
        x = concatenate([a, b])

        a = layers.act_conv_bn(x, 192, (3, 3), strides=(2, 2))
        b = MaxPooling2D((2, 2), strides=(2, 2))(x)
        x = concatenate([a, b])

        x = blocks.sepconv_residual(x, 3*192, name='sepconv1')

        model = Model(xi, x, name='Stem')
        x = model(inp)
        
        return x
    
    def build_softargmax_model(self, input_shape):
        '''
        Static model for soft argmax
        '''

        inp = Input(shape=input_shape)
        x = layers.act_channel_softmax(inp)

        x_x = lin_interpolation_2d(x, dim=0)
        x_y = lin_interpolation_2d(x, dim=1)
        pose = concatenate([x_x, x_y])

        model = Model(inputs=inp, outputs=pose)
        model.trainable = False

        return model
    
    def build_visibility_model(self, input_shape):
        '''
        Static model for joint visibility
        '''
        num_rows, num_cols = input_shape[0:2]
        inp = Input(shape=input_shape)

        x = MaxPooling2D((num_rows, num_cols))(inp)
        x = Activation('sigmoid')(x)

        x = Lambda(lambda x: tf.squeeze(x, axis=1))(x)
        x = Lambda(lambda x: tf.squeeze(x, axis=1))(x)
        x = Lambda(lambda x: tf.expand_dims(x, axis=-1))(x)

        model = Model(inputs=inp, outputs=x)

        return model
        
    def reception_block(self, inp, name):
        '''
        each pose block starts with a reception block
        it is u-shaped and relies on separable convolutions
        
        inp ------------------------- a (SR 576) -------------------- + -- out
          |                                                           |
          |                                                           |
          MP --- C 288 -- SR 288 ---- b (SR 288) ---- + -- SR 576 -- US
                            |                         |
                            |                         |
                            MP --- SR -- SR -- SR --- US     <- all 288 channels
                            
          
        SR: Sepconv Residual (all 5x5)
        C: Conv (1x1)
        MP: Max Pooling (2x2 with stride 2x2)
        US: UpSampling (2x2)
        
        input: 32 x 32 x 576
        output: 32 x 32 x 576
        '''
        ksize = self.kernel_size
        
        input_shape = inp.get_shape().as_list()[1:]
        size = int(input_shape[-1])

        # first branch
        xi = Input(shape=input_shape)
        a = blocks.sepconv_residual(xi, size, name='sepconv_l1', kernel_size=ksize)

        # second branch
        low1 = MaxPooling2D((2, 2))(xi)
        low1 = layers.act_conv_bn(low1, int(size/2), (1, 1))
        low1 = blocks.sepconv_residual(low1, int(size/2), name='sepconv_l2_1', kernel_size=ksize)
        b = blocks.sepconv_residual(low1, int(size/2), name='sepconv_l2_2', kernel_size=ksize)

        # third branch
        c = MaxPooling2D((2, 2))(low1)
        c = blocks.sepconv_residual(c, int(size/2), name='sepconv_l3_1', kernel_size=ksize)
        c = blocks.sepconv_residual(c, int(size/2), name='sepconv_l3_2', kernel_size=ksize)
        c = blocks.sepconv_residual(c, int(size/2), name='sepconv_l3_3', kernel_size=ksize)
        c = UpSampling2D((2, 2))(c)

        # merge second and third branches
        b = add([b, c])
        b = blocks.sepconv_residual(b, size, name='sepconv_l2_3', kernel_size=ksize)
        b = UpSampling2D((2, 2))(b)
        
        # merge first and second branches
        x = add([a, b])
        model = Model(inputs=xi, outputs=x, name=name)

        return model(inp)
    
    def sepconv_block(self, inp, name):
        '''
        Separable convolution
        '''
        input_shape = inp.get_shape().as_list()[1:]

        xi = Input(shape=input_shape)
        x = layers.separable_act_conv_bn(xi, input_shape[-1], self.kernel_size)

        model = Model(inputs=xi, outputs=x, name=name)

        return model(inp)
        
    def pose_block(self, inp, name):
        '''
        input: 32 x 32 x 576
        output: 32 x 32 x 16 (number of heatmaps)
        '''
        input_shape = inp.get_shape().as_list()[1:]

        xi = Input(shape=input_shape)
        x = layers.act_conv(xi, self.n_heatmaps, (1, 1))

        model = Model(inputs=xi, outputs=x, name=name)

        return model(inp)
    
    def pose_regression_2d(self, heatmaps, name):
        '''
        soft argmax to get the pose from the heatmaps
        joint prob model to get the joint visibility probability
        
        input: 32 x 32 x 16 (number of joints)
        output: 
        - pose (None, 16, 2)
        - visibility (None, 16, 1)
        '''
        pose = self.pose_softargmax_model(heatmaps)
        visibility = self.joint_visibility_model(heatmaps)
        
        return pose, visibility
    
    def fremap_block(self, inp, num_filters, name=None):
        input_shape = inp.get_shape().as_list()[1:]

        xi = Input(shape=input_shape)
        x = layers.act_conv_bn(xi, num_filters, (1, 1))

        model = Model(inputs=xi, outputs=x, name=name)

        return model(inp)
    


In [None]:
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import SeparableConv2D
from model.utils import math

def lin_interpolation_2d(inp, dim):
    num_rows, num_cols, num_filters = inp.get_shape().as_list()[1:]
    conv = SeparableConv2D(num_filters, (num_rows, num_cols), use_bias=False)
    x = conv(inp)

    w = conv.get_weights()
    w[0].fill(0)
    w[1].fill(0)
    linspace = math.linspace_2d(num_rows, num_cols, dim=dim)

    for i in range(num_filters):
        w[0][:,:, i, 0] = linspace[:,:]
        w[1][0, 0, i, i] = 1.

    conv.set_weights(w)
    conv.trainable = False
    
    x = Lambda(lambda x: tf.squeeze(x, axis=1))(x)
    x = Lambda(lambda x: tf.squeeze(x, axis=1))(x)
    x = Lambda(lambda x: tf.expand_dims(x, axis=-1))(x)

    return x

In [None]:
conv = SeparableConv2D(16, (32, 32), use_bias=False)
w = conv.get_weights()
print(len(w))
print("%s %s %s" % (len(w), w[0].shape, w[1].shape))

In [None]:
class DecoderModel(object):
    
    def __init__(self, input_tensor):
        
        self.build(input_tensor)
        
    @property
    def model(self):
        return self._model
    
    def build(self, inp):
        z_a = Input(shape=inp.get_shape().as_list()[1:])  # for now, only the z_a part (8 x 8 x 2048)
        
        up = layers.up(z_a)  # 16 x 16
        up = layers.conv_bn_act(up, 512, (3, 3))
        up = layers.conv_bn_act(up, 512, (3, 3))
        up = layers.conv_bn_act(up, 512, (3, 3))
        
        up = layers.up(up)  # 32 x 32
        up = layers.conv_bn_act(up, 512, (3, 3))
        up = layers.conv_bn_act(up, 512, (3, 3))
        up = layers.conv_bn_act(up, 256, (3, 3))
        
        up = layers.up(up)  # 64 x 64
        up = layers.conv_bn_act(up, 256, (3, 3))
        up = layers.conv_bn_act(up, 256, (3, 3))
        up = layers.conv_bn_act(up, 128, (3, 3))
        
        up = layers.up(up)  # 128 x 128
        up = layers.conv_bn_act(up, 128, (3, 3))
        up = layers.conv_bn_act(up, 64, (3, 3))
        
        up = layers.up(up)  # 256 x 256
        up = layers.conv_bn_act(up, 3, (3, 3))
        up = layers.conv_bn(up, 3, (1, 1))   # 3 channels, output shape of this should be (None, 3, 256, 256)
            
        # TODO: should we permute here or have the input formatted with channels first?
        # perm = Permute((1, 2))(up)
        # i_hat = Permute((2, 3))(perm)
        i_hat = up
        
        self._model = Model(inputs=z_a, outputs=i_hat, name='decoder')
        
        

In [None]:
class MultiBranchModel(BaseModel):
    '''
    2 branch model :
    - appearance (z_a)
    - pose (z_p)
    One common decoder to recreate the image
    '''
    
    def __init__(self, n_joints=16, nb_pose_blocks=8, reception_kernel_size=(5,5)):
        self.n_joints = n_joints
        self.n_blocks = nb_pose_blocks
        self.reception_kernel_size = reception_kernel_size
        
        BaseModel.__init__(self)
        
    def build(self):
        inp = Input(shape=self.input_shape)
        
        # encoders
        time_1 = time.time()
        z_a = self.appearance_encoder(inp)
        time_2 = time.time()
        z_p = self.pose_encoder(inp)
        time_3 = time.time()
        
        print("Build E_a %s, build E_p %s" % (time_2 - time_1, time_3 - time_2))
        print(type(z_a), type(z_p))
        print("Shape z_a %s" % str(z_a.shape))
        
        # decoder
        concat = self.concat(z_a, z_p)
        print("Shape concat %s" % str(concat.shape))
        i_hat = self.decoder(concat)
        
        outputs = [i_hat]
        outputs.extend(z_p)
        self.model = Model(inputs=inp, outputs=outputs)
        print("Outputs shape %s" % self.model.output_shape)
       
        ploss = [pose_loss()] * len(z_p)
        losses = [reconstruction_loss()]
        losses.extend(ploss)
        # loss = mean_squared_error
        self.model.compile(loss=losses, optimizer=RMSprop(lr=self.start_lr))
        self.model.summary()
        
    def appearance_encoder(self, inp):
        '''
        resnet50 for now
        input: 256 x 256 x 3
        output: 8 x 8 x 2048
        '''
        enc_model = ResNet50(include_top=False, weights='imagenet', input_tensor=inp)
        
        z_a = enc_model.output   # 8 x 8 x 2048
        return z_a
    
    def pose_encoder(self, inp):
        '''
        reception / stacked hourglass
        input: 256 x 256 x 3
        output: [] x 8
        '''
        pose_model = PoseModel(inp, self.n_joints, self.n_blocks, self.reception_kernel_size).model
        out = pose_model.output
        
        return out
    
    def concat(self, z_a, z_p):
        '''
        concat pose and appearance representations before decoding
        input:
            - z_p: 
            - z_a: 8 x 8 x 2048
        output:
        
        TODO: This is where the real work should happen
        '''
        return z_a
        
    def decoder(self, concat):
        '''
        from concatenated representations to image reconstruction
        input: 8 x 8 x 2048 (z_a)
        output: 256 x 256 x 3
        '''
        decoder_model = DecoderModel(input_tensor=concat).model
        out = decoder_model(concat)
        
        return out
    
        

In [None]:
def elastic_bce(y_true, y_pred):
    '''
    Elasticnet binary cross entropy for pose estimation
    y_true
    y_pred: (None, 16, 2)
    '''
    idx = tf.cast(tf.math.greater(y_true, 0.), tf.float32)
    print("Shape %s" % idx.shape)
    #tmp_sum = tf.math.reduce_sum(idx, axis=(-1, -2))
    #print("Shape sum %s" % tmp_sum.shape)
    #num_joints = tf.clip_by_value(tmp_sum, 1, None)
    num_joints = y_pred.get_shape().as_list()[1]
    print("Num joints %s" % num_joints)

    l1 = tf.math.abs(y_pred - y_true)
    l2 = tf.math.square(y_pred - y_true)
    bc = 0.01 * tf.keras.backend.binary_crossentropy(y_true, y_pred)  # doesn't expect logits like tf does
    dummy = 0. * y_pred

    return tf.reduce_sum(tf.where(tf.cast(idx, tf.bool), l1 + l2 + bc, dummy), axis=(-1, -2)) / num_joints
    

In [None]:
def multi_loss():
    
    def _multi_loss(y_true, y_pred):
        print("y_true shape %s" % (str(y_true.shape)))
        print("y_pred shape %s" % (str(y_pred.shape)))
        
        pose_loss = elastic_bce(p_true, p_pred)
        return pose_loss
    
    return _multi_loss


def pose_loss():
    
    def _pose_loss(y_true, y_pred):
        print("pose y_true shape %s" % (str(y_true.shape)))
        print("pose y_pred shape %s" % (str(y_pred.shape)))
        
        pose_loss = elastic_bce(y_true, y_pred)
        return pose_loss
    
    return _pose_loss


def reconstruction_loss():
    
    def _rec_loss(y_true, y_pred):
        print("rec y_true shape %s" % (str(y_true.shape)))
        print("rec y_pred shape %s" % (str(y_pred.shape)))
        num_joints = y_pred.get_shape().as_list()[-1]
        print("Num joints: %s" % num_joints)
        
        rec_loss = tf.math.reduce_sum(tf.keras.backend.square(y_pred - y_true), axis=(-1, -2)) / num_joints
        return rec_loss
        
    return _rec_loss
        

# dataset

In [None]:
h36m_path = "/share/DEEPLEARNING/datasets/human36m"
mpii_path = "/share/DEEPLEARNING/datasets/mpii"

In [None]:
# h36m dataset loading
h36m = Human36M(h36m_path, dataconf=config.human36m_dataconf, poselayout=pose_format.pa17j3d, topology='frames')

data_tr = BatchLoader(
    [h36m], 
    ['frame'], 
    ['pose'],
    TRAIN_MODE, 
    batch_size=h36m.get_length(TRAIN_MODE),
    num_predictions=num_predictions, 
    shuffle=True)

# batch_size=[batch_size_mpii, batch_size_mpii, batch_size_ar, batch_size_ar], 

In [None]:
# validation
h36m_val = BatchLoader(
    h36m, 
    ['frame'],
    ['pose_w', 'pose_uvd', 'afmat', 'camera', 'action'], 
    VALID_MODE,
    batch_size=h36m.get_length(VALID_MODE), 
    shuffle=True)

[x_val], [pw_val, puvd_val, afmat_val, scam_val, action] = h36m_val[0]

h36m_callback = H36MEvalCallback(x_val, pw_val, afmat_val, puvd_val[:,0,2], scam_val, action, logdir=logdir)

In [None]:
# mpii = MpiiSinglePerson(mpii_path, dataconf=config.mpii_dataconf, poselayout=pose_format.pa17j3d)
mpii = MpiiSinglePerson(mpii_path, dataconf=config.mpii_dataconf)

In [None]:
data_tr_mpii = BatchLoader(
    mpii, 
    ['frame'], 
    ['frame', 'pose', 'pose', 'pose', 'pose'], 
    TRAIN_MODE,
    batch_size=20,
    shuffle=False)

In [None]:
len(data_tr_mpii)

In [None]:
a = data_tr_mpii.get_data(1, TRAIN_MODE)
print(type(a), a.keys())
print(a['pose'])
print("pose shape %s" % (str(a['pose'].shape)))
print("frame shape %s" % (str(a['frame'].shape)))

In [None]:
b = data_tr_mpii[1]
print(type(b), len(b))
print(type(b[0]), len(b[0]))
print(b[0][0].shape)
print(type(b[1]), len(b[1]))
print(b[1][0].shape, b[1][1].shape)

import matplotlib.pyplot as plt
img = b[0][0][0]
print(img.shape)
imgplot = plt.imshow(img)
plt.show()

# training

In [None]:
model = Encoder()
model.build()

# steps_per_epoch = h36m.get_length(TRAIN_MODE) // batch_size_h36m
steps_per_epoch = mpii.get_length(TRAIN_MODE) // batch_size_mpii

In [None]:
model.train(data_tr, steps_per_epoch)

In [None]:
model = AppearanceModel()
model.build()


In [None]:
model_name = 'appearance'
dataset_name = 'mpii'
model_folder = '/home/caleml/pe_experiments/exp_%s_%s_%s' % (model_name, dataset_name, datetime.datetime.now().strftime("%Y%m%d%H%M")) 
os.makedirs(model_folder)
model.train(data_tr_mpii, steps_per_epoch=len(data_tr_mpii), model_folder=model_folder, n_epochs=60)

In [None]:
model = MultiBranchModel(nb_pose_blocks=4)
model.build()

In [None]:
model_name = 'multib'
dataset_name = 'mpii'
model_folder = '/home/caleml/pe_experiments/exp_%s_%s_%s' % (model_name, dataset_name, datetime.datetime.now().strftime("%Y%m%d%H%M")) 
os.makedirs(model_folder)
model.train(data_tr_mpii, steps_per_epoch=len(data_tr_mpii), model_folder=model_folder, n_epochs=60)

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
print(os.environ["CUDA_VISIBLE_DEVICES"])

os.environ["CUDA_VISIBLE_DEVICES"] = "3"

# eval

In [None]:
from model.models import AppearanceModel

In [None]:
model_folder = '/home/caleml/pe_experiments/exp_appearance_mpii_201902051901'
model_checkpoint = '/home/caleml/pe_experiments/exp_appearance_mpii_201902051901/weights_mpii_058.h5'  # weights
checkpoint_2 = '/home/caleml/pe_experiments/exp_appearance_mpii_201902061614/weights_mpii_013.h5'  # made with save_model

In [None]:
model = AppearanceModel()
model.load(checkpoint_2)

In [None]:
# eval data
mpii = MpiiSinglePerson(mpii_path, dataconf=config.mpii_dataconf, poselayout=pose_format.pa17j3d)
data_val_mpii = BatchLoader(
    mpii, 
    ['frame'], 
    ['frame'], 
    VALID_MODE,
    shuffle=False)

len(data_val_mpii)

# debug

In [None]:
def pouet(definition):
    
    ret = list()
    for elt in definition:
        ret.append('truc')
        
    return tuple(ret)

a = pouet('a')
        
    

In [None]:
a

In [None]:
b

In [None]:
b

In [None]:
tf.max

In [None]:
def wrapper(i):
    def loss(a):
        return a * i
    return loss

losses = list()
for i in range(10):
    losses.append(wrapper(i))
    
print(len(losses))
for loss_fn in losses:
    print(loss_fn(10))