In [1]:
%load_ext autoreload
%autoreload 2

In [41]:
import datetime
import os
import sys

In [3]:
sys.path.append("/home/caleml/main-pe/")

In [11]:
import numpy as np
import tensorflow as tf

from tensorflow.keras import Model, Input, Sequential
from tensorflow.keras.layers import MaxPooling2D, UpSampling2D, Convolution2D, Activation, BatchNormalization, Reshape
from tensorflow.keras.layers import Permute
from tensorflow.keras.losses import mean_squared_error
from tensorflow.keras.optimizers import RMSprop

from tensorflow.keras.applications import ResNet50

In [38]:
from data.datasets.mpii import MpiiSinglePerson
from data.data_utils import TEST_MODE, TRAIN_MODE, VALID_MODE
from data.loader import BatchLoader

from model import layers
from model import losses
from model import config
from model import callbacks
from model.utils import pose_format

# model

In [None]:
class Encoder(object):
    
    def __init__(self):
        self.input_shape = (256, 256, 3)
        self.start_lr = 0.001
        
    def stem(self, inp):
        '''
        common first stem
        '''
        print(inp.shape)
        stem_input = Input(shape=inp.shape[1:]) # 256 x 256 x 3

        x = layers.conv_bn_act(stem_input, 32, (3, 3), strides=(2, 2))
        x = layers.conv_bn_act(x, 32, (3, 3))
        x = layers.conv_bn_act(x, 64, (3, 3))

        a = layers.conv_bn_act(x, 96, (3, 3), strides=(2, 2))
        b = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
        x = concatenate([a, b])

        a = layers.conv_bn_act(x, 64, (1, 1))
        a = layers.conv_bn(a, 96, (3, 3))
        b = layers.conv_bn_act(x, 64, (1, 1))
        b = layers.conv_bn_act(b, 64, (5, 1))
        b = layers.conv_bn_act(b, 64, (1, 5))
        b = layers.conv_bn(b, 96, (3, 3))
        x = concatenate([a, b])

        a = layers.act_conv_bn(x, 192, (3, 3), strides=(2, 2))
        b = MaxPooling2D((2, 2), strides=(2, 2))(x)
        x = concatenate([a, b])

        x = layers.sepconv_residual(x, 3*192, name='sepconv1')

        model = Model(stem_input, x, name='stem')
        x = model(inp)
        
        return x
    
    def pose_model(self, inp):
        stem_out = self.stem(inp)
        
        out = stem_out
        
        return out
    
    def appearance_model(self, inp):
        out = ResNet50(inp)
        return out
    
    def build(self):
        '''
        Input: 256 x 256 x 3 image
        Outputs: 
            - pose tensor
            - reconstructed image
        
        1. E_p is the encoder for the pose estimation
        2. E_a is the encoder for the appearance
        3. concat z_a and z_p to form the input of the decoder
        4. decode into an image
        '''
        inp = Input(shape=self.input_shape)
        
        # 1. E_p
        z_p, pred_pose = self.pose_model(inp)
        
        # 2. E_a
        z_a = self.appearance_model(inp)
        
        # 3. reconstruction base
        concat = self.prepare_concat(z_p, z_a)
        
        # 4. decoding
        rec_img = self.decoder(concat)
        
        outputs = [pred_pose, rec_img]
        self.model = Model(inputs=inp, outputs=outputs)
        
        # compile it
        loss = losses.combined_loss()
        self.model.compile(loss=loss, optimizer=RMSprop(lr=self.start_lr))
        self.model.summary()
        
    def train(self, data_tr, steps_per_epoch):
        callbacks = []
        callbacks.append(SaveModel(weights_path))
        callbacks.append(mpii_callback)
        # callbacks.append(h36m_callback)

        model.fit_generator(
            data_tr,
            steps_per_epoch=steps_per_epoch,
            epochs=60,
            callbacks=callbacks,
            workers=8,
            initial_epoch=0)
        

In [53]:
class AppearanceModel(object):
    '''
    Only autoencoding z_a for now
    '''
    
    def __init__(self):
        self.input_shape = (256, 256, 3)
        self.start_lr = 0.001
        
    def decoder(self):
        pass
    
    def build(self):
        inp = Input(shape=self.input_shape)
        
        enc_model = ResNet50(include_top=False, weights='imagenet', input_tensor=inp)
        
        enc_model.summary()
        z_a = enc_model.output   # 8 x 8 x 2048
        
        # decoder part
        up = layers.up(z_a)  # 16 x 16
        up = layers.conv_bn_act(up, 512, (3, 3))
        up = layers.conv_bn_act(up, 512, (3, 3))
        up = layers.conv_bn_act(up, 512, (3, 3))
        
        up = layers.up(up)  # 32 x 32
        up = layers.conv_bn_act(up, 512, (3, 3))
        up = layers.conv_bn_act(up, 512, (3, 3))
        up = layers.conv_bn_act(up, 256, (3, 3))
        
        up = layers.up(up)  # 64 x 64
        up = layers.conv_bn_act(up, 256, (3, 3))
        up = layers.conv_bn_act(up, 256, (3, 3))
        up = layers.conv_bn_act(up, 128, (3, 3))
        
        up = layers.up(up)  # 128 x 128
        up = layers.conv_bn_act(up, 128, (3, 3))
        up = layers.conv_bn_act(up, 64, (3, 3))
        
        up = layers.up(up)  # 256 x 256
        up = layers.conv_bn_act(up, 64, (3, 3))
        up = layers.conv_bn(up, 3, (1, 1))   # 3 channels, output shape of this should be (None, 3, 256, 256)
            
        # TODO: should we permute here or have the input formatted with channels first?
        # perm = Permute((1, 2))(up)
        # i_hat = Permute((2, 3))(perm)
        i_hat = up
        
        self.model = Model(inputs=inp, outputs=i_hat)
        
        # loss = losses.combined_loss()
        loss = mean_squared_error
        self.model.compile(loss=loss, optimizer=RMSprop(lr=self.start_lr))
        self.model.summary()
        
    def train(self, data_tr, steps_per_epoch, model_folder):
        weights_file = os.path.join(model_folder, 'weights_mpii_{epoch:03d}.h5')
        
        cb_list = []
        cb_list.append(callbacks.SaveModel(weights_file))
        # callbacks.append(LearningRateScheduler(lr_scheduler))
        # callbacks.append(eval_callback)

        self.model.fit_generator(data_tr,
                                 steps_per_epoch=steps_per_epoch,
                                 epochs=120,
                                 callbacks=cb_list,
                                 workers=4,
                                 initial_epoch=0)

        

In [None]:
class AutoEncoder(object):
    
    def __init__(self):
        self.input_shape = (256, 256, 3)
        self.start_lr = 0.001
        
    def build(self):
        img_w = 256
        img_h = 256

        kernel = 3

        encoding_layers = [
            Convolution2D(64, kernel, padding='same', input_shape=self.input_shape),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(64, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            MaxPooling2D(),

            Convolution2D(128, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(128, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            MaxPooling2D(),

            Convolution2D(256, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(256, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(256, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            MaxPooling2D(),

            Convolution2D(512, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(512, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(512, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            MaxPooling2D(),

            Convolution2D(512, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(512, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(512, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            MaxPooling2D(),
        ]

        autoencoder = Sequential()
        autoencoder.encoding_layers = encoding_layers

        for l in autoencoder.encoding_layers:
            autoencoder.add(l)
            # print(l.input_shape,l.output_shape,l)

        decoding_layers = [
            UpSampling2D(),
            Convolution2D(512, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(512, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(512, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),

            UpSampling2D(),
            Convolution2D(512, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(512, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(256, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),

            UpSampling2D(),
            Convolution2D(256, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(256, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(128, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),

            UpSampling2D(),
            Convolution2D(128, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(64, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),

            UpSampling2D(),
            Convolution2D(64, kernel, kernel, padding='same'),
            BatchNormalization(),
            Activation('relu'),
            Convolution2D(64, 1, 1, padding='same'),
            BatchNormalization(),
            Activation('relu')
        ]
        autoencoder.decoding_layers = decoding_layers
        for l in autoencoder.decoding_layers:
            autoencoder.add(l)

        autoencoder.add(Reshape((3, img_h * img_w)))
        self.model = autoencoder
        
        # loss = losses.elastic_loss()
        self.model.compile(loss=mean_squared_error, optimizer=RMSprop(lr=self.start_lr))
        self.model.summary()

# dataset

In [22]:
h36m_path = "/share/DEEPLEARNING/datasets/human36m"
mpii_path = "/share/DEEPLEARNING/datasets/mpii"

In [None]:
# h36m dataset loading
h36m = Human36M(h36m_path, dataconf=config.human36m_dataconf, poselayout=pose_format.pa17j3d, topology='frames')

data_tr = BatchLoader(
    [h36m], 
    ['frame'], 
    ['pose'],
    TRAIN_MODE, 
    batch_size=h36m.get_length(TRAIN_MODE),
    num_predictions=num_predictions, 
    shuffle=True)

# batch_size=[batch_size_mpii, batch_size_mpii, batch_size_ar, batch_size_ar], 

In [None]:
# validation
h36m_val = BatchLoader(
    h36m, 
    ['frame'],
    ['pose_w', 'pose_uvd', 'afmat', 'camera', 'action'], 
    VALID_MODE,
    batch_size=h36m.get_length(VALID_MODE), 
    shuffle=True)

[x_val], [pw_val, puvd_val, afmat_val, scam_val, action] = h36m_val[0]

h36m_callback = H36MEvalCallback(x_val, pw_val, afmat_val, puvd_val[:,0,2], scam_val, action, logdir=logdir)

In [23]:
mpii = MpiiSinglePerson(mpii_path, dataconf=config.mpii_dataconf, poselayout=pose_format.pa17j3d)

In [30]:
data_tr_mpii = BatchLoader(
    mpii, 
    ['frame'], 
    ['frame'], 
    TRAIN_MODE)

In [32]:
len(data_tr_mpii)

1074

# training

In [None]:
model = Encoder()
model.build()

# steps_per_epoch = h36m.get_length(TRAIN_MODE) // batch_size_h36m
steps_per_epoch = mpii.get_length(TRAIN_MODE) // batch_size_mpii

In [None]:
model.train(data_tr, steps_per_epoch)

In [54]:
model = AppearanceModel()
model.build()




__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 262, 262, 3)  0           input_11[0][0]                   
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 128, 128, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 128, 128, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 262, 262, 3)  0           input_11[0][0]                   
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 128, 128, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 128, 128, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [55]:
model_name = 'appearance'
dataset_name = 'mpii'
model_folder = '/home/caleml/pe_experiments/exp_%s_%s_%s' % (model_name, dataset_name, datetime.datetime.now().strftime("%Y%m%d%H%M")) 
os.makedirs(model_folder)
model.train(data_tr_mpii, steps_per_epoch=len(data_tr_mpii), model_folder=model_folder)

Epoch 1/120
Error loading sample key/mode: 4140/1
Error loading sample key/mode: 24440/1
Error loading sample key/mode: 2713/1
Error loading sample key/mode: 8897/1
Error loading sample key/mode: 20585/1
Error loading sample key/mode: 13664/1


StopIteration: name 'T' is not defined

# debug