In [1]:
# encoding: utf-8
import os
os.environ['KERAS_BACKEND']='tensorflow'

# constants

pnames = ["Nose",#0
          "Neck",#1 
          "RShoulder",#2 
          "RElbow", #3
          "RWrist", #4
          "LShoulder",#5
          "LElbow",#6
          "LWrist",#7
          "RHip",#8
          "RKnee",#9
          "RAnkle",#10
          "LHip",#11
          "LKnee",#12
          "LAnkle",#13
          "REye",#14
          "LEye",#15
          "REar",#16
          "LEar",#17
          "Bkg"]#18

# find connection in the specified sequence, center 29 is in the position 15
limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \
           [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \
           [1,16], [16,18], [3,17], [6,18]]

colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
          [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
          [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]

In [2]:
from cStringIO import StringIO
import numpy as np
import PIL.Image
from IPython.display import clear_output, Image, display
from scipy.misc import imresize

# If your GPU supports CUDA and Caffe was built with CUDA support,
# uncomment the following to run Caffe operations on the GPU.
# caffe.set_mode_gpu()
# caffe.set_device(0) # select GPU device if multiple devices exist

def showarray(a, fmt='jpeg', resize=True, height=360, width=360):
    a = np.uint8(np.clip(a, 0, 255))
    a = imresize(a, [height, width], 'bilinear')
    f = StringIO()
    PIL.Image.fromarray(a).save(f, fmt)
    display(Image(data=f.getvalue()))

In [3]:
import json
import cv2 as cv
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
import glob

NUM_TOTAL_JOINTS = 18

class Body:

    """ representing one body with joints info """
    def __init__(self, joints_raw, parts_thresh = 0.0, conf_thresh=0.0, min_size_thresh=0):
        """ initializing with joints array 
        
        parts_thresh: (visible joints/total joints). 
                    body with the number of valid joints under this threshold will be regarded as invalid
        conf_thresh: joints with confidence under this threshold will be ignored
        min_size_thresh: max(body width, body height) should be bigger than this threshold
        """
        assert len(joints_raw) == 54, "Invlid joint array"
        self.joints_raw = joints_raw
        self.joints = []
        self.parts_thresh = parts_thresh
        self.conf_thresh = conf_thresh
        self.num_joints_valid = 0
    
        # create joints array
        for i in range(0,len(joints_raw), 3):
            self.joints.append(joints_raw[i:i+3])
            if joints_raw[i+2] > conf_thresh:
                self.num_joints_valid += 1
        self.joints_array = self.joints # keep this for convenience
        self.joints = np.array(self.joints)
        
        if self.num_joints_valid / float(NUM_TOTAL_JOINTS) < parts_thresh:
            self.is_valid = False
            return
        
        # get size
        _joints = []
        for _j in self.joints_array:
            if _j[2] > self.conf_thresh:
                _joints.append(_j[:2]) # ignore invalid joint
        _joints = np.array(_joints)
        min_x = np.min(_joints[:,0])
        max_x = np.max(_joints[:,0]) 
        min_y = np.min(_joints[:,1])
        max_y = np.max(_joints[:,1]) 
        size  = max(max_x - min_x, max_y - min_y)
        
        # get normalized joints
        self.norm_joints = self.get_normalized_joints_matrix()

        # enough visible joints and bigger enough
        self.is_valid = (size > min_size_thresh and self.norm_joints is not None)         
        
    def is_valid_joint(self, index1, index2):
        """ check if a joint between index1 to index2 is valid 
        """
        return (self.joints_array[index1][2] > self.conf_thresh and self.joints_array[index2][2] > self.conf_thresh)
        
    def get_cropped_image_around_body(self, canvas, margin_coef = 1.25):
        """ crop image around find body
        """
        assert self.is_valid, "this body has not enough valid joints!"
        
        _joints = []
        for _j in self.joints_array:
            if _j[2] > self.conf_thresh:
                _joints.append(_j[:2])
        
        _joints = np.array(_joints)
        min_x = np.min(_joints[:,0])
        max_x = np.max(_joints[:,0]) 
        min_y = np.min(_joints[:,1])
        max_y = np.max(_joints[:,1]) 
        center = [0.5*(max_x+min_x),0.5*(max_y+min_y)]
        size  = max(max_x - min_x, max_y - min_y) * margin_coef 
          
        x1 = int(min(max(0, center[0] - size*0.5), canvas.shape[1]))
        x2 = int(min(max(0, center[0] + size*0.5), canvas.shape[1]))
        y1 = int(min(max(0, center[1] - size*0.5), canvas.shape[0]))
        y2 = int(min(max(0, center[1] + size*0.5), canvas.shape[0]))
  
        cropped = canvas[y1:y2, x1:x2]
        return cropped
    
    def get_normalized_joints_matrix(self):
        """
        normalized for learning.
        center = between left 
        """
        
        joints = np.copy(self.joints)
        
        # TODO: how to handle invalid joint???
        # at least you should have valid hips!
        if np.min(joints[(pnames.index("RHip"), pnames.index("LHip")),2]) < self.conf_thresh:
            return None;
        
        # body center is between hips
        body_center = np.mean(joints[(pnames.index("RHip"), pnames.index("LHip")),:2], axis=0)         
        
        # assume that the less confident parts are at the center of body
        joints[joints[:,2] < self.conf_thresh, :2] = body_center
        dists = joints[:, :2] - body_center        
        max_dist = np.max(np.linalg.norm(dists, axis=1))
        norm_dists = dists/max_dist
        return norm_dists
        

In [4]:
import random
from tqdm import tqdm

filepaths = glob.glob("./video/olympics_frames2/*.json")
random.shuffle(filepaths)

body_mat = None
body_info = []

for jsonpath in tqdm(filepaths):
    with open(jsonpath) as json_data:
        d = json.load(json_data)
        bodies = d['bodies']
        for _b in bodies:
            body = Body(_b['joints'], parts_thresh=0.90, conf_thresh=0.40, min_size_thresh=180)

            if body.is_valid:
                imagepath = jsonpath[:-5]+".bmp"
                
                if body_mat is None:
                    body_mat = body.norm_joints.reshape(1, 36)
                else:
                    body_mat = np.vstack((body_mat, body.norm_joints.reshape(1,36)))
                body_info.append({"image": imagepath, "body": body})

body_mat = np.array(body_mat)
print body_mat.shape
print "# of found bodies", len(body_info)


100%|██████████| 91948/91948 [05:39<00:00, 270.92it/s]

(16318, 36)
# of found bodies 16318





In [5]:
body_mat = np.array(body_mat)
np.savez("body_mat.npz", body_mat = body_mat, body_info = body_info)
print body_mat.shape
print "# of found bodies", len(body_info)


(16318, 36)
# of found bodies 16318


In [4]:
body_mat = np.load("body_mat.npz")["body_mat"]
body_info = np.load("body_mat.npz")["body_info"]
print body_mat.shape
print len(body_info)

(16318, 36)
16318


In [7]:
# Autoencoder
# encoding: utf-8
import os
os.environ['KERAS_BACKEND']='tensorflow'

from keras.layers import Input, Dense
from keras.models import Model

# encodeded representation
input_size = body_mat.shape[1]
encoding_dim = 12

input_vector = Input(shape=(input_size,)) # 784 = 28 x 28
x1 = Dense(encoding_dim * 2, activation='relu')(input_vector)
encoded = Dense(encoding_dim)(x1)
x2 = Dense(encoding_dim * 2, activation='relu')(encoded)
decoded = Dense(input_size)(x2)

# autoencoder
autoencoder = Model(input=input_vector, output=decoded)

# encoder only model
encoder = Model(input=input_vector, output=encoded)

# decoder
encoded_input = Input(shape=(encoding_dim,))
x2_layer = autoencoder.layers[-2]
decoder_layer = autoencoder.layers[-1] # 最後のlayer
decoder = Model(input=encoded_input, output=decoder_layer(x2_layer(encoded_input)))

# compile
autoencoder.compile(optimizer='adadelta', loss='mean_squared_error')


Using TensorFlow backend.


In [9]:
# input = outputなのでx_trainが並んでる
autoencoder.fit(body_mat, body_mat, nb_epoch=150, batch_size=256, shuffle=True, validation_split=0.1,
                    verbose=2)

Train on 14686 samples, validate on 1632 samples
Epoch 1/150
0s - loss: 0.0032 - val_loss: 0.0031
Epoch 2/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 3/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 4/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 5/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 6/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 7/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 8/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 9/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 10/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 11/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 12/150
0s - loss: 0.0031 - val_loss: 0.0030
Epoch 13/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 14/150
0s - loss: 0.0031 - val_loss: 0.0030
Epoch 15/150
0s - loss: 0.0031 - val_loss: 0.0030
Epoch 16/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 17/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 18/150
0s - loss: 0.0031 - val_loss: 0.0031
Epoch 19/150
0s - loss: 0.0031 - val_loss: 0.0030
Epoch 20/1

<keras.callbacks.History at 0x7f98825d0450>

In [10]:
autoencoder.save("pose_autoencoder.h5")
encoder.save("pose_autoencoder_encoder.h5")
decoder.save("pose_autoencoder_decoder.h5")

In [5]:
from keras.models import load_model
autoencoder = load_model("pose_autoencoder.h5")
encoder = load_model("pose_autoencoder_encoder.h5")
decoder = load_model("pose_autoencoder_decoder.h5")

Using TensorFlow backend.


In [6]:
import tensorflow as tf

# 全部ロード仕様とおもすぎるので最初の　num_embeddingに絞る
num_embedding = 2000

encoded_mat = encoder.predict(body_mat[:num_embedding,:])

In [7]:
from datetime import datetime
now = datetime.now()
logdir = "/tmp/tf_logs/" + now.strftime("%Y%m%d-%H%M%S") + "/"
cmd = "tensorboard --logdir=" + logdir
print cmd

tensorboard --logdir=/tmp/tf_logs/20170223-155659/


In [9]:
from tensorflow.contrib.tensorboard.plugins import projector
import math
from tqdm import tqdm

with tf.Session() as sess:
    # create a variable 
    # 表現したいベクトルは１次元であるひつようあり！
    embedding_var = tf.Variable(encoded_mat, trainable=False, name="encoded_vector")
    sess.run(embedding_var.initializer)
    
    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = embedding_var.name
    summary_writer = tf.summary.FileWriter(logdir)
    
    # file id
    metadata_path = os.path.join(logdir, 'metadata.tsv')
    with open(metadata_path, 'w') as f:
        for name in range(num_embedding):
            f.write('%s\n' % name)
    embedding.metadata_path = metadata_path
    
    # image
    images = []
    image_dim = 100
    for i in tqdm(range(num_embedding)):
        imagepath = body_info[i]['image']
        foundbody = body_info[i]['body']
        canvas = cv.imread(imagepath) # B,G,R order
        cropped = foundbody.get_cropped_image_around_body(canvas)
        cropped = cropped[:,:,[2,1,0]]
        resized = tf.image.resize_images(cropped, [image_dim, image_dim])
#        resized = tf.image.rgb_to_grayscale(resized)
        images.append(tf.cast(resized, tf.uint8))
        
    image_path = os.path.join(logdir, 'sprite.jpg')
    size = int(math.sqrt(len(images))) + 1
    while len(images) < size * size:
        images.append(np.zeros((image_dim, image_dim, 3), dtype=np.uint8))        
    rows = []
    for i in range(size):
        rows.append(tf.concat(images[i*size:(i+1)*size],1))
 
    jpeg = tf.image.encode_jpeg(tf.cast(tf.concat(rows, 0), tf.uint8))
    with open(image_path, 'wb') as f:
        f.write(sess.run(jpeg))
    embedding.sprite.image_path = image_path
    embedding.sprite.single_image_dim.extend([image_dim, image_dim])
        
    projector.visualize_embeddings(summary_writer, config)
    sess.run([embedding_var])
    saver = tf.train.Saver([embedding_var])
    saver.save(sess, os.path.join(logdir, 'model.ckpt'))
    print "saved"

100%|██████████| 2000/2000 [00:14<00:00, 139.86it/s]


saved


In [7]:
from datetime import datetime
now = datetime.now()
logdir = "/tmp/tf_logs/" + now.strftime("%Y%m%d-%H%M%S") + "/"
cmd = "tensorboard --logdir=" + logdir
print cmd

tensorboard --logdir=/tmp/tf_logs/20170223-161629/


In [8]:
from tensorflow.contrib.tensorboard.plugins import projector
import math
from tqdm import tqdm

body_mat_short = body_mat[:num_embedding,:]

with tf.Session() as sess:
    # create a variable 
    # 表現したいベクトルは１次元であるひつようあり！
    embedding_var = tf.Variable(body_mat_short, trainable=False, name="encoded_vector")
    sess.run(embedding_var.initializer)
    
    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = embedding_var.name
    summary_writer = tf.summary.FileWriter(logdir)
    
    # file id
    metadata_path = os.path.join(logdir, 'metadata.tsv')
    with open(metadata_path, 'w') as f:
        for name in range(num_embedding):
            f.write('%s\n' % name)
    embedding.metadata_path = metadata_path
    
    # image
    images = []
    image_dim = 100
    for i in tqdm(range(num_embedding)):
        imagepath = body_info[i]['image']
        foundbody = body_info[i]['body']
        canvas = cv.imread(imagepath) # B,G,R order
        cropped = foundbody.get_cropped_image_around_body(canvas)
        cropped = cropped[:,:,[2,1,0]]
        resized = tf.image.resize_images(cropped, [image_dim, image_dim])
#        resized = tf.image.rgb_to_grayscale(resized)
        images.append(tf.cast(resized, tf.uint8))
        
    image_path = os.path.join(logdir, 'sprite.jpg')
    size = int(math.sqrt(len(images))) + 1
    while len(images) < size * size:
        images.append(np.zeros((image_dim, image_dim, 3), dtype=np.uint8))        
    rows = []
    for i in range(size):
        rows.append(tf.concat(images[i*size:(i+1)*size],1))
 
    jpeg = tf.image.encode_jpeg(tf.cast(tf.concat(rows, 0), tf.uint8))
    with open(image_path, 'wb') as f:
        f.write(sess.run(jpeg))
    embedding.sprite.image_path = image_path
    embedding.sprite.single_image_dim.extend([image_dim, image_dim])
        
    projector.visualize_embeddings(summary_writer, config)
    sess.run([embedding_var])
    saver = tf.train.Saver([embedding_var])
    saver.save(sess, os.path.join(logdir, 'model.ckpt'))
    print "saved"

100%|██████████| 2000/2000 [00:14<00:00, 140.34it/s]


saved


In [17]:
print body_mat[0]
encoded = encoder.predict(body_mat[0].reshape(1,36), 1)
decoded = decoder.predict(encoded, 1)
print decoded


[ 0.          0.         -0.09673796 -0.55154003  0.01077628 -0.56159826
  0.10831514 -0.27013608  0.          0.         -0.23649818 -0.55150689
 -0.28001756 -0.25933218 -0.43060929 -0.20487082  0.09716879  0.
  0.07531244  0.46437423  0.19430996  0.82102434 -0.09716879  0.         -0.4524546
  0.3030504  -0.67829249  0.73479201  0.          0.          0.          0.
  0.          0.         -0.29073308 -0.73476992]
[[-0.0368489  -0.06802221 -0.03029673 -0.48934975  0.1107394  -0.48543811
   0.09450564 -0.19057766 -0.09538137 -0.08128609 -0.19582306 -0.46150014
  -0.38449973 -0.21244901 -0.45778859 -0.19601478  0.12433363 -0.00592352
   0.17398603  0.41582155  0.302026    0.88152504 -0.11843937  0.00127022
  -0.4461413   0.39817518 -0.49515843  0.72199726 -0.0525818   0.05771903
  -0.04056739 -0.08941069 -0.00617    -0.02536397 -0.11257882 -0.67260206]]
