In [2]:
%matplotlib inline
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt

muct_data_path = '/Users/azhong/face/clmtools/pdm_builder/data/images/'
muct_annotation_path = '/Users/azhong/face/clmtools/pdm_builder/data/annotations.csv'

landmark_dict = {}
raw_image_dict = {}
image_dict = {}
image_array = []
landmarks_array = []
image_dim_dict = {}
mouth_landmarks = range(44, 62)

with open(muct_annotation_path) as fi:
    for line in fi:
        splitted = line.split(';')
        filename = os.path.join(muct_data_path, splitted[0])
        if os.path.isfile(filename):
            landmark_dict[splitted[0]] = [[float(splitted[i*3+1]), float(splitted[i*3+2])] for i in range(71)]
            landmark_dict[splitted[0]] = np.array(landmark_dict[splitted[0]])
            raw_image_dict[splitted[0]] = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2GRAY)
            image_dim_dict[splitted[0]] = raw_image_dict[splitted[0]].shape



In [3]:
import random
augment_count = 10
random.seed()
for i in sorted(landmark_dict.keys()):
    x2, y2 = np.amax(landmark_dict[i], axis=0)
    x1, y1 = np.amin(landmark_dict[i], axis=0)
    x1 = int(x1)
    x2 = int(x2)
    # extend up further to forehead
    y1 = max(int(y1 - (y2-y1)*0.2), 0)
    y2 = int(y2)
    for j in range(augment_count):
        # randomly translate in x and y direction by 10%
        translate_range_x = (x2-x1)*0.1
        translate_range_y = (y2-y1)*0.1
        translate_x = int((random.random()*2-1)*translate_range_x)
        translate_y = int((random.random()*2-1)*translate_range_y)
#        print("translate x:", translate_x, "y:", translate_y)
        if translate_y < 0:
            translate_y = -min(y1, -translate_y)
        else:
            translate_y = min(image_dim_dict[i][0] - 1 - y2, translate_y)
        if translate_x < 0:
            translate_x = -min(x1, -translate_x)
        else:
            translate_x = min(image_dim_dict[i][1] - 1 - x2, translate_x)

        x11 = x1 + translate_x
        y11 = y1 + translate_y
        x22 = x2 + translate_x
        y22 = y2 + translate_y

        if y22 - y11 > x22 - x11:
            diff = (y22 - y11 - (x22 - x11))/2.0
            x111 = max(int(x11-diff), 0)
            x222 = min(int(x22+diff), image_dim_dict[i][1])
            y111 = int(y11)
            y222 = int(y22)
        else:
            diff = (x22 - x11 - (y22 - y11))/2.0
            y111 = max(int(y11-diff), 0)
            y222 = min(int(y22+diff), image_dim_dict[i][0])
            x111 = int(x11)
            x222 = int(x22)


        # randomly rotate image by -10 to 10 degrees
        angle = (random.random()*2-1)*10
        # randomly scale image 0.9-1.1
        ratio = (random.random()*2-1)*0.1 + 1
        M = cv2.getRotationMatrix2D(((x111+x222)/2,(y111+y222)/2), angle, ratio)
        rotated_image = cv2.warpAffine(raw_image_dict[i], M, (image_dim_dict[i][1], image_dim_dict[i][0]))
#         print('angle ', angle, 'ratio', ratio)
#         plt.imshow(raw_image_dict[i], cmap='gray')
#         plt.show()
#         plt.imshow(rotated_image, cmap='gray')
#         plt.show()

        raw_image_resized = cv2.resize(raw_image_dict[i][y1:y2, x1:x2], (64, 64))
        mod_image_resized = cv2.resize(rotated_image[y111:y222, x111:x222], (64, 64))
#         plt.imshow(raw_image_resized, cmap='gray')
#         plt.show()
#         plt.imshow(mod_image_resized, cmap='gray')
#         plt.show()
        image_dict[i + '_augment_' + str(j)] = mod_image_resized

    for j in range(71):
        landmark_dict[i][j] = (landmark_dict[i][j] - np.array([x111, y111]))/(np.array([x222-x111, y222-y111]))*np.array([64, 64])

In [4]:
mouth_openness = {}

for i in sorted(landmark_dict.keys()):
    center_side = max(np.linalg.norm(landmark_dict[i][44] - landmark_dict[i][60]),
                      np.linalg.norm(landmark_dict[i][50] - landmark_dict[i][60]))
    center_open = np.linalg.norm(landmark_dict[i][57] - landmark_dict[i][60])
    top_open = np.linalg.norm(landmark_dict[i][47] - landmark_dict[i][53])

    mouth_openness[i] = center_open/max(top_open, center_side)
#    print(mouth_openness[i])
#     debug_img = image_dict[i].copy()
#     for j in mouth_landmarks:
#         cv2.circle(debug_img, (int(landmark_dict[i][j][0]), int(landmark_dict[i][j][1])), 1, (255))
#     plt.imshow(debug_img, cmap='gray')
#     plt.show()



In [5]:
import tensorflow as tf
emotion_model_path = '../tensorflow/models/emotion_mini_XCEPTION_64x64_0.66_7ms.hdf5.pb'
graph = tf.Graph()
graph_def = tf.GraphDef()
with open(emotion_model_path, "rb") as f:
    graph_def.ParseFromString(f.read())
with graph.as_default():
    tf.import_graph_def(graph_def)
input_name = 'import/input_1'
output_add_name = 'import/add_4/add'
output_conv_name = 'import/conv2d_7/BiasAdd'
output_name = 'import/output_node0'

input_operation = graph.get_operation_by_name(input_name)
output_add_operation  = graph.get_operation_by_name(output_add_name)
output_conv_operation = graph.get_operation_by_name(output_conv_name)
output_operation = graph.get_operation_by_name(output_name)

input_shape = (int(input_operation.outputs[0].shape.dims[1]),
               int(input_operation.outputs[0].shape.dims[2]),
               1)

add_dict = {}
mean_of_add_dict = {}
conv_dict = {}
output_dict = {}
add_sampled_dict = {}
sess = tf.Session(graph = graph)
for i in sorted(image_dict.keys()):
    gray_face = (image_dict[i] - 127.5) / 127.5
    gray_face = np.expand_dims(gray_face, 0)
    gray_face = np.expand_dims(gray_face, -1)
    prediction = sess.run([output_add_operation.outputs[0], output_conv_operation.outputs[0], output_operation.outputs[0]],
                          {input_operation.outputs[0]: gray_face})
    add_out = prediction[0][0]
    add_out = add_out.reshape(16, 128)
    mean_of_add_out = np.mean(add_out, axis=0)
    add_out = add_out.reshape(2048)
    add_dict[i] = add_out.copy()
    add_sampled_dict[i] = [add_out[j] for j in [728, 1749, 1741, 1914]]
    mean_of_add_dict[i] = mean_of_add_out.copy()
    conv_dict[i] = prediction[1][0].reshape(112)
    output_dict[i] = prediction[2][0]

  from ._conv import register_converters as _register_converters


In [6]:
print(add_sampled_dict['104810657_1ce2931c9f.jpg_augment_0'])
print(mean_of_add_dict['104810657_1ce2931c9f.jpg'].shape)
print(input_shape)
print(len(mouth_openness.keys()))
print(add_sampled_dict['104810657_1ce2931c9f.jpg'])

[0.15874708, 0.32331732, 0.4854281, -0.104132414]


KeyError: '104810657_1ce2931c9f.jpg'

In [10]:
# preparing SVM

X = []
y = []

sorted_keys = sorted(image_dict.keys())
np.random.shuffle(sorted_keys)
# shuffle
for i in sorted_keys:
    X.append(list(add_sampled_dict[i]))
    filename = i.split('_augment_')[0]
    if mouth_openness[filename] < 0.1:
        y.append(-1)
    else:
        y.append(mouth_openness[filename])


from sklearn import svm
from sklearn.model_selection import cross_val_score
clf = svm.LinearSVR()
total_size = len(X)
print('total size is {}'.format(total_size))
train_size = int(0.8*total_size)

print(cross_val_score(clf, X, y, scoring='neg_mean_absolute_error'))
#cross_val_score(clf, X, y, scoring='neg_mean_absolute_error') 
clf.fit(X, y)

total size is 5110
[-0.46462565 -0.44840826 -0.45747143]


LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
     intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
     random_state=None, tol=0.0001, verbose=0)

In [8]:
counter = 0
for i in sorted(image_dict.keys()):
    counter += 1
    debug_img = image_dict[i].copy()
#     for j in mouth_landmarks:
#         cv2.circle(debug_img, (int(landmark_dict[i][j][0]), int(landmark_dict[i][j][1])), 1, (255))
    filename = i.split('_augment_')[0]
    #plt.imshow(debug_img, cmap='gray')
    
    #plt.title('mouth openness {}; predicted {}; {}: {}, {}: {}'.format(mouth_openness[filename], 
#                                                                        clf.predict([list(add_sampled_dict[i])]),
#                                                                       728, add_dict[i][728],
#                                                                       1914, -add_dict[i][1914]))
    #plt.show()
    if counter > 100:
        break

In [97]:
clf

LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
     intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
     random_state=None, tol=0.0001, verbose=0)

In [46]:
print(graph.get_operation_by_name('import/add_4/add').outputs[0].shape)

print(graph.get_operation_by_name('import/conv2d_7/BiasAdd').inputs[0].shape)
print(graph.get_operation_by_name('import/conv2d_7/BiasAdd').outputs[0].shape)
print(graph.get_operation_by_name('import/global_average_pooling2d_1/Mean').inputs[0].shape)

(?, 4, 4, 128)
(?, 4, 4, 7)
(?, 4, 4, 7)
(?, 4, 4, 7)


In [1]:
# print(clf.intercept_)
# print(list(clf.coef_))

In [59]:
graph.get_operations()

[<tf.Operation 'import/input_1' type=Placeholder>,
 <tf.Operation 'import/conv2d_1/kernel' type=Const>,
 <tf.Operation 'import/conv2d_1/kernel/read' type=Identity>,
 <tf.Operation 'import/conv2d_1/convolution' type=Conv2D>,
 <tf.Operation 'import/batch_normalization_1/gamma' type=Const>,
 <tf.Operation 'import/batch_normalization_1/gamma/read' type=Identity>,
 <tf.Operation 'import/batch_normalization_1/beta' type=Const>,
 <tf.Operation 'import/batch_normalization_1/beta/read' type=Identity>,
 <tf.Operation 'import/batch_normalization_1/moving_mean' type=Const>,
 <tf.Operation 'import/batch_normalization_1/moving_mean/read' type=Identity>,
 <tf.Operation 'import/batch_normalization_1/moving_variance' type=Const>,
 <tf.Operation 'import/batch_normalization_1/moving_variance/read' type=Identity>,
 <tf.Operation 'import/batch_normalization_1/batchnorm/add/y' type=Const>,
 <tf.Operation 'import/batch_normalization_1/batchnorm/add' type=Add>,
 <tf.Operation 'import/batch_normalization_1/bat

5110

In [None]:
print(clf.intercept_)
print(list(clf.coef_))