In [1]:
import os
import glob

#from tqdm import tqdm
import numpy as np
import scipy.ndimage
import scipy.misc
import pandas as pd
import pickle
from IPython.display import clear_output

import keras
from keras.layers.core import Dense, Flatten, Dropout
from keras.layers import Concatenate
from keras.layers import Input
from keras.layers.normalization import BatchNormalization
from keras.layers import GlobalAveragePooling2D
from keras.models import Model

from keras.applications.resnet50 import ResNet50
from keras.applications.vgg19 import VGG19
# import the necessary packages
from keras.preprocessing import image as image_utils
from keras.utils import plot_model
from keras import backend as K
from keras.regularizers import Regularizer

from keras.wrappers.scikit_learn import KerasRegressor


from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.linear_model import LinearRegression
from sklearn import cross_validation, grid_search
import keras_metrics

from scipy import stats

import cv2
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

import numpy as np
import tensorflow as tf
import random as rn


# The below is necessary in Python 3.2.3 onwards to
# have reproducible behavior for certain hash-based operations.
# See these references for further details:
# https://docs.python.org/3.4/using/cmdline.html#envvar-PYTHONHASHSEED
# https://github.com/keras-team/keras/issues/2280#issuecomment-306959926

import os
os.environ['PYTHONHASHSEED'] = '0'

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.

np.random.seed(42)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.

rn.seed(12345)

# Force TensorFlow to use single thread.
# Multiple threads are a potential source of
# non-reproducible results.
# For further details, see: https://stackoverflow.com/questions/42022950/which-seeds-have-to-be-set-where-to-realize-100-reproducibility-of-training-res

session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

from keras import backend as K

# The below tf.set_random_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see: https://www.tensorflow.org/api_docs/python/tf/set_random_seed

tf.set_random_seed(1234)

sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess) # reference: https://keras.io/getting-started/faq/#how-can-i-obtain-the-output-of-an-intermediate-layer

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
IMG_SIZE = (256, 256)
IN_SHAPE = (*IMG_SIZE, 3)
BATCH_SIZE = 64

In [3]:
openImg_path = '/nfs/juhu/data/rakhasan/bystander-detection/google-img-db/'
survey_path='/nfs/juhu/data/rakhasan/bystander-detection/pilot-study2/'
survey_photo_path = survey_path+'/photos/'

model_output_path = '/nfs/juhu/data/rakhasan/bystander-detection/code-repos/notebooks/model-output/'

print('loading features.')

'''Note: this data frame has all deep features, and the label is binary values (0,1)'''
feature_df = pickle.load(open(os.path.join(survey_path, 'features-df-all.pkl'), 'rb'))
#feature_df = feature_df[(feature_df.label==1)|(feature_df.label==-1)]
#feature_df['label'] = feature_df.apply(lambda row: 1 if row.label==1 else 0, axis=1)
print('dataset:',len(feature_df), 'unique labels:', feature_df.label.unique(),
     'pos:',len(feature_df[feature_df.label==1]),'neg:',len(feature_df[feature_df.label==0]))

loading features.
dataset: 3802 unique labels: [1 0] pos: 2287 neg: 1515


In [4]:
'''Feature names'''

#joint names labeled by openpose
body_joint_names = ['nose', 'neck', 'Rsho', 'Relb', 'Rwri', 'Lsho', 'Lelb',
               'Lwri', 'Rhip', 'Rkne', 'Rank', 'Lhip', 'Lkne', 'Lank', 
               'Leye', 'Reye', 'Lear', 'Rear']

#angles between pairs of body joint, from openpose
link_angle_features = ['angle_'+str(i) for i in range(17)]

#probability of detecting a body joint, from openpose
body_joint_prob_features = [j + '_prob' for j in body_joint_names]

face_exp_feaures = ['angry', 'disgusted', 'fearful', 'happy', 'sad', 'surprised', 'neutral']

img_level_features = ['person_distance', 'person_size', 'num_people']

visual_features = img_level_features +\
    link_angle_features + body_joint_prob_features + face_exp_feaures

features_from_study = ['was_aware_num',  'posing_num',  'comfort_num',  'will_num', 'photographer_intention_num',
     'replacable_num',  'photo_place_num']

resnet_feature_cols = ['resnet_feat_{}'.format(i) for i in range(131071)]

resnet_feat_avg_cols = ['resnet_feat_avg_{}'.format(i) for i in range(2048)]

all_features = features_from_study + visual_features + resnet_feat_avg_cols

In [47]:
from keras import optimizers

def linear_regression_model(input_dim=38, hidden_dims = []):
    '''
    Create a fully connected network with first layer as input with input_dim=input_dim,
    and len(hidden_dims) number of hidden layers.
    
    Currenly default activation is relu for all hidden layers, and a dropout(.5) is added.
    '''
    
    input_layer = Input(shape=(input_dim,), name = 'input_layer')
    hidden_layer = input_layer
    if hidden_dims:
        for hidden_dim in hidden_dims:
            hidden_layer = Dense(hidden_dim, activation='relu')(hidden_layer)
            hidden_layer = Dropout(.5)(BatchNormalization()(hidden_layer))
    
    output_layer = Dense(1, kernel_regularizer=keras.regularizers.l2(1), bias_regularizer=keras.regularizers.l2(1), 
                         activation = 'linear')(hidden_layer)
    
    model = Model(input_layer, output_layer)
    
    keras.backend.set_epsilon(1) 
    
    model.compile(optimizer=optimizers.SGD( lr = 0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=0.5),
                  loss='mse',
                  metrics=['mse','mae','mape'])
    return model

In [43]:
def do_cross_validation(model_func, X,Y, n_splits=5, epochs=20, save_model = False,
                        save_prefix='', model_args=None, verbose=1):
    seed = 1234
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    cvscores = []
    
    fold = 0
    splits = []
    for train, test in kfold.split(X, (Y>0).astype(int)):
        # create model
        if model_args:
            model = model_func(model_args['input_dim'], model_args['hidden_dims'])
        else:
            model = model_func()
        model.fit(X[train], Y[train], epochs=epochs, batch_size=BATCH_SIZE, verbose=verbose)
        #evaluate the model
        score = model.evaluate(X[test], Y[test], verbose=1)
        print('metrics:{}, values:{}'.format(model.metrics_names, score))
        cvscores.append(score)
        
        if save_model:
            model.save_weights(model_output_path+'model_{}_{}.weights'.format(save_prefix, fold))
            fold+=1
            splits.append((train, test))
    
    if save_model:
        pickle.dump(splits, open(model_output_path+'splits_{}'.format(save_prefix), 'wb'))

    return cvscores

def fine_tune_cv(model_func, feature_df, n_splits=5, epochs=20, save_model = True,
                        save_prefix=''):
    seed = 1234
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    cvscores = []
    
    fold = 0
    splits = []

    for train, test in kfold.split(feature_df, feature_df.label):
        model = model_func()
        
        X_train = np.array([x for x in feature_df.resized_cropped_img.values[train]])
        X_test = np.array([x for x in feature_df.resized_cropped_img.values[test]])

        model.fit(X_train, feature_df.label[train], epochs=epochs, batch_size=BATCH_SIZE, verbose=1)
        
        #evaluate the model
        scores = model.evaluate(X_test, feature_df.label[test], verbose=1)
        print(model.metrics_names, scores)
        cvscores.append(scores)
        
        if save_model:
            model.save_weights(model_output_path+'model_{}_{}.weights'.format(save_prefix, fold))
            fold+=1
            splits.append((train, test))
    
    if save_model:
        pickle.dump(splits, open(model_output_path+'splits_{}'.format(save_prefix), 'wb'))

    return cvscores


def fine_tune_cv_mixed_features(model_func, feature_df, other_feat, hidden_dims=[],
                                n_splits=10, epochs=20, save_model = True, save_prefix = ''):
    seed = 1234
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    cvscores = []
    
    fold = 0
    splits = []
    for train, test in kfold.split(feature_df, feature_df.label):
        
        X_train = np.array([x for x in feature_df.resized_cropped_img.values[train]])
        X_test = np.array([x for x in feature_df.resized_cropped_img.values[test]])

        model = resnet_mixed_features(len(other_feat), hidden_dims)
        
        model.fit([X_train, feature_df[other_feat].apply(stats.zscore).values[train]],
                  feature_df.label[train], epochs=epochs, batch_size=BATCH_SIZE, verbose=1)
        
        
        scores = model.evaluate([X_test, feature_df[other_feat].apply(stats.zscore).values[test]],
                                feature_df.label[test], verbose=1)
        print(model.metrics_names, scores)
        cvscores.append(scores)
        
        if save_model:
            model.save_weights(model_output_path+'model_{}_{}.weights'.format(save_prefix,fold))
            fold+=1
            splits.append((train, test))
    
    if save_model:
        pickle.dump(splits, open(model_output_path+'splits_{}'.format(save_prefix), 'wb'))
        
    return cvscores

## Predict pose

### Built in LinearRegression model

In [14]:
'''Built in normalization, which performs a z-score transformation'''

feat = img_level_features
print('Features:',feat)
model = LinearRegression(fit_intercept=True, normalize=True, copy_X=True)
scores = cross_val_score(model, feature_df[feat], feature_df.posing_num, cv=5, scoring='neg_mean_squared_error')
print("score: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print()

feat = img_level_features + link_angle_features + body_joint_prob_features
print('Features:',feat)
model = LinearRegression(fit_intercept=True, normalize=True, copy_X=True)
scores = cross_val_score(model, feature_df[feat], feature_df.posing_num, cv=5, scoring='neg_mean_squared_error')
print("score: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print()

feat = img_level_features + link_angle_features + body_joint_prob_features + face_exp_feaures
print('Features:',feat)
model = LinearRegression(fit_intercept=True, normalize=True, copy_X=True)
scores = cross_val_score(model, feature_df[feat], feature_df.posing_num, cv=5, scoring='neg_mean_squared_error')
print("score: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print()

'''use all features'''
feat = resnet_feat_avg_cols + img_level_features + link_angle_features + body_joint_prob_features + face_exp_feaures
print('Features: all features')
model = LinearRegression(fit_intercept=True, normalize=True, copy_X=True)
scores = cross_val_score(model, feature_df[feat], feature_df.posing_num, cv=5, scoring='neg_mean_squared_error')
print("score: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print()

Features: ['person_distance', 'person_size', 'num_people']
score: -2.12 (+/- 0.34)

Features: ['person_distance', 'person_size', 'num_people', 'angle_0', 'angle_1', 'angle_2', 'angle_3', 'angle_4', 'angle_5', 'angle_6', 'angle_7', 'angle_8', 'angle_9', 'angle_10', 'angle_11', 'angle_12', 'angle_13', 'angle_14', 'angle_15', 'angle_16', 'nose_prob', 'neck_prob', 'Rsho_prob', 'Relb_prob', 'Rwri_prob', 'Lsho_prob', 'Lelb_prob', 'Lwri_prob', 'Rhip_prob', 'Rkne_prob', 'Rank_prob', 'Lhip_prob', 'Lkne_prob', 'Lank_prob', 'Leye_prob', 'Reye_prob', 'Lear_prob', 'Rear_prob']
score: -1.81 (+/- 0.30)

Features: ['person_distance', 'person_size', 'num_people', 'angle_0', 'angle_1', 'angle_2', 'angle_3', 'angle_4', 'angle_5', 'angle_6', 'angle_7', 'angle_8', 'angle_9', 'angle_10', 'angle_11', 'angle_12', 'angle_13', 'angle_14', 'angle_15', 'angle_16', 'nose_prob', 'neck_prob', 'Rsho_prob', 'Relb_prob', 'Rwri_prob', 'Lsho_prob', 'Lelb_prob', 'Lwri_prob', 'Rhip_prob', 'Rkne_prob', 'Rank_prob', 'Lhip_pr

### Keras Linear Regression model

### Test *posing* with different feature sets and network layers

In [12]:
len(img_level_features + link_angle_features + body_joint_prob_features)

38

In [13]:
'''use img_level_features'''
feat = img_level_features
print('Features:',feat)

scores = do_cross_validation(linear_regression_model, 
                            X= feature_df[feat].apply(stats.zscore).values,
                            Y= feature_df.posing_num, epochs= 20, n_splits = 5,
                            model_args={'input_dim': len(feat),'hidden_dims':[]})

metrics=['loss','mse','mae']

print()
for m in range(len(metrics)):
    print("{}:  {:.2f} (+/- {:.2f})".format(metrics[m], np.mean([s[m] for s in scores]), np.std([s[m] for s in scores]) * 2))
print()

Features: ['person_distance', 'person_size', 'num_people']
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.611105448313048, 2.3368749591900078, 1.290946942121378]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.612831606031561, 2.3723089428674844, 1.2969617824830144]
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
E

In [14]:
feat = img_level_features + link_angle_features +body_joint_prob_features #+ resnet_feat_avg_cols
scores = do_cross_validation(linear_regression_model, X= feature_df[feat].apply(stats.zscore).values,
                            Y= feature_df.posing_num, epochs= 30, n_splits = 10,
                            model_args={'input_dim': len(feat),'hidden_dims':[]})

metrics=['loss','mse','mae']

print()
for m in range(len(metrics)):
    print("{}:  {:.2f} (+/- {:.2f})".format(metrics[m], np.mean([s[m] for s in scores]), np.std([s[m] for s in scores]) * 2))
print()

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.278036162296305, 1.9743081177939268, 1.173652930209643]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.377113238407245, 2.0970775843292397, 1.2377342702209793]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 

Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.391264456196835, 2.095827367431239, 1.239889487467314]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.164481378856458, 1.8819997084768194, 1.145196074561069]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch

Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.256021647704275, 1.9661501934653833, 1.1983977305261713]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.307511254360801, 2.0198720743781644, 1.2075703282105295]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epo

In [15]:
feat = resnet_feat_avg_cols
scores = do_cross_validation(linear_regression_model, X= feature_df[feat].apply(stats.zscore).values,
                            Y= feature_df.posing_num, epochs= 30, n_splits = 10,
                            model_args={'input_dim': len(feat),'hidden_dims':[]})

metrics=['loss','mse','mae']

print()
for m in range(len(metrics)):
    print("{}:  {:.2f} (+/- {:.2f})".format(metrics[m], np.mean([s[m] for s in scores]), np.std([s[m] for s in scores]) * 2))
print()

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.1670822709251265, 1.8124743812666164, 1.1001171081710674]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.401290620092958, 2.0468678023871476, 1.1840711720972237]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoc

Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.5728691853974994, 2.2215224240955553, 1.1930500369322927]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.252578333804482, 1.8940986043528507, 1.1084450207258525]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
E

Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.0715432367826763, 1.716464917283309, 1.0785983449534366]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.376863227392498, 2.0298661947250367, 1.1726154114070693]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epo

#### Add a hidden layer

In [16]:
'''Only resnet feature, no normalization'''
feat = resnet_feat_avg_cols
scores = do_cross_validation(linear_regression_model, X= feature_df[feat].values,#.apply(stats.zscore).values,
                            Y= feature_df.posing_num, epochs= 30, n_splits = 10,
                            model_args={'input_dim': len(feat),'hidden_dims':[64]})

metrics=['loss','mse','mae']

print()
for m in range(len(metrics)):
    print("{}:  {:.2f} (+/- {:.2f})".format(metrics[m], np.mean([s[m] for s in scores]), np.std([s[m] for s in scores]) * 2))
print()

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.8254261467400499, 1.6620275683290378, 1.0367726084441338]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.9777466706403597, 1.80434339753599, 1.078600517721001]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 

Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.0394510218971655, 1.8762006445934898, 1.0820095513996324]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.9191405359067415, 1.7432145758679038, 1.052975605036083]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
E

Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.7470006039268091, 1.5690491789265684, 0.9827260312281156]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.9547434681340268, 1.7882855076538888, 1.0829573292481272]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
E

In [28]:
'''All features, normalized, this gives the BEST result'''
min_max_scaler = preprocessing.MinMaxScaler()

feat = resnet_feat_avg_cols + link_angle_features + body_joint_prob_features + face_exp_feaures
scores = do_cross_validation(linear_regression_model, 
                             X= min_max_scaler.fit_transform(feature_df[feat]),
                            Y= feature_df.posing_num, epochs= 30, n_splits = 10,
                            model_args={'input_dim': len(feat),'hidden_dims':[64]})

metrics=['loss','mse','mae']

print()
for m in range(len(metrics)):
    print("{}:  {:.2f} (+/- {:.2f})".format(metrics[m], np.mean([s[m] for s in scores]), np.std([s[m] for s in scores]) * 2))
print()

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.8062866197170548, 1.620312975460463, 1.036225434676243]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.8321809890702014, 1.6492078025822878, 1.0252210696538289]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch

Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[2.0517229782907584, 1.851191718954789, 1.0719406680056924]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.7281342506408692, 1.536104020319487, 0.9964799422966807]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Ep

Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.7155007274527299, 1.5353858320336593, 0.9805997835962396]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.926172005502801, 1.7284457558079769, 1.0434181451797486]
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Ep

### Test how well other high level features can be predicted using the low level predictors that best worked for *posing*

In [48]:
feat = resnet_feat_avg_cols + link_angle_features + body_joint_prob_features + face_exp_feaures #+ img_level_features

high_level_concepts = ['was_aware', 'photographer_intention', 'replacable',  'posing','comfort', 'will']
high_level_concepts_num = [c+'_num' for c in high_level_concepts]
metrics=['loss','mse','mae','mape','mase']
all_scores=[]
X= min_max_scaler.fit_transform(feature_df[feat])
for dv in high_level_concepts_num:
    scores = do_cross_validation(linear_regression_model, X,
                            Y= feature_df[dv], epochs= 30, n_splits = 10,
                            model_args={'input_dim': len(feat),'hidden_dims':[64]}, verbose=0)
    all_scores.append(scores)

    print('predicting :',dv)
    for m in range(len(metrics)):
        print("{}:  {:.2f} (+/- {:.2f})".format(metrics[m], np.mean([s[m] for s in scores]), np.std([s[m] for s in scores]) * 2))
    print()
    
tab = dict()
for i in range(len(high_level_concepts_num)):
    print('predicting :',high_level_concepts_num[i])
    for m in range(len(metrics)):
        scores= all_scores[i]
        print("{}:  {:.2f} (+/- {:.2f})".format(
            metrics[m], np.mean([s[m] for s in scores]), np.std([s[m] for s in scores]) * 2))
        #tab[high_level_concepts_num[i]][]
    print()
    

metrics:['loss', 'mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error'], values:[1.7488865520697565, 1.5924175937031824, 1.0352144394646792, 67.15474572769926]
metrics:['loss', 'mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error'], values:[1.6716031533839508, 1.5307797891261383, 0.9912607846610503, 64.58514970994683]
metrics:['loss', 'mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error'], values:[1.7212156032326966, 1.568381793855682, 0.9855427069301055, 65.28281192328986]
metrics:['loss', 'mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error'], values:[1.797455334663391, 1.6536383666490253, 1.0315145222764266, 65.60587744461863]
metrics:['loss', 'mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error'], values:[1.716383189276645, 1.562002590455507, 1.0085523749652663, 64.76159053601717]
metrics:['loss', 'mean_squared_error', 'mean_absolute_error', 'mean_absolute_percent

IndexError: list index out of range

In [26]:

high_level_concepts_name = {'was_aware_num':'Awareness', 'posing_num':'Pose',
                            'comfort_num':'Comfort', 'will_num':'Willingness', 
                            'photographer_intention_num':'Photographer intention',
                            'replacable_num':'Replaceable', 'photo_place_num':'Photo place'}

In [28]:
'''create table for paper'''    
tab = []
for i in range(len(high_level_concepts_num)):
    d=dict()
    
    for m in range(len(metrics)):
        scores= all_scores[i]
        d['name']=high_level_concepts_name[high_level_concepts_num[i]]
        d[metrics[m]] = np.mean([s[m] for s in scores])
        d[metrics[m]+'-std']=np.std([s[m] for s in scores])
    tab.append(d)
print(pd.DataFrame(tab).set_index('name').round(2).to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} &  loss &  loss-std &   mae &  mae-std &   mse &  mse-std \\
name                   &       &           &       &          &       &          \\
\midrule
Awareness              &  1.79 &      0.07 &  1.04 &     0.02 &  1.65 &     0.06 \\
Photographer intention &  2.65 &      0.15 &  1.30 &     0.04 &  2.47 &     0.15 \\
Replacable             &  1.60 &      0.08 &  0.98 &     0.03 &  1.46 &     0.07 \\
Pose                   &  1.99 &      0.14 &  1.08 &     0.05 &  1.81 &     0.14 \\
Comfort                &  0.81 &      0.05 &  0.67 &     0.03 &  0.72 &     0.05 \\
Willingness            &  0.45 &      0.02 &  0.50 &     0.02 &  0.40 &     0.02 \\
\bottomrule
\end{tabular}



## Predict 'Replaceable'


In [13]:
feat = resnet_feat_avg_cols + img_level_features + link_angle_features + body_joint_prob_features + face_exp_feaures
scores = do_cross_validation(linear_regression_model, X= feature_df[feat].apply(stats.zscore).values,
                            Y= feature_df.replacable_num, epochs= 30, n_splits = 10,
                            model_args={'input_dim': len(feat),'hidden_dims':[64]}, verbose=0)

metrics=['loss','mse','mae']

print()
for m in range(len(metrics)):
    print("{}:  {:.2f} (+/- {:.2f})".format(metrics[m], np.mean([s[m] for s in scores]), np.std([s[m] for s in scores]) * 2))
print()

metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.4937870173942385, 1.3707459881862631, 0.9343677277327209]
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.503943408255189, 1.3729439423153094, 0.9324095655926882]
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.4647528674658827, 1.328376243120729, 0.9397835080704977]
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.4751834318706682, 1.3381649897167376, 0.9337431899518791]
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.5740005732208413, 1.4416623869593062, 0.9821044650916352]
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.5129932975518734, 1.379386386846307, 0.9463060626520572]
metrics:['loss', 'mean_squared_error', 'mean_absolute_error'], values:[1.6386518997378587, 1.52053723014439, 0.9986895396715733]


KeyboardInterrupt: 