In [1]:
import os
import glob

#from tqdm import tqdm
import numpy as np
import scipy.ndimage
import scipy.misc
import pandas as pd
import pickle
from IPython.display import clear_output

import keras
from keras.layers.core import Dense, Flatten, Dropout
from keras.layers import Input
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.layers import GlobalAveragePooling2D

#from keras.applications.inception_v3 import InceptionV3
#from keras.applications.xception import Xception
from keras.applications.resnet50 import ResNet50
from keras.applications.vgg19 import VGG19
# import the necessary packages
from keras.preprocessing import image as image_utils

from sklearn.model_selection import StratifiedKFold
from sklearn import preprocessing

import cv2
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

import numpy as np
import tensorflow as tf
import random as rn

from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, classification_report

# The below is necessary in Python 3.2.3 onwards to
# have reproducible behavior for certain hash-based operations.
# See these references for further details:
# https://docs.python.org/3.4/using/cmdline.html#envvar-PYTHONHASHSEED
# https://github.com/keras-team/keras/issues/2280#issuecomment-306959926

import os
os.environ['PYTHONHASHSEED'] = '0'

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.

np.random.seed(42)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.

rn.seed(12345)

# Force TensorFlow to use single thread.
# Multiple threads are a potential source of
# non-reproducible results.
# For further details, see: https://stackoverflow.com/questions/42022950/which-seeds-have-to-be-set-where-to-realize-100-reproducibility-of-training-res

session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

from keras import backend as K

# The below tf.set_random_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see: https://www.tensorflow.org/api_docs/python/tf/set_random_seed

tf.set_random_seed(1234)

sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess) # reference: https://keras.io/getting-started/faq/#how-can-i-obtain-the-output-of-an-intermediate-layer



Using TensorFlow backend.
  return f(*args, **kwds)


In [159]:
import os
from tqdm import tqdm
import sys

base_path = '/nfs/juhu/data/rakhasan/bystander-detection/'
pose_feature_file = '/nfs/juhu/data/rakhasan/bystander-detection/pilot-study/pose-features.pkl'
data_path= '/nfs/juhu//data/rakhasan/bystander-detection/pilot-study/'
os.chdir('/nfs/juhu/data/rakhasan/bystander-detection/code-repos/keras-openpose-reproduce/')
#sys.path.append('/nfs/juhu/data/rakhasan/bystander-detection/code-repos/keras-openpose-reproduce/')
from openpose_wrapper import *

In [None]:
pose_model = get_pose_model()

In [34]:

IMG_SIZE = (256, 256)
IN_SHAPE = (*IMG_SIZE, 3)
BATCH_SIZE = 64

resnet_model = ResNet50(
                include_top=False,
                input_shape=IN_SHAPE,
                weights='imagenet'
            )



In [29]:
def split_data(X,Y, test_perc = 0.1):
    indices = rn.sample(range(1, len(Y)), int(len(Y)*test_perc))
    Xtest = X[indices]
    Ytest = Y[indices]
    Xtrain = X[list(set(range(len(Y))).difference(set(indices)))]
    Ytrain = Y[list(set(range(len(Y))).difference(set(indices)))]
    
    return (Xtrain,Ytrain),(Xtest,Ytest)

In [151]:
'''Prepare train-test data'''
def load_XY(photo_path = 'pilot1-photos'):
    pilot1_df = pd.read_csv(data_path+'photo_df.csv').groupby('photo_no').mean()
    pilot1_df = pilot1_df[(pilot1_df.subject_bystander_num >0) | (pilot1_df.subject_bystander_num <0) ]
    pilot1_df['sub_by'] = pilot1_df.apply(lambda r: 0 if r.subject_bystander_num < 0 else 1, axis=1)

    files = [data_path+photo_path+'/'+str(p)+'.jpg' for p in pilot1_df.index.values]
    photos = [np.asarray(image_utils.load_img(file, target_size=IMG_SIZE)) for file in files]
    X = np.array(photos)
    Y = pilot1_df.sub_by.values
    return X,Y

def load_features(photo_path = 'pilot1-photos', Y='subject_bystander_num'):
    features = list(set([col+'_num' for col in [ 'posing','uncomfortable', 'willing',
               'photographer_intention', 'replacable']]+['distance',
                'person_size']).difference(set([Y])))
    pilot1_df = pd.read_csv(data_path+'photo_df.csv').groupby('photo_no').mean()
    pilot1_df = pilot1_df[(pilot1_df[Y] >0) | (pilot1_df[Y] <0) ]
    pilot1_df['y'] = pilot1_df.apply(lambda r: 0 if r[Y] < 0 else 1, axis=1)

    Y = pilot1_df.y.values
    return Y,pilot1_df[features]#.values

def load_all_features(path, Y='subject_bystander_num'):
    df = pd.read_csv(path+'photo_df.csv').groupby('photo_no').mean()
    pose_data = pickle.load(open(path+'pose-features.pkl','rb'))
                            
    df = df[(df[Y] >0) | (df[Y] <0) ]
    df['label'] = df.apply(lambda r: 0 if r[Y] < 0 else 1, axis=1)

    df['pose'] = df.apply(lambda r: pose_data[str(r.name)+'.jpg'], axis=1)
    
    df['img_data']= df.apply(lambda r: 
                np.asarray(image_utils.load_img(path+'photos/'+str(r.name)+'.jpg', target_size=IMG_SIZE)), axis=1)
    
    
    return df


def load_photos(df):
    files = [data_path+photo_path+'/'+str(p)+'.jpg' for p in df.index.values]
    photos = [np.asarray(image_utils.load_img(file, target_size=IMG_SIZE)) for file in files]
    return np.array(photos)


In [13]:
load_features(Y='photographer_intention_num')[1].columns

Index(['uncomfortable_num', 'posing_num', 'replacable_num', 'person_size',
       'distance', 'willing_num'],
      dtype='object')

In [9]:
'''Build model using pretrained ImageNet'''
def imagenet():
    pretrained_model = ResNet50(
                include_top=False,
                input_shape=IN_SHAPE,
                weights='imagenet'
            )
    if pretrained_model.output.shape.ndims > 2:
        output = Flatten()(pretrained_model.output)
    else:
        output = pretrained_model.output

    output = BatchNormalization()(output)
    output = Dropout(0.5)(output)
    output = Dense(128, activation='relu')(output)
    output = BatchNormalization()(output)
    output = Dropout(0.5)(output)
    output = Dense(1, activation='sigmoid')(output)
    model = Model(pretrained_model.input, output)

    for layer in pretrained_model.layers:
        layer.trainable = False

    #model.summary(line_length=200)
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [10]:
def do_cross_validation(model_func, X,Y, n_splits=5):
    seed = 1234
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    cvscores = []
    for train, test in kfold.split(X, Y):
        # create model
        model = model_func()
        model.fit(X[train], Y[train], epochs=10, batch_size=BATCH_SIZE, verbose=1)
        #evaluate the model
        scores = model.evaluate(X[test], Y[test], verbose=1)
        print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
        cvscores.append(scores[1] * 100)
    return cvscores

In [21]:
'''20% dilated box with ImageNet'''
X,Y= load_XY(photo_path='dilated-box20')
scores = do_cross_validation(imagenet, X, Y)
clear_output()
print("%.2f%% (+/- %.2f%%)" % (np.mean(scores), np.std(scores)))

70.70% (+/- 2.09%)


In [22]:
'''40% dilated box with ImageNet'''
X,Y= load_XY(photo_path='dilated-box40')
scores = do_cross_validation(imagenet, X, Y)
clear_output()
print("%.2f%% (+/- %.2f%%)" % (np.mean(scores), np.std(scores)))

67.51% (+/- 4.15%)


## SVM with high level features

In [4]:
features = [col+'_num' for col in [ 'posing','uncomfortable', 'willing', 'photographer_intention', 'replacable']]+['distance','person_size']
df = pd.read_csv('/nfs/juhu//data/rakhasan/bystander-detection/pilot-study/photo_df.csv').groupby('photo_no').mean()
df = df[(df.subject_bystander_num >0) | (df.subject_bystander_num <0) ]
df['sub_by'] = df.apply(lambda r: 0 if r.subject_bystander_num < 0 else 1, axis=1)
Y = df.sub_by.values

In [5]:
'''add pose info'''
pose_features = pickle.load(open('/nfs/juhu/data/rakhasan/bystander-detection/pilot-study/pose-features.pkl','rb'))

for i in range(18):
    df['pose_'+str(i)] = df.apply(lambda row: pose_features[str(row.name)+'.jpg'][i], axis=1)
df.head(2)

Unnamed: 0_level_0,photo_no.1,photo_place_text,pid,willing_text,person_size,subject_bystander_num,posing_num,photographer_intention_num,photo_place_num,willing_num,...,pose_8,pose_9,pose_10,pose_11,pose_12,pose_13,pose_14,pose_15,pose_16,pose_17
photo_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,,8456.333333,,336635.653455,1.0,-1.0,3.0,-0.333333,0.666667,...,0.135436,0.9984,0.777365,0.4736,0.892393,0.7952,0.799629,0.8992,0.0,0.0
1,1.0,,6265.0,,1613.976428,-0.333333,0.0,0.0,-1.333333,0.666667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.518519,0.403226


In [36]:
rect=mappings[0][4:8]
img_no=0
img=cv2.imread('/nfs/juhu//data/rakhasan/bystander-detection/pilot-study/photos/{}.jpg'.format(img_no))
float(rect[0])*len(img[0]),float(rect[2])*len(img)


(6.4, 57.664324)

In [9]:
min_max_scaler = preprocessing.MinMaxScaler()

feat = features
print('Features:',feat)
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, min_max_scaler.fit_transform(df[feat]), Y, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, min_max_scaler.fit_transform(df[feat]), Y, cv=5)
print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))
print()

feat = features[-2:]
print('Features:',feat)
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, min_max_scaler.fit_transform(df[feat]), Y, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, min_max_scaler.fit_transform(df[feat]), Y, cv=5)
print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))
print()

feat = features[-4:-2]
print('Features:',feat)
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, min_max_scaler.fit_transform(df[feat]), Y, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, min_max_scaler.fit_transform(df[feat]), Y, cv=5)
print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))
print()

feat = features[:5]
print('Features:',feat)
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, min_max_scaler.fit_transform(df[feat]), Y, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, min_max_scaler.fit_transform(df[feat]), Y, cv=5)
print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))


Features: ['posing_num', 'uncomfortable_num', 'willing_num', 'photographer_intention_num', 'replacable_num', 'distance', 'person_size']
Linear kernel accuracy: 87.88 (+/- 8.81)
RBF kernel accuracy: 86.97 (+/- 9.83)

Features: ['distance', 'person_size']
Linear kernel accuracy: 67.51 (+/- 7.90)
RBF kernel accuracy: 70.04 (+/- 10.06)

Features: ['photographer_intention_num', 'replacable_num']
Linear kernel accuracy: 88.11 (+/- 11.45)
RBF kernel accuracy: 88.34 (+/- 11.24)

Features: ['posing_num', 'uncomfortable_num', 'willing_num', 'photographer_intention_num', 'replacable_num']
Linear kernel accuracy: 87.88 (+/- 11.27)
RBF kernel accuracy: 88.57 (+/- 9.82)


## SVM with pose features

In [28]:
f=min_max_scaler.fit_transform(df[feat])
#min_max_scaler.transform([[1613]])
#df.pose_0
f.shape

(437, 20)

In [11]:
min_max_scaler = preprocessing.MinMaxScaler()

feat = ['distance', 'person_size']+['pose_'+str(i) for i in range(18)]

clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, min_max_scaler.fit_transform(df[feat]), Y, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, min_max_scaler.fit_transform(df[feat]), Y, cv=5)
print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

Linear kernel accuracy: 65.93 (+/- 11.91)
RBF kernel accuracy: 65.46 (+/- 9.08)


## SVM for predicting other outcomes

In [24]:
outcomes = [ 'posing','uncomfortable', 'willing', 'photographer_intention', 'replacable']
features = ['person_size','distance']
min_max_scaler = preprocessing.MinMaxScaler()

for outcome in outcomes:
    print('Outcome:',outcome)
    Y,features = load_features(Y=outcome+'_num')
    
    clf = svm.SVC(kernel='linear', C=1)
    scores = cross_val_score(clf, min_max_scaler.fit_transform(features), Y, cv=5)
    print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

    clf = svm.SVC(kernel='rbf', C=1)
    scores = cross_val_score(clf, min_max_scaler.fit_transform(features), Y, cv=5)
    print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))
    print()

Outcome: posing
Linear kernel accuracy: 82.72 (+/- 9.25)
RBF kernel accuracy: 83.40 (+/- 9.94)

Outcome: uncomfortable
Linear kernel accuracy: 88.06 (+/- 7.89)
RBF kernel accuracy: 83.25 (+/- 2.88)

Outcome: willing
Linear kernel accuracy: 92.77 (+/- 2.64)
RBF kernel accuracy: 90.60 (+/- 0.86)

Outcome: photographer_intention
Linear kernel accuracy: 87.56 (+/- 9.38)
RBF kernel accuracy: 86.67 (+/- 7.06)

Outcome: replacable
Linear kernel accuracy: 73.49 (+/- 2.69)
RBF kernel accuracy: 74.38 (+/- 0.66)



In [49]:
feat = features[:5]
print('Features:',feat)
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, df[feat], Y, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, df[feat], Y, cv=5)
print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))


Features: ['posing_num', 'uncomfortable_num', 'willing_num', 'photographer_intention_num', 'replacable_num']
Linear kernel accuracy: 88.34 (+/- 12.12)
RBF kernel accuracy: 86.75 (+/- 13.79)


### ResNet with SVM

In [36]:
X,Y = load_XY(photo_path='photos')
cnn_feats = resnet_model.predict(X, batch_size=BATCH_SIZE)
cnn_feats = np.array([c.flatten() for c in cnn_feats])

'''Normalize CNN features'''
min_max_scaler = preprocessing.MinMaxScaler()
cnn_feats = min_max_scaler.fit_transform(cnn_feats)

In [70]:
cnn_feats.shape

(437, 131072)

In [37]:
'''Feed CNN features directly into SVM'''
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, cnn_feats, Y, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, cnn_feats, Y, cv=5)
print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

Linear kernel accuracy: 64.75 (+/- 5.35)
RBF kernel accuracy: 53.78 (+/- 0.60)


In [133]:
cnn_feats.shape

(437, 131072)

In [None]:
'''Confusion matrix'''
#(Xtrain, Ytrain),(Xtest,Ytest) = split_data(cnn_feats, Y)
clf = svm.SVC(kernel='linear', C=1)
clf.fit(Xtrain,Ytrain)
predictions = clf.predict(Xtest)
print(confusion_matrix(Ytest, predictions))

In [118]:
clf = svm.SVC(kernel='rbf', C=1)
clf.fit(Xtrain,Ytrain)
predictions = clf.predict(Xtest)
print(confusion_matrix(Ytest, predictions))

[[19  4]
 [ 5 15]]


In [156]:
np.array([v for v in all_feat_df['cnn_feat'].values]).shape

(445, 1, 131072)

In [157]:
'How well pose joints can classify whether posing for camera'
# all_feat_df = load_all_features(path='/nfs/juhu//data/rakhasan/bystander-detection/pilot-study/',Y='posing_num')

# all_feat_df['cnn_feat'] = all_feat_df.apply(lambda row: 
#    np.array(resnet_model.predict(row.img_data.reshape(1,*(row.img_data.shape)),batch_size=1).flatten()), axis=1)
    
# all_feat_df['cnn_feat'] = all_feat_df.apply(lambda row:
#                                            row.cnn_feat.reshape(1,row.cnn_feat.shape[0]), axis=1)
#all_feat_df.head()

# min_max_scaler = preprocessing.MinMaxScaler()
# min_max_scaler.fit(all_feat_df['cnn_feat'].values)
# all_feat_df['cnn_feat_transformed'] = all_feat_df.apply(lambda row: min_max_scaler.transform(row.cnn_feat), axis=1)

clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, np.array(
    [v for v in all_feat_df['cnn_feat'].values]).reshape(445, 131072), all_feat_df.label, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, np.array(
    [v for v in all_feat_df['cnn_feat'].values]).reshape(445, 131072), all_feat_df.label, cv=5)

print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

Linear kernel accuracy: 64.94 (+/- 3.40)
RBF kernel accuracy: 60.91 (+/- 4.81)


In [138]:

'''Feed CNN features directly into SVM'''
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, np.array(
    [v for v in all_feat_df['cnn_feat_transformed'].values]).reshape(437, 131072), all_feat_df.label, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, np.array(
    [v for v in all_feat_df['cnn_feat_transformed'].values]).reshape(437, 131072), all_feat_df.label, cv=5)

print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

Linear kernel accuracy: 64.75 (+/- 5.35)
RBF kernel accuracy: 53.78 (+/- 0.60)


In [146]:
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, np.array(
    [v for v in all_feat_df['combined_feat'].values]).reshape(437, 131108), all_feat_df.label, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, np.array(
    [v for v in all_feat_df['combined_feat'].values]).reshape(437, 131108), all_feat_df.label, cv=5)

print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

Linear kernel accuracy: 65.21 (+/- 6.78)
RBF kernel accuracy: 53.78 (+/- 0.60)


In [150]:
#Y,features = load_features(Y='posing_num')
all_feat_df=all_feat_df[(all_feat_df.posing_num>0)&(all_feat_df.posing_num<0)]
#all_feat_df['posing_num']=all_feat_df.apply(lambda r: 0 if r['posing_num'] < 0 else 1, axis=1)
all_feat_df.shape

(0, 19)

In [None]:
'''Combine CNN feature with high level features'''
X,Y,high_features = load_features()
cnn_feats = resnet_model.predict(X, batch_size=BATCH_SIZE)
cnn_feats = np.array([c.flatten() for c in cnn_feats])

In [103]:
'''Concate all high level features (Normalized)'''
min_max_scaler = preprocessing.MinMaxScaler()
feat=list(high_features.columns)
combined_feats = np.concatenate((cnn_feats, min_max_scaler.fit_transform(high_features[feat].values)),axis=1)
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, combined_feats, Y, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, combined_feats, Y, cv=5)
print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

Linear kernel accuracy: 77.13 (+/- 6.77)
RBF kernel accuracy: 73.71 (+/- 12.33)


In [104]:
'''Concate subset of high level features (Normalized)'''
min_max_scaler = preprocessing.MinMaxScaler()
feat=list(high_features.columns)[-2:]
combined_feats = np.concatenate((cnn_feats, min_max_scaler.fit_transform(high_features[feat].values)),axis=1)
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, combined_feats, Y, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, combined_feats, Y, cv=5)
print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

Linear kernel accuracy: 66.60 (+/- 6.22)
RBF kernel accuracy: 73.72 (+/- 13.65)


In [105]:
'''Concate subset of high level features (Normalized)'''
feat=list(high_features.columns)[-2:]
combined_feats = np.concatenate((cnn_feats, preprocessing.scale(high_features[feat].values)),axis=1)
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, combined_feats, Y, cv=5)
print("Linear kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

clf = svm.SVC(kernel='rbf', C=1)
scores = cross_val_score(clf, combined_feats, Y, cv=5)
print("RBF kernel accuracy: %0.2f (+/- %0.2f)" % (scores.mean()*100, scores.std() * 2*100))

(1.0, 0.0, 0.18987177, 0.1512498)

# Two-stream networks

In [6]:
'''Prepare train-test data'''
pilot1_df = pd.read_csv(data_path+'photo_df.csv').groupby('photo_no').mean()
pilot1_df = pilot1_df[(pilot1_df.subject_bystander_num >0) | (pilot1_df.subject_bystander_num <0) ]
pilot1_df['sub_by'] = pilot1_df.apply(lambda r: 0 if r.subject_bystander_num < 0 else 1, axis=1)

cropped_files = [data_path+'cropped/'+str(p)+'.jpg' for p in pilot1_df.index.values]
masked_files = [data_path+'binary-mask/'+str(p)+'.jpg' for p in pilot1_df.index.values]

photos = [np.asarray(image_utils.load_img(file, target_size=IMG_SIZE)) for file in cropped_files]
X1 = np.array(photos)
binary_mask = [data_path+'binary-mask/'+str(p)+'.jpg' for p in pilot1_df.index.values]
photos = [np.asarray(image_utils.load_img(file, target_size=IMG_SIZE)) for file in masked_files]
X2 = np.array(photos)

Y = pilot1_df.sub_by.values

In [90]:
do_cross_validation(imagenet, X1, Y)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 60.23%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 67.05%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 60.92%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 71.26%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 66.67%


[60.22727272727273,
 67.04545508731495,
 60.91954132606243,
 71.26436946035801,
 66.66666728326645]

In [91]:
do_cross_validation(imagenet, X2, Y)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 56.82%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 53.41%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 60.92%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 67.82%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 56.32%


[56.81818290190264,
 53.40909090909091,
 60.91954009286289,
 67.8160933242447,
 56.32183969705954]

In [94]:
a=[56.81818290190264,
 53.40909090909091,
 60.91954009286289,
 67.8160933242447,
 56.32183969705954]
np.mean(a),np.std(a)

(59.056949385032134, 4.991654083131733)

In [17]:
'''Concatenation of CNN features from two identical networks'''
def build_model_multi():
    in1 = Input(shape=IN_SHAPE)
    in2 = Input(shape=IN_SHAPE)

    pretrained_model1 = ResNet50(
                    include_top=False,
                    input_shape=IN_SHAPE,
                    weights='imagenet',

                )

    for layer in pretrained_model1.layers:
            layer.trainable = False
    stream1 = pretrained_model1(in1)
    stream1 = Flatten()(stream1)if stream1.shape.ndims > 2 else  stream1
    stream1 = Dropout(.5)(BatchNormalization()(stream1))

    pretrained_model2 = VGG19(
                    include_top=False,
                    input_shape=IN_SHAPE,
                    weights='imagenet'
                )
    for layer in pretrained_model2.layers:
            layer.trainable = False

    stream2 = pretrained_model2(in2)
    stream2 = Flatten()(stream2)if stream2.shape.ndims > 2 else  stream2
    stream2 = Dropout(.5)(BatchNormalization()(stream2))

    merged_features =  keras.layers.concatenate([stream1, stream2], axis=-1)
    output = Dense(128, activation='relu')(merged_features)
    output = BatchNormalization()(output)
    output = Dropout(0.5)(output)
    output = Dense(1, activation='sigmoid')(output)

    model = Model(inputs=[in1, in2], outputs=output)

    #model.summary(line_length=200)
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [18]:
def do_cross_validation_multi(X1, X2 ,Y, n_splits=5):
    seed = 1234
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    cvscores = []
    for train, test in kfold.split(X1, Y):
        # create model
        model = build_model_multi()
        model.fit([X1[train],X2[train]], Y[train], epochs=10, batch_size=BATCH_SIZE, verbose=1)
        #evaluate the model
        scores = model.evaluate([X1[test],X2[test]], Y[test], verbose=1)
        print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
        cvscores.append(scores[1] * 100)
    print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

In [19]:
do_cross_validation_multi(X1, X2, Y)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 64.77%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 63.64%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 62.07%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 70.11%
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 65.52%
65.22% (+/- 2.71%)


In [20]:
cvscores

NameError: name 'cvscores' is not defined

In [None]:
model.fit([X1[train],X2[train]], Y[train], epochs=1, batch_size=BATCH_SIZE, verbose=1)

In [109]:
len(Y),len(X1),len(X2)

(437, 437, 437)

In [29]:
pretrained_model=None

In [None]:
import os
 
import sklearn
from sklearn import cross_validation, grid_search
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.svm import SVC
from sklearn.externals import joblib
 
def train_svm_classifer(features, labels, model_output_path):
    """
    train_svm_classifer will train a SVM, saved the trained and SVM model and
    report the classification performance
 
    features: array of input features
    labels: array of labels associated with the input features
    model_output_path: path for storing the trained svm model
    """
    # save 20% of data for performance evaluation
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, labels, test_size=0.2)
 
    param = [
        {
            "kernel": ["linear"],
            "C": [1, 10, 100, 1000]
        },
        {
            "kernel": ["rbf"],
            "C": [1, 10, 100, 1000],
            "gamma": [1e-2, 1e-3, 1e-4, 1e-5]
        }
    ]
 
    # request probability estimation
    svm = SVC(probability=True)
 
    # 10-fold cross validation, use 4 thread as each fold and each parameter set can be train in parallel
    clf = grid_search.GridSearchCV(svm, param,
            cv=10, n_jobs=4, verbose=3)
 
    clf.fit(X_train, y_train)
 
    if os.path.exists(model_output_path):
        joblib.dump(clf.best_estimator_, model_output_path)
    else:
        print("Cannot save trained svm model to {0}.".format(model_output_path))
 
    print("\nBest parameters set:")
    print(clf.best_params_)
 
    y_predict=clf.predict(X_test)
 
    labels=sorted(list(set(labels)))
    print("\nConfusion matrix:")
    print("Labels: {0}\n".format(",".join(labels)))
    print(confusion_matrix(y_test, y_predict, labels=labels))
 
    print("\nClassification report:")
    print(classification_report(y_test, y_predict))

In [3]:
# Build the model of a logistic classifier
import os
import gzip
import six.moves.cPickle as pickle
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.datasets import mnist
from keras.utils import np_utils
import tensorflow as tf

def build_logistic_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(output_dim, input_dim=input_dim))
    model.add(Activation(tf.nn.sigmoid))

    return model

batch_size = 128
nb_classes = 10
nb_epoch = 20
input_dim = 784

# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(60000, input_dim)
X_test = X_test.reshape(10000, input_dim)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

model = build_logistic_model(input_dim, nb_classes)
#model = build_logistic_model(7, 1)
print(model.summary())

# compile the model
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, Y_train,
                    batch_size=batch_size, nb_epoch=nb_epoch,
                    verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)

print('Test score:', score[0])
print('Test accuracy:', score[1])


60000 train samples
10000 test samples
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 10)                7850      
_________________________________________________________________
activation_2 (Activation)    (None, 10)                0         
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________
None
Train on 60000 samples, validate on 10000 samples
Epoch 1/20
  128/60000 [..............................] - ETA: 33s - loss: 2.3289 - acc: 0.0938



Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test score: 0.3489051495671272
Test accuracy: 0.9084


In [None]:
fea