Run the code following two steps:


1.   List the features returned by the image analysis tool (Rekognition/Vision/Azure) by excuting one of the three code blocks
2. Run the final code block to evaluate the framework.  



In [None]:
pip install pyDML

In [3]:
import pandas as pd
import numpy as np
from math import *
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
from keras.utils import np_utils
from keras import optimizers
import keras.backend as K
from sklearn.ensemble import RandomForestClassifier
import seaborn as sns
from sklearn.decomposition import KernelPCA
from sklearn.metrics import classification_report
import os
import csv
import matplotlib.pyplot as plt
import fnmatch
import datetime
import shutil
from statistics import mean
from sklearn.feature_selection import f_classif
from itertools import combinations
import math
from numpy import percentile
from dml import kda

Necessary functions to compute self-report features

In [5]:
def find_transition_matrix(values):
	src_labels=[0,1]
	tgt_labels=[0,1]
	
	transition_vector=[]
	
	for em_src in src_labels:
		src=values[values[:,0]==em_src,:]
		if src.shape[0] != 0:
			for em_tgt in tgt_labels:
				tgt=src[src[:,1]==em_tgt,:]
				trans_prob=tgt.shape[0]/float(src.shape[0])			
				transition_vector.append(trans_prob)
		else:
			transition_vector.extend([0,0])
	return transition_vector

def find_average_sequence_length(values):
	seq_length_vector=[]

	seq_lengths_l=np.empty((0))
	seq_lengths_h=np.empty((0))


	count=1
	for i in range(1,values.shape[0]):
		if values[i,1]==0 and values[i,1]==values[i-1,1]:				
				count=count+1
				continue
		else:
			if values[i-1,1]==0:
				seq_lengths_l=np.append(seq_lengths_l,count)
				count=1
	if count>1 or seq_lengths_l.shape[0]==0:
		seq_lengths_l=np.append(seq_lengths_l,count)	

	count=1
	for i in range(1,values.shape[0]):
		if values[i,1]==values[i-1,1] and values[i,1]==1:
			count=count+1
			continue
		else:
			if values[i-1,1]==1:			
				seq_lengths_h=np.append(seq_lengths_h,count)
				count=1
	if count>1 or seq_lengths_h.shape[0]==0:
		seq_lengths_h=np.append(seq_lengths_h,count)

		

	seq_length_vector.append(int(round(np.percentile(seq_lengths_l,75))))
	seq_length_vector.append(int(round(np.percentile(seq_lengths_h,75))))
	

	#print('seq vector:',np.array(seq_length_vector))
	return np.array(seq_length_vector)

def find_PRE(transition_vector,previous_label):
	if previous_label==0:
		label_vector=np.array([1,0])
	elif previous_label==1:
		label_vector=np.array([0,1])

	
	transition_matrix=np.empty((0,2))
	transition_matrix=np.append(transition_matrix,np.array([transition_vector[0:2]]),axis=0)
	transition_matrix=np.append(transition_matrix,np.array([transition_vector[2:4]]),axis=0)


	predicted_val=np.matmul(label_vector,transition_matrix)
	idx=np.argmax(predicted_val)

	predicted_val_sorted=np.sort(predicted_val)

	r=np.where(predicted_val==predicted_val_sorted[0])
	sec_idx=r[0][0]

	
	
	if idx==0:
		pre='low'
	elif idx==1:
		pre='high'


	if sec_idx==0:
		pre_2='low'
	elif sec_idx==1:
		pre_2='high'
	

	total_wt=np.sum(predicted_val)
	if total_wt==0:
		predicted_val_norm=np.array([0,0])
	else:
		predicted_val_norm=np.divide(predicted_val,total_wt)

	return pre,pre_2,predicted_val_norm

def find_val(label):
   if label=='low':
     val=0
   elif label=='high':
     val=1
   return val  
def find_label(val):
	if val==0:
		label='low'
	elif val==1:
		label='high'

	return label

Return facial features value

In [1]:
def facial_feature(photo,common):
  user_no=photo.split('/')[0]
  # mention facial feature file
  # for Amazon
  user_file='Data/FacialData/Amazon/'+user_no+'.csv'
  # for vision
  #user_file='Data/FacialData/Google/'+user_no+'.csv'
  # for Azure
  #user_file='Data/FacialData/Microsoft/'+user_no+'.csv'
  user_data=pd.read_csv(user_file)
  df=user_data[user_data['FileName']==photo]
  df=df[common]
  #print(df)
  feature_list=[]

  df=df.to_numpy()
  #print(df)
  for k in range(df.shape[1]):
    feature_list.append(df[0,k])
  #print(feature_list)
  return feature_list

Remove duplicate data samples if any

In [7]:
def data_stat(train_data):
  np.set_printoptions(suppress=True)
  uniquerows=np.unique(train_data,axis=0)
  print("number of unique rows=",uniquerows.shape)
  return uniquerows

Build the model and evaluate

In [12]:
def Random_Forest_model(train_data,test_data):

  
  print("train size in model function=",train_data.shape[0],"test size in model function=",test_data.shape[0])
  train_data=data_stat(train_data)
  print("train size after removing duplicates=",train_data.shape[0],"test size after removing duplicates=",test_data.shape[0])
  
  X_train=train_data[:,1:train_data.shape[1]]
  Y_train=train_data[:,0]
  X_test=test_data[:,1:test_data.shape[1]]
  Y_test=test_data[:,0]
  
  

  # define model
  model = RandomForestClassifier(n_estimators=50, random_state=42)
  
  s=model.fit(X_train,Y_train)

  Y_pred=s.predict(X_test)
  Y_predTrain=s.predict(X_train)
  #importance = s.feature_importances_
  #print("Y_train=",Y_train)
  
  print("Y_test=",Y_test)
  print("Y_pred=",Y_pred)
  
  
  # f1 score
  print("Train accuracy=",f1_score(Y_train,Y_predTrain,average="macro"))
  train_score=f1_score(Y_train,Y_predTrain,average="macro")
  print("Test f1=",f1_score(Y_test,Y_pred,average="macro"))
  test_score=f1_score(Y_test,Y_pred,average="macro")
  print("Test accuracy=",accuracy_score(Y_test,Y_pred)) 
  accu_score=accuracy_score(Y_test,Y_pred)
  print(classification_report(Y_test, Y_pred))
  

  return train_score,test_score


Create train and test set

In [9]:
def feature_file_creation(train_data,test_data,common):
  # column_list need to add as argument when using correlation
  feature_no=len(common)
  accu_score=0
  et_norm=np.empty((0))
  et_from_last_label=np.array([0,0])
  train_feature=np.empty((0,feature_no+4))
  test_feature=np.empty((0,feature_no+4))
  transition_matrix=find_transition_matrix(train_data)
  
  continuous_seq_len=1
  
  for i in range(0,train_data.shape[0]):
  
    pre,pre_2,label_wt=find_PRE(transition_matrix,train_data[i,0])
    # compute elapsed time
    if train_data[i,0]==0:
          et_from_last_label[0]=train_data[i,4]
          et_from_last_label[1]=et_from_last_label[1]+train_data[i,4]
    elif train_data[i,0]==1: 
          et_from_last_label[1]=train_data[i,4] 
          et_from_last_label[0]=et_from_last_label[0]+train_data[i,4]
        
    max_et=np.amax(et_from_last_label)
    
    if(max_et==0):
           continue
    et_norm=np.divide(et_from_last_label,float(max_et))
    # influence calculation
    for j in range(0,2):
          et_norm[j]=1-et_norm[j]
          label_wt[j]=label_wt[j]*et_norm[j]
    #print("influence:",label_wt)      
    if train_data[i,0]==train_data[i,1]:
          continuous_seq_len=continuous_seq_len+1
    else:
          continuous_seq_len=1

    img_features=facial_feature(train_data[i,3],common)  
    self_report=train_data[i,1]
    train_instance_lst=[]
    train_instance_lst.append(self_report)
    train_instance_lst.append(label_wt[0])
    train_instance_lst.append(label_wt[1])
    train_instance_lst.append(continuous_seq_len)
    train_instance_lst.extend(img_features)
    
    
    train_instance=np.array(train_instance_lst).reshape(1,len(train_instance_lst))
    
    # training data
    train_feature=np.append(train_feature,train_instance,axis=0)
    
  
  # for test feature
  continuous_seq_len=1
  old_label=test_data[0,0]
  for p in range(0,test_data.shape[0]):
    pre,pre_2,label_wt=find_PRE(transition_matrix,old_label) 
    if old_label==0:
          et_from_last_label[0]=test_data[p,4]
          et_from_last_label[1]=et_from_last_label[1]+test_data[p,4]
    elif old_label==1:
          et_from_last_label[1]=test_data[p,4]
          et_from_last_label[0]=et_from_last_label[0]+test_data[p,4]
        
    max_et=np.amax(et_from_last_label)
    
    if(max_et==0):

          continue
    et_norm=np.divide(et_from_last_label,float(max_et))
    for j in range(0,2):
          et_norm[j]=1-et_norm[j]
          label_wt[j]=label_wt[j]*et_norm[j]
    if old_label==test_data[p,1]:
          continuous_seq_len=continuous_seq_len+1
    else:
          continuous_seq_len=1

    img_features=facial_feature(test_data[p,3],common)      
    self_report=test_data[p,1] 
    test_instance_lst=[]
    # typing feature considered
    test_instance_lst.append(self_report)
    test_instance_lst.append(label_wt[0])
    test_instance_lst.append(label_wt[1])
    test_instance_lst.append(continuous_seq_len)
    test_instance_lst.extend(img_features)
    
    test_instance=np.array(test_instance_lst).reshape(1,len(test_instance_lst))

    test_feature=np.append(test_feature,test_instance,axis=0)
    old_label=find_val(pre)

 
  return train_feature,test_feature

Excute the below code block to list the features obtain from Amazon Rekognition

In [10]:
# driver code
user_no=12
common=[]
user_name='user_'+str(format(user_no,'02'))

dirname='Data/FacialData/Amazon/'+user_name+'.csv'

array=pd.read_csv(dirname)
array=array.drop_duplicates(keep='last')
array=array.drop(['FileName',"TapEmotion",'NewTarget'],axis=1)
array=array.drop(['SmileValue_ False','SmileValue_ True','EyeglassesValue_ False','SunglassesValue_ False','SunglassesValue_ True',"GenderValue_ 'Female'","GenderValue_ 'Male'","BeardValue_ False",
                            "BeardValue_ True","MustacheValue_ False","EyesOpenValue_ False","EyesOpenValue_ True","MouthOpenValue_ False","MouthOpenValue_ True","MustacheValue_ True",'EyeglassesValue_ True'],axis=1,errors='ignore')  
array=array.drop(['Confidence','BoundingBoxWidth','BoundingBoxHeight','BoundingBoxLeft','BoundingBoxTop','AgeRangeLow','AgeRangeHigh','SmileConfidence', 'EyesOpenConfidence', 'MouthOpenConfidence','EyeglassesConfidence','SunglassesConfidence','GenderConfidence','BeardConfidence','MustacheConfidence','Date'], axis=1,errors='ignore')                          
column_list=list(array.columns)
print(array.shape)
common=column_list
print(common)

(444, 65)
['eyeLeftX', 'eyeLeftY', 'eyeRightX', 'eyeRightY', 'mouthLeftX', 'mouthLeftY', 'mouthRightX', 'mouthRightY', 'noseX', 'noseY', 'leftEyeBrowLeftX', 'leftEyeBrowLeftY', 'leftEyeBrowRightX', 'leftEyeBrowRightY', 'leftEyeBrowUpX', 'leftEyeBrowUpY', 'rightEyeBrowLeftX', 'rightEyeBrowLeftY', 'rightEyeBrowRightX', 'rightEyeBrowRightY', 'rightEyeBrowupX', 'rightEyeBrowupY', 'leftEyeLeftX', 'leftEyeLeftY', 'leftEyeRightX', 'leftEyeRightY', 'leftEyeUpX', 'leftEyeUpY', 'leftEyeDownX', 'leftEyeDownY', 'rightEyeLeftX', 'rightEyeLeftY', 'rightEyeRightX', 'rightEyeRightY', 'rightEyeUpX', 'rightEyeUpY', 'rightEyeDownX', 'rightEyeDownY', 'noseLeftX', 'noseLeftY', 'noseRightX', 'noseRightY', 'mouthUpX', 'mouthUpY', 'mouthDownX', 'mouthDownY', 'leftPupilX', 'leftPupilY', 'rightPupilX', 'rightPupilY', 'upperJawlineLeftX', 'upperJawlineLeftY', 'midJawlineLeftX', 'midJawlineLeftY', 'chinBottomX', 'chinBottomY', 'midJawlineRightX', 'midJawlineRightY', 'upperJawlineRightX', 'upperJawlineRightY', 'Po

Excute the code to list the features obtain from Google Vision

In [None]:
# for vision data processing
# driver code
user_no=12
common=[]
user_name='user_'+str(format(user_no,'02'))
dirname='Data/FacialData/Google/'+user_name+'.csv'

array=pd.read_csv(dirname)
array=array.drop_duplicates(keep='last')
array=array.drop(['FileName'],axis=1)

array=array.drop(['bounding_poly1x', 'bounding_poly1y', 'bounding_poly2x', 'bounding_poly2y', 'bounding_poly3x', 'bounding_poly3y', 'bounding_poly4x', 'bounding_poly4y', 'fdbounding_poly1x', 'fdbounding_poly1y', 'fdbounding_poly2x', 'fdbounding_poly2y', 'fdbounding_poly3x', 'fdbounding_poly3y', 'fdbounding_poly4x', 'fdbounding_poly4y','detection_confidence','landmarking_confidence','joy_likelihood','sorrow_likelihood','anger_likelihood','surprise_likelihood','emotion','target','Date'],axis=1,errors='ignore')  
             
column_list=list(array.columns)
#print(column_list)
print(array.shape)
common=column_list
print(common)

(86, 108)
['LEFT_EYEX', 'LEFT_EYEY', 'LEFT_EYEZ', 'RIGHT_EYEX', 'RIGHT_EYEY', 'RIGHT_EYEZ', 'RIGHT_EYEX.1', 'RIGHT_EYEY.1', 'RIGHT_EYEZ.1', 'LEFT_OF_LEFT_EYEBROWX', 'LEFT_OF_LEFT_EYEBROWY', 'LEFT_OF_LEFT_EYEBROWZ', 'RLX', 'RLY', 'RLZ', 'LRX', 'LRY', 'LRZ', 'RRX', 'RRY', 'RRZ', 'MBEX', 'MBEY', 'MBEZ', 'NTX', 'NTY', 'NTZ', 'ULX', 'ULY', 'ULZ', 'LLX', 'LLY', 'LLZ', 'MLX', 'MLY', 'MLZ', 'MRX', 'MRY', 'MRZ', 'MCX', 'MCY', 'MCZ', 'NBRX', 'NBRY', 'NBRZ', 'NBLX', 'NBLY', 'NBLZ', 'NBCX', 'NBCY', 'NBCZ', 'LETBX', 'LETBY', 'LETBZ', 'LERCX', 'LERCY', 'LERCZ', 'LEBBX', 'LEBBY', 'LEBBZ', 'LELCX', 'LELCY', 'LELCZ', 'RETBX', 'RETBY', 'RETBZ', 'RERCX', 'RERCY', 'RERCZ', 'REBBX', 'REBBY', 'REBBZ', 'RELCX', 'RELCY', 'RELCZ', 'LEUMX', 'LEUMY', 'LEUMZ', 'REUMX', 'REUMY', 'REUMZ', 'LETX', 'LETY', 'LETZ', 'RETX', 'RETY', 'RETZ', 'FGX', 'FGY', 'FGZ', 'CGX', 'CGY', 'CGZ', 'CLGX', 'CLGY', 'CLGZ', 'CRGX', 'CRGY', 'CRGZ', 'ThirtyFiveX', 'ThirtyFiveY', 'ThirtyFiveZ', 'ThirtySixX', 'ThirtySixY', 'ThirtySixZ', 'roll

Excute the code to list features obtain from Microsoft Azure

In [None]:
# for Azure Data Processing
user_no=12
common=[]
user_name='user_'+str(format(user_no,'02'))
dirname='Data/FacialData/Microsoft/'+user_name+'.csv'
array=pd.read_csv(dirname)
array=array.drop_duplicates(keep='last')
array=array.drop(['FileName'],axis=1)
column_list=list(array.columns)
print(array.shape)
common=column_list
print(common)

(90, 40)
['pupil_leftX', 'pupil_leftY', 'pupil_rightX', 'pupil_rightY', 'nose_tipX', 'nose_tipY', 'mouth_leftX', 'mouth_leftY', 'mouth_rightX', 'mouth_rightY', 'eyebrow_left_outerX', 'eyebrow_left_outerY', 'eyebrow_left_innerX', 'eyebrow_left_innerY', 'eye_left_outerX', 'eye_left_outerY', 'eye_left_topX', 'eye_left_topY', 'eye_left_bottomX', 'eye_left_bottomY', 'eye_left_innerX', 'eye_left_innerY', 'eyebrow_right_innerX', 'eyebrow_right_innerY', 'eyebrow_right_outerX', 'eyebrow_right_outerY', 'eye_right_innerX', 'eye_right_innerY', 'eye_right_topX', 'eye_right_topY', 'nose_right_alar_topX', 'nose_right_alar_topY', 'nose_left_alar_out_tipX', 'nose_left_alar_out_tipY', 'nose_right_alar_out_tipX', 'nose_right_alar_out_tipY', 'upper_lip_topX', 'upper_lip_topY', 'upper_lip_bottomX', 'upper_lip_bottomY']


In [14]:
# driver code
dirname='Data/Self_reports/'

for user in [user_no]:  
  # read user file
  test_user='user_'+str(format(user,'02'))
  test_dir=dirname+test_user+'.csv'
  if(os.path.isfile(test_dir)):

    dataset=pd.read_csv(test_dir)
    dataset=dataset.drop_duplicates(keep='last')
  else:

    continue  

  test_data=dataset.values
  data_size=test_data.shape[0]  
  print("data size=",data_size)
  # specify training and testing data size for each iteration of cross-validation
  test_data_size=ceil(data_size*0.20)
  train_data_size=ceil(data_size*0.55)
  start=0
  train_data_index=0
  test_data_index=0

  accu_list=[]
   
  train_accu=[] 
  i=1
  

 
  array1=np.array([])

  # loop for the nested cross-validation 
  while(start+train_data_size<test_data.shape[0]):
     print("fold no=",i)
     train_data_index=start+train_data_size
     test_data_index=start+train_data_size+test_data_size
     print("train data index=",start,"-->",train_data_index-1,"test data index=",train_data_index,"--->",test_data_index-1)
     print("train_data size=",test_data[start:train_data_index,:].shape[0],"test data size=",test_data[train_data_index:test_data_index,:].shape[0])
     train_dataset=test_data[start:train_data_index,:]
     test_dataset=test_data[train_data_index:test_data_index,:]
    
     
    # calculate number of high and low samples in the train set
     if(test_dataset.shape[0]!=0):
       high_train=0
       low_train=0
       for k in range(train_dataset.shape[0]):
            if(train_dataset[k][1]==0):
               low_train+=1
            elif(train_dataset[k][1]==1):
               high_train+=1
       print("no. of high samples in train=",high_train,"no of low samples in train=",low_train) 
       if(low_train==0 or high_train==0):
         continue
       train_feature,test_feature=feature_file_creation(train_dataset,test_dataset,common)
       array1=train_dataset
       
      
       print("train feature shape:",train_feature.shape)
       print("test feature shape:",test_feature.shape)
       # separte targets from features
       X_train=train_feature[:,1:train_feature.shape[1]]   
       
       Y_train=train_feature[:,0]
       X_test=test_feature[:,1:test_feature.shape[1]]
    
       Y_test=test_feature[:,0]
       # seperate facial features
       new_train=X_train[:,3:X_train.shape[1]]
       new_test=X_test[:,3:X_test.shape[1]]
      
       # do standarization on facial data
       
       scaler = StandardScaler()
       #scaler.fit(X_train)
       scaler.fit(new_train)
       new_train=scaler.transform(new_train)
       new_test=scaler.transform(new_test)
       np.set_printoptions(suppress=True)
       

      
       
       # reduce dimension of features
       
       # apply PCA 
       pca=KernelPCA(n_components=1,kernel='rbf',eigen_solver='arpack',remove_zero_eig=True,random_state=32)
       
       pca.fit(new_train)
       new_train=pca.transform(new_train)
       new_test=pca.transform(new_test)
       
       
       

       # apply KLDA
       # uncomment the below code when KLDA is applied
       '''
       klda=kda.KDA(n_components=1,kernel='rbf')
       new_train=klda.fit_transform(new_train,Y_train)
       new_test=klda.transform(new_test)
       '''
       
       # concatenate self-report features and facial features (obtain from feature reduction tool)
       trainX_new=np.concatenate((X_train[:,0:3],new_train),axis=1)
       testX_new=np.concatenate((X_test[:,0:3],new_test),axis=1)
      
       print(trainX_new.shape)
       print(testX_new.shape)
       Y_train=Y_train.reshape(len(Y_train),1)
       Y_test=Y_test.reshape(len(Y_test),1)
       
       
       final_train=np.concatenate((Y_train,trainX_new),axis=1)
       final_test=np.concatenate((Y_test,testX_new),axis=1)
       
       
       train_score,score=Random_Forest_model(final_train,final_test)  
       
      
       accu_list.append(score)
       
       train_accu.append(train_score)

     start=start+test_data_size
     i+=1  

  # print the accuracy     
  print("f1 score list=",accu_list)
  if(len(train_accu)!=0):   
      print("Train f1-score=",mean(train_accu))    
  if(len(accu_list)!=0):   
      print("Test f1-score=",mean(accu_list))

  
      


data size= 146
fold no= 1
train data index= 0 --> 80 test data index= 81 ---> 110
train_data size= 81 test data size= 30
no. of high samples in train= 74 no of low samples in train= 7
train feature shape: (80, 69)
test feature shape: (30, 69)
(80, 4)
(30, 4)
train size in model function= 80 test size in model function= 30
number of unique rows= (80, 5)
train size after removing duplicates= 80 test size after removing duplicates= 30
Y_test= [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.]
Y_pred= [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.]
Train accuracy= 1.0
Test f1= 1.0
Test accuracy= 1.0
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00         3
         1.0       1.00      1.00      1.00        27

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00     