In [None]:
from google.colab import drive
drive.mount('/content/drive')
import warnings
warnings.filterwarnings('ignore')

import os 
os.chdir("/content/drive/Shareddrives/Facial Recognition/whole dataset/")

from keras.models import Sequential
from keras.layers import Dense

import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split

import random 
random.seed(1234)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def clean_data(data):
    # data planning 

  drop_col = ['Unnamed: 0', # index columns
              # 'pol',  # label column
              # 'gender', # self reported and filtered already
              # 'age', # self-reported  
              # 'country', # self reported and filtered already
              'userid', # index equivalent column 
              'pol_dat_us', # redundant columns with label
              'pol_dat_ca', # redundant columns with label
              'pol_dat_uk', # redundant columns with label
              'pol_fb_us', # redundant columns with label
              # 'database', # filtered already 
              # 'ethnicity.value' # filtered already
              ]

  data = data.drop(drop_col,axis =1)

  """# data cleaning """

  # gender replacing with <unk> 
  # replacing other columns with mean values of the age and 5 big personality traits 
  for col in data.loc[:, data.isna().any()].columns:
      data[col] = data[col].fillna(data[col].mean())

  data = data.drop(data.columns[data.isna().any()].tolist(), axis =1)

  return data

In [None]:
def get_model(dimension_input):
  model = Sequential()
  model.add(Dense(1024, input_dim=dimension_input, activation='relu'))
  model.add(Dense(512, input_dim=1024, activation='relu'))
  model.add(Dense(256, input_dim=512, activation='relu'))
  model.add(Dense(128, input_dim=256, activation='relu'))
  model.add(Dense(60, input_dim=128, activation='relu'))
  model.add(Dense(1, activation='sigmoid'))
  # Compile model
  model.compile(loss='binary_crossentropy', optimizer='SGD', metrics=['accuracy'])

  return model

In [None]:
def get_accuracy(data):
  y = data['pol'].replace({'liberal':1,'conservative':0})
  X = data.drop('pol', axis = 1)

  X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2) 

  model = get_model(X.shape[1])
  results = model.fit(epochs=25,x=X_train,y=y_train,batch_size=128, verbose=0, validation_split=0.2)

  y_pred = model.predict_proba(X_test)
  auc = round(metrics.roc_auc_score(y_test,y_pred),2)*100

  _, acc = model.evaluate(X_test, y_test,batch_size=1000, verbose=0)

  return auc, round(acc*100,2), data.shape[0]

In [None]:
def save_results(arr):
  # saving the results 
  results_df = pd.DataFrame(arr, columns = ["Features","Test AUC","Test Accuracy"])
  results_file_loc = "/content/drive/Shareddrives/Facial Recognition/whole dataset/results/NN_complete.csv"
  results_df.to_csv(results_file_loc, index=False)
  print("Results Saved !!")

In [None]:
data_directory = "/content/drive/Shareddrives/Facial Recognition/data/"

folders = os.listdir(data_directory)
results = []
for folder in folders:
  csv_files = os.listdir(data_directory + folder)
  for csv in csv_files:
    if csv[-3:] == "csv":
      file = data_directory+folder+"/"+csv
      df = pd.read_csv(file)
      results.append(df)

data = pd.concat(results, axis=0)

In [None]:
data.head()

Unnamed: 0.1,Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,age,country,facial_hair,pol,pol_dat_us,pol_dat_ca,pol_dat_uk,pol_fb_us,ext,neu,ope,agr,con,database,emotion.sadness,emotion.neutral,emotion.disgust,emotion.anger,emotion.surprise,emotion.fear,emotion.happiness,gender.value,age.value,headpose.yaw_angle,headpose.pitch_angle,headpose.roll_angle,smile.value,left_eye_status.normal_glass_eye_open,left_eye_status.no_glass_eye_close,left_eye_status.occlusion,left_eye_status.no_glass_eye_open,left_eye_status.normal_glass_eye_close,left_eye_status.dark_glasses,right_eye_status.normal_glass_eye_open,right_eye_status.no_glass_eye_close,right_eye_status.occlusion,right_eye_status.no_glass_eye_open,right_eye_status.normal_glass_eye_close,right_eye_status.dark_glasses,ethnicity.value
0,4766,1.472585,0.779794,-0.633454,-0.704236,2.158832,-0.59597,-0.511159,-0.544154,5.186326,0.382593,2.60792,-0.592334,-0.434515,-0.409975,-0.128592,-0.729221,-0.637264,-0.421282,-0.941553,-0.431933,-0.051772,0.861839,-0.567465,-0.30433,-0.256917,-0.536574,-0.855431,-0.417165,-0.393633,-0.606429,-0.17586,-0.790128,-0.442605,-0.462796,-0.46232,-0.426995,-0.299547,0.055176,-0.520596,...,,canada,0.000884,conservative,,1.0,,,,,,,,dating,0.3,0.1,0.1,12.2,53.4,31.2,2.8,1,53,-8.7,2.1,6.3,25.9,0.0,0.0,0.2,12.1,0.0,87.7,0.0,0.0,0.0,0.0,0.0,100.0,black
1,6007,0.569038,0.606089,1.424348,1.251127,-0.580214,3.486677,-0.52494,-0.543985,-0.564389,0.162516,-0.519859,-0.503988,-0.512228,0.550128,2.153172,-0.560032,-0.633837,-0.466945,-0.5298,-0.496277,-0.357818,0.19644,-0.651562,-0.630523,0.658054,1.778119,0.567798,-0.419127,2.741424,-0.493857,-0.521631,2.495352,-0.513886,-0.46319,-0.401713,-0.453495,-0.360869,2.035817,-0.520503,...,,canada,0.000503,conservative,,1.0,,,,,,,,dating,0.0,0.1,0.0,0.0,0.0,0.0,99.9,1,47,-5.0,-0.2,7.5,100.0,99.9,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,black
2,7262,-0.614499,-0.501963,1.012575,-0.632123,-0.424815,0.102978,1.095525,-0.545542,-0.323655,-0.704627,-0.020135,-0.588596,-0.205615,0.992991,-0.620585,-0.685054,-0.638066,-0.124315,-0.944114,-0.429342,-0.333381,-0.476247,-0.659904,-0.167494,0.698741,-0.641243,-0.7718,-0.411808,-0.378573,0.210422,1.28615,-0.846683,0.520565,1.694491,-0.443189,-0.453847,-0.282269,-0.611541,-0.424771,...,,canada,0.076734,conservative,,1.0,,,,,,,,dating,40.9,0.0,2.1,0.0,0.0,53.1,3.8,1,40,20.4,2.0,-7.6,100.0,0.0,18.0,0.5,81.4,0.0,0.0,0.8,0.0,0.0,99.2,0.0,0.0,black
3,7264,-0.61565,-0.34722,-0.636254,1.048345,-0.500394,-0.567211,-0.300655,-0.477878,1.140184,-0.68165,-0.193651,-0.539176,0.080096,0.50556,0.517847,-0.746036,1.210325,-0.470811,0.665863,-0.459299,-0.300659,-0.386472,-0.637353,-0.627443,-0.687512,-0.638114,-0.514537,-0.417913,-0.393298,-0.634178,-0.503307,-0.806665,-0.533162,-0.449588,-0.466083,-0.255579,-0.224979,-0.614248,3.430897,...,,canada,0.002087,liberal,,0.0,,,,,,,,dating,0.4,9.8,0.0,0.0,26.1,0.0,63.6,1,34,-0.5,7.2,-11.6,52.9,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,black
4,8251,-0.616161,0.24367,0.068842,-0.600636,0.736316,-0.260989,-0.523339,-0.405227,-0.600699,0.036759,-0.402617,0.035069,0.254657,-0.758873,-0.60935,0.273519,-0.62815,-0.003916,1.135159,-0.494815,-0.318183,-0.475512,0.510538,-0.615811,0.731285,1.313995,0.221348,-0.256391,0.367755,2.32131,-0.47004,-0.6459,-0.552897,-0.450061,-0.462151,0.367391,0.282399,1.494986,-0.520792,...,,canada,0.00283,conservative,,1.0,,,,,,,,dating,0.0,0.0,0.0,0.0,0.0,0.0,100.0,1,51,6.1,12.6,0.7,100.0,0.0,0.0,0.0,99.9,0.0,0.0,1.1,0.0,0.0,98.9,0.0,0.0,black


In [None]:
# drop columns and missing value handling
data = clean_data(df)

data.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,2041,2042,2043,2044,2045,2046,2047,2048,gender,age,country,facial_hair,pol,database,emotion.sadness,emotion.neutral,emotion.disgust,emotion.anger,emotion.surprise,emotion.fear,emotion.happiness,gender.value,age.value,headpose.yaw_angle,headpose.pitch_angle,headpose.roll_angle,smile.value,left_eye_status.normal_glass_eye_open,left_eye_status.no_glass_eye_close,left_eye_status.occlusion,left_eye_status.no_glass_eye_open,left_eye_status.normal_glass_eye_close,left_eye_status.dark_glasses,right_eye_status.normal_glass_eye_open,right_eye_status.no_glass_eye_close,right_eye_status.occlusion,right_eye_status.no_glass_eye_open,right_eye_status.normal_glass_eye_close,right_eye_status.dark_glasses,ethnicity.value
0,-0.582905,-0.087585,-0.636002,-0.551932,0.212712,2.201278,-0.298542,-0.503289,-0.306482,0.121034,2.296046,-0.468428,-0.512522,0.141914,-0.606282,-0.654309,-0.621247,-0.056865,2.59053,-0.491134,-0.359886,-0.471464,-0.364472,-0.476171,0.449897,-0.14695,2.403956,-0.41085,-0.389871,-0.640979,-0.523328,0.775164,-0.555768,-0.44678,-0.458155,-0.420732,-0.368939,0.928361,-0.520536,-0.553569,...,-0.308463,-0.493913,0.413788,-0.389984,-0.495435,3.014848,2.871524,-0.518409,1.0,38.076923,canada,0.000625,liberal,dating,4.7,59.9,2.5,1.3,0.3,0.8,30.4,1,41,-5.0,26.8,0.6,34.4,0.0,0.0,37.0,63.0,0.0,0.0,0.1,0.1,0.2,99.6,0.0,0.0,white
1,0.458995,-0.599687,-0.590477,1.988928,-0.386568,2.938878,-0.518553,-0.507482,2.087345,-0.705629,-0.473976,0.059145,-0.472727,-0.838713,-0.242439,-0.746392,-0.63036,-0.468258,-0.944241,-0.288475,-0.358003,2.98869,-0.635706,-0.571041,-0.690726,-0.627143,-0.852943,-0.172075,3.321081,-0.646941,2.437489,-0.883823,-0.531591,-0.457465,4.980867,-0.188929,-0.355537,-0.590977,2.276038,-0.547694,...,-0.303697,-0.499402,-0.605985,-0.273447,3.054814,-0.550448,-0.656644,-0.479197,1.0,38.076923,canada,0.007247,liberal,dating,91.7,5.4,0.1,1.0,0.2,0.1,1.4,1,24,-22.2,12.9,1.4,20.1,0.2,0.0,0.3,98.6,0.0,0.9,12.0,2.0,10.7,70.3,2.6,2.4,white
2,3.72094,1.201529,-0.471916,1.155495,-0.614558,2.299848,-0.528174,-0.546723,-0.112413,-0.672324,-0.048039,-0.53741,-0.513017,-0.869917,-0.467806,-0.740456,-0.637771,-0.468809,-0.914986,0.724162,-0.35636,0.382519,-0.659664,-0.614366,-0.54979,0.374206,1.459505,-0.413452,-0.394194,-0.647384,-0.481379,2.554599,-0.468268,-0.462059,-0.450517,-0.442029,-0.368811,0.869253,-0.516889,-0.542644,...,-0.308478,-0.497246,0.654121,-0.291998,-0.197306,5.755627,1.123628,-0.511573,1.0,38.076923,canada,0.001584,conservative,dating,0.0,0.0,0.0,0.0,0.0,0.0,99.9,1,36,-3.3,21.9,-12.6,95.6,99.9,0.0,0.0,0.0,0.0,0.0,99.9,0.0,0.0,0.1,0.0,0.0,white
3,-0.611967,-0.686305,2.750901,-0.121398,-0.368361,-0.427338,-0.206659,-0.124426,-0.600414,-0.454984,-0.505183,1.030021,-0.512655,1.010287,-0.220782,-0.247473,-0.612555,-0.470862,2.178795,-0.478991,-0.349461,-0.383029,-0.659957,-0.630468,0.561624,0.902316,1.347824,-0.418981,-0.377537,0.964234,-0.417878,1.814204,-0.178874,1.030432,0.472405,-0.454028,-0.305817,-0.564738,-0.52047,-0.249372,...,-0.308372,-0.498876,-0.227596,-0.39386,-0.518808,-0.311542,-0.654596,-0.518409,1.0,38.076923,canada,0.007437,conservative,dating,0.0,99.8,0.0,0.0,0.0,0.0,0.1,1,50,6.7,-3.5,-9.7,3.4,0.0,0.0,0.0,100.0,0.0,0.0,0.6,0.0,0.0,99.4,0.0,0.0,white
4,0.22831,-0.613203,-0.474178,-0.667091,3.523682,-0.690866,0.07415,-0.537677,0.384843,-0.604588,4.247587,0.092407,-0.505824,0.145424,-0.300181,-0.700481,-0.633362,-0.462469,2.047285,-0.495086,-0.358278,-0.478724,-0.657798,-0.630134,-0.660566,-0.559957,-0.564983,-0.412687,0.056453,0.344064,4.430504,-0.874307,-0.552976,1.82462,-0.3865,0.632983,-0.36874,-0.557592,3.436025,1.199113,...,-0.307313,-0.49831,1.01786,-0.073878,-0.538677,5.450623,-0.391599,-0.198924,1.0,38.076923,canada,0.035491,liberal,dating,0.9,89.6,0.1,0.1,6.9,0.1,2.4,1,46,29.2,-7.6,17.4,97.3,0.0,0.0,44.1,55.9,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,white


In [None]:
label_df = data['pol']

# image features 
image_cols = list(map(str, range(1,2049)))
image_feature_df = data[image_cols]
img_df = pd.concat([label_df,image_feature_df], axis =1)

# image and self reported 
SR_df = data[['age','gender','country']]
cat_cols = SR_df.select_dtypes(include=['object']).columns
num_df = pd.get_dummies(SR_df[list(cat_cols)])
SR_df = pd.concat([SR_df.drop(cat_cols,axis =1),num_df],axis =1)
img_SR_df = pd.concat([img_df, SR_df ], axis =1)

# image and extracted features 
extracted_df = data.drop([ 'age','gender','country','pol'], axis =1)
cat_cols = extracted_df.select_dtypes(include=['object']).columns
num_df = pd.get_dummies(extracted_df[list(cat_cols)])
extracted_df = pd.concat([label_df,extracted_df.drop(cat_cols,axis =1),num_df], axis =1)

# extracted features and self_reported (whole dataset)
EX_SR_df = pd.concat([ SR_df, extracted_df ], axis =1)

In [None]:
results = []

In [None]:
# getting accuracies 
feature_auc, feature_acc, feature_samples = get_accuracy(data =img_df)
print(feature_auc, feature_acc, feature_samples, sep = " | ")
res = ["Only Image Features",feature_auc, feature_acc]
results.append(res)

# getting accuracies 
feature_auc, feature_acc, feature_samples = get_accuracy(data =img_SR_df)
print(feature_auc, feature_acc, feature_samples, sep = " | ")
res = ["Image and Self-reported Features",feature_auc, feature_acc]
results.append(res)

# getting accuracies 
feature_auc, feature_acc, feature_samples = get_accuracy(data=extracted_df)
print(feature_auc, feature_acc, feature_samples, sep = " | ")
res = ["Image and Extracted Features",feature_auc, feature_acc]
results.append(res)

# getting accuracies 
feature_auc, feature_acc, feature_samples = get_accuracy(data =EX_SR_df)
print(feature_auc, feature_acc, feature_samples, sep = " | ")
res = ["Image, SR and Extracted Features",feature_auc, feature_acc]
results.append(res)

# saving the results 
save_results(results)

59.0 | 63.34 | 30398
60.0 | 67.55 | 30398
62.0 | 70.21 | 30398
62.0 | 67.75 | 30398
 Segment Results Saved !!
