In [16]:
import copy
import cv2
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import os
import random
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

In [17]:
all_df = pd.read_csv("./dataset/all_train_test_val.csv", index_col = 0)

In [18]:
fake_df = all_df[all_df['label'] == 1]
real_df = all_df[all_df['label'] == 0]

In [19]:
print(fake_df.groupby('train')['path_drive'].agg('count'))

train
test          2236
train         6712
validation    2237
Name: path_drive, dtype: int64


In [20]:
print(real_df.groupby('train')['path_drive'].agg('count'))

train
test          2210
train         7043
validation    1934
Name: path_drive, dtype: int64


In [None]:
def get_ttv_xy(fake_df, real_df):
    fake_train = fake_df[fake_df['train'] == 'train']
    real_train = real_df[real_df['train'] == 'train']
    fake_test = fake_df[fake_df['train'] == 'test']
    real_test = real_df[real_df['train'] == 'test']
    fake_val = fake_df[fake_df['train'] == 'validation']
    real_val = real_df[real_df['train'] == 'validation']
    x_train = np.concatenate((fake_train.iloc[:,:-3].values, real_train.iloc[:,:-3].values), axis=0)
    y_train = np.concatenate((np.ones(fake_train.shape[0]), np.zeros(real_train.shape[0])))
    x_test = np.concatenate((fake_test.iloc[:,:-3].values, real_test.iloc[:,:-3].values), axis=0)
    y_test = np.concatenate((np.ones(fake_test.shape[0]), np.zeros(real_test.shape[0])))
    x_val = np.concatenate((fake_val.iloc[:,:-3].values, real_val.iloc[:,:-3].values), axis=0)
    y_val = np.concatenate((np.ones(fake_val.shape[0]), np.zeros(real_val.shape[0])))
    return x_train, x_test, x_val, y_train, y_test, y_val
    

In [None]:
def shuffle_two_arrays(arr1, arr2):
    shuffler = np.random.permutation(len(arr1))
    array1_shuffled = arr1[shuffler]
    array2_shuffled = arr2[shuffler]

    return array1_shuffled, array2_shuffled

In [None]:
x_train, x_test, x_val, y_train, y_test, y_val = get_ttv_xy(fake_df, real_df)

In [24]:
x_train, y_train = shuffle_two_arrays(x_train, y_train)

In [25]:
x_test, y_test = shuffle_two_arrays(x_test, y_test)

In [26]:
x_val, y_val = shuffle_two_arrays(x_val, y_val)

In [27]:
def demeaning_local(array_nd):
    array_nd = array_nd.astype('float32')
    means = array_nd.mean(axis=(0,1,2), dtype='float64')
    #print('Means: %s' % means)
    #print(means.shape)
    array_nd -= means
    return array_nd

In [28]:
def demeaning_global(arr):
    mean = arr.mean()
    print('Mean: %.3f' % mean)
    arr_0mean = arr - mean
    return arr_0mean

In [29]:
#nd_array = np.reshape(flat_array, (40, 40, 4, 3))
x_train_4d = np.reshape(x_train, (x_train.shape[0],40,40,4,3))
x_train_4d_demean = demeaning_local(x_train_4d)
x_train_flat = x_train_4d_demean.reshape((x_train.shape[0], 40*40*4*3))

ValueError: cannot reshape array of size 264109755 into shape (13755,40,40,4,3)

In [None]:
x_test_4d = np.reshape(x_test, (x_test.shape[0],40,40,4,3))
x_test_4d_demean = demeaning_local(x_test_4d)
x_test_flat = x_test_4d_demean.reshape((x_test.shape[0], 40*40*4*3))

In [None]:
x_val_4d = np.reshape(x_val, (x_val.shape[0],40,40,4,3))
x_val_4d_demean = demeaning_local(x_val_4d)
x_val_flat = x_val_4d_demean.reshape((x_val.shape[0], 40*40*4*3))

In [None]:


x_val_flat = x_val.reshape((x_val.shape[0], 40*40*4*3))

x_test_flat = x_test.reshape((x_test.shape[0], 40*40*4*3))


from sklearn.linear_model import LogisticRegression

logisticRegr = LogisticRegression(solver = 'saga')
logit = logisticRegr.fit(x_train_flat, y_train)

# training and test accuracy
print('training acc: {:.2f}'.format(logit.score(x_train_flat, y_train)))
print('test acc: {:.2f}'.format(logit.score(x_test_flat, y_test)))
print('validation acc: {:.2f}'.format(logit.score(x_val_flat, y_val)))

In [None]:
y_test_pred =logit.predict(x_test_flat)
y_val_pred = logit.predict(x_val_flat)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_test_pred))

In [None]:
print(confusion_matrix(y_test, y_test_pred, labels=[0,1]))
cm = confusion_matrix(y_test, y_test_pred, labels=[0,1])

In [None]:
import seaborn as sns
plt.figure(figsize=(9,9))
sns.heatmap(cm, annot=True, fmt=".3f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Accuracy Score: {0}'.format(logit.score)
plt.title("Confusion matrix");

In [None]:
print(classification_report(y_val, y_val_pred))

In [None]:
print(logit.coef_)
print(logit.coef_.shape)
coef_lr = logit.coef_.reshape(-1)
print(coef_lr.shape)
coef_lr_abs = np.abs(coef_lr)
coef_lr_0_1 = np.interp(coef_lr_abs, (coef_lr.min(), coef_lr.max()), (0, 1))
coef_lr_split_0_1 = np.array_split(coef_lr_0_1, 4)

In [None]:
coef_lr_0_1_3mean = np.mean(coef_lr_0_1.reshape(-1, 3), axis=1)
coef_lr_split_0_1_3mean = np.array_split(coef_lr_0_1_3mean, 4)
feat = ['eye_left', 'eye_right', 'nose', 'lips']
plt.rcParams['figure.figsize'] = (20.0, 16.0)
for i in range(4):
    plt.subplot(1,4,i+1)
    img_color = coef_lr_split_0_1_3mean[i]
    img_color = img_color.reshape((40,40))
    #img_gray = np.dot(img_color, [0.299, 0.587, 0.114])
    plt.imshow(img_color, cmap='Greys')
    plt.title(feat[i])
    plt.axis('off')