# Import libraries

In [1]:
from tqdm import tqdm
import numpy as np
import pandas as pd
import os
 
# Importing sklearn libraries
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import preprocessing
 
# Importing Keras libraries
import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.applications import VGG16
from keras.applications import imagenet_utils
from keras.callbacks import ModelCheckpoint
from keras.preprocessing import image
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.layers import Dense, Conv2D, MaxPooling2D
from keras.layers import Dropout, Flatten, GlobalAveragePooling2D
from keras.preprocessing import image
from keras.layers import merge, Input
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.inception_resnet_v2 import preprocess_input

# Import and preprocess data

In [4]:
image_input = Input(shape=(218,178,3))

model = InceptionResNetV2(include_top=False,weights="imagenet",input_tensor=image_input)

model.summary()

p = os.getcwd()
print(p)

os.system(p)
os.chdir("..")
d = os.getcwd()
print(d)

dirname = os.path.dirname(d)
csvfile = os.path.join(d, 'datasets/celeba/labels.csv')
dirname = os.path.dirname(d)
imgfile = os.path.join(d, 'datasets/celeba/img/')

dataset = pd.read_csv(csvfile, sep='\\t', engine='python')

resnet_feature_list = []
for i in tqdm(range(dataset.shape[0])):
    img = image.load_img(imgfile+dataset['img_name'][i])
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)

    resnet_feature = model.predict(img)
    resnet_feature_np = np.array(resnet_feature)
    resnet_feature_list.append(resnet_feature_np.flatten())


resnet_feature_list_np = np.array(resnet_feature_list)

resnet_feature_list_np.shape

Model: "inception_resnet_v2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 218, 178, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 108, 88, 32)  864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 108, 88, 32)  96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 108, 88, 32)  0           batch_normalization[0][0]        
________________________________________________________________________________

activation_78 (Activation)      (None, 11, 9, 160)   0           batch_normalization_78[0][0]     
__________________________________________________________________________________________________
conv2d_76 (Conv2D)              (None, 11, 9, 192)   208896      mixed_6a[0][0]                   
__________________________________________________________________________________________________
conv2d_79 (Conv2D)              (None, 11, 9, 192)   215040      activation_78[0][0]              
__________________________________________________________________________________________________
batch_normalization_76 (BatchNo (None, 11, 9, 192)   576         conv2d_76[0][0]                  
__________________________________________________________________________________________________
batch_normalization_79 (BatchNo (None, 11, 9, 192)   576         conv2d_79[0][0]                  
__________________________________________________________________________________________________
activation

  0%|          | 0/5000 [00:00<?, ?it/s]


__________________________________________________________________________________________________
activation_181 (Activation)     (None, 5, 4, 224)    0           batch_normalization_181[0][0]    
__________________________________________________________________________________________________
conv2d_179 (Conv2D)             (None, 5, 4, 192)    399360      block8_4_ac[0][0]                
__________________________________________________________________________________________________
conv2d_182 (Conv2D)             (None, 5, 4, 256)    172032      activation_181[0][0]             
__________________________________________________________________________________________________
batch_normalization_179 (BatchN (None, 5, 4, 192)    576         conv2d_179[0][0]                 
__________________________________________________________________________________________________
batch_normalization_182 (BatchN (None, 5, 4, 256)    768         conv2d_182[0][0]                 
_________

100%|██████████| 5000/5000 [16:48<00:00,  4.96it/s]


(5000, 30720)

# Label and split data

In [5]:
# Label the data
X = np.array(resnet_feature_list)
y = np.array(dataset['smiling']+1)/2

In [6]:
# Evaluate using a train and a validation set
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.25, random_state=100)

# Optimise solver

In [7]:
#Accuracy using lbfgs solver
model = LogisticRegression(solver = 'lbfgs', max_iter=10000)
model.fit(X_train, Y_train)
result = model.score(X_test, Y_test)
print("Accuracy: %.2f%%" % (result*100.0))

Accuracy: 83.12%


In [8]:
#Accuracy using newton-cg solver
model = LogisticRegression(solver = 'newton-cg', max_iter=10000)
model.fit(X_train, Y_train)
result = model.score(X_test, Y_test)
print("Accuracy: %.2f%%" % (result*100.0))

Accuracy: 83.12%


In [9]:
# Accuracy using liblinear solver
model = LogisticRegression(solver = 'liblinear', max_iter=10000)
model.fit(X_train, Y_train)
result = model.score(X_test, Y_test)
print("Accuracy: %.2f%%" % (result*100.0))

Accuracy: 83.36%


# Optimise C value

In [10]:
# Accuracy with C value of 10
model = LogisticRegression(solver = 'liblinear', max_iter=10000, C=10)
model.fit(X_train, Y_train)
result = model.score(X_test, Y_test)
print("Accuracy: %.2f%%" % (result*100.0))

Accuracy: 83.04%


In [11]:
# Accuracy with C value of 1.0
model = LogisticRegression(solver = 'liblinear', max_iter=10000, C=1.0)
model.fit(X_train, Y_train)
result = model.score(X_test, Y_test)
print("Accuracy: %.2f%%" % (result*100.0))

Accuracy: 83.36%


In [12]:
# Accuracy with C value of 0.1
model = LogisticRegression(solver = 'liblinear', max_iter=10000, C=0.1)
model.fit(X_train, Y_train)
result = model.score(X_test, Y_test)
print("Accuracy: %.2f%%" % (result*100.0))

Accuracy: 83.04%


In [13]:
# Accuracy with C value of 0.01
model = LogisticRegression(solver = 'liblinear', max_iter=10000, C=0.01)
model.fit(X_train, Y_train)
result = model.score(X_test, Y_test)
print("Accuracy: %.2f%%" % (result*100.0))

Accuracy: 83.60%


# Accuracy of validation set using KFold CV

In [14]:
#KFold cross validation with 5 splits
kfold = KFold(n_splits=5, random_state=0, shuffle=True)
model_kfold = LogisticRegression(solver = 'liblinear', max_iter = 20000, C=0.001)
results_kfold = cross_val_score(model_kfold, X, y, cv=kfold)
print("Accuracy: %.2f%%" % (results_kfold.mean()*100.0))

Accuracy: 83.22%


# Finding train set accuracy

In [15]:
#Using the model with the optimised paramaters to find train set accuracy
model = LogisticRegression(solver = 'liblinear', max_iter=10000, C=0.01)
model.fit(X_train, Y_train)
result = model.score(X_train, Y_train)
print("Accuracy: %.2f%%" % (result*100.0))

Accuracy: 100.00%


# Accuracy of test set

In [16]:
image_input = Input(shape=(218,178,3))

model = InceptionResNetV2(include_top=False,weights="imagenet",input_tensor=image_input)

model.summary()

dirname = os.path.dirname(d)
csvtest = os.path.join(d, 'test/celeba_test/labels.csv')
dirname = os.path.dirname(d)
imgtest = os.path.join(d, 'test/celeba_test/img/')

test_dataset = pd.read_csv(csvtest, sep='\\t', engine='python')

resnet_test_feature_list = []
for i in tqdm(range(test_dataset.shape[0])):
    img = image.load_img(imgtest+test_dataset['img_name'][i])
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)

    resnet_feature = model.predict(img)
    resnet_feature_np = np.array(resnet_feature)
    resnet_test_feature_list.append(resnet_feature_np.flatten())


resnet_test_feature_list_np = np.array(resnet_test_feature_list)

resnet_test_feature_list_np.shape

Model: "inception_resnet_v2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 218, 178, 3) 0                                            
__________________________________________________________________________________________________
conv2d_203 (Conv2D)             (None, 108, 88, 32)  864         input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_203 (BatchN (None, 108, 88, 32)  96          conv2d_203[0][0]                 
__________________________________________________________________________________________________
activation_203 (Activation)     (None, 108, 88, 32)  0           batch_normalization_203[0][0]    
________________________________________________________________________________

  0%|          | 0/1000 [00:00<?, ?it/s]

conv2d_325 (Conv2D)             (None, 11, 9, 160)   143360      activation_324[0][0]             
__________________________________________________________________________________________________
batch_normalization_325 (BatchN (None, 11, 9, 160)   480         conv2d_325[0][0]                 
__________________________________________________________________________________________________
activation_325 (Activation)     (None, 11, 9, 160)   0           batch_normalization_325[0][0]    
__________________________________________________________________________________________________
conv2d_323 (Conv2D)             (None, 11, 9, 192)   208896      block17_11_ac[0][0]              
__________________________________________________________________________________________________
conv2d_326 (Conv2D)             (None, 11, 9, 192)   215040      activation_325[0][0]             
__________________________________________________________________________________________________
batch_norm

100%|██████████| 1000/1000 [02:56<00:00,  5.65it/s]


(1000, 30720)

In [17]:
# Labelling the X and Y test data
X_TEST = np.array(resnet_test_feature_list)
Y_TEST = np.array(test_dataset['smiling']+1)/2

In [19]:
#Using the model with the optimised paramaters
model = LogisticRegression(solver = 'liblinear', max_iter=10000, C=0.01)
model.fit(X_train, Y_train)
result = model.score(X_TEST, Y_TEST)
print("Accuracy: %.2f%%" % (result*100.0))

Accuracy: 82.70%
