# CelebA-Spoof for Anti Spoofing using VGG19

Muhammad Adisatriyo Pratama - May 2021
____________________
## Face Antispoofing using Celeb-A Spoof dataset with TensorFlow

CelebA-Spoof Dataset Reference:
- https://github.com/Davidzhangyuanhan/CelebA-Spoof

```script
@inproceedings{CelebA-Spoof,
  title={CelebA-Spoof: Large-Scale Face Anti-Spoofing Dataset with Rich Annotations},
  author={Zhang, Yuanhan and Yin, Zhenfei and Li, Yidong and Yin, Guojun and Yan, Junjie and Shao, Jing and Liu, Ziwei},
  booktitle={European Conference on Computer Vision (ECCV)},
  year={2020}
}
```

## Use GPU

In [None]:
!nvidia-smi

## Import Library

In [None]:
# basic library
import os
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
plt.style.use('ggplot')
%matplotlib inline

## Dataset Preparation

In [None]:
data_path = 'e:/skripsi/face-anti-spoof-celeba-spoof/dataset/'
TRAIN_DIR = data_path + 'train/'
TEST_DIR = data_path + 'test/'

train_real_dir = os.path.join(TRAIN_DIR, 'real')
train_spoof_dir = os.path.join(TRAIN_DIR, 'spoof')

testing_real_dir = os.path.join(TEST_DIR, 'real')
testing_spoof_dir = os.path.join(TEST_DIR, 'spoof') 

live_train = os.listdir(train_real_dir)
spoof_train = os.listdir(train_spoof_dir)

In [None]:
print('total training real images :', len(os.listdir(train_real_dir)))
print('total training spoof images :', len(os.listdir(train_spoof_dir)))

print('total testing real images :', len(os.listdir(testing_real_dir)))
print('total testing spoof images :', len(os.listdir(testing_spoof_dir)))

## Sample Real and Spoof Images

In [None]:
import cv2


def load_img(path):
    image = cv2.imread(path)
    image = cv2.resize(image,(224, 224))
    return image[...,::-1]

In [None]:
fig = plt.figure(figsize=(15, 15))

for i in range(25):
    plt.subplot(5, 5, i+1)
    plt.imshow(load_img(train_real_dir + "/" + live_train[i]), cmap='gray')
    plt.suptitle("Real faces",fontsize=20)
    plt.axis('off')

plt.show()

In [None]:
fig = plt.figure(figsize=(15,15))

for i in range(25):
    plt.subplot(5, 5, i+1)
    plt.imshow(load_img(train_spoof_dir + "/" + spoof_train[i]), cmap='gray')
    plt.suptitle("Spoof faces",fontsize=20)
    plt.axis('off')

plt.show()

## Preprocessing

In [None]:
# tensorflow library
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

print("Version: ", tf.__version__)
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

In [None]:
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

### Variable

In [None]:
INIT_LR = 1e-4
# ALPHA = 1e-5
BATCH_SIZE = 50
IMG_SHAPE = (224, 224, 3)
EPOCHS = 20
# SAMPLE_QTD = 1.0
RANDOM_STATE = 42

In [None]:
from keras.preprocessing.image import ImageDataGenerator


val_share = 0.1

train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=val_share,
    rotation_range=20,
    # width_shift_range=0.15,
    # height_shift_range=0.15,
    # shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(
    rescale = 1./255,
)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMG_SHAPE[:-1],
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training',
    shuffle=True
)

val_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMG_SHAPE[:-1],
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation',
    shuffle=True
)

test_generator = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=IMG_SHAPE[:-1],
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

In [None]:
print(train_generator.class_indices)
print(val_generator.class_indices)
print(test_generator.class_indices)

class_names = ['real', 'spoof']

# VGG19

In [None]:
vgg19 = tf.keras.applications.VGG19(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

In [None]:
model = tf.keras.models.Sequential([vgg19,
                                    tf.keras.layers.GlobalAveragePooling2D(),
                                    tf.keras.layers.Dense(512, activation='relu'),
                                    tf.keras.layers.BatchNormalization(),
                                    tf.keras.layers.Dropout(0.3),
                                    tf.keras.layers.Dense(128, activation = "relu"),
                                    tf.keras.layers.Dropout(0.1),
                                    tf.keras.layers.Dense(2, activation='softmax')
                                   ])
model.layers[0].trainable = True

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

model.summary()

## Callbacks

In [None]:
filepath="weights/transferlearning_weights_VGG19.h5"
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, 
                                                monitor='val_accuracy', 
                                                verbose=1, 
                                                save_best_only=True, 
                                                mode='max')

## Training

In [None]:
with tf.device('/GPU:0'):
    hist = model.fit(train_generator,
                    validation_data=val_generator,
                    callbacks = [checkpoint],
                    epochs=EPOCHS)


## Training Results

In [None]:
plt.style.use('ggplot')
acc = hist.history['accuracy']
val_acc = hist.history['val_accuracy']

loss = hist.history['loss']
val_loss = histy.history['val_loss']

epochs_range = range(EPOCHS)

plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

## Testing

In [None]:
## Evaluate testing
import time
start_time = time.time()

model.evaluate(test_generator)

print('---- %s seconds ----' % ((time.time() - start_time)))
print('---- %s minutes ----' % ((time.time() - start_time)/60))

## Testing Results

In [None]:
# predicted label
y_pred = model.predict(test_generator, verbose=1)
y_pred = y_pred.argmax(axis=1)

# actual label
y_test = test_generator.classes

print(y_pred)
print(y_test)

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

print(classification_report(y_test, y_pred, target_names=test_generator.class_indices))

## Plot Predicted Image

In [None]:
def plot_image(i, predictions_array, true_label, img):
  true_label, img = true_label[i], img[i]
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])

  plt.imshow(img.astype("uint8"))

  predicted_label = np.argmax(predictions_array)
  if predicted_label == true_label:
    color = 'blue'
  else:
    color = 'red'

  plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100 * np.max(predictions_array),
                                class_names[true_label]),
                                color=color)

In [None]:
# plot first 30 image from test_images
num_rows = 10
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
  plt.subplot(num_rows, 2*num_cols, 2*i+1)
  plot_image(i, y_pred[i], y_test, test_generator.filepaths[i])
plt.show()

## Save Model

In [None]:
model.save('models/VGG19.h5')