In [343]:
import tensorflow as tf
from keras import layers
from keras.models import Sequential
from keras.applications.efficientnet import EfficientNetB7
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import load_model
import pandas as pd
import os
import numpy as np
import wandb
from wandb.keras import WandbCallback

In [344]:
config_defaults = {
    'epochs': 3,
    'batch_size': 32,
    'learning_rate': 0.0001,
    'dropout': 0.5,
    'regularization': 0.0001,
}
wandb.init(config=config_defaults, project="deepfake-efficientnet", entity="dat550")
config = wandb.config




VBox(children=(Label(value='5.642 MB of 5.642 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▅█▁
epoch,▁▅█
loss,█▃▁
val_accuracy,▁▁▁
val_loss,▁▄█

0,1
accuracy,0.41667
best_epoch,0.0
best_val_loss,1.38148
epoch,2.0
loss,1.63466
val_accuracy,0.5
val_loss,4.7333


In [345]:
batch_size = config.batch_size
img_size = 128
data_dir = "./data/tester"

In [346]:
train_datagen = ImageDataGenerator(
    rescale = 1/255,    #rescale the tensor values to [0,1]
    rotation_range = 10,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    shear_range = 0.2,
    zoom_range = 0.1,
    horizontal_flip = True,
    fill_mode = 'nearest'
)

In [347]:
train_generator = train_datagen.flow_from_directory(
    directory = f"{data_dir}/train",
    target_size = (img_size, img_size),
    color_mode = "rgb",
    class_mode = "binary",
    batch_size = batch_size,
    shuffle = True
)

Found 12 images belonging to 2 classes.


In [348]:
val_datagen = ImageDataGenerator(
    rescale = 1/255    #rescale the tensor values to [0,1]
)

In [349]:
val_generator = val_datagen.flow_from_directory(
    directory = f"{data_dir}/validation",
    target_size = (img_size, img_size),
    color_mode = "rgb",
    class_mode = "binary",
    batch_size = batch_size,
    shuffle = True
)

Found 12 images belonging to 2 classes.


In [350]:
efficient_net = EfficientNetB7(
    weights = 'imagenet',
    input_shape = (img_size, img_size, 3),
    include_top = False,
    pooling = 'max',
    drop_connect_rate=0.5
)

model = Sequential()
model.add(efficient_net)
model.add(layers.Dense(units = 512, activation = 'relu', kernel_regularizer=tf.keras.regularizers.L2(config.regularization), bias_regularizer=tf.keras.regularizers.L2(config.regularization)))
model.add(layers.Dropout(config.dropout))
model.add(layers.Dense(units = 128, activation = 'relu', kernel_regularizer=tf.keras.regularizers.L2(config.regularization), bias_regularizer=tf.keras.regularizers.L2(config.regularization)))
model.add(layers.Dense(units = 1, activation = 'sigmoid'))

In [351]:
model.summary()

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 efficientnetb7 (Functional)  (None, 2560)             64097687  
                                                                 
 dense_39 (Dense)            (None, 512)               1311232   
                                                                 
 dropout_13 (Dropout)        (None, 512)               0         
                                                                 
 dense_40 (Dense)            (None, 128)               65664     
                                                                 
 dense_41 (Dense)            (None, 1)                 129       
                                                                 
Total params: 65,474,712
Trainable params: 65,163,985
Non-trainable params: 310,727
_________________________________________________________________


In [352]:
# Compile model
model.compile(optimizer = tf.keras.optimizers.Adam(lr=config.learning_rate), loss='binary_crossentropy', metrics=['accuracy'])


  super(Adam, self).__init__(name, **kwargs)


In [353]:
model_file = f'f{wandb.run.name}_model.h5'

custom_callbacks = [
    EarlyStopping(
        monitor = 'val_loss',
        mode = 'min',
        patience = 5,
        verbose = 1
    ),
    ModelCheckpoint(
        filepath = model_file,
        monitor = 'val_loss',
        mode = 'min',
        verbose = 1,
        save_best_only = True
    ),
    WandbCallback()
]

In [354]:
# Train network
num_epochs = config.epochs
history = model.fit_generator(
    train_generator,
    epochs = num_epochs,
    steps_per_epoch = len(train_generator),
    validation_data = val_generator,
    validation_steps = len(val_generator),
    callbacks = custom_callbacks
)
print(history.history)

Epoch 1/3


  history = model.fit_generator(


Epoch 1: val_loss improved from inf to 1.22427, saving model to fglorious-hill-8_model.h5
Epoch 2/3
Epoch 2: val_loss improved from 1.22427 to 0.80566, saving model to fglorious-hill-8_model.h5
Epoch 3/3
Epoch 3: val_loss did not improve from 0.80566
{'loss': [0.9066199660301208, 1.6622676849365234, 1.1691819429397583], 'accuracy': [0.5833333134651184, 0.4166666567325592, 0.5], 'val_loss': [1.2242740392684937, 0.8056626319885254, 1.460307002067566], 'val_accuracy': [0.5, 0.5, 0.5], '_timestamp': [1650310957, 1650310972, 1650310980], '_runtime': [54, 69, 77]}


In [355]:
test_datagen = ImageDataGenerator(
    rescale = 1/255    #rescale the tensor values to [0,1]
)

test_generator = test_datagen.flow_from_directory(
    directory = f"{data_dir}/test",
    classes=['REAL', 'FAKE'],
    target_size = (img_size, img_size),
    color_mode = "rgb",
    class_mode = None,
    batch_size = 1,
    shuffle = False
)

Found 12 images belonging to 2 classes.


In [356]:
# load the saved model that is considered the best
best_model = load_model(model_file)

# Generate predictions
test_generator.reset()

preds = best_model.predict(
    test_generator,
    verbose = 1
)

test_results = pd.DataFrame({
    "Filename": test_generator.filenames,
    "Prediction": preds.flatten()
})

test_results



Unnamed: 0,Filename,Prediction
0,REAL/abqwwspghj1.jpg,0.442409
1,REAL/abqwwspghj2.jpg,0.442409
2,REAL/abqwwspghj3.jpg,0.442415
3,REAL/abqwwspghj4.jpg,0.442492
4,REAL/abqwwspghj5.jpg,0.442335
5,REAL/abqwwspghj6.jpg,0.442469
6,FAKE/abqwwspghj1.jpg,0.442409
7,FAKE/abqwwspghj2.jpg,0.442409
8,FAKE/abqwwspghj3.jpg,0.442415
9,FAKE/abqwwspghj4.jpg,0.442492


In [357]:
# round predictions to either 0 or 1
test_results["Rounded"] = test_results["Prediction"].round()

In [358]:
true_positive_fake = test_results[(test_results['Filename'].str.startswith('FAKE')) & (test_results['Rounded'] == 0)].count()[0]
false_positive_fake = test_results[(test_results['Filename'].str.startswith('REAL')) & (test_results['Rounded'] == 0)].count()[0]

true_positive_real = test_results[(test_results['Filename'].str.startswith('REAL')) & (test_results['Rounded'] == 1)].count()[0]
false_positive_real = test_results[(test_results['Filename'].str.startswith('FAKE')) & (test_results['Rounded'] == 1)].count()[0]

np.matrix([
    [true_positive_fake, false_positive_fake],
    [false_positive_real, true_positive_real]
])

matrix([[6, 6],
        [0, 0]])

In [359]:
# Log the resultmatrix to wandb
wandb.log({
    'true_positive_fake': true_positive_fake,
    'false_positive_fake': false_positive_fake,
    'true_positive_real': true_positive_real,
    'false_positive_real': false_positive_real
})

wandb: Network error (ReadTimeout), entering retry loop.
wandb: ERROR Error while calling W&B API: Error 1040: Too many connections (<Response [500]>)
wandb: ERROR Error while calling W&B API: Error 1040: Too many connections (<Response [500]>)
wandb: Network error (ReadTimeout), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
