In [1]:
# ## Download packages
# !pip install keras-vggface
# !pip install opencv-python
# !pip install tensorflow
# !pip install shutil
# !pip install matplotlib
# !pip install sklearn
# !pip install keras-applications
# !pip install tensorflow-gpu

In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.layers import (Conv2D, BatchNormalization, Activation, MaxPooling2D, GlobalAveragePooling2D, 
                          Dense, Flatten, Dropout)
from keras.optimizers import RMSprop, Adam, SGD
from keras import regularizers
from keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau

In [11]:
import os

base_path = os.getcwd()

train_location = os.path.join(base_path, 'train')
val_location = os.path.join(base_path, 'val')

train_fake_image_location = os.path.join(train_location, 'fake_image')
train_real_image_location = os.path.join(train_location, 'real_image')
val_fake_image_location = os.path.join(val_location, 'fake_image')
val_real_image_location = os.path.join(val_location, 'real_image')

SHAPE = 224


In [12]:
import shutil
import os
def squash_and_merge(directory):
    count = 0
    folders = os.listdir(directory)
    for f in folders :
        files = os.listdir(os.path.join(directory, f))
        for f_x in files :
            original = os.path.join(directory, f, f_x)
            target = os.path.join(directory, f'{count}.jpg')
            count += 1
            shutil.move(original, target)
        shutil.rmtree(os.path.join(directory, f))

In [13]:
# squash_and_merge(train_fake_image_location)
# squash_and_merge(train_real_image_location)
# squash_and_merge(val_fake_image_location)
# squash_and_merge(val_real_image_location)

In [14]:
train_new_location = os.path.join(base_path, 'train_1')
val_new_location = os.path.join(base_path, 'val_1')
try :
    shutil.copytree(train_fake_image_location,os.path.join(train_new_location, 'fake'))
    shutil.copytree(train_real_image_location,os.path.join(train_new_location, 'real'))
    shutil.copytree(val_fake_image_location,os.path.join(val_new_location, 'fake'))
    shutil.copytree(val_real_image_location,os.path.join(val_new_location, 'real'))
except FileExistsError:
    pass
finally :
    print('Copied')

Copied


In [15]:
from keras.preprocessing.image import ImageDataGenerator
batch_size = 32

train_datagen = ImageDataGenerator(rescale=1./255,
    horizontal_flip=True,
    validation_split=0.2) # set validation split

train_generator = train_datagen.flow_from_directory(
    train_new_location,
    target_size=(SHAPE, SHAPE),
    batch_size=batch_size,
    class_mode='binary',
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    train_new_location, # same directory as training data
    target_size=(SHAPE, SHAPE),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation') # set as validation data

val_datagen = ImageDataGenerator(rescale=1./255.)

# train_flow = train_datagen.flow_from_directory(
#     train_new_location,
#     target_size=(SHAPE, SHAPE),
#     batch_size=batch_size,
#     class_mode='binary',
# )

valid_flow = val_datagen.flow_from_directory(
    val_new_location,
    target_size=(SHAPE, SHAPE),
    batch_size=batch_size,
    class_mode='binary',
)


Found 53378 images belonging to 2 classes.
Found 13344 images belonging to 2 classes.
Found 12592 images belonging to 2 classes.


In [16]:
from tensorflow.keras.applications import DenseNet121

def get_model():
    densenet = DenseNet121(weights='imagenet',
                           include_top=False,
                           input_shape=(SHAPE,SHAPE,3),
                          )
    model = tf.keras.models.Sequential([densenet,
                                        tf.keras.layers.GlobalAveragePooling2D(),
                                        tf.keras.layers.Dense(512, activation='relu'),
                                        tf.keras.layers.BatchNormalization(),
                                        tf.keras.layers.Dropout(0.3),
                                        tf.keras.layers.Dense(1, activation='sigmoid')
                                      ])
    model.compile(optimizer=Adam(lr=0.001),
                loss='binary_crossentropy',
                metrics=['accuracy']
                )

    return model

model = get_model()
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet121 (Functional)     (None, 7, 7, 1024)        7037504   
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               524800    
_________________________________________________________________
batch_normalization_1 (Batch (None, 512)               2048      
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 513       
Total params: 7,564,865
Trainable params: 7,480,193
Non-trainable params: 84,672
_______________________________________

In [17]:
checkpoint = ModelCheckpoint(filepath='denseNet.v2.1.h5',
                             save_best_only=True,
                             verbose=1,
                             mode='min',
                             monitor='val_loss'
                            )
reduce_lr = ReduceLROnPlateau(monitor='val_loss', 
                              factor=0.2, 
                              patience=3, 
                              verbose=1, 
                              min_delta=0.0001
                             )
csv_logger = CSVLogger('training.log')

callbacks = [checkpoint, reduce_lr, csv_logger]

In [27]:
hist = model.fit(train_generator,
                 validation_data = validation_generator, 
                    callbacks=callbacks,
                    steps_per_epoch=train_generator.samples // batch_size,
                    validation_steps=validation_generator.samples // batch_size,
                    epochs=10
                   )

Epoch 1/10

Epoch 00001: val_loss did not improve from 0.50405
Epoch 2/10

Epoch 00002: val_loss did not improve from 0.50405
Epoch 3/10

Epoch 00003: val_loss did not improve from 0.50405
Epoch 4/10

Epoch 00004: val_loss did not improve from 0.50405

Epoch 00004: ReduceLROnPlateau reducing learning rate to 8.000000525498762e-06.
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.50405
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.50405
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.50405

Epoch 00007: ReduceLROnPlateau reducing learning rate to 1.6000001778593287e-06.
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.50405
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.50405
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.50405

Epoch 00010: ReduceLROnPlateau reducing learning rate to 3.200000264769187e-07.


In [28]:
"""
Plot the training and validation loss
epochs - list of epoch numbers
loss - training loss for each epoch
val_loss - validation loss for each epoch
"""
def plot_loss(epochs, loss, val_loss):
    plt.plot(epochs, loss, 'bo', label='Training Loss')
    plt.plot(epochs, val_loss, 'orange', label = 'Validation Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.show()
    
    
"""
Plot the training and validation accuracy
epochs - list of epoch numbers
acc - training accuracy for each epoch
val_acc - validation accuracy for each epoch
"""
def plot_accuracy(epochs, acc, val_acc):
    plt.plot(epochs, acc, 'bo', label='Training accuracy')
    plt.plot(epochs, val_acc, 'orange', label = 'Validation accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.show()

In [None]:
model.save('spoffnet1.h5')

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
plot_loss(range(1, len(loss) + 1), loss, val_loss)

In [None]:
plot_accuracy(range(1, len(loss) + 1), acc, val_acc)

## Evaluation 


In [29]:
from tensorflow import keras
import pandas as pd
test_location = os.path.join(base_path, 'test')

df = pd.read_csv(os.path.join(test_location,'image_predictions.csv'))
df

Unnamed: 0,folder,prediction
0,image/00000,
1,image/00001,
2,image/00002,
3,image/00003,
4,image/00004,
...,...,...
785,image/00785,
786,image/00786,
787,image/00787,
788,image/00788,


In [30]:
def predict(row, base):
    val = row['folder'].split('/')
    test_location = os.path.join(base, val[0], val[1])
    files = os.listdir(test_location)
    try:
        os.mkdir(os.path.join(test_location, 'hello'))
    except FileExistsError:
        pass
    for file in files:
        if (file.endswith('.jpg')):
            try :
                from_location = os.path.join(test_location, file)
                to_location = os.path.join(test_location,'hello', file)
                shutil.copy(from_location, to_location)
            except FileExistsError:
                pass
    new_test_location = os.path.join(test_location)
    test_flow = val_datagen.flow_from_directory(
        test_location,
        target_size=(SHAPE, SHAPE),
        batch_size=1,
        shuffle=False,
    )
    res = model.predict(test_flow)
    return np.mean(res)
    

In [31]:
df['prediction'] = df.apply(lambda row : predict(row, test_location), axis=1)

Found 16 images belonging to 1 classes.
Found 13 images belonging to 1 classes.
Found 21 images belonging to 1 classes.
Found 20 images belonging to 1 classes.
Found 30 images belonging to 1 classes.
Found 72 images belonging to 1 classes.
Found 19 images belonging to 1 classes.
Found 17 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 13 images belonging to 1 classes.
Found 20 images belonging to 1 classes.
Found 11 images belonging to 1 classes.
Found 20 images belonging to 1 classes.
Found 14 images belonging to 1 classes.
Found 13 images belonging to 1 classes.
Found 19 images belonging to 1 classes.
Found 11 images belonging to 1 classes.
Found 17 images belonging to 1 classes.
Found 18 images belonging to 1 classes.
Found 13 images belonging to 1 classes.
Found 19 images belonging to 1 classes.
Found 17 images belonging to 1 classes.
Found 25 images belonging to 1 classes.
Found 13 images belonging to 1 classes.
Found 16 images belonging to 1 classes.


Found 19 images belonging to 1 classes.
Found 12 images belonging to 1 classes.
Found 10 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 12 images belonging to 1 classes.
Found 12 images belonging to 1 classes.
Found 14 images belonging to 1 classes.
Found 19 images belonging to 1 classes.
Found 17 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 13 images belonging to 1 classes.
Found 11 images belonging to 1 classes.
Found 20 images belonging to 1 classes.
Found 19 images belonging to 1 classes.
Found 11 images belonging to 1 classes.
Found 24 images belonging to 1 classes.
Found 23 images belonging to 1 classes.
Found 19 images belonging to 1 classes.
Found 10 images belonging to 1 classes.
Found 18 images belonging to 1 classes.
Found 11 images belonging to 1 classes.
Found 25 images belonging to 1 classes.
Found 28 images belonging to 1 classes.
Found 12 images belonging to 1 classes.
Found 12 images belonging to 1 classes.


Found 49 images belonging to 1 classes.
Found 19 images belonging to 1 classes.
Found 10 images belonging to 1 classes.
Found 16 images belonging to 1 classes.
Found 12 images belonging to 1 classes.
Found 21 images belonging to 1 classes.
Found 18 images belonging to 1 classes.
Found 10 images belonging to 1 classes.
Found 12 images belonging to 1 classes.
Found 13 images belonging to 1 classes.
Found 14 images belonging to 1 classes.
Found 10 images belonging to 1 classes.
Found 16 images belonging to 1 classes.
Found 14 images belonging to 1 classes.
Found 10 images belonging to 1 classes.
Found 19 images belonging to 1 classes.
Found 16 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 45 images belonging to 1 classes.
Found 19 images belonging to 1 classes.
Found 17 images belonging to 1 classes.
Found 13 images belonging to 1 classes.
Found 12 images belonging to 1 classes.
Found 14 images belonging to 1 classes.
Found 14 images belonging to 1 classes.


Found 12 images belonging to 1 classes.
Found 28 images belonging to 1 classes.
Found 14 images belonging to 1 classes.
Found 10 images belonging to 1 classes.
Found 20 images belonging to 1 classes.
Found 32 images belonging to 1 classes.
Found 16 images belonging to 1 classes.
Found 10 images belonging to 1 classes.
Found 17 images belonging to 1 classes.
Found 18 images belonging to 1 classes.
Found 34 images belonging to 1 classes.
Found 10 images belonging to 1 classes.
Found 15 images belonging to 1 classes.
Found 19 images belonging to 1 classes.
Found 18 images belonging to 1 classes.
Found 20 images belonging to 1 classes.
Found 19 images belonging to 1 classes.
Found 11 images belonging to 1 classes.
Found 12 images belonging to 1 classes.
Found 9 images belonging to 1 classes.
Found 17 images belonging to 1 classes.
Found 20 images belonging to 1 classes.
Found 17 images belonging to 1 classes.
Found 29 images belonging to 1 classes.
Found 24 images belonging to 1 classes.
F

In [32]:
df

Unnamed: 0,folder,prediction
0,image/00000,0.001280
1,image/00001,0.355724
2,image/00002,0.007989
3,image/00003,0.007067
4,image/00004,0.225806
...,...,...
785,image/00785,0.210561
786,image/00786,0.333628
787,image/00787,0.062243
788,image/00788,0.023783


In [33]:
df.to_csv('simyujie_e0310402@u.nus.edu.csv' , index=False)

In [34]:
from numba import cuda 
device = cuda.get_current_device()
device.reset()