<h1>Deepfake Detection Challenge</h1>

<h4>Biometric Systems 2019/2020 Project</h4>


In [None]:
!pip install albumentations==0.4.5
!pip install efficientnet
!pip install facenet-pytorch

In [None]:
from google.colab import files
files.upload()

!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!kaggle datasets download -d meraxes10/datadfdc
!mkdir datadfdc
!unzip datadfdc.zip -d ./datadfdc
!rm datadfdc.zip

In [None]:
!kaggle datasets download -d meraxes10/dfdctrainset
!mkdir dfdctrainset
!unzip dfdctrainset.zip -d ./dfdctrainset
!rm dfdctrainset.zip

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import numpy as np
import pandas as pd

In [None]:
import gc
import random
import time
from tqdm.notebook import tqdm

In [None]:
import cv2
from PIL import Image

In [None]:
import zipfile
import os

In [None]:
import torch

In [None]:
torch.cuda.get_device_name(0)

In [None]:
from facenet_pytorch import MTCNN

In [None]:
import tensorflow as tf
from tensorflow import keras
import efficientnet.tfkeras as efn

In [None]:
from tensorflow.keras import optimizers
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Dense
from tensorflow.keras import metrics

In [None]:
import albumentations
from albumentations.augmentations import transforms

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, roc_curve, classification_report

In [None]:
def unzip_videos(orig, dest):
    for root, dirs, files in os.walk(orig):
        for file in files:
            zipname = root + '/' + file
            if zipfile.is_zipfile(zipname):  
                with zipfile.ZipFile(zipname,"r") as zip_ref:
                    zip_ref.extractall(dest)

In [4]:
def extract_metadata(dest):
    for i in range(0, 50):
        df = pd.read_json('./train_set_videos/dfdc_train_part_' + str(i) + '/metadata.json')
        try:
            os.mkdir(dest + '/dfdc_train_part_' + str(i))
        except OSError:
            print("Creation of the directory failed!")
            return
        df.to_json(dest + '/dfdc_train_part_' + str(i) + '/metadata.json')

In [5]:
def extract_face_from_frame(frame, fd):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pilimg = Image.fromarray(frame)
    # face detection and extraction
    faces, confs = fd.detect(pilimg)
    if faces is None:
        return None
    best = confs.argmax()
    box = [max(0, int(x)) for x in faces[best].tolist()]
    img = frame[box[1]:box[3], box[0]:box[2]]

    #resize and 0 border
    sf = 224/np.max(img.shape)
    img_rs = cv2.resize(img, (int(img.shape[1]*sf), int(img.shape[0]*sf)), fx=sf, fy=sf)
    bottom = int((224-int(img.shape[0]*sf))/2)
    top = 224 - img_rs.shape[0] - bottom
    left = int((224-int(img.shape[1]*sf))/2)
    right = 224 - img_rs.shape[1] - left
    img = cv2.copyMakeBorder(img_rs, top, bottom, left, right, 0)
    return img

In [6]:
def extract_faces_from_video(path, fd, t=None, n=None, transforms=None):
    output = []
    cap = cv2.VideoCapture(path)
    ret = True
    begin = time.time()
    count = 0
    while ret:
        if n is not None and count >= n:
            break
        if (not t is None) and (time.time() - begin > t):
            break
        ret = cap.grab()
        if not ret:
            break
            
        # next frame extraction
        ret, frame = cap.retrieve()
        
        img = extract_face_from_frame(frame, fd, transforms=None)
        if transforms is None:
            img = cv2.cvtColor(img_rs, cv2.COLOR_RGB2BGR)
        else:
            img = transforms(image=img_rs)['image']
        if img is None:
            continue
        
        output.append(img)
        count += 1
    cap.release()
    return np.asarray(output)

In [7]:
def preprocess_videos(n, begin, end):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    mtcnn = MTCNN(keep_all=False, select_largest=False, device=device, min_face_size = 60)
    for i in range(begin, end):
        df = pd.read_json('./train_set_videos/dfdc_train_part_' + str(i) + '/metadata.json')
        print(str(i), 'TOT:', df.shape[1])
        try:
            os.mkdir('./train_set_faces/dfdc_train_part_' + str(i))
        except OSError:
            print("Creation of the directory failed!")
            break
        count = 0
        for index, row in df.transpose().iterrows():
            filename = str(index).split('.')[0]
            try:
                os.mkdir('./train_set_faces/dfdc_train_part_' + str(i) + '/' + filename)
            except OSError:
                print("Creation of the directory failed!")
                break            
            faces = extract_face('./train_set_videos/dfdc_train_part_' + str(i) + '/' + index, mtcnn, n)
            print(count)
            if len(faces) == 0:
                continue
            j = 0
            for face in faces:
                path = './train_set_faces/dfdc_train_part_' + str(i) + '/' + filename + '/' + filename + '_' + str(j) + '.png'
                cv2.imwrite(path, face)
                j += 1
            count += 1
        print(str(i), 'EXT:', count)

In [8]:
def train_valid_split(filename, valid_balanced=True):
    train_data = pd.read_csv(filename)
    train_data = train_data[['video', 'face', 'original', 'chunk', 'label']]

    train_data['split'] = 'train'

    train_data.loc[((train_data.chunk >= 40) & (train_data.chunk < 50)), 'split'] = 'valid'

    valid_set = train_data[train_data.split == 'valid']
    train_set = train_data[train_data.split == 'train']

    valid_set.drop(columns='split', inplace=True)
    train_set.drop(columns='split', inplace=True)

    temp = train_set[train_set.label == 'FAKE'][['video', 'face', 'original']]
    temp.rename(columns={'video': 'fake', 'face': 
                         'face_fake', 'original': 'video'}, inplace=True)

    train_set = temp.merge(train_set[train_set.label == 'REAL'], how='left', on='video')

    train_set.dropna(subset=['face'], inplace=True)

    temp = valid_set[valid_set.original.isin(valid_set.loc[valid_set.label == 'REAL', 'video'].tolist())]

    if valid_balanced:
        temp = temp.groupby(['original']).apply(lambda x : x.sample(1, replace=False, random_state=42))
    temp.reset_index(inplace=True, drop=True)

    valid_set = temp.append(valid_set[valid_set.label == 'REAL'])
    valid_set = valid_set.sample(frac=1).reset_index(drop=True)
    return train_set, valid_set

In [9]:
def get_transfoms():
    train_transforms = albumentations.Compose([     
        transforms.ShiftScaleRotate(p=0.2, scale_limit=0.25, 
                                    border_mode=1, rotate_limit=25),
        transforms.HorizontalFlip(p=0.1),
        transforms.Cutout(p=.1),
        transforms.RandomContrast(p=.1),
        transforms.RandomBrightness(p=.1, limit=0.3),
        transforms.JpegCompression(p=.2, quality_lower=15, quality_upper=60),
        transforms.Downscale(scale_min=0.25, scale_max=0.25, p=0.2),
        transforms.GaussNoise(p=0.1),
        transforms.Normalize()
    ], additional_targets={'image2': 'image'})
    
    valid_transforms = albumentations.Compose([
        transforms.Normalize()
    ])
    
    return train_transforms, valid_transforms

In [10]:
def get_test_transfoms():
    test_transforms = albumentations.Compose([
            transforms.RandomBrightness(p=1.0, limit=0.3),
            transforms.Normalize()
        ])
    return test_transforms

In [11]:
def generate(dataset, batch_size, transforms, root_dir, train=True):
    X, y = [], []
    for index, row in dataset.iterrows():
        img = cv2.imread(root_dir + str(row.face))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if train:
            img_fake = cv2.imread(root_dir + str(row.face_fake))
            img_fake = cv2.cvtColor(img_fake, cv2.COLOR_BGR2RGB)
            img = transforms(image=img, image2=img_fake)
        else:
            img = transforms(image=img)

        X.append(img['image'])
        y.append((1 if str(row.label) == 'FAKE' else 0))
        
        if train:
            X.append(img['image2'])
            y.append(1)

        if len(X) == batch_size:
            yield np.array(X), np.array(y)
            X, y = [], []

In [12]:
def evaluate(models, valid_set, batch_size, transforms, root_dir, clips, verbose=0):
    loss = keras.losses.BinaryCrossentropy()

    y_pred = np.array([])
    target = np.array([])
    for X_valid, Y_valid in tqdm(generate(valid_set, batch_size, 
                                          transforms, root_dir, train=False)):
        target = np.append(target, Y_valid, axis=0)
        outputs = np.zeros(batch_size)
        for model in models:
            outputs += (model.predict(np.array(X_valid), 
                                      batch_size=batch_size).reshape(-1) / len(models))

        y_pred = np.append(y_pred, outputs, axis=0)
    
    valid_loss = loss(target, y_pred).numpy()

    clipped_losses = np.array([])
    for low, high in clips:
        clip_loss = loss(target, np.clip(y_pred, low, high)).numpy()
        clipped_losses = np.append(clipped_losses, [clip_loss], axis=0)
            
    acc = accuracy_score(target, np.round(y_pred))
    auc = roc_auc_score(target, y_pred)
    cm = confusion_matrix(target, np.round(y_pred))
    curve = roc_curve(target, y_pred)
    if verbose:
        print(classification_report(target, np.round(y_pred)))
    return valid_loss, acc, auc, cm, clipped_losses, curve

In [13]:
def train(model, name, train_set, valid_set, early_stopping, 
          train_transforms, valid_transforms, root_dir,
          batch_size, epochs):
    model_name = 'checkpoint_' + name + '.h5'
    patience = 0
    best_val_loss = None
    history = list()
    for epoch_n in range(0, epochs):
        print('EPOCH:', epoch_n, '/', epochs)
        train_set = train_set.sample(frac=1).reset_index(drop=True)

        nbatches = 0
        train_loss = 0
        train_acc = 0
        for i, (X_train, Y_train) in tqdm(enumerate(generate(train_set, batch_size,
                                                             train_transforms, root_dir, 
                                                             train=True), 1)):
            batch_loss, batch_acc = model.train_on_batch(X_train, Y_train, 
                                                         reset_metrics=True)
            train_loss += batch_loss
            train_acc += batch_acc
            nbatches += 1

            if i % 1000 == 0:
                train_loss /= nbatches
                train_acc /= nbatches
                nbatches = 0
                # evaluate
                valid_loss, val_acc, val_auc, val_cm, clipped_loss, _ = evaluate([model], valid_set, 32,
                                                                                  valid_transforms, root_dir, 
                                                                                  [(0.1, 0.9), (0.15, 0.85)])
                if (best_val_loss is None) or (valid_loss < best_val_loss):
                    patience = 0
                    best_val_loss = valid_loss
                    model.save_weights(model_name)
                else:
                    patience += 1

                print('\nTRAIN LOSS:', train_loss, 'VALID LOSS:', valid_loss, 
                      'VALID LOSS CLIPPED:', clipped_loss)
                history.append({'epoch': epoch_n, 'chunk': i, 'train_loss': train_loss, 
                                'valid_loss': valid_loss, "val_loss_clip": clipped_loss,
                                'valid_accuracy': val_acc, 'valid_auc': val_auc,
                                'valid_cm': val_cm})

                # early stopping
                if patience >= early_stopping:
                    model.load_weights(model_name)
                    return model, history
    model.load_weights(model_name)
    return model, history

In [None]:
def build_model():
    base_model = efn.EfficientNetB7(weights='imagenet',
                                     include_top=False,
                                     pooling='avg', 
                                     input_shape=(224,224,3))

    x = base_model.output
    predicted = Dense(1,activation ='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predicted)
    return model

In [None]:
def load_models():
    eff_model1 =  efn.EfficientNetB7(weights=None,
                                     include_top=False,
                                     pooling='avg', 
                                     input_shape=(224,224,3))
    x = eff_model1.output
    predicted = Dense(1,activation ='sigmoid')(x)
    eff_model1 = Model(inputs=eff_model1.input, outputs=predicted)
    eff_model1.load_weights('datadfdc/effic_student_weights.h5')

    eff_model2 =  efn.EfficientNetB7(weights=None,
                                     include_top=False,
                                     pooling='avg', 
                                     input_shape=(224,224,3))
    x = eff_model2.output
    x = Dense(128,activation ='relu')(x)
    x = Dense(64,activation ='relu')(x)
    predicted = Dense(1,activation ='sigmoid')(x)
    eff_model2 = Model(inputs=eff_model2.input, outputs=predicted)
    eff_model2.load_weights('datadfdc/effic_best_acc_weights (1).h5')

    eff_model3 =  efn.EfficientNetB7(weights=None,
                                     include_top=False,
                                     pooling='avg', 
                                     input_shape=(224,224,3))
    x = eff_model3.output
    predicted = Dense(1,activation ='sigmoid')(x)
    eff_model3 = Model(inputs=eff_model3.input, outputs=predicted)
    eff_model3.load_weights('datadfdc/effic_sgd_weights.h5')

    xcep_model =  keras.applications.xception.Xception(weights=None,
                                     include_top=False,
                                     pooling='avg', 
                                     input_shape=(224,224,3))
    x = xcep_model.output
    predicted = Dense(1,activation ='sigmoid')(x)
    xcep_model = Model(inputs=xcep_model.input, outputs=predicted)
    xcep_model.load_weights('datadfdc/xcep_weights.h5')

    models = [eff_model1, eff_model2, eff_model3, xcep_model]

    for model in models:
        model.compile(loss='binary_crossentropy',
                       optimizer=keras.optimizers.Adam(learning_rate=0.00005),
                       metrics=[metrics.binary_accuracy])    
    return models

In [None]:
def predict(models, df, root_dir, d_type='video', extract='No'):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    mtcnn = MTCNN(keep_all=False, select_largest=False, device=device, min_face_size = 60)
    test_transforms = get_test_transfoms()
    preds = []
    b = time.time()
    for index, row in tqdm(df.iterrows()):
        if d_type == 'video':
            imgs = extract_faces_from_video(root_dir + row.filename, mtcnn, t=1, 
                                            transforms=test_transforms)
        else:
            img = cv2.imread(root_dir + row.face)
            if extract == 'Yes':
                img = extract_face_from_frame(img, mtcnn)
            else:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            if len(img) == 0:
                preds.append(0.51)
                continue                
            imgs = test_transforms(image=img)['image'].reshape((-1, 224, 224, 3))
        if len(imgs) == 0:
            preds.append(0.51)
            continue
        output = 0
        for model in models:
            output += model.predict(imgs).mean() / len(models)
        preds.append(np.clip(output, 0.01, 0.99))
        print(index, len(imgs), output)
    print(time.time() - b)
    return preds

In [None]:
def simple_predict(models, filename):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    mtcnn = MTCNN(keep_all=False, select_largest=False, device=device, min_face_size = 60)
    test_transforms = get_test_transfoms()
    img = cv2.imread(filename)

    img_crop = extract_face_from_frame(img, mtcnn)

    if len(img_crop) == 0:
        return 0.51
    img = test_transforms(image=img_crop)['image'].reshape((-1, 224, 224, 3))
    output = 0
    
    for model in models:
        output += model.predict(img).mean() / len(models)
    plt.xlabel('FAKE; PREDICTION:' + "{:.2f}".format(output))
    plt.imshow(img_crop)
    return output

In [None]:
def plot_confusion_matrix(cm):
    ax= plt.subplot()
    sns.heatmap(cm, annot=True, ax = ax, fmt='g'); 

    ax.set_xlabel('Predicted labels')
    ax.set_ylabel('True labels')
    ax.set_title('Confusion Matrix')
    ax.xaxis.set_ticklabels(['REAL', 'FAKE'])
    ax.yaxis.set_ticklabels(['REAL', 'FAKE'])   
    plt.savefig('cm.png')
    plt.close()

In [None]:
def plot_roc_curve(x, y, auc):
    plt.plot(x, y, color='darkorange', lw=2, label='ROC curve (area = %0.3f)' % auc)
    plt.ylabel('1 - FRR')
    plt.xlabel('FAR')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc="lower right")
    plt.savefig('roc.png')
    plt.close()

In [None]:
global models

In [None]:
models = None

In [None]:
def main(**args):
    print(args)
    global models
    #unzip_videos("../dfdc_down", "../dfdc/train_set_videos")
    #extract_metadata('./train_metadata')
    #preprocess_videos(1, 0, 50)
    
    PATH = args['--path']
    
    if torch.cuda.is_available():
        print(torch.cuda.get_device_name(0))
        
    if args['--mode'] == 'train':
        train_transforms, valid_transforms = get_transfoms()
        train_set, valid_set = train_valid_split('dfdctrainset/train_data.csv')
        model = build_model()
        model.compile(loss='binary_crossentropy',
                      optimizer=keras.optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.8, nesterov=False),
                      metrics=[metrics.binary_accuracy])
        best_val_loss, val_acc, val_auc, val_cm, clipped_loss, _ = evaluate([model], valid_set, 32, 
                                                                            valid_transforms, PATH, 
                                                                            [(0.1, 0.9), (0.15, 0.85)])
        print(best_val_loss, val_acc, val_auc, clipped_loss)
        EPOCHS = 10
        EARLY_STOPPING = 5
        BATCH_SIZE = 16
        model, history = train(model, '0', train_set, valid_set, EARLY_STOPPING, 
                               train_transforms, valid_transforms, PATH, BATCH_SIZE, EPOCHS)
        best_val_loss, val_acc, val_auc, val_cm, clipped_loss, _ = evaluate([model], valid_set, 32, 
                                                                            valid_transforms, PATH, 
                                                                            [(0.1, 0.9), (0.15, 0.85)])
        print(best_val_loss, val_acc, val_auc, clipped_loss)
    elif args['--mode'] == 'eval':    
        if models is None:
            models = load_models()
            
        _, valid_transforms = get_transfoms()
        
        train_set, valid_set = train_valid_split(args['--metadata'], valid_balanced=False)
        
        loss, acc, auc, cm, clipped_loss, curve = evaluate(models, valid_set, 32, valid_transforms, PATH, 
                                                           [(0.1, 0.9), (0.15, 0.85)], verbose=1)
        
        print('LOSS:', loss, 'ACCURACY:', acc, 'AUC:', auc, 'CLIPPED LOSSES:', clipped_loss)
        plot_confusion_matrix(cm)
        plot_roc_curve(curve[0], curve[1], auc)
        return loss
    elif args['--mode'] == 'predict':     
        if models is None:
            models = load_models()
        
        df = pd.read_csv(args['--metadata'])
        
        
        if '--indices' in args:
            df = df.loc[args['--indices']]
        
        submit = df

        sub_preds = predict(models, df, PATH, d_type=args['--type'], extract=args['--extract'])

        submit['label'] = sub_preds

        submit.reset_index(inplace=True, drop=True)

        submit.to_csv('submission.csv', index=False)
        return submit
    elif args['--mode'] == 'predict frame':
        if models is None:
            models = load_models()      
        return simple_predict(models, PATH)

In [None]:
# train
args = {'--mode': 'train',
        '--path': 'dfdctrainset/train_set_faces/train_set_faces/'}

In [None]:
# prediction
args = {'--mode': 'predict',
        '--type': 'video',
        '--metadata': 'deepfake-detection-challenge/sample_submission.csv',
        '--path': 'deepfake-detection-challenge/test_videos/'}

In [None]:
# evaluation
args = {'--mode': 'eval',
        '--metadata': 'dfdctrainset/train_data.csv',
        '--path': 'dfdctrainset/train_set_faces/train_set_faces/'}

In [None]:
# subset evaluation
args = {'--mode': 'predict',
        '--type': 'image',
        '--extract': 'No',
        '--indices': [3, 4],
        '--metadata': 'dfdctrainset/train_data.csv',
        '--path': 'dfdctrainset/train_set_faces/train_set_faces/'}

In [None]:
# frame evaluation
args = {'--mode': 'predict frame',
        '--path': 'dfdctrainset/train_set_faces/train_set_faces/dfdc_train_part_0/acagallncj.png'}
args = {'--mode': 'predict frame',
        '--path': 'Nixon FAKE.png'}

In [None]:
if __name__ == "__main__":
    out = main(**args)

In [None]:
train_set, valid_set = train_valid_split('dfdctrainset/train_data.csv')

In [None]:
sample =valid_set.sample()
sample

In [None]:
real = valid_set[valid_set.video == sample.original.values[0]]
real