In [None]:
!pip install -U seaborn

# Global variables

In [None]:
SECONDS = None
FPS = None

# Librarys

In [None]:
import pandas as pd
import numpy as np

from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation, Dropout, LSTM,Conv3D, MaxPooling3D, Conv2D, MaxPooling2D
# from tensorflow.keras.layers.convolutional import 
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers

import matplotlib.pyplot as plt
import seaborn as sns

import cv2

import os
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split

np.random.seed(30)
tf.random.set_seed(30)

In [None]:
print(cv2.__version__)

# View Dataset
---

**Metadata with video label**

In [None]:
train_metadata = pd.read_json("../input/deepfake-detection-challenge/train_sample_videos/metadata.json")
train_metadata = train_metadata.T
train_metadata.reset_index(inplace=True)
train_metadata.rename({"index":"name"},axis=1,inplace=True)
train_metadata.head()

## Fake/Real videos count

In [None]:
# Escolhendo tema grafico
sns.set_style("dark")

# Configurando tamanho grafico
fig, ax = plt.subplots(figsize=(10,8))
bar = sns.countplot(data=train_metadata,x="label",ax=ax)
ax.set_title("Real and Fake Videos Split")

# Adicionando legendas nas barras
for p in bar.patches:
    _x = p.get_x() + p.get_width() / 2
    _y = p.get_y() + p.get_height() + 4
    value = f"{p.get_height()}"
    ax.text(_x, _y, value, ha="center")

plt.savefig("labels_dist.png")
plt.show()

## Video FPS

In [None]:
def get_fps_all(train_metadata:pd.DataFrame)->list:
    fps = []
    for video_name in tqdm(train_metadata.name,total=train_metadata.shape[0]):
        path_video = f"../input/deepfake-detection-challenge/train_sample_videos/{video_name}"
        video = cv2.VideoCapture(path_video)
        fps.append(video.get(cv2.CAP_PROP_FPS))
        video.release()
    return fps

train_metadata["fps"] = get_fps_all(train_metadata)
FPS = int(train_metadata["fps"].min())

**Plot Graph**

In [None]:
fig, ax = plt.subplots(figsize=(10,8))
data = train_metadata["fps"].value_counts()
sns.barplot(ax=ax,x=data.index,y=list(data))
ax.set_title("Frame rate per second")
plt.savefig("fps_dist.png")
plt.show()

## Video Time

In [None]:
def getDuration(train_metadata:pd.DataFrame)->list:
    duration = []
    for video_name,fps in tqdm(zip(train_metadata.name,train_metadata["fps"]),total=train_metadata.shape[0]):
        path_video = f"../input/deepfake-detection-challenge/train_sample_videos/{video_name}"
        
        vidcapture = cv2.VideoCapture(path_video)
        totalNoFrames = vidcapture.get(cv2.CAP_PROP_FRAME_COUNT);
        durationInSeconds = round(float(totalNoFrames) / float(fps),4)
        duration.append(durationInSeconds)
        vidcapture.release()
    
    return duration

train_metadata["duration"] = getDuration(train_metadata)
SECONDS = int(train_metadata["duration"].min())

**Plot Graph**

In [None]:
fig, ax = plt.subplots(figsize=(10,8))
sns.histplot(ax=ax,data=train_metadata,x="duration")
ax.set_title("Seconds per video")
plt.show()

## Width and Height

In [None]:
def get_width_height(train_metadata:pd.DataFrame)->tuple:
    height = []
    width = []

    for video_name,fps in tqdm(zip(train_metadata.name,train_metadata["fps"]),total=train_metadata.shape[0]):
        path_video = f"../input/deepfake-detection-challenge/train_sample_videos/{video_name}"

        vidcapture = cv2.VideoCapture(path_video)
        height.append(vidcapture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        width.append(vidcapture.get(cv2.CAP_PROP_FRAME_WIDTH))
        vidcapture.release()

    return (width, height)

width, height = get_width_height(train_metadata)
train_metadata["width"] = width
train_metadata["height"] = height

**Plot Graph**

In [None]:
fig, ax = plt.subplots(1,2,figsize=(10,5))
data = train_metadata["width"].value_counts()
bar = sns.barplot(ax=ax[0],x=data.index,y=list(data))
for i,pat in enumerate(bar.patches):
    ax[0].annotate(f"{int(pat.get_height())}",
                (i,pat.get_height()))

ax[0].set_title("Width")

data = train_metadata["height"].value_counts()
bar = sns.barplot(ax=ax[1],x=data.index,y=list(data))
for i,pat in enumerate(bar.patches):
    ax[1].annotate(f"{int(pat.get_height())}",
                (i,pat.get_height()))
    
ax[1].set_title("Lenght")
    
plt.show()

# Train Variables

In [None]:
FRAMES = SECONDS * FPS
print(f"Frames Totais: {FRAMES}")

IMG_SIZE = 224
BATCH_SIZE = 10
EPOCHS = 10

# Normalize Videos

In [None]:
def pre_process_video(path_video:str,img_index:int,resize:tuple)->list:
    frames = []
    vidcapture = cv2.VideoCapture(path_video)
    index = 0
    j = 0
    while(vidcapture.isOpened()):
        rent, frame = vidcapture.read()
        if(not rent):
            break
        else:
            if(len(img_index) - 1 < j):
                break
            else:
                if(index == img_index[j]):
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                    frame = cv2.resize(frame,resize)
                    frames.append(frame)
                    j += 1

                index += 1

    frames = np.array(frames)
    frames = (frames / 255)
    frames = frames.reshape(frames.shape[0],frames.shape[1],frames.shape[2],1)
    return frames

# Getting Image Tensor

In [None]:
def getImgTensor(n_frames:int)->list:
    img_idx = np.round(np.linspace(0, FRAMES, n_frames)).astype(int)
    return [img_idx, IMG_SIZE, IMG_SIZE, 1]

In [None]:
img_tensor = getImgTensor(50)
print ('# img_tensor =', img_tensor)

# Generate Data

In [None]:
def getBatchData(train_metadata,batch,batch_size,img_tensor)->tuple:
    [len_frames,width,length] = [len(img_tensor[0]),img_tensor[1], img_tensor[2]] # dimensions
    img_idx = img_tensor[0] # array index of frames
    
    batch_data = np.zeros((batch_size,len_frames,width,length,1)) # batch data that will pass forward
    batch_labels = np.zeros((batch_size,2)) # batch labels that will pass forward
    
    #############################################################
    # Here is how the batch data is split by callback
    if(((batch+1)*batch_size) <= train_metadata.shape[0]):
        train_metadata_ = train_metadata.iloc[
            batch*batch_size:(batch+1)*batch_size,
            :
        ]
    else:
        train_metadata_ = train_metadata.iloc[
            batch*batch_size:,
            :
        ]
    
    #############################################################
    video_posi = 0
    name_list = train_metadata_['name'].to_list()
    label_list = train_metadata_["label"].to_list()
    
    for name,label in zip(name_list,label_list):
        path_ = f"../input/deepfake-detection-challenge/train_sample_videos/{name}"
        batch_data[video_posi] = pre_process_video(path_,
                                          img_idx,
                                          (width,length))
        
        if(label_list == "FAKE"):
            batch_labels[video_posi][0] = 1
        else:
            batch_labels[video_posi][1] = 1
            
        video_posi += 1
            
    return batch_data, batch_labels

In [None]:
def generator(train_metadata, batch_size, img_tensor):
    while True:
        if(len(train_metadata["name"])%batch_size == 0):
            num_batches = int(len(train_metadata["name"])/batch_size)
        else:
            num_batches = int(len(train_metadata["name"])/batch_size) + 1
        
        for batch in range(num_batches):
            yield getBatchData(train_metadata,batch,batch_size,img_tensor)

# Train Model

In [None]:
def plotModelHistory(h):
    fig, ax = plt.subplots(1, 2, figsize=(15,4))
    ax[0].plot(h.history['loss'])   
    ax[0].plot(h.history['val_loss'])
    ax[0].legend(['loss','val_loss'])
    ax[0].set_title("Train loss vs Validation loss")

    ax[1].plot(h.history['categorical_accuracy'])   
    ax[1].plot(h.history['val_categorical_accuracy'])
    ax[1].legend(['categorical_accuracy','val_categorical_accuracy'])
    ax[1].set_title("Train accuracy vs Validation accuracy")
    plt.show()

    print("Max. Training Accuracy", max(h.history['categorical_accuracy']))
    print("Max. Validaiton Accuracy", max(h.history['val_categorical_accuracy']))

In [None]:
def make3dFilter(x):
    return tuple([x]*3)

def make2dFilter(x):
    return tuple([x]*2)

In [None]:
#write your model here
def defineModel(img_tensor):
    inputShape = (len(img_tensor[0]), img_tensor[1], img_tensor[2], img_tensor[3])
    print(inputShape)
    model = Sequential([
        Conv3D(16, make3dFilter(5), activation='relu', input_shape=inputShape),
        MaxPooling3D(make3dFilter(2), padding='same'),
        BatchNormalization(),

        Conv3D(32, make3dFilter(3), activation='relu'),
        MaxPooling3D(pool_size=(1,2,2), padding='same'),
        BatchNormalization(),

        Conv3D(64, make3dFilter(3), activation='relu'),
        MaxPooling3D(pool_size=(1,2,2), padding='same'),
        BatchNormalization(),

        Flatten(),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.25),

        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.25),

        Dense(2, activation='softmax')
    ])
    model.compile(optimizer=optimizers.Adam(), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    return model

model = defineModel(img_tensor)
model.summary()

In [None]:
train, test = train_test_split(train_metadata,test_size=0.33,random_state=42,stratify=train_metadata["label"])

train_generator = generator(train, BATCH_SIZE, img_tensor)
val_generator = generator(test, BATCH_SIZE, img_tensor)

if (train.shape[0]%BATCH_SIZE) == 0:
    steps_per_epoch = int(train.shape[0]/BATCH_SIZE)
else:
    steps_per_epoch = (train.shape[0]//BATCH_SIZE) + 1

if (test.shape[0]%BATCH_SIZE) == 0:
    validation_steps = int(test.shape[0]/BATCH_SIZE)
else:
    validation_steps = (test.shape[0]//BATCH_SIZE) + 1

## Checkpoints

In [None]:
import datetime

curr_dt_time = datetime.datetime.now()

model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)

filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2, verbose=1, patience=4)

# callbacks_list = [checkpoint, LR]
callbacks_list = [LR]

**Fit Model**

In [None]:
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1, 
            callbacks=callbacks_list, validation_data=val_generator, 
            validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

In [None]:
plotModelHistory(model.history)