In [1]:
!pip install /kaggle/input/cvlib-python-package/progressbar-2.5/progressbar-2.5 -f ./ --no-index
!pip install /kaggle/input/cvlib-python-package/imutils-0.5.3/imutils-0.5.3 -f ./ --no-index
!pip install /kaggle/input/cvlib-python-package/cvlib-0.2.3/cvlib-0.2.3 -f ./ --no-index

Looking in links: ./
Processing /kaggle/input/cvlib-python-package/progressbar-2.5/progressbar-2.5
Building wheels for collected packages: progressbar
  Building wheel for progressbar (setup.py) ... [?25l- \ done
[?25h  Created wheel for progressbar: filename=progressbar-2.5-cp36-none-any.whl size=12073 sha256=b8d2ba0ac65582ed9ae7aaac3d6156e3e336cb78aca1c6e7ceed212f5acc91a6
  Stored in directory: /root/.cache/pip/wheels/70/b8/da/6a7c36dab225b61b697caf74a1c365329bb7a9c80e818d6331
Successfully built progressbar
Installing collected packages: progressbar
Successfully installed progressbar-2.5
Looking in links: ./
Processing /kaggle/input/cvlib-python-package/imutils-0.5.3/imutils-0.5.3
Building wheels for collected packages: imutils
  Building wheel for imutils (setup.py) ... [?25l- \ done
[?25h  Created wheel for imutils: filename=imutils-0.5.3-cp36-none-any.whl size=25851 sha256=3015e8acbe0e2889717acc2119724dd4201a9669ed7d8012573eac525b71cca2
  Stored in direct

In [2]:
import os
from multiprocessing import Pool
from typing import List

import cv2
import cvlib as cv
import numpy as np
import pandas as pd
import tensorflow as tf

FRAMES_PER_VIDEO = 6
TEST_VIDEOS_DIRECTORY = '/kaggle/input/deepfake-detection-challenge/test_videos'
PRETRAINED_MODEL_PATH = '/kaggle/input/deepfake-3-faces-resnet50-balanced-classes-weights/deepfake_3_faces_resnet50_balanced_classes.h5'
BATCH_SIZE = 16
IMG_SIZE = 200

Using TensorFlow backend.


In [4]:
def extend_rect_to_square(start_x, start_y, end_x, end_y, image_width, image_height):
    width = end_x - start_x
    height = end_y - start_y
    if width > height:
        difference = width - height
        start_y -= difference // 2
        end_y += difference // 2
    else:
        difference = height - width
        start_x -= difference // 2
        end_x += difference // 2
    start_x_result = np.max([0, start_x])
    start_y_result = np.max([0, start_y])
    end_x_result = np.min([image_width, end_x])
    end_y_result = np.min([image_height, end_y])

    return start_x_result, start_y_result, end_x_result, end_y_result


def read_faces_from_video(path: str, img_size=None, swap_channels=True) -> List[str]:
    capture = cv2.VideoCapture(path)
    num_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    faces_to_save = []
    for i in range(0, num_frames):
        ret = capture.grab()
        if i % 10 == 0:
            ret, frame = capture.retrieve()
            faces, confidences = cv.detect_face(frame)
            if len(confidences) > 0:
                most_confident_face_index = np.argmax(confidences)
                (start_x, start_y, end_x, end_y) = faces[most_confident_face_index]
                (start_x, start_y, end_x, end_y) = extend_rect_to_square(
                    start_x,
                    start_y,
                    end_x,
                    end_y,
                    frame.shape[1],
                    frame.shape[0])
                face_crop = frame[start_y:end_y, start_x:end_x]
                if face_crop.shape[0] > 0 and face_crop.shape[1] > 0:
                    if swap_channels:
                        face_crop = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
                    if img_size:
                        face_crop = cv2.resize(face_crop, (img_size, img_size))
                    faces_to_save.append(face_crop)
            if len(faces_to_save) == FRAMES_PER_VIDEO:
                break
    capture.release()
    assert len(faces_to_save) > 0
    return faces_to_save

In [5]:
class FacesDataGenerator(tf.keras.utils.Sequence):
    def __init__(
            self,
            video_file_names: List[str],
            videos_directory: str,
            video_groups: List[str] = None,
            batch_size: int = 64,
            frames_per_movie: int = 3,
            image_size: int = 200):
        self.batch_size = batch_size
        self.frames_per_movie = frames_per_movie
        self.image_size = image_size
        self.video_file_names = video_file_names
        self.videos_directory = videos_directory
        self.video_groups = video_groups
        self.broken_files = []

    def __len__(self) -> int:
        return int(np.ceil(len(self.video_file_names) / self.batch_size))

    def __getitem__(self, index) -> tuple:
        x = []
        batch_start = index * self.batch_size
        batch_end = min([(index + 1) * self.batch_size, len(video_file_names)])
        for i in range(batch_start, batch_end):
            filename = self.video_file_names[i]
            if self.video_groups is not None:
                group_path = os.path.join(self.videos_directory, self.video_groups[i])
                video_path = os.path.join(group_path, filename)
            else:
                video_path = os.path.join(self.videos_directory, filename)

            try:
                video_frames = read_faces_from_video(video_path, img_size=self.image_size)
            except Exception:
                self.broken_files.append(filename)
                video_frames = np.zeros(shape=(self.frames_per_movie, IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)

            if len(video_frames) < self.frames_per_movie:
                for i in range(self.frames_per_movie - len(video_frames)):
                    video_frames.append(video_frames[-1])
            x.extend(video_frames)
        x = np.array(x) / 255.0

        return x

In [6]:
video_file_names = np.array(sorted([x for x in os.listdir(TEST_VIDEOS_DIRECTORY) if x[-4:] == ".mp4"]))
data_generator = FacesDataGenerator(
    video_file_names=video_file_names,
    videos_directory=TEST_VIDEOS_DIRECTORY,
    batch_size=BATCH_SIZE,
    frames_per_movie=FRAMES_PER_VIDEO,
    image_size=IMG_SIZE)

model = tf.keras.models.load_model(PRETRAINED_MODEL_PATH)
model.run_eagerly = False

predictions = model.predict(
    data_generator,
    verbose=1,
    workers=4,
    use_multiprocessing=True,
    max_queue_size=BATCH_SIZE)



In [None]:
print('Broken files:', data_generator.broken_files)

In [None]:
assert predictions[predictions<0.9].shape[0] > 0

In [7]:
predictions_grouped = np.reshape(predictions, (len(video_file_names), FRAMES_PER_VIDEO))
predictions_mean = np.mean(predictions_grouped, axis=1)

In [8]:
broken_file_indexes = np.isin(video_file_names, data_generator.broken_files)
predictions_mean[broken_file_indexes] = 0.5
all_predictions = np.clip(all_predictions, 0.1, 0.9)

In [9]:
submission_df = pd.DataFrame({"filename": video_file_names, "label": predictions_mean})
submission_df.to_csv("submission.csv", index=False)