In [None]:
!pip install /kaggle/input/facenet-torch-model/facenet_pytorch-2.2.7-py3-none-any.whl

from facenet_pytorch.models.inception_resnet_v1 import get_torch_home
torch_home = get_torch_home()

# Copy model checkpoints to torch cache so they are loaded automatically by the package
!mkdir -p $torch_home/checkpoints/
!cp /kaggle/input/facenet-torch-pretrained/vggface2_DG3kwML46X.pt $torch_home/checkpoints/vggface2_DG3kwML46X.pt
!cp /kaggle/input/facenet-torch-pretrained/vggface2_G5aNV2VSMn.pt $torch_home/checkpoints/vggface2_G5aNV2VSMn.pt

import os
import glob
import time
import torch
import cv2
from PIL import Image
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
import gc
import math

# See github.com/timesler/facenet-pytorch:
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(f'Running on device: {device}')

In [None]:
filenames = glob.glob('/kaggle/input/deepfake-detection-challenge/test_videos/*.mp4')

In [None]:
df = pd.read_csv('/kaggle/input/deepfake-detection-challenge/sample_submission.csv')

In [None]:
mtcnn = MTCNN(margin=14, keep_all=True, factor=0.5, device=device).eval()

In [None]:
def face_crop(filepath, batch_size, detector = mtcnn):
    
    v_cap = cv2.VideoCapture(filepath)
    v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    sample = np.arange(0, v_len, 2)
    
    result_dict = {'ptype1' : False,
                  'ptype2': False, 
                  'boxes' : [],
                   'probs' : []}
    frames = []
    video_frames = []
    
    for j in range(v_len):
        success = v_cap.grab()
        success, frame = v_cap.retrieve()
        if not success or j not in sample:
            continue
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        frames.append(frame)
        video_frames.append(frame)

        # When batch is full, detect faces and reset frame list
        
    for i in np.arange(0, len(frames), batch_size):
        boxes, probs = mtcnn.detect(frames[i:np.minimum(i+batch_size, len(frames))])
        result_dict['boxes'].extend(boxes)
        result_dict['probs'].extend(probs)
    if not result_dict['ptype2'] and len([0 for a in result_dict['boxes'] if (a is None or  a == [])]) > 0:
        result_dict['ptype2'] = True
    if not result_dict['ptype1'] and len([0 for a in result_dict['boxes'] if (a is not None and  len(a) > 1)]) > 0:
        result_dict['ptype1'] = True

    v_cap.release()

    return result_dict, video_frames

In [None]:
def check_overlap (box1, box2, frac = 0.5):
        b1x1, b1y1, b1x2, b1y2 = box1[:]
        b2x1, b2y1, b2x2, b2y2 = box2[:]
        w1 = abs(b1x1-b1x2)
        h1 = abs(b1y1-b1y2)
    #     w2 = abs(b2x1-b2x2)
    #     h2 = abs(b2y1-b2y2)
        range_b2x = (b1x1 - frac*w1, b1x1 + frac*w1, b1x2 - frac*w1, b1x2 + frac*w1)
        range_b2y = (b1y1 - frac*h1, b1y1 + frac*h1, b1y2 - frac*h1, b1y2 + frac*h1)
        if range_b2x[0] < b2x1 < range_b2x[1] and range_b2x[2] < b2x2 < range_b2x[3] and \
            range_b2y[0] < b2y1 < range_b2y[1] and range_b2y[2] < b2y2 < range_b2y[3]:
            return True
        else:
            return False

def get_known_faces(boxes):
    boxes_int = [ [list(map(int, box)) for box in fboxes] if fboxes is not None else None for fboxes in boxes ] 
    known_faces_dict = {}
    for i in range(0,len(boxes_int)):
        if boxes_int[i]:
            # exclude if box with negative number
            temp_dict = {'face'+str(j): {i: box} for j, box in enumerate(boxes_int[i]) if (np.array(box) > 0).all() }
                    
            nkeys = len(known_faces_dict.keys())          
            for _, tempv in temp_dict.items():
                exist = False
                for k, v in known_faces_dict.items():  
                    if check_overlap(list(v.values())[-1], tempv[i]):
                        known_faces_dict[k].update(tempv)
                        exist = True
                if exist == False:
                    known_faces_dict.update({'face'+str(1+nkeys):tempv})              
    return known_faces_dict


def filter_known_faces(known_faces_dict, len_thresh=51):
    filtered_known_faces_dict = {k:v for k,v in known_faces_dict.items() if len(v)>len_thresh}
    return filtered_known_faces_dict

def filter_known_faces_by_size(known_faces_dict, dim_thresh=51):
    filtered_known_faces_dict = {}
    for k,v in known_faces_dict.items():
        bx_arr = np.array(list(v.values()))
        x1, y1, x2, y2 = np.mean(bx_arr, axis=0) 
        if np.abs(x1-x2) > dim_thresh and np.abs(y1-y2) > dim_thresh:
            filtered_known_faces_dict[k] = v
    return filtered_known_faces_dict


def get_boxes_filter_ptype1(filtered_known_faces_dict, len_boxes):
    box_filtered = [[] for _ in range(len_boxes)]
    for key in filtered_known_faces_dict.keys():
        for key2 in filtered_known_faces_dict[key].keys():
            box_filtered[key2].append(filtered_known_faces_dict[key][key2]) 
    return box_filtered

In [None]:
def fill_missing_box(boxes_ori):
    boxes_filled = boxes_ori
    missing_idx = [i for i, box in enumerate(boxes_ori) if box == []]
    processed = False
    if len(missing_idx) == 1:
        if missing_idx[0] == 0:
            boxes_filled[0] = boxes_filled[1]
        elif missing_idx[0] == len(boxes_ori)-1 :
            boxes_filled[-1] = boxes_filled[-2]
        else:
            boxes_filled[missing_idx[0]] = (np.array(boxes_filled[missing_idx[0]-1]) + np.array(boxes_filled[missing_idx[0]+1])) // 2
        processed = True
    else:
        print("Too many missing boxes, not processed")
    return boxes_filled, processed

In [None]:
class Combine(torch.nn.Module):
    def __init__(self, CNN_model):
        super(Combine, self).__init__()
        self.cnn = CNN_model
        self.rnn = torch.nn.LSTM(512, 64, 1, batch_first=True)
        self.fc = torch.nn.Linear(64,1)

    def forward(self, x):
        b, t, c, w, h = x.size()
        c_in = x.view(b*t, c, w, h)
        c_out = self.cnn(c_in)
        r_in = c_out.view(c_out.shape[0]//50, 50, -1)
        self.rnn.flatten_parameters()
        r_out, _ = self.rnn(r_in)
        fc_in = r_out[:,-1,:]
        fc_out = self.fc(fc_in)
        return torch.sigmoid(fc_out)

In [None]:
resnet = InceptionResnetV1(pretrained=None, num_classes=8631, device=device)

In [None]:
model = Combine(resnet)

In [None]:
print("Let's use", torch.cuda.device_count(), "GPUs!")
model = torch.nn.DataParallel(model)
model.to(device)

In [None]:
checkpoint = torch.load('/kaggle/input/cnn-rnn-trained/CNN_RNN_model_to_submit.pth')

In [None]:
model.load_state_dict(checkpoint)

In [None]:
def build_sample_for_estimation(video_frames, boxes):
    v_len = len(video_frames)
    sample = np.arange(0, np.minimum(v_len-1,299), 6)
    sample = sample + 3 #MAGIC numbers
    faces = []
    while sample.shape[0] < 50:
        sample = np.hstack([sample, [v_len]])    
    for j in range(v_len):
        if j not in sample:
            continue
        frame = video_frames[j]
        cx1, cy1, cx2, cy2 = list(map(int, boxes[j][0]))
        marginx = (160 - (cx2 - cx1))//2
        cx1 = cx1 - marginx - (160 - (cx2 - cx1))%2
        cx2 = cx2 + marginx
        marginy = (160 - (cy2 - cy1))//2
        cy1 = cy1 - marginy - (160 - (cy2 - cy1))%2
        cy2 = cy2 + marginy
        cy1, pad_y1 = np.maximum(cy1, 0), np.maximum(0-cy1, 0)
        cy2, pad_y2 = np.minimum(cy2, frame.shape[0]), np.maximum(cy2-frame.shape[0], 0)
        cx1, pad_x1 = np.maximum(cx1, 0), np.maximum(0-cx1, 0)
        cx2, pad_x2 = np.minimum(cx2, frame.shape[1]), np.maximum(cx2-frame.shape[1], 0)

        padded_frame = np.pad(frame[cy1:cy2, cx1:cx2, :], ((pad_y1,pad_y2),(pad_x1,pad_x2), (0,0)), mode='constant' )
        assert padded_frame.shape == (160, 160, 3), \
        'the coords are {}, {}, {},{}, frame shape {}'.format(cx1, cx2, cy1, cy2, frame[cy1:cy2, cx1:cx2, :].shape)

        faces.append(padded_frame)

    while len(faces) < 50:
        faces.append(faces[-1])
    assert len(faces) == 50, 'Not enough faces, len: {}, sameple: {}'.format(v_len, sample)
    faces = np.moveaxis(np.array(faces), 3,1)   #.permute(0,3,1,2)
    faces = (faces - 127.5) * 0.0078125

    return faces.astype('float')

In [None]:
submission = {}
false_counter = 0

In [None]:
for filename in tqdm(filenames):
    with torch.no_grad():
        result_dict, vid_frames = face_crop(filename, 64)
    boxes_ori = result_dict['boxes']
    known_faces_dict = get_known_faces(boxes_ori)
    filtered_faces_1 = filter_known_faces(known_faces_dict)
    filtered_faces_2 = filter_known_faces_by_size(filtered_faces_1)
    boxes_filtered = get_boxes_filter_ptype1(filtered_faces_2, len(boxes_ori))
    result_dict['boxes'] = boxes_filtered
    if len([*filtered_faces_2]) == 1:
        result_dict['ptype1'] = False
    else:
        result_dict['ptype1'] = True

    if len([0 for a in result_dict['boxes'] if (a is None or  a == [])]) > 0:
        result_dict['ptype2'] = True
    else:
        result_dict['ptype2'] = False 
    if result_dict['ptype2']==True and result_dict['ptype1']==False:               
        boxes_ori = result_dict['boxes']
        boxes_filled, processed = fill_missing_box(boxes_ori)
        if processed:
            result_dict['boxes'] = boxes_filled
            result_dict['ptype2'] = False
    if result_dict['ptype2']==True and result_dict['ptype1']==False:               
        boxes_ori = result_dict['boxes']
        boxes_filled, processed = fill_missing_box(boxes_ori)
        if processed:
            result_dict['boxes'] = boxes_filled
            result_dict['ptype2'] = False
    if result_dict['ptype2']==False and result_dict['ptype1']==False:
        print('Estimating by model')
        sample_to_train = build_sample_for_estimation(vid_frames, result_dict['boxes'])
        with torch.no_grad():
            prob = model(torch.from_numpy(sample_to_train.reshape(1,50,3,160,160)).type(torch.FloatTensor).to(device))
            if torch.isnan(prob).item():
                submission.append([os.path.basename(filename), 0.5])
            else:
                prob_val = prob.item()
                if prob_val >= 0 and prob_val <= 1:
                    submission[os.path.basename(filename)] =  prob_val
                else:
                    submission[os.path.basename(filename)] =  0.5
    else:
        false_counter += 1
        submission[os.path.basename(filename)] =  0.5

In [None]:
def fill_df(x):
    if x not in submission.keys():
        return 0.5
    else:
        val = submission[x]
        if math.isnan(val):
            return 0.5
        else:
            try:
                val = float(val)
                if val >0 and val<1:
                    return val
            except:
                return 0.5

In [None]:
df['label'] = df['filename'].apply(lambda x: fill_df(x))
df['label'] = 1. - df['label']

In [None]:
df.to_csv('submission.csv', index=False)

In [None]:
plt.hist(df.label, 20)
plt.show()