In [None]:
!pip install facenet-pytorch

In [None]:
from facenet_pytorch.models.inception_resnet_v1 import get_torch_home
torch_home = get_torch_home()

In [None]:
import os
import glob
import time
import torch
import cv2
from PIL import Image
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm

# See github.com/timesler/facenet-pytorch:
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(f'Running on device: {device}')

In [None]:
mtcnn = MTCNN(margin=14, keep_all=True, factor=0.5, device=device).eval()

# Load facial recognition model
resnet = InceptionResnetV1(pretrained='vggface2', device=device).eval()

In [None]:
class DetectionPipeline:
    """Pipeline class for detecting faces in the frames of a video file."""
    
    def __init__(self, detector, n_frames=None, batch_size=60, resize=None):
        """Constructor for DetectionPipeline class.
        
        Keyword Arguments:
            n_frames {int} -- Total number of frames to load. These will be evenly spaced
                throughout the video. If not specified (i.e., None), all frames will be loaded.
                (default: {None})
            batch_size {int} -- Batch size to use with MTCNN face detector. (default: {32})
            resize {float} -- Fraction by which to resize frames from original prior to face
                detection. A value less than 1 results in downsampling and a value greater than
                1 result in upsampling. (default: {None})
        """
        self.detector = detector
        self.n_frames = n_frames
        self.batch_size = batch_size
        self.resize = resize
        
            
    def __call__(self, filename):
        """Load frames from an MP4 video and detect faces.

        Arguments:
            filename {str} -- Path to video.
        """
        # Create video reader and find length
        v_cap = cv2.VideoCapture(filename)
        v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
        print(v_len)
        # Pick 'n_frames' evenly spaced frames to sample
        if self.n_frames is None:
            sample = np.arange(0, v_len)
        else:
            sample = np.linspace(0, v_len - 1, self.n_frames).astype(int)

        # Loop through frames
        faces = []
        frames = []
        for j in range(v_len):
            success = v_cap.grab()
            if j in sample:
                # Load frame
                success, frame = v_cap.retrieve()
                if not success:
                    continue
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = Image.fromarray(frame)
                
                # Resize frame to desired size
                if self.resize is not None:
                    frame = frame.resize([int(d * self.resize) for d in frame.size])
                frames.append(frame)

                # When batch is full, detect faces and reset frame list
                if len(frames) % self.batch_size == 0 or j == sample[-1]:
                    faces.extend(self.detector(frames))
                    frames = []

        v_cap.release()
        
        return faces    

In [None]:
import copy
def process_faces(faces, resnet):
    # Filter out frames without faces
    faces = [f for f in faces if f is not None]
    if(len(faces) == 0):
        return []
#     faces = np.array(faces)
#     print(faces[200].shape)
#     faces = torch.from_numpy(faces)
#     f = copy.deepcopy(faces)
#     f = np.array(f[0])
#     print(type(faces))
#     print(f.shape)
    faces = torch.cat(faces).to(device)
    if(len(faces)<290):
        return []
#     print(len(faces))
    faces = faces[:290]
    # Generate facial feature vectors using a pretrained model
    embeddings = resnet(faces)
#     print(len(embeddings))
#     print(len(embeddings[0]))
    
#     print(len(embeddings))
#     print(len(embeddings[0]))
    # Calculate centroid for video and distance of each face's feature vector from centroid
    centroid = embeddings.mean(dim=0)
    
    
    
    x = (embeddings - centroid).norm(dim=1).cpu().numpy()
    
    return x

In [None]:
# Define face detection pipeline
detection_pipeline = DetectionPipeline(detector=mtcnn, batch_size=60, resize=0.25)
import json

with open('../input/deepfake-detection-challenge/train_sample_videos/metadata.json') as f:
  data = json.load(f)
# Get all test videos
filenames = glob.glob('/kaggle/input/deepfake-detection-challenge/train_sample_videos/*.mp4')
total_files = len(filenames)

X = []
y = []
start = time.time()
n_processed = 0
# filename = 'kaggle/input/deepfake-detection-challenge/train_sample_videos/aapnvogymq.mp4'
# print(filename)

# faces = detection_pipeline(filename)
# print(faces)
with torch.no_grad():
    for i, filename in tqdm(enumerate(filenames), total=len(filenames)):
        print(i, filename)
        try:
            # Load frames and find faces
            faces = detection_pipeline(filename)
           
            
#             f = np.array(faces[0])
#             print(f.shape)
            
            
            
            # Calculate embeddings
            
            z = process_faces(faces, resnet)
            if (len(z)!=0):
                X.append(z)
                if(data[filename[63:]]['label']=='FAKE'):
                    y.append(1)
                else:
                    y.append(0)

        except KeyboardInterrupt:
            print('\nStopped.')
            break

        except Exception as e:
            print(e)
            X.append(None)
        
        n_processed += len(faces)
#         print(f'Frames per second (load+detect+embed): {n_processed / (time.time() - start):6.3}\r', end='')

In [None]:
X=X[1:]
y=y[1:]

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.85, random_state=5)

In [None]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=0).fit(X_train, y_train)
y_pred_lr = clf.predict(X_test)


In [None]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,y_pred_lr))

In [None]:
# for i in range(len(y_test)):
#     if y_test[i] == y_pred_lr[i] and y_test[i]==0:
#         print(y_test[i])

In [None]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
y_pred_gnb = gnb.fit(X_train, y_train).predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,y_pred_gnb))

In [None]:
from sklearn import svm

clf = svm.SVC(gamma='auto')
clf.fit(X_train, y_train)
y_pred_svm = clf.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,y_pred_svm))