# Multiple Instance Learning (MIL) for DeepFake Detections

This project was conceived in a conversation with Chris Farnan at PaigeAI. The idea was to use MIL for detecting DeepFakes using the technique used successfully by PaigeAI to find tumorous slides in thousands of samples available to them. This script was used to prepare data to use with the training script.

In [None]:
import matplotlib.pyplot as plt
import cv2
import os
import pandas as pd
import numpy as np
import json
from sklearn.model_selection import train_test_split

In [None]:
np.random.seed(42)

In [None]:
#More than 475GB of data was available with the metadata in 10 json files. 
#We were only able to use a subset as there was not enough space available on the hard drive to store all the data
json_filenames=['../train_sample_videos/metadata.json','../train_sample_videos/metadata_prt2.json','../train_sample_videos/metadata_prt3.json']
lstSlides=[]
lstTargets=[]
for json_filename in json_filenames:
    with open(json_filename) as json_file:
        data=json.load(json_file)
        for item in data:
            Filename=item     
            Label=data[item]['label']

            lstSlides.append(Filename)

            if Label=='FAKE':
                lstTargets.append(1)
            else:
                lstTargets.append(0)

In [None]:
#using scikit learn to split the test train data-set with 90% for training and 10% for testing
lstSlides_train, lstSlides_test, lstTargets_train, lstTargets_test = train_test_split(lstSlides, lstTargets, test_size=0.1, random_state=42, stratify=lstTargets)

In [None]:
#Number of files in train data-set
lstTargets_train.count(1)

In [None]:
import argparse

from yolo.yolo import YOLO

In [None]:
class Args:
    model='model-weights/YOLO_Face.h5'
    anchors='cfg/yolo_anchors.txt'
    classes='cfg/face_classes.txt'
    score=0.5
    iou=0.45
    img_size=(416, 416)
    image=False
    video='samples/abofeumbvv.mp4' #'samples/acqfdwsrhi.mp4'
    output='outputs/'

# Get the arguments
args=Args()

In [None]:
import os
import colorsys
import numpy as np
import cv2
import matplotlib.pyplot as plt

from yolo.model import eval

from keras import backend as K
from keras.models import load_model
from timeit import default_timer as timer
from PIL import ImageDraw, Image

In [None]:
def write_image(filename,frame):
    #comp_filename=dirname+'/'+filename
    frame.save(filename)

In [None]:
import json
import numpy as np
from PIL import Image

Frames_per_row=10
Max_Frame=100


#if not os.path.exists(output_dir):
#    os.mkdir(output_dir)
mpg_inputdir='../train_sample_videos/'
#mpg_inputdir='../difficult_videos/'
#This function processed the videos to create a Mosaic. It uses YOLO to detect a Face and then places it in a Mosaic. 
#Each video will create a Mosaic of multiple faces detected in the video.
def process_video_mosaic(model,json_filename,lstFiles,output_dir=None):
    nFileCnt=0
    lstSlides=[]
    lstGrid=[]
    lstTargets=[]
    lstMult=[]
    lstLevel=[]
    bDispFlg=False
    
    if output_dir==None:
        output_dir='MosaicFiles_Strat'+str(Max_Frame)+'/'
    
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
        
    with open(json_filename) as json_file:
        data=json.load(json_file)
        for item in data:
            Filename=item
            print('===================================================')
            print(Filename,'->',data[item])
            #Processing testing and training separately
            if Filename not in lstFiles:
                print("Skipping as not in list...")
                continue
            #print('Label: ',data[item]['label'])
            Label=data[item]['label']
    
            dirname=Filename[:-4]
            output_dir_cur=output_dir
            file_path=mpg_inputdir+Filename
            
            mpg_File=cv2.VideoCapture(file_path)
            if (mpg_File.isOpened()==False):
                print('Error: reading the input file: ',Filename)
            video_size = (int(mpg_File.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(mpg_File.get(cv2.CAP_PROP_FRAME_HEIGHT)))
            
            lstIndices=[]
            nFrameCnt=0
            nRowCnt=0
            bFirstFrame=False
            sizeC=int(Frames_per_row*224)
            sizeR=int((Max_Frame/Frames_per_row)*224)
            #print("Size Mosaic:",sizeR,sizeC)
            mosaic_frm=Image.new('RGB',(sizeC,sizeR))
            
            print("Reading file...")
            while(mpg_File.isOpened()):
                ret, frame=mpg_File.read()

                if (ret==True):
                    #print("Frame read...")
                    #cv2.imshow('Frame',frame)
                    #print(frame.shape)
                    frame=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    image = Image.fromarray(frame)
                    np_image = np.asarray(image)
                    image_rec, faces = model.detect_image(image)
                    result = np.asarray(image_rec)
                    print("Face:",faces)

                    for face in faces:
                        x1=int(np.round(face[0]))
                        y1=int(np.round(face[1]))
                        x2=int(np.round(face[2]))
                        y2=int(np.round(face[3]))

                        w=x2-x1
                        h=y2-y1
                        border_x=int((224-w)/2)
                        border_y=int((224-h)/2)
                        if (x1-border_x)<0:
                            x1=0
                            x2=x1+224
                        else:
                            x1=x1-border_x
                        if (x2+border_x)>video_size[1]:  #width and height order are reversed in the face structure, fix it later
                            x2=video_size[1]
                            x1=x2-224
                        else:
                            x2=x2+border_x
                        if (y1-border_y)<0:
                            y1=0
                            y2=y1+224
                        else:
                            y1=y1-border_y
                        if (y2+border_y)>video_size[0]:
                            y2=video_size[0]
                            y1=y2-224
                        else:
                            y2=y2+border_y
                        print("Face info:",x1,y1,x2,y2,w,h)

                        face_frame=np_image[x1:x2,y1:y2,:] #face is equivalent to the required frames here
                        im_pil = Image.fromarray(face_frame)
                        nXIndex=(nFrameCnt%Frames_per_row)*224
                        nYIndex=nRowCnt*224
                        mosaic_frm.paste(im_pil,(nXIndex,nYIndex))
                        lstIndices.append((nXIndex, nYIndex))
                        nFrameCnt=nFrameCnt+1
                        if nFrameCnt>=Max_Frame:
                            print("WARNING: FRAME COUNT BREACHED...")
                            break
                        if nFrameCnt%Frames_per_row==0:
                            nRowCnt=nRowCnt+1
                            
                        if bDispFlg:
                            plt.figure()
                            plt.imshow(face_frame)
                    
                    if nFrameCnt<Max_Frame:
                        print("****Frame:",nFrameCnt)

                    else:
                        #plt.figure()
                        #plt.imshow(mosaic_frm)
                        strFilename=output_dir_cur+dirname+'.png'
                        #if not os.path.exists(strFilename):
                        write_image(strFilename,mosaic_frm)
                        lstSlides.append(strFilename)
                        lstGrid.append(lstIndices)
                        break
                else:
                    break
                    
            if Label=='FAKE':
                lstTargets.append(1)
            else:
                lstTargets.append(0)
            lstMult.append(1)
            lstLevel.append(0)
                
            nFileCnt=nFileCnt+1
            #f (nFileCnt>2):
             #  break
    
    print(lstSlides)
    print(lstGrid)
    print(lstTargets)
    
    #lstSlides_train, lstSlides_test, y_train, y_test = train_test_split(lstSlides, lstGrid, lstMult, lstLevel, lstTargets, test_size=0.1, random_state=42, stratify=lstTargets)
    
    resDict= {
        "slides": lstSlides,
        "grid": lstGrid,
        "targets": lstTargets,
        "mult": lstMult,
        "level": lstLevel
    }  #dict(zip(lstSlides,lstGrid,lstTargets,lstMult,lstLevel))
    
    return resDict

In [None]:
model=YOLO(args)
dictMILTrain=process_video_mosaic(model,json_filename, lstSlides_train)
#model.close_session()

In [None]:
import json

In [None]:
json.dump(dictMILTrain,open('MIL_data_dict_train.json','w'))

In [None]:
dictMILTest=process_video_mosaic(model,json_filename, lstSlides_test)
model.close_session()

In [None]:
json.dump(dictMILTest,open('MIL_data_dict_test.json','w'))