# Mean Anomaly Detect

In [120]:
import cv2
import numpy as np 
import os
from joblib import Parallel, delayed
from PIL import Image
import torch
from facenet_pytorch import MTCNN

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
detector = MTCNN(margin=40, select_largest=False, image_size=224, device=device, post_process=False)

# list of videos
video_path = '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos'
video_fnames = os.listdir(video_path)
videos = []
for fname in video_fnames:
    videos.append(os.path.join(video_path, fname))


# set output path
path = '/home/ec2-user/SageMaker/data/frames_17x'
os.makedirs(path, exist_ok=True)

# frame indices to grab
frame_idxs = [i for i in range(30, 301, 30)]

In [99]:
import pandas as pd
metadata_file = '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/metadata.json'

metadata_df = pd.read_json(metadata_file).T

In [100]:
metadata_df.head()

Unnamed: 0,label,original,split
aagfhgtpmv.mp4,FAKE,vudstovrck.mp4,train
aapnvogymq.mp4,FAKE,jdubbvfswz.mp4,train
abarnvbtwb.mp4,REAL,,train
abofeumbvv.mp4,FAKE,atvmxvwyns.mp4,train
abqwwspghj.mp4,FAKE,qzimuostzz.mp4,train


In [108]:
metadata_df[metadata_df.index == 'dkwjwbwgey.mp4']

Unnamed: 0,label,original,split
dkwjwbwgey.mp4,FAKE,rfzzrftgco.mp4,train


In [101]:
real_df = metadata_df[metadata_df.label == "REAL"]

In [103]:
real_df.index[0]

'abarnvbtwb.mp4'

In [104]:
fake_df = metadata_df[metadata_df.label == "FAKE"]

In [105]:
fake_df.index[0]

'aagfhgtpmv.mp4'

In [107]:
len(real_df), len(fake_df)

(77, 323)

In [25]:
len(frame_idxs)

10

In [26]:
frame_idxs

[30, 60, 90, 120, 150, 180, 210, 240, 270, 300]

In [121]:
def read_frames_at_indices(videos, frame_idxs):
    images = {}
    for video in videos:
        for frame_num in frame_idxs:
            images.update(grab_frames(video, frame_num))
    return images


def grab_frames(video, frame_num):
    #video = os.path.join(video_dir, sample)
    filename = video[:-4]+'_'+ str(frame_num) +'.jpg'
    reader = cv2.VideoCapture(video)
    reader.set(1, frame_num)
    _, image = reader.read()
    images = {}
    images[filename] = image
    #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # cv2.imwrite(filename, image)
    #images_dict = {}
    #images_dict[filename] = image
    reader.release()
    return images


def multiprocess_read_frames_at_indices(videos, frame_idxs, job_num):
    results = Parallel(n_jobs=job_num)(delayed(grab_frames)(video, frame_num) 
        for video in videos for frame_num in frame_idxs)
    images = {}
    for item in results:
        images.update(item)
    return images


def detect_facenet_pytorch(detector, images):
    faces = {}
    for key in images.keys():
        try:
            imgs_pil = Image.fromarray(images[key])
            faces.update( {key: detector(imgs_pil)} )
        except:
            pass
    return faces


def write_images_to_disk(path, faces):
    for face in faces.keys():
        try:
            image = faces[face].permute(1, 2, 0).int().numpy()
            jpg = face.split('/')[-1]
            filename = os.path.join(path, jpg)
            cv2.imwrite(filename, image)
        except:
            faces[face] = 'no face detected'
            pass

In [None]:
# for single processing
# images = read_frames_at_indices(videos, frame_idxs)

# for multiprocessing
images = multiprocess_read_frames_at_indices(videos, frame_idxs, 4)


In [30]:
del(images)

In [31]:
videos[0]

'/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/cyboodqqyr.mp4'

In [37]:
sample_vids = videos[:5]

In [38]:
sample_vids

['/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/cyboodqqyr.mp4',
 '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/dkwjwbwgey.mp4',
 '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/esckbnkkvb.mp4',
 '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/cttqtsjvgn.mp4',
 '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/dzqwgqewhu.mp4']

In [85]:
#images = read_frames_at_indices[sample_vids, frame_idxs]

images = {}
for frame_num in frame_idxs:
    images.update(grab_frames(videos[1], frame_num))


In [86]:
faces = detect_facenet_pytorch(detector, images)
    

In [88]:
list_of_faces = faces.keys()


In [89]:
list_of_faces

dict_keys(['/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/dkwjwbwgey_30.jpg', '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/dkwjwbwgey_60.jpg', '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/dkwjwbwgey_90.jpg', '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/dkwjwbwgey_120.jpg', '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/dkwjwbwgey_150.jpg', '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/dkwjwbwgey_180.jpg', '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/dkwjwbwgey_210.jpg', '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/dkwjwbwgey_240.jpg', '/home/ec2-user/SageMaker/data/deepfake-samples/sample/train_sample_videos/dkwjwbwgey_270.jpg'])

In [94]:
frame_avg_pixels = []
for face in list_of_faces:
    try:
        frame_avg_pixels.append(torch.mean(faces[face]))
    except:
        pass

In [95]:
frame_avg_pixels

[tensor(76.3063),
 tensor(75.9641),
 tensor(75.9244),
 tensor(77.3486),
 tensor(77.5707),
 tensor(77.7624),
 tensor(76.4414),
 tensor(76.8857),
 tensor(78.5662)]

In [133]:
# REAL video sample

images = {}
for frame_num in frame_idxs:
    full_path = os.path.join(video_path, real_df.index[8])
    images.update(grab_frames(full_path, frame_num))

faces = detect_facenet_pytorch(detector, images)

list_of_faces = faces.keys()
frame_avg_pixels = []
for face in list_of_faces:
    try:
        frame_avg_pixels.append(torch.mean(faces[face]))
    except:
        pass
    

print(len(frame_avg_pixels))
frame_avg_pixels

9


[tensor(93.9107),
 tensor(90.0922),
 tensor(92.0537),
 tensor(93.2568),
 tensor(92.9295),
 tensor(92.6925),
 tensor(91.7141),
 tensor(94.6976),
 tensor(93.4106)]

In [None]:
# FAKE video sample

images = {}
for frame_num in frame_idxs:
    full_path = os.path.join(video_path, fake_df.index[8])
    images.update(grab_frames(full_path, frame_num))

faces = detect_facenet_pytorch(detector, images)

list_of_faces = faces.keys()
frame_avg_pixels = []
for face in list_of_faces:
    try:
        frame_avg_pixels.append(torch.mean(faces[face]))
    except:
        pass
    
print(len(frame_avg_pixels))
frame_avg_pixels

In [20]:
# detect faces and crop
faces = detect_facenet_pytorch(detector, images)

AttributeError: 'NoneType' object has no attribute '__array_interface__'

In [5]:


# save to disk
write_images_to_disk(path, faces)

In [6]:

import boto3

sns = boto3.client('sns')
response = sns.publish(
    TopicArn='arn:aws:sns:us-east-1:364430515305:deepfake',
    Message='finished processing v3'
)


"\nimport boto3\n\nsns = boto3.client('sns')\nresponse = sns.publish(\n    TopicArn='arn:aws:sns:us-east-1:364430515305:deepfake',\n    Message='finished processing v3'\n)\n"