In [38]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
%matplotlib inline 
import cv2 as cv
from PIL import Image
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torchvision import datasets, transforms

100%|██████████| 93/93 [02:43<00:00,  1.76s/it]


In [32]:
from elaInceptionResnet import ElaInceptionResnet

elainceptionresnet = ElaInceptionResnet(classify=True,pretrained=True,num_classes=2)

OrderedDict([('conv2d_1a.conv.weight',
              tensor([[[[ 5.5483e-02,  1.7572e-01,  2.8577e-01],
                        [-4.1948e-02,  6.6362e-02,  1.3249e-01],
                        [-2.2289e-01, -9.0222e-02, -1.6760e-02]],
              
                       [[ 2.0688e-02,  1.8402e-01,  3.0928e-01],
                        [-9.3705e-02,  5.3426e-02,  1.3936e-01],
                        [-3.0190e-01, -1.3297e-01, -4.0702e-02]],
              
                       [[ 9.0688e-02,  1.9830e-01,  3.2082e-01],
                        [-2.0841e-02,  7.2127e-02,  1.4974e-01],
                        [-2.5312e-01, -1.1804e-01, -4.1512e-02]]],
              
              
                      [[[-2.0466e-01, -8.9272e-02, -9.1269e-02],
                        [ 1.4927e-02,  8.2109e-02,  9.1968e-02],
                        [ 1.5996e-01,  2.2482e-01,  2.7860e-01]],
              
                       [[-2.4678e-01, -1.2144e-01, -1.3110e-01],
                        [-5.3458e-03

In [26]:
import torch.nn as nn
import cv2
from PIL import Image as pil_image
from sklearn.metrics import classification_report, confusion_matrix

def test_crop_image(image_path, model, cuda=False):
    """
    Reads a video and evaluates a subset of frames with the a detection network
    that takes in a full frame. Outputs are only given if a face is present
    and the face is highlighted using dlib.
    :param video_path: path to video file
    :param model_path: path to model file (should expect the full sized image)
    :param output_path: path where the output video is stored
    :param start_frame: first frame to evaluate
    :param end_frame: last frame to evaluate
    :param cuda: enable cuda
    :return:
    """
    print('Starting: {}'.format(image_path))

    DATA_FOLDER = image_path
    DATA_FOLDER_FAKE = image_path+'\\fake'
    DATA_FOLDER_REAL = image_path+'\\real'
    fake_image = pd.DataFrame(list(os.listdir(DATA_FOLDER_FAKE)), columns=['Image'])
    real_image = pd.DataFrame(list(os.listdir(DATA_FOLDER_REAL)), columns=['Image'])
    label = []
    for e in fake_image['Image']:
        label.append('fake')
    fake_image['Label'] = label

    label2 = []
    for e in real_image['Image']:
        label2.append('real')
    real_image['Label'] = label2
    train_image = pd.concat([fake_image,real_image],join='outer',ignore_index= True)
    image_path_list = train_image['Image']

    true_label = []
    predict_label =[]
    pbar = tqdm(total=len(image_path_list), position=0, leave=True)
    for i, image_file in enumerate(image_path_list):
        label = train_image.loc[train_image.Image == image_file, 'Label'].values[0]
        if label == 'fake':
            image_folder = DATA_FOLDER_FAKE
            true_label.append(0)
            # continue
        elif label == 'real':
            image_folder = DATA_FOLDER_REAL
            true_label.append(1)

        image_path = image_folder+'\\'+image_file

        cropped_face = cv2.imread(image_path)
    # Actual prediction using our model
        prediction, output = predict_with_model(cropped_face, model,
                                            cuda=cuda)
    # ------------------------------------------------------------------
        output_list = ['{0:.3f}'.format(float(x)) for x in
                        output.detach().cpu().numpy()[0]]
        predict_label.append(prediction)
        pbar.update(1)
    
    label_name = ["Fake","Real"]
    cmtx = pd.DataFrame(
    confusion_matrix(true_label, predict_label, labels=[0,1]), 
    index=['True:Fake', 'True:Real'], 
    columns=['Pred:Fake', 'Pred:Real']
    )
    print(cmtx)
    print(classification_report(true_label, predict_label, target_names=label_name))

def predict_with_model(image, model, post_function=nn.Softmax(dim=1),
                       cuda=True):
    """
    Predicts the label of an input image. Preprocesses the input image and
    casts it to cuda if required

    :param image: numpy image
    :param model: torch model with linear layer at the end
    :param post_function: e.g., softmax
    :param cuda: enables cuda, must be the same parameter as the model
    :return: prediction (1 = fake, 0 = real)
    """
    # Preprocess
    preprocessed_image = preprocess_image(image, cuda)

    # Model prediction
    output = model(preprocessed_image)
    output = post_function(output)

    # Cast to desired
    _, prediction = torch.max(output, 1)    # argmax
    prediction = float(prediction.cpu().numpy())

    return int(prediction), output

def preprocess_image(image, cuda=True):
    """
    Preprocesses the image such that it can be fed into our network.
    During this process we envoke PIL to cast it into a PIL image.

    :param image: numpy image in opencv form (i.e., BGR and of shape
    :return: pytorch tensor of shape [1, 3, image_size, image_size], not
    necessarily casted to cuda
    """
    # Revert from BGR
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Preprocess using the preprocessing function used during training and
    # casting it to PIL image
    preprocess = transforms.Compose([
                    transforms.Resize(256),
                    np.float32,
                    transforms.ToTensor(),
                ])
    preprocessed_image = preprocess(pil_image.fromarray(image))
    # Add first dimension as the network expects a batch
    preprocessed_image = preprocessed_image.unsqueeze(0)
    if cuda:
        preprocessed_image = preprocessed_image.cuda()
    return preprocessed_image

100%|██████████| 100/100 [06:04<00:00,  3.64s/it]


In [37]:
IMAGEPATH = r".\Kittiwat_dataset\dataset\crop\test"

test_crop_image(IMAGEPATH, elainceptionresnet, cuda=False)

  0%|          | 0/93 [00:00<?, ?it/s]

Starting: .\Kittiwat_dataset\dataset\crop\test


100%|██████████| 93/93 [05:19<00:00,  3.44s/it]
100%|██████████| 93/93 [00:18<00:00,  6.11it/s]

           Pred:Fake  Pred:Real
True:Fake         48          0
True:Real         17         28
              precision    recall  f1-score   support

        Fake       0.74      1.00      0.85        48
        Real       1.00      0.62      0.77        45

    accuracy                           0.82        93
   macro avg       0.87      0.81      0.81        93
weighted avg       0.87      0.82      0.81        93



In [28]:
!pip install ipywidgets



same_original_fake_train_sample_video = list(meta_train_df.loc[meta_train_df.original=='xmkwsnuzyq.mp4'].index)
for video_file in same_original_fake_train_sample_video[1:4]:
    print(video_file)
    extract_image_and_face(video_file)