In [1]:
import json
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms.functional as t_F
import torchvision.models as models
import torchvision.transforms as transforms
import torch.utils.data as data
import torchvision
from torch.autograd import Variable
from torch.utils.data import Dataset
import cv2

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score

In [57]:
# set path
data_path = "kaggle/input/deepfake-detection-challenge/train_sample_videos"
save_model_path = "kaggle/input/single-frame/"
meta_data = "metadata.json"

res_size = 224        # ResNet image size

# training parameters
k = 2             # number of target category
epochs = 30        # training epochs
batch_size = 32
learning_rate = 1e-3
log_interval = 10   # interval for displaying training info

In [37]:
def get_X(data_folder, valid=False):
    X = []
    y = []
    videos = os.listdir(data_folder)
    if valid:
         with open(os.path.join(data_folder, meta_data)) as json_file:
            label_data = json.load(json_file)
    for v in videos:
        if v.endswith('mp4'):
            X.append(os.path.join(data_folder, v))
            if valid:
                if label_data[v]['label'] == 'FAKE':
                    y.append(1)
                else:
                    y.append(0)
    return X, y

In [5]:
videos = os.listdir(data_path)

In [21]:
videos[0]

'rvvpazsffd.mp4'

In [22]:
results = videos[0].endswith('mp4')

In [23]:
results

True

In [13]:
if False:
    with open(os.path.join(data_path, meta_data)) as json_file:
        label_data = json.load(json_file)

In [19]:
with open(os.path.join(data_path, meta_data)) as json_file:
    label_data = json.load(json_file)

In [24]:
label_data['aagfhgtpmv.mp4']

{'label': 'FAKE', 'split': 'train', 'original': 'vudstovrck.mp4'}

In [98]:
data_folder = data_path
valid = False
X = []
y = []
videos = os.listdir(data_folder)
if valid:
     with open(os.path.join(data_folder, meta_data)) as json_file:
        label_data = json.load(json_file)
for v in videos:
    if v.endswith('mp4'):
        X.append(os.path.join(data_folder, v))
        if label_data[v]['label'] == 'FAKE':
            y.append(1)
        else:
            y.append(0)

In [97]:
label_data['aagfhgtpmv.mp4']['split']

'train'

In [39]:
train_X, train_y = get_X(data_path, valid=True)

In [41]:
# Detect devices
use_cuda = torch.cuda.is_available()                   # check if GPU exists
device = torch.device("cuda" if use_cuda else "cpu")   # use CPU or GPU

# Data loading parameters
params = {'batch_size': batch_size, 'shuffle': True, 'pin_memory': True} if use_cuda else {}



In [42]:
params

{'batch_size': 32, 'shuffle': True, 'pin_memory': True}

In [43]:
use_cuda

True

In [44]:
device

device(type='cuda')

In [45]:
models.resnet50

<function torchvision.models.resnet.resnet50(pretrained=False, progress=True, **kwargs)>

In [49]:
resnet50 = models.resnet50()

In [None]:
dir(resnet50)

In [54]:
from torchsummary import summary

In [53]:
!pip3 install torchsummary --user

Collecting torchsummary
  Using cached https://files.pythonhosted.org/packages/7d/18/1474d06f721b86e6a9b9d7392ad68bed711a02f3b61ac43f13c719db50a6/torchsummary-1.5.1-py3-none-any.whl
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1
[33mYou are using pip version 19.0.2, however version 19.3.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [58]:
# Create model
model_ft = models.resnet18()
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)
model_ft = model_ft.to(device)
#Load model
encoder_model_path = os.path.join(save_model_path, 'cnn_encoder_epoch1.pth')
model_ft.load_state_dict(torch.load(encoder_model_path))

<All keys matched successfully>

In [59]:
help(summary)

Help on function summary in module torchsummary.torchsummary:

summary(model, input_size, batch_size=-1, device='cuda')



In [None]:
summary(model_ft, (3, 224, 224))

In [70]:
class FrameDataset(Dataset):
    """Dataset Class for Loading Video"""

    def __init__(self, files, labels, num_frames, transform=None, test=False):
        """
        """
        self.files = files
        self.labels  = labels
        self.num_frames = num_frames
        self.max_num_frames = 60
        self.transform = transform
        self.test = test
        self.frame_no = num_frames
        self.face_cascade = cv2.CascadeClassifier('kaggle/input/single-frame/haarcascade_frontalface_default.xml')

    def face_detect(self, frame):
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        # Resize frame of video to 1/4 size for faster face detection processing
        small_frame = cv2.resize(gray, (0, 0), fx=0.25, fy=0.25)
        # Detect the faces
        faces = self.face_cascade.detectMultiScale(small_frame, 1.1, 4)
        return faces


    def __len__(self):
        return len(self.files)


    def readVideo(self, videoFile):

        # Load the cascade

        # Open the video file
        cap = cv2.VideoCapture(videoFile)
        # cap.set(1, self.frame_no)
        # nFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        # frames = torch.FloatTensor(self.channels, self.timeDepth, self.xSize, self.ySize)

        attempts = 0
        while attempts < self.max_num_frames:
            ret, frame = cap.read()
            attempts += 1
            if ret:
                last_good_frame = frame
                try:
                    faces = self.face_detect(frame)
                    # Face detected
                    if len(faces) > 0:
                        # Get the face, if more than two, use the whole frame
                        if len(faces) > 1:
                            break
                        x, y, w, h = faces[0] * 4
                        face_img = frame[y: y + h, x: x + w]
                        frame = torch.from_numpy(face_img)
                        # HWC2CHW
                        frame = frame.permute(2, 0, 1)
                        if self.transform is not None:
                            frame = t_F.to_pil_image(frame)
                            frame = self.transform(frame)
                            cap.release()
                            return frame
                except:
                    print("Face detection error")
            else:
                break

        frame = torch.from_numpy(last_good_frame)
        # HWC2CHW
        frame = frame.permute(2, 0, 1)
        if self.transform is not None:
            frame = t_F.to_pil_image(frame)
            frame = self.transform(frame)
        cap.release()
        return frame

    def __getitem__(self, index):

        file = self.files[index]
        X = self.readVideo(file)
        if self.test:
            y = self.labels[index]
        else:
            y = torch.LongTensor([self.labels[index]])  # (labels) LongTensor are for int64 instead of FloatTensor

        return X, y

In [71]:
# Detect devices
use_cuda = torch.cuda.is_available()                   # check if GPU exists
device = torch.device("cuda" if use_cuda else "cpu")   # use CPU or GPU

# Data loading parameters
params = {'batch_size': batch_size, 'shuffle': True, 'pin_memory': True} if use_cuda else {}

train_X, train_y = get_X(data_path, valid=True)

In [None]:
print(train_X, train_y)

In [86]:
transform = transforms.Compose([transforms.Resize([res_size, res_size]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

# selected_frames = np.arange(begin_frame, end_frame, skip_frame).tolist()
num_frames = 60

train_set = FrameDataset(train_X, train_y, num_frames, transform=transform)
train_loader = data.DataLoader(train_set, **params)

In [74]:
type(train_set)

__main__.FrameDataset

In [102]:
dir(train_set)

['__add__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'face_cascade',
 'face_detect',
 'files',
 'frame_no',
 'labels',
 'max_num_frames',
 'num_frames',
 'readVideo',
 'test',
 'transform']

In [88]:
train_set.transform

Compose(
    Resize(size=[224, 224], interpolation=PIL.Image.BILINEAR)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

In [106]:
train_set.frame_no

60

In [118]:
train_set.__getitem__(0)[0].shape

torch.Size([3, 224, 224])

In [119]:
train_set.__getitem__(0)[1]

tensor([1])