In [1]:
# Import utilities
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
# Import Image manipulation
from PIL import Image

# Import data visualization
from matplotlib import pyplot as plt
import matplotlib.patches as patches
import matplotlib

# Import PyTorch
import torch
from torch import nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset, DataLoader
# import the library and helpers
import imageio
import imgaug as ia
from imgaug import augmenters as iaa

# CELEB DF Dataset

In [2]:
#CELEB-DF dataset

#image = imageio.imread('Dataset/Celeb-DF-v2/dataset_24/real/id0_0004_0.jpg')
data_dir = 'Dataset/Celeb-DF-v2/dataset_24/real/'
real_data = [f for f in os.listdir(data_dir) if f.endswith('.jpg')]
# new_real = []
# for i in real_data:
#     str = i
#     chunks = str.split('_')
#     if(chunks[0] == "id0"):
#         new_real.append(i)
#     elif(chunks[0] == "id1"):
#         new_real.append(i)


In [10]:
# import albumentations package
import albumentations as A

# import pytorch utilities from albumentations
from albumentations.pytorch.transforms import img_to_tensor

# Compose a complex augmentation pipeline
augmentation_pipeline = A.Compose(
    [
        A.HorizontalFlip(p = 0.5), # apply horizontal flip to 50% of images
        A.OneOf(
            [
                # apply one of transforms to 50% of images
                A.RandomGamma(), # apply random gamma
                A.RandomBrightnessContrast(), # apply random brightness
            ],
            p = 0.5
        ),
        A.HueSaturationValue(p=0.5),
        A.ISONoise(p=0.5),
        A.GaussNoise(p=0.5),
    ],
    p = 1
)


In [9]:
#REAL_IMAGE APPLYING TO THE PIPELINE
count = 0
for j in real_data:
    z = data_dir +j
    im = imageio.imread(z)
    images_aug = np.array([augmentation_pipeline(image = im)['image'] for _ in range(20)])
    for i, image_aug in enumerate(images_aug):
        imageio.imwrite(os.path.join('Dataset/Celeb-DF-v2/data_aug/real/', "%04d.jpg" % (count)), image_aug)
        count = count + 1

In [10]:
# GETTING FAKE IMAGE 
data_dir_f = 'Dataset/Celeb-DF-v2/dataset_24/fake/'
fake_data = [f for f in os.listdir(data_dir_f) if f.endswith('.jpg')]
# new_fake = []
# for i in fake_data:
#     str = i
#     chunks = str.split('_')
#     if(chunks[0] == "id0"):
#         new_fake.append(i)
#     elif(chunks[0] == "id1"):
#         new_fake.append(i)
        


In [11]:
# FAKE_IMAGE APPLYING IT TO THE PIPELINE

count = 0
for j in fake_data:
    z = data_dir_f +j
    im = imageio.imread(z)
    images_aug = np.array([augmentation_pipeline(image = im)['image'] for _ in range(1)])
    for i, image_aug in enumerate(images_aug):
        imageio.imwrite(os.path.join('Dataset/Celeb-DF-v2/data_aug/fake/', "%04d.jpg" % (count,)), image_aug)
        count = count + 1

print("COMPLETED!")

COMPLETED!


# Deepfake Detection Challenge Dataset

In [68]:
#REAL IMAGE DATASET
ddff_real = 'Dataset/deepfake-detection-challenge/dataset_24/real/'
ddff_real_data = [f for f in os.listdir(ddff_real) if f.endswith('.jpg')]

In [70]:
#PIPELINE FOR REAL IMAGE
count = 0
for i in ddff_real_data:
    z = ddff_real + i
    im = imageio.imread(z)
    images_aug = np.array([augmentation_pipeline(image = im)['image'] for _ in range(15)])
    for i, image_aug in enumerate(images_aug):
        imageio.imwrite(os.path.join('Dataset/deepfake-detection-challenge/data_aug/real/', "%04d.jpg" % (i + count,)), image_aug)
        count = count + 1

print("COMPLETED!")

COMPLETED!


In [73]:
#FAKE IMAGE DATASET
ddff_fake = 'Dataset/deepfake-detection-challenge/dataset_24/fake/'
ddff_fake_data = [f for f in os.listdir(ddff_fake) if f.endswith('.jpg')]

In [74]:
#PIPELINE FOR FAKE IMAGE
count = 0
for i in ddff_fake_data:
    z = ddff_fake + i
    im = imageio.imread(z)
    images_aug = np.array([augmentation_pipeline(image = im)['image'] for _ in range(15)])
    for i, image_aug in enumerate(images_aug):
        imageio.imwrite(os.path.join('Dataset/deepfake-detection-challenge/data_aug/fake/', "%04d.jpg" % (i + count,)), image_aug)
        count = count + 1

print("COMPLETED!")

COMPLETED!


# FF++ Dataset

In [12]:
#REAL IMAGE DATASET
ff_real = 'Dataset/extract_data/dataset_24/real/'
ff_real_data = [f for f in os.listdir(ff_real) if f.endswith('.jpg')]

In [13]:
#PIPELINE FOR REAL IMAGE
count = 0
for j in ff_real_data:
    z = ff_real +j
    im = imageio.imread(z)
    images_aug = np.array([augmentation_pipeline(image = im)['image'] for _ in range(25)])
    for i, image_aug in enumerate(images_aug):
        imageio.imwrite(os.path.join('Dataset/extract_data/data_aug/real/', "%04d.jpg" % (count)), image_aug)
        count = count + 1

In [14]:
#FAKE IMAGE DATASET
ff_fake = 'Dataset/extract_data/dataset_24/fake/'
ff_fake_data = [f for f in os.listdir(ff_fake) if f.endswith('.jpg')]
#ff_new_fake = []
# for i in ff_fake_data:
#     str = i
#     chunks = str.split('_')
#     if(chunks[0] == "01"):
#         ff_new_fake.append(i)
#     elif(chunks[0] == "02"):
#         ff_new_fake.append(i)
        


In [15]:
#PIPELINE FOR FAKE IMAGE
count = 0
for j in ff_fake_data:
    z = ff_fake +j
    im = imageio.imread(z)
    images_aug = np.array([augmentation_pipeline(image = im)['image'] for _ in range(2)])
    for i, image_aug in enumerate(images_aug):
        imageio.imwrite(os.path.join('Dataset/extract_data/data_aug/fake/', "%04d.jpg" % (count)), image_aug)
        count = count + 1

print("Completed!!")

Completed!!


In [6]:
len(ff_real_data) 

234

In [7]:
len(ff_fake_data) 

2818