**NOTES**

Using this notebook with CASIAv2.0 dataset requires a runtime with at least 4Gb of system RAM and 64GB of disk. With a CPU runtime on Google Colab, it took around 1 hour (or 0.07 compute units) to run the whole notebook.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
CNN_AUTHENTIC_DIRECTORY = '/content/gdrive/MyDrive/TFM/datasets/CASIA2/Au'
CNN_TAMPERED_DIRECTORY = '/content/gdrive/MyDrive/TFM/datasets/CASIA2/Tp'
CNN_GROUND_TRUTH_DIRECTORY = '/content/gdrive/MyDrive/TFM/datasets/CASIA2/gt_all_grayscale'
AUGMENTED_MASK_DIRECTORY = CNN_GROUND_TRUTH_DIRECTORY + "/augment"

In [None]:
import numpy as np
import os
import random
import shutil
import cv2
from PIL import Image

ARRAY_SHAPE = (256, 384) # reshapes down below need to match this (height, width)
IMAGE_SIZE = (384, 256)
BAND_WIDTH = 10

def process_images(directory, is_tampered):
    reset_npy_dir(directory)
    list_of_files = os.listdir(directory)
    print("list_of_files", len(list_of_files), " ", directory)
    for file in list_of_files:
      if os.path.isfile(os.path.join(directory, file)):
        if not is_tampered or (is_tampered and cnn_groundtruth_exists_dir(file)):
          cnn_process_file(directory, file)
          augment_image(directory, file, is_tampered)

def process_masks(directory):
  reset_npy_dir(directory)
  generate_authentic_mask_file()
  list_of_files = os.listdir(directory)
  print("list_of_files", len(list_of_files), " ", directory)
  for file in list_of_files:
      if os.path.isfile(os.path.join(directory, file)):
          cnn_process_groundtruth(directory, file)

def generate_authentic_mask_file():
  authentic_mask_filename = get_npy_authentic_file()
  array = np.zeros(ARRAY_SHAPE, dtype=np.uint8)
  np.save(authentic_mask_filename, array)

def get_augmented_image_directory(directory):
  return directory + "/augment"

def augment_image(directory, file, is_tampered):
  do_augment = np.random.choice([0, 1], p=[0.47, 0.53]) if is_tampered else np.random.choice([0, 1], p=[0.85, 0.15])
  if not do_augment:
    return
  AUGMENTED_IMAGE_DIRECTORY = get_augmented_image_directory(directory)
  if file_exists(get_npy_file(AUGMENTED_IMAGE_DIRECTORY, file)) and file_exists(get_npy_file(AUGMENTED_MASK_DIRECTORY, file)):
    return
  os.makedirs(AUGMENTED_IMAGE_DIRECTORY + "/npy", exist_ok=True)
  os.makedirs(AUGMENTED_MASK_DIRECTORY + "/npy", exist_ok=True)
  gt_image_filename = CNN_GROUND_TRUTH_DIRECTORY + '/' + get_cnn_groundtruth_filename(file)
  main_image_filename = directory + '/' + file
  for i in range(0, 1): # Apply 1x augmentation
    augmented_file = file[:-4] + str(i) + file[-4:]
    augment_method_is_rotate = np.random.choice([0, 1], p=[0.5, 0.5])
    if augment_method_is_rotate:
      augment_with_rotation(main_image_filename, gt_image_filename, directory, augmented_file, is_tampered)
    else:
      augment_with_flip(main_image_filename, gt_image_filename, directory, augmented_file, is_tampered)

def augment_with_flip(main_image_filename, gt_image_filename, directory, augmented_file, is_tampered):
  left_right = np.random.choice([0, 1], p=[0.5, 0.5])
  if file_exists(get_npy_file(get_augmented_image_directory(directory), augmented_file)):
    return
  image = cv2.imread(main_image_filename)
  image = cv2.resize(image, IMAGE_SIZE, interpolation=cv2.INTER_LANCZOS4)
  image = cv2.flip(image, 1 if left_right else 0)
  array = generate_masked_image_with_contrast_bands(image, BAND_WIDTH)
  np.save(get_npy_file(get_augmented_image_directory(directory), augmented_file), array.astype(np.uint8))
  if is_tampered:
    generate_augmented_flipped_mask(gt_image_filename, left_right, augmented_file)

def generate_augmented_flipped_mask(gt_image_filename, left_right, augmented_file):
  image = cv2.imread(gt_image_filename)
  image = cv2.resize(image, IMAGE_SIZE, interpolation=cv2.INTER_LANCZOS4)
  image = cv2.flip(image, 1 if left_right else 0)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
  np.save(get_npy_file(AUGMENTED_MASK_DIRECTORY, augmented_file), image.astype(np.uint8))

def augment_with_rotation(main_image_filename, gt_image_filename, directory, augmented_file, is_tampered):
  angle = random.randint(30, 45) # Between 30º and 45º angle rotation
  if file_exists(get_npy_file(get_augmented_image_directory(directory), augmented_file)):
    return
  image = cv2.imread(main_image_filename)
  image = cv2.resize(image, IMAGE_SIZE, interpolation=cv2.INTER_LANCZOS4)

  (h, w) = image.shape[:2]
  # Specify the center of rotation
  center = (w // 2, h // 2)
  scale = 1.0  # Scale factor (1.0 means no scaling)
  # Get the rotation matrix
  rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)
  # Perform the rotation
  rotated_image = cv2.warpAffine(image, rotation_matrix, (w, h))

  array = generate_masked_image_with_contrast_bands(rotated_image, BAND_WIDTH)

  np.save(get_npy_file(get_augmented_image_directory(directory), augmented_file), array.astype(np.uint8))
  if is_tampered:
    generate_augmented_rotated_mask(gt_image_filename, angle, augmented_file)

def generate_augmented_rotated_mask(gt_image_filename, angle, augmented_file):
  image = cv2.imread(gt_image_filename)
  image = cv2.resize(image, IMAGE_SIZE, interpolation=cv2.INTER_LANCZOS4)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale

  (h, w) = image.shape[:2]
  # Specify the center of rotation
  center = (w // 2, h // 2)
  scale = 1.0  # Scale factor (1.0 means no scaling)
  # Get the rotation matrix
  rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)
  # Perform the rotation
  rotated_image = cv2.warpAffine(image, rotation_matrix, (w, h))
  np.save(get_npy_file(AUGMENTED_MASK_DIRECTORY, augmented_file), rotated_image.astype(np.uint8))

def reset_npy_dir(directory):
    npy_directory = get_npy_directory(directory)
    os.makedirs(npy_directory, exist_ok=True)

def cnn_process_file(directory, file):
    npy_file = get_npy_file(directory, file)
    if (os.path.exists(npy_file)):
        return
    np.save(npy_file, process_image(directory, file))

def process_image(directory, file):
  image_filename = directory + '/' + file
  image = cv2.imread(image_filename)
  image = cv2.resize(image, IMAGE_SIZE, interpolation=cv2.INTER_LANCZOS4)
  array = generate_masked_image_with_contrast_bands(image, BAND_WIDTH)
  return array.astype(np.uint8)

def cnn_process_groundtruth(directory, file):
    npy_file = get_npy_file(directory, file)
    if (os.path.exists(npy_file)):
        print("file, ", npy_file, "already exists")
        return
    np.save(npy_file, process_groundtruth_image(directory, file))

def process_groundtruth_image(directory, file):
    try:
        image_filename = directory + '/' + file
        image = Image.open(image_filename, 'r')
        image = image.resize(IMAGE_SIZE, Image.LANCZOS)
        data = image.getdata()
        width, height = image.size
        array = np.array(data, dtype=np.uint8).reshape((height, width))
        return array
    finally:
        image.close()

def cnn_groundtruth_exists_dir(file):
  return os.path.exists(get_cnn_groundtruth_path_dir(file)) # e.g. Tp_S_NNN_S_N_sec00054_sec00055_11341_gt.png does not exist.

def get_cnn_groundtruth_path_dir(file):
  return CNN_GROUND_TRUTH_DIRECTORY + '/' + get_cnn_groundtruth_filename(file)

def get_cnn_groundtruth_filename(file):
  return file[:-4] + '_gt.png'

In [None]:
import numpy as np

def generate_masked_image_with_contrast_bands(image, extend_radius=3):
    # Compute the gradient magnitude
    sobel_x = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)
    sobel_y = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)
    gradient_magnitude = np.sqrt(sobel_x**2 + sobel_y**2)

    # Determine thresholds dynamically
    t1 = gradient_magnitude.max() * 0.85
    t2 = gradient_magnitude.max() * 0.95
    mask = cv2.Canny(image, threshold1=t1, threshold2=t2)

    # Extend the mask using dilation (avoiding manual loops)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2 * extend_radius + 1, 2 * extend_radius + 1))
    extended_mask = cv2.dilate(mask, kernel)

    # Create the extended image
    extended_image = np.zeros_like(image)
    extended_image[extended_mask > 0] = image[extended_mask > 0]

    return extended_image

In [None]:
def get_npy_directory(directory):
    return directory + '/npy'

def get_npy_file(directory, file):
    return get_npy_directory(directory) + '/' + file + '.npy'

def get_npy_authentic_file():
    return get_npy_directory(CNN_GROUND_TRUTH_DIRECTORY) + '/authentic.npy'

def file_exists(file):
  return os.path.exists(file)

In [None]:
# Process images, find_gradient
import concurrent.futures

with concurrent.futures.ThreadPoolExecutor() as executor:
    # Submit both tasks to the executor
    future_tampered = executor.submit(process_images, CNN_TAMPERED_DIRECTORY, True)
    future_authentic = executor.submit(process_images, CNN_AUTHENTIC_DIRECTORY, False)

    # Wait for both tasks to complete and get the results if needed
    result_tampered = future_tampered.result()
    result_authentic = future_authentic.result()

process_masks(CNN_GROUND_TRUTH_DIRECTORY)

list_of_files 7492   /content/gdrive/MyDrive/TFM/datasets/CASIA2/Au
list_of_files 5124   /content/gdrive/MyDrive/TFM/datasets/CASIA2/Tp
list_of_files 5125   /content/gdrive/MyDrive/TFM/datasets/CASIA2/gt_all_grayscale
