# Creating data splits

## create-unpaired_casia_oulu-casia_data
### Create unpaired split
From Oulu-Casia and Casia2

In [None]:
import json
from sklearn.model_selection import train_test_split
from pathlib import Path
import random

# Specify the directories
nir_path = Path('data/CASIA_NISVIR/NIR-VIS/NIR/')
vis_path = Path('data/CASIA_NISVIR/NIR-VIS/VIS/')

# Get the contents of the directories
nir_images = list(nir_path.glob('*.bmp'))  # replace with your image extension
vis_images = list(vis_path.glob('*.jpg'))  # replace with your image extension

# Determine the size of the smaller dataset
min_size = min(len(nir_images), len(vis_images))

# Randomly select 'min_size' images from both NIR and VIS datasets
nir_images_selected = random.sample(nir_images, min_size)
vis_images_selected = random.sample(vis_images, min_size)

# Get the images that are not selected
nir_images_unselected = list(set(nir_images) - set(nir_images_selected))
vis_images_unselected = list(set(vis_images) - set(vis_images_selected))

# Split the NIR images into train, val, and test sets
nir_train_val, nir_test = train_test_split(nir_images_selected, test_size=0.2, random_state=42)
nir_train, nir_val = train_test_split(nir_train_val, test_size=0.25, random_state=42)

# Split the VIS images into train, val, and test sets
vis_train_val, vis_test = train_test_split(vis_images_selected, test_size=0.2, random_state=42)
vis_train, vis_val = train_test_split(vis_train_val, test_size=0.25, random_state=42)

# Convert the Path objects to strings
nir_train = [str(path) for path in nir_train]
nir_val = [str(path) for path in nir_val]
nir_test = [str(path) for path in nir_test]
vis_train = [str(path) for path in vis_train]
vis_val = [str(path) for path in vis_val]
vis_test = [str(path) for path in vis_test]
nir_images_unselected = [str(path) for path in nir_images_unselected]
vis_images_unselected = [str(path) for path in vis_images_unselected]

# Prepare the metadata
metadata = {
    'nir_images_selected': len(nir_images_selected),
    'vis_images_selected': len(vis_images_selected),
    'nir_images_unselected': len(nir_images_unselected),
    'vis_images_unselected': len(vis_images_unselected),
    'nir_train': len(nir_train),
    'nir_val': len(nir_val),
    'nir_test': len(nir_test),
    'vis_train': len(vis_train),
    'vis_val': len(vis_val),
    'vis_test': len(vis_test),
}

# Prepare the data to be stored in the JSON file
data = {
    'nir_train': nir_train,
    'nir_val': nir_val,
    'nir_test': nir_test,
    'nir_rest': nir_images_unselected,
    'vis_train': vis_train,
    'vis_val': vis_val,
    'vis_test': vis_test,
    'vis_rest': vis_images_unselected,
    'metadata': metadata,
}

# Write the data to the JSON file
with open('splits/new/casia2_data_splits.json', 'w') as f:
    json.dump(data, f)


In [None]:
from skeleton.data.splitter import DatasetSplitter
import json
import os

# define splitter
splitter = DatasetSplitter(
    vl_data_path=Path('data/B_OriginalImg/VL/Strong/'),
    ni_data_path=Path('data/B_OriginalImg/NI/Strong/'),
    train_n_img_picked=5,
    test_n_img_picked=2,
    val_n_img_picked=2,
    json_train_split_pth=Path('_train_tmp.json'),
    json_test_split_pth=Path('_test_tmp.json'),
    json_val_split_pth=Path('_val_tmp.json'),
)

# split the files
splitter()


# Load the temporary JSON files
with open('_train_tmp.json', 'r') as f:
    train_data = json.load(f)
with open('_test_tmp.json', 'r') as f:
    test_data = json.load(f)
with open('_val_tmp.json', 'r') as f:
    val_data = json.load(f)

# Prepare the metadata
metadata = {
    'nir_images': len(train_data['ni']) + len(test_data['ni']) + len(val_data['ni']),
    'vis_images': len(train_data['vl']) + len(test_data['vl']) + len(val_data['vl']),
    'nir_train': len(train_data['ni']),
    'nir_val': len(val_data['ni']),
    'nir_test': len(test_data['ni']),
    'vis_train': len(train_data['vl']),
    'vis_val': len(val_data['vl']),
    'vis_test': len(test_data['vl']),
}

# Prepare the data to be stored in the JSON file
data = {
    'nir_train': train_data['ni'],
    'nir_val': val_data['ni'],
    'nir_test': test_data['ni'],
    'vis_train': train_data['vl'],
    'vis_val': val_data['vl'],
    'vis_test': test_data['vl'],
    'metadata': metadata,
}

# Write the data to the JSON file
with open('splits/new/oulucasia_data_splits.json', 'w') as f:
    json.dump(data, f)

# Remove the temporary files
os.remove('_train_tmp.json')
os.remove('_test_tmp.json')
os.remove('_val_tmp.json')


In [None]:
# Load the existing data from the two files
with open('splits/new/casia2_data_splits.json', 'r') as f:
    casia2_data = json.load(f)
with open('splits/new/oulucasia_data_splits.json', 'r') as f:
    oulucasia_data = json.load(f)

# Merge the data
merged_data = {}
for key in set(casia2_data.keys()).union(oulucasia_data.keys()):
    if key != 'metadata':
        merged_data[key] = casia2_data.get(key, []) + oulucasia_data.get(key, [])

# Merge the metadata
merged_metadata = {}
for key in set(casia2_data['metadata'].keys()).union(oulucasia_data['metadata'].keys()):
    merged_metadata[key] = casia2_data['metadata'].get(key, 0) + oulucasia_data['metadata'].get(key, 0)

# Add the merged metadata to the merged data
merged_data['metadata'] = merged_metadata

# Save the merged data back to the file
with open('splits/new/merged_data_splits.json', 'w') as f:
    json.dump(merged_data, f)

### Preprocess

In [None]:
import json
import pathlib
import os
import click
import cv2

from deepface import DeepFace


class FacePreprocessor:
    def __init__(
        self,
        train_split_pth,
        test_split_pth,
        val_split_pth,
        new_train_vl_pth,
        new_train_ni_pth,
        new_test_vl_pth,
        new_test_ni_pth,
        new_val_ni_pth,
        new_val_vl_pth,
        detector_backend,
        target_size,
        new_train_split_pth=None,
        new_test_split_pth=None,
        new_val_split_pth=None,
    ):
        self.train_split_pth = train_split_pth
        self.test_split_pth = test_split_pth
        self.val_split_pth = val_split_pth
        self.new_train_vl_pth = new_train_vl_pth
        self.new_train_ni_pth = new_train_ni_pth
        self.new_test_vl_pth = new_test_vl_pth
        self.new_test_ni_pth = new_test_ni_pth
        self.new_val_ni_pth = new_val_ni_pth
        self.new_val_vl_pth = new_val_vl_pth
        self.detector_backend = detector_backend
        self.target_size = target_size
        self.new_train_split = new_train_split_pth
        self.new_test_split = new_test_split_pth
        self.new_val_split = new_val_split_pth

    def detect_and_align_face(self, image_fp):
        try:
            face_objs = DeepFace.extract_faces(
                img_path=image_fp,
                target_size=self.target_size,
                detector_backend=self.detector_backend,
                enforce_detection=False,
            )
        except Exception as e:
            print(f"ERROR at {image_fp}", e)
            return None

        if len(face_objs) != 1:
            print("NOT FOUND OR MULTIPLE FACES!")
            return None

        face = face_objs[0]["face"]

        return face

    def preprocess_part(self, fps, target_fp, spectra):
        # prepare filepath
        os.makedirs(target_fp, exist_ok=True)

        # align faces for all images
        i = 0
        preprocessed_fps = []
        for fp in fps:
            new_filename = "-".join(pathlib.PurePath(fp).parts[-3:])
            target_path = os.path.join(target_fp, new_filename)

            aligned_face = self.detect_and_align_face(fp)

            if aligned_face is None:
                continue

            aligned_face = 255 * aligned_face[:, :, ::-1]

            cv2.imwrite(target_path, aligned_face)

            print(f"#{i} {spectra} Stored: {new_filename}")
            i += 1
            preprocessed_fps.append(target_path)

        return preprocessed_fps

    def preprocess_split(self, split_pth, new_vl_path, new_ni_pth):
        with open(split_pth, "r") as f:
            paths = json.load(f)

        vl_preproc_fps = self.preprocess_part(paths["vl"], new_vl_path, "vl")
        ni_preproc_fps = self.preprocess_part(paths["ni"], new_ni_pth, "ni")

        return {"vl": vl_preproc_fps, "ni": ni_preproc_fps}

    def preprocess(self):
#         preprocess train split
        train_fps = self.preprocess_split(
            self.train_split_pth, self.new_train_vl_pth, self.new_train_ni_pth
        )

        if self.new_train_split:
            with open(self.new_train_split, "w") as f:
                json.dump(train_fps, f)

        # preprocess test split
        test_fps = self.preprocess_split(
            self.test_split_pth, self.new_test_vl_pth, self.new_test_ni_pth
        )

        if self.new_test_split:
            with open(self.new_test_split, "w") as f:
                json.dump(test_fps, f)

        # preprocess val split
        val_fps = self.preprocess_split(
            self.val_split_pth, self.new_val_vl_pth, self.new_val_ni_pth
        )

        if self.new_val_split:
            with open(self.new_val_split, "w") as f:
                json.dump(val_fps, f)

In [None]:
with open('splits/new/merged_data_splits.json') as f:
    merged_data = json.load(f)

In [None]:
with open('_train_split_tmp.json', 'w') as f:
    json.dump({'ni': merged_data['nir_train'], 'vl': merged_data['vis_train']}, f)
with open('_test_split_tmp.json', 'w') as f:
    json.dump({'ni': merged_data['nir_test'], 'vl': merged_data['vis_test']}, f)
with open('_val_split_tmp.json', 'w') as f:
    json.dump({'ni': merged_data['nir_val'], 'vl': merged_data['vis_val']}, f)

preprocessor = FacePreprocessor(
    train_split_pth='_train_split_tmp.json',
    test_split_pth='_test_split_tmp.json',
    val_split_pth='_val_split_tmp.json',
    new_train_vl_pth='data/casia-oulucasia-unpaired/A/train',
    new_train_ni_pth='data/casia-oulucasia-unpaired/B/train',
    new_test_vl_pth='data/casia-oulucasia-unpaired/A/test',
    new_test_ni_pth='data/casia-oulucasia-unpaired/B/test',
    new_val_vl_pth='data/casia-oulucasia-unpaired/A/val',
    new_val_ni_pth='data/casia-oulucasia-unpaired/B/val',
    detector_backend='retinaface',
    target_size=(224, 224),
    new_train_split_pth='splits/new/preproc-merged-train',
    new_test_split_pth='splits/new/preproc-merged-test',
    new_val_split_pth='splits/new/preproc-merged-val',
)

preprocessor.preprocess()
               
os.remove('_train_split_tmp.json')
os.remove('_test_split_tmp.json')
os.remove('_val_split_tmp.json')

In [None]:
merged_data.keys()

## Create-buaa-w_and_wo_stripes-train_test

In [None]:
import os
import numpy as np
import random
from sklearn.model_selection import train_test_split
from PIL import Image
from deepface import DeepFace
from skeleton.inference import CenterFace
import cv2
import glob

__DATA_FOLDER = 'data/BUAA/BUAAVISNIR/'
__TARGET_SIZE = (224, 224)
__TARGET_FOLDER = 'data/buaa-w_and_wo_stripes-train_test/'

centerface = CenterFace()


def fill_black(image):
    height = image.shape[0]
    width = image.shape[1]
    max_dim = max([width, height])

    black_image = np.zeros((max_dim, max_dim, image.shape[-1]), dtype=np.uint8)

    width_padding = max_dim - width
    height_padding = max_dim - height
    y1 = int(height_padding/2)
    y2 = int(max_dim - height_padding/2)
    x1 = int(width_padding/2)
    x2 = int(max_dim - width_padding/2)

    black_image[y1:y2, x1:x2, :] = image

    return black_image


def operation_X(image_pth_A, image_pth_B):
    # load
    image_np_A = cv2.imread(str(image_pth_A))
    image_np_A = cv2.cvtColor(image_np_A, cv2.COLOR_BGR2RGB)
    image_np_B = cv2.imread(str(image_pth_B))
    image_np_B = cv2.cvtColor(image_np_B, cv2.COLOR_BGR2RGB)

    # expects 3 channels
    if image_np_A.shape[2] == 1:
        image_np_A = np.concatenate([image_np_A] * 3, axis=-1)

    # predict
    dets, lms = centerface(image_np_A, threshold=0.35)

    det = dets[0]
    width = det[2] - det[0]
    height = det[3] - det[1]
    bigger_dimension = max([width, height])
    width_padding = bigger_dimension - width
    height_padding = bigger_dimension - height

    face_A = image_np_A[int(det[1]-width_padding//2):int(det[3]+width_padding//2),
                        int(det[0]-width_padding//2):int(det[2]+width_padding//2), :]
    face_B = image_np_B[int(det[1]-width_padding//2):int(det[3]+width_padding//2),
                        int(det[0]-width_padding//2):int(det[2]+width_padding//2), :]

    face_A = cv2.resize(face_A, __TARGET_SIZE)
    face_B = cv2.resize(face_B, __TARGET_SIZE).astype(np.uint8)
    face_A_np = np.mean(face_A, axis=2)
    face_A = np.stack([face_A_np, face_A_np, face_A_np],
                      axis=2).astype(np.uint8)

    display(Image.fromarray(np.concatenate([face_A, face_B], axis=1)))

    return face_A, face_B


def operation_Y(image_pth_A, image_pth_B):
    face_objs_A = DeepFace.extract_faces(
        img_path=image_pth_A,
        target_size=__TARGET_SIZE,
        detector_backend="retinaface",
        enforce_detection=False,
        align=False
    )

    face_B_np = np.mean(cv2.imread(image_pth_B), axis=2)
    face_B_np = np.stack([face_B_np, face_B_np, face_B_np], axis=2)

    face_A = (face_objs_A[0]['face'] * 255).astype(np.uint8)
    ar = face_objs_A[0]['facial_area']
    face_B = face_B_np[ar['y']:ar['y']+ar['h'], ar['x']:ar['x']+ar['w']]

    face_B = fill_black(face_B)
    face_B = cv2.resize(face_B, __TARGET_SIZE).astype(np.uint8)

    face_A_np = np.mean(face_A, axis=2)
    face_A = np.stack([face_A_np, face_A_np, face_A_np],
                      axis=2).astype(np.uint8)

    display(Image.fromarray(np.concatenate([face_A, face_B], axis=1)))

    return face_A, face_B


# Get the list of patient ids
patient_ids = [name for name in os.listdir(
    __DATA_FOLDER) if os.path.isdir(os.path.join(__DATA_FOLDER, name))]

# Split the patient ids into train, validation, and test sets
train_ids, test_ids = train_test_split(
    patient_ids, test_size=0.2, random_state=42)
# train_ids, val_ids = train_test_split(train_ids, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2


# Function to process and save images
def process_and_save_images(ids, split):
    filepaths_A = []
    filepaths_B = []
    invalid = []
    for _id in sorted(ids):
        patient_folder = os.path.join(__DATA_FOLDER, _id)
        dir_content = [i for i in os.listdir(
            patient_folder) if i[-3:] == 'bmp']
        image_files = sorted(dir_content, key=lambda x: int(
            x.split('.')[0]))  # Sort the files to ensure pairs
        image_files = [i for i in image_files if int(i.split('.')[0]) <= 28]
        for file_A, file_B in zip(image_files[0::2], image_files[1::2]):
            print(str(os.path.join(patient_folder, file_A)),
                  str(os.path.join(patient_folder, file_B)))

            img_path_A = os.path.join(patient_folder, file_A)
            img_path_B = os.path.join(patient_folder, file_B)

            try:
                if random.choice([True, False]):
                    img_A, img_B = operation_X(img_path_A, img_path_B)
                else:
                    img_A, img_B = operation_Y(img_path_A, img_path_B)
            except:
                invalid.append(img_path_A)
                continue

            # Save the images
            Image.fromarray(img_A).save(
                os.path.join(split + 'A', f'{_id}_{file_A}'))
            Image.fromarray(img_B).save(
                os.path.join(split + 'B', f'{_id}_{file_B}'))

            filepaths_A.append(
                str(os.path.join(split + 'A', f'{_id}_{file_A}')))
            filepaths_B.append(
                str(os.path.join(split + 'B', f'{_id}_{file_B}')))

    return filepaths_A, filepaths_B, invalid

In [None]:
# Process and save images for each split
out_train = process_and_save_images(train_ids, __TARGET_FOLDER + 'train')
# out_val = process_and_save_images(val_ids, __TARGET_FOLDER + 'val')
out_test = process_and_save_images(test_ids, __TARGET_FOLDER + 'test')

## AffectNet prep

In [None]:
import json

In [None]:
with open('splits/new/merged_data_splits.json', 'r') as f:
    casia_oulucasia_split = json.loads(f.read())

In [None]:
# Load the existing data from the two files
with open('splits/new/casia2_data_splits.json', 'r') as f:
    casia2_data = json.load(f)
with open('splits/new/oulucasia_data_splits.json', 'r') as f:
    oulucasia_data = json.load(f)

In [None]:
# Merge the data
merged_data = {}
for key in set(casia2_data.keys()).union(oulucasia_data.keys()):
    if key != 'metadata':
        merged_data[key] = casia2_data.get(key, []) + oulucasia_data.get(key, [])

# Merge the metadata
merged_metadata = {}
for key in set(casia2_data['metadata'].keys()).union(oulucasia_data['metadata'].keys()):
    merged_metadata[key] = casia2_data['metadata'].get(key, 0) + oulucasia_data['metadata'].get(key, 0)

# Add the merged metadata to the merged data
merged_data['metadata'] = merged_metadata

In [None]:
merged_data['nir_rest']

In [None]:
import json
import pathlib
import os
import click
import cv2

from deepface import DeepFace


def detect_and_align_face(image_fp):
    try:
        face_objs = DeepFace.extract_faces(
            img_path=image_fp,
            target_size=(256, 256),
            detector_backend="retinaface",
            enforce_detection=False,
        )
    except Exception as e:
        print(f"ERROR at {image_fp}", e)
        return None

    if len(face_objs) != 1:
        print("NOT FOUND OR MULTIPLE FACES!")
        return None

    face = face_objs[0]["face"]

    return face

def preprocess_part( fps, target_fp, spectra):
    # prepare filepath
    os.makedirs(target_fp, exist_ok=True)

    # align faces for all images
    i = 0
    preprocessed_fps = []
    for fp in fps:
        new_filename = "-".join(pathlib.PurePath(fp).parts[-3:])
        target_path = os.path.join(target_fp, new_filename)

        aligned_face = detect_and_align_face(fp)

        if aligned_face is None:
            continue

        aligned_face = 255 * aligned_face[:, :, ::-1]

        cv2.imwrite(target_path, aligned_face)

        print(f"#{i} {spectra} Stored: {new_filename}")
        i += 1
        preprocessed_fps.append(target_path)

    return preprocessed_fps

In [None]:
preproc_casia_fps = preprocess_part(merged_data['nir_rest'], "data/for_unpaired-casia_preprocessed_rest_of_NIR/", "NIR")

In [None]:
import os
__BUAA_PREPROC_FOLDER = 'data/Z_PreprocImg-BUAA-centerface-gray-averaged/BUAAVISNIR'

buaa_nir_images = []
for root, dirs, files in os.walk(__BUAA_PREPROC_FOLDER):
    for file in files:
        if int(file.split('.')[0]) % 2 == 0:
            buaa_nir_images.append(os.path.join(root, file))

In [None]:
buaa_nir_images

In [None]:
__AFFECTNET_PREPROC_TRAIN_FOLDER = 'data/AffectNet-8Labels/train_set/images'

retrieve_frist_n_images = len(buaa_nir_images) + len(merged_data['nir_rest'])
affectnet_vis_images = []
for i, f_nms in enumerate(sorted(os.listdir(__AFFECTNET_PREPROC_TRAIN_FOLDER), key=lambda x: int(x.split('.')[0]))):
    if i > retrieve_frist_n_images:
        break
    affectnet_vis_images.append(os.path.join(__AFFECTNET_PREPROC_TRAIN_FOLDER, f_nms))

In [None]:
affectnet_vis_images

In [None]:
import math
a = [0.,0.5]
print(a)

x = math.sqrt(a[0]*a[0] + a[1]*a[1])
print("radius", x, "-", "in" if x <=1 else "out")
r = 1 - x
print("rest to boundary", r)
enl = 1/(1 - a[0])
print("enlarge", enl)
print([a[0]*enl, a[1]*enl])

In [None]:
os.mkdir('data/unpaired_additional_train-affectnet_vis_and_buaa_casia_nir', )
os.mkdir('data/unpaired_additional_train-affectnet_vis_and_buaa_casia_nir/vis', )
os.mkdir('data/unpaired_additional_train-affectnet_vis_and_buaa_casia_nir/nir', )

In [None]:
import shutil

for i, (vis_pth, nir_pth) in enumerate(zip(affectnet_vis_images, buaa_nir_images + preproc_casia_fps)):
#     vis_new_fp = 'data/unpaired_additional_train-affectnet_vis_and_buaa_casia_nir/vis/' + "affnet-" + vis_pth.split('/')[-1]
    nir_new_fp = 'data/unpaired_additional_train-affectnet_vis_and_buaa_casia_nir/nir/' + "buaa-casia-" + str(i) + "." + (nir_pth.split('/')[-1]).split('.')[-1]
    
#     shutil.copyfile(vis_pth, vis_new_fp)
    shutil.copyfile(nir_pth, nir_new_fp)

In [None]:
merged_data['nir_rest']