# Creating data splits

In [117]:
import os
import pandas as pd
import random
import math
import numpy as np


# Set seed for random
random.seed(10)


##########################################################################
##########################################################################


oulu_casia_mongo_patient_ids = [7, 8, 9, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
                                61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79]

e_eval = ['Neutral', 'Happy', 'Sad', 'Surprise', 'Fear', 'Disgust', 'Anger', 'Contempt', 'None']
e_dict = {0:'Neutral', 1:'Happy', 2:'Sad', 3:'Surprise', 4:'Fear', 5:'Disgust', 6:'Anger', 7:'Contempt', 8:'None'}
e_eval_8 = ['Neutral', 'Happy', 'Sad', 'Surprise', 'Fear', 'Disgust', 'Anger', 'Contempt']
e_dict_8 = {0:'Neutral', 1:'Happy', 2:'Sad', 3:'Surprise', 4:'Fear', 5:'Disgust', 6:'Anger', 7:'Contempt'}
e_dict_8_inv = {'Neutral':0, 'Happy':1, 'Sad':2, 'Surprise':3, 'Fear':4, 'Disgust':5, 'Anger':6, 'Contempt':7}
e_eval_8_wrongorder = ['Neutral', 'Happy', 'Anger', 'Sad', 'Fear', 'Surprise', 'Disgust', 'Contempt']
e_dict_8_wrongorder = {0: 'Neutral', 1:'Happy', 2:'Anger', 3:'Sad', 4:'Fear', 5:'Surprise', 6:'Disgust', 7:'Contempt'}
e_eval_8_alphabetic = ['Anger','Contempt','Disgust','Fear','Happy','Neutral','Sad','Surprise']
e_dict_8_alphabetic = {0:'Anger',1:'Contempt',2:'Disgust',3:'Fear',4:'Happy',5:'Neutral',6:'Sad',7:'Surprise'}
e_dict_8_lower2upper = {'neutral':"Neutral", "happiness":"Happy", "sadness":"Sad", "surprise":"Surprise", "fear": "Fear", "disgust": "Disgust", "anger": "Anger", "contempt": "Contempt"}

# Define valence-arousal values for emotions
# anchors inspired by https://arxiv.org/pdf/2311.14816.pdf
valence_arousal = {
    "neutral": (0., 0.),
    "anger": (-0.51, 0.59),
    "disgust": (-0.60, 0.35),
    "fear": (-0.64, 0.6),
    "happiness": (0.81, 0.51),
    "sadness": (-0.63, -0.27),
    "surprise": (0.4, 0.67)
}

va_recalculate2square_range = False


##########################################################################
##########################################################################


def magnitude(vector):
    return math.sqrt(sum(pow(element, 2) for element in vector))


def circumplex2square(a):
    a = np.array(a)
    a_magn = magnitude(a)

    if a_magn > 1:
        print("Not in range")
        return

    if a[0] == 0 or a[1] == 0:
        return a

    a1_sign = -1 if a[0] < 0 else 1
    a2_sign = -1 if a[1] < 0 else 1
    if abs(a[0] / a[1]) > 1:
        point_on_square = (1., abs(1/a[0]*a[1]))
    else:
        point_on_square = (abs(1/a[1]*a[0]), 1.)
    point_on_square = np.array(
        [a1_sign*point_on_square[0], a2_sign*point_on_square[1]])

    b = a_magn * point_on_square

    return b

# Recalculate if range [-1, 1] is used for oulucasiadb
if va_recalculate2square_range:
    for k, v in valence_arousal.items():
        valence_arousal[k] = circumplex2square(v)


def label_emotion_images(imgs, emotion):
    cat_emotion = []
    spat_emotion = []

    ##############
    # STRATEGY 1 #
    ##############
    # for i, im in enumerate(imgs):
    #     # if first quarter -> neutral, else default emotion
    #     cat_emotion.append("neutral" if i < (len(imgs) / 4) else emotion.lower())

    # # spatial emotions
    # spat_emotion = np.linspace(valence_arousal["neutral"], valence_arousal[emotion.lower()], len(imgs))

    ##############
    # STRATEGY 2 #
    ##############
    for i, im in enumerate(imgs):
        # if first quarter -> neutral, else default emotion
        cat_emotion.append("neutral" if i < (
            len(imgs) / 4) else emotion.lower())

        if i < (len(imgs) / 4):
            spat_emotion.append(valence_arousal["neutral"])
        elif i < (len(imgs) / 4 * 3):
            spat_emotion.append((np.array(
                valence_arousal["neutral"]) + np.array(valence_arousal[emotion.lower()])) / 2)
        else:
            spat_emotion.append(valence_arousal[emotion.lower()])

    return cat_emotion, spat_emotion


def get_files_oulucasia(path, spectra, add_annotations=True):
    @staticmethod
    def get_annotations_oulucasia(df):
        grouped = df.groupby(
            ['oulucasia-patient', 'oulucasia-emotion'])['filename'].apply(list)

        patient_emotion_list = [(filenames, emotion) for (
            patient, emotion), filenames in grouped.items()]
        labels = [label_emotion_images(imgs, emotion)
                  for imgs, emotion in patient_emotion_list]
        expression_list = [l[0] for l in labels]
        valence_list = [l[1] for l in labels]
        arousal_list = [l[1] for l in labels]
 
        flat_expression_list = []
        for row in expression_list:
            flat_expression_list.extend(row)
        flat_valence_list = []
        for row in valence_list:
            flat_valence_list.extend([r[0] for r in row])
        flat_arousal_list = []
        for row in arousal_list:
            flat_arousal_list.extend([r[1] for r in row])

        return flat_expression_list, flat_valence_list, flat_arousal_list

    filepath_list = []
    filename_list = []
    adjusted_unique_filename_list = []
    oulucasia_patient_list = []
    oulucasia_emotion_list = []
    race_list = []
    filetype_list = []

    for i, p in enumerate(sorted(os.listdir(path))):
        for j, e in enumerate(sorted(os.listdir(os.path.join(path, p)))):
            for k, f in enumerate(sorted(os.listdir(os.path.join(path, p, e)))):
                fp = os.path.join(path, p, e, f)
                race = "mongo" if int(
                    p[1:]) in oulu_casia_mongo_patient_ids else "cauca"
                adjusted_unique_filename = "-".join(["oulucasia", p, e, f])

                filepath_list.append(str(fp))
                filename_list.append(f)
                adjusted_unique_filename_list.append(adjusted_unique_filename)
                oulucasia_patient_list.append(p)
                oulucasia_emotion_list.append(e)
                race_list.append(race)
                filetype_list.append(f.split('.')[-1])

    df = pd.DataFrame({
        "db": "oulucasia",
        "spectra": spectra,
        "filepath": filepath_list,
        "filename": filename_list,
        "adjusted_unique_filename": adjusted_unique_filename_list,
        "oulucasia-patient": oulucasia_patient_list,
        "oulucasia-emotion": oulucasia_emotion_list,
        "filetype": filetype_list,
        "race": race_list,
    })

    if add_annotations:
        expression_list, valence_list, arousal_list = get_annotations_oulucasia(
            df)
        df["expression"] = expression_list
        df["valence"] = valence_list
        df["arousal"] = arousal_list
        
        df["expression"] = df.expression.apply(lambda x: e_dict_8_lower2upper[x])

    return df


def get_files_casia(path, spectra):
    filenames = os.listdir(path)
    filepaths = [str(os.path.join(path, f)) for f in filenames]
    adjusted_unique_filename_list = ["-".join(["casia", f]) for f in filenames]
    session_list = [int(f.split('_')[0][1:]) for f in filenames]
    patient_list = [int(f.split('_')[2]) for f in filenames]
    emotion_list = [int(f.split('_')[3].split('.')[0].split('-')[0])
                    for f in filenames]
    filetype_list = [f.split('.')[-1] for f in filenames]

    df = pd.DataFrame({
        "db": "casia",
        "spectra": spectra,
        "filepath": filepaths,
        "filename": filenames,
        "adjusted_unique_filename": adjusted_unique_filename_list,
        "casia-session": session_list,
        "casia-patient": patient_list,
        "casia-emotion": emotion_list,
        "filetype": filetype_list,
        "race": "mongo",
    })

    return df


def get_files_buaa(path, labels_fp=None):
    @staticmethod
    def get_buaa_filetype(i):
        type_ = ''
        if i < 2:
            type_ = "neutral"
        elif i < 18:
            type_ = "tilt"
        elif i < 26:
            type_ = "affection"
        else:
            type_ = "low_light"

        return type_

    filepath_list = []
    filename_list = []
    adjusted_unique_filename_list = []
    buaa_patient_list = []
    filetype_list = []
    buaa_image_type = []
    spectra_list = []

    for p in os.listdir(path):
        if not os.path.isdir(os.path.join(path, p)):
            continue
        dir_content_images = [f for f in os.listdir(os.path.join(path, p)) if f.split('.')[-1] == 'bmp']
        for i, f in enumerate(sorted(dir_content_images, key=lambda x: int(x.split(".")[0]))):
            filename_list.append(f)
            filepath_list.append(str(os.path.join(path, p, f)))
            adjusted_unique_filename_list.append("-".join(["buaa", p, f]))
            buaa_patient_list.append(str(p))
            filetype_list.append(f.split(".")[-1])
            buaa_image_type.append(get_buaa_filetype(i))
            spectra_list.append("vis" if i % 2 == 1 else "nir")

    df = pd.DataFrame({
        "db": "buaa",
        "spectra": spectra_list,
        "filepath": filepath_list,
        "filename": filename_list,
        "adjusted_unique_filename": adjusted_unique_filename_list,
        "buaa-patient": buaa_patient_list,
        "buaa-image_type": buaa_image_type,
        "filetype": filetype_list,
        "race": "mongo",
    })
    
    if labels_fp:
        df_annotations = pd.read_csv(labels_fp)
        df_annotations["adjusted_unique_filename"] = df_annotations.filename.apply(lambda x: "-".join(x.split("_")))
        df = df.merge(df_annotations[[
                      'adjusted_unique_filename', 'expression', 'valence', 'arousal']], on='adjusted_unique_filename', how='left')
        df["expression"].fillna(0., inplace=True)
        df["valence"].fillna(0., inplace=True)
        df["arousal"].fillna(0., inplace=True)
        df["expression"] = df["expression"].apply(lambda x: e_dict[int(x)])
        df.loc[0::2, ["expression","valence","arousal"]] = df.loc[1::2,["expression","valence","arousal"]].values
        
    return df


def get_files_customdb(path, labels_fp=None):
    filenames = os.listdir(path)
    filepaths = [str(os.path.join(path, f)) for f in filenames]
    adjusted_unique_filename_list = [
        "-".join(["customdb", f]) for f in filenames]
    patient_list = [int(f.split('-')[0]) for f in filenames]
    filetype_list = [f.split('.')[-1] for f in filenames]

    df = pd.DataFrame({
        "db": "customdb",
        "spectra": "nir",
        "filepath": filepaths,
        "filename": filenames,
        "adjusted_unique_filename": adjusted_unique_filename_list,
        "customdb-patient": patient_list,
        "filetype": filetype_list,
        "race": "cauca",
    })

    if labels_fp:
        df_annotations = pd.read_csv(labels_fp)
        df = df.merge(df_annotations[[
                      'filename', 'expression', 'valence', 'arousal']], on='filename', how='left')
        df["expression"] = df["expression"].apply(lambda x: e_dict[int(x)])

    return df

def get_files_affectnet(path, num_samples, labels_fp):
    df = pd.read_csv(labels_fp)
    df = df[df["exists_in_dir"]==True]
    df = df.rename(columns={"FileType": "filetype"})
    df['filename'] = df['Unnamed: 0'].apply(lambda x: str(x) + ".jpg")
    df = df[["expression", "valence", "arousal", "filetype", "filename"]]
    df["expression"] = df["expression"].apply(lambda x: e_dict[int(x)])
    
    # sample data
    df = df.sample(n=num_samples)
    
    # get filetypes to lower
    df["filetype"] = df["filetype"].apply(lambda x: x.lower())
    
    # add data
    df["filepath"] = [str(os.path.join(path, f)) for f in df.filename]
    df["adjusted_unique_filename"] = [
        "-".join(["affectnet", f]) for f in df.filename]
    df["db"] = "affectnet"
    df["spectra"] = "vis"
    df["race"] = "cauca"

    return df

In [118]:
df_oulucasia_vl = get_files_oulucasia('data/B_OriginalImg/VL/Strong/', "vis")
df_oulucasia_ni = get_files_oulucasia('data/B_OriginalImg/NI/Strong/', "nir")
df_casia_vl = get_files_casia('data/CASIA_NISVIR/NIR-VIS/VIS/', "vis")
df_casia_ni = get_files_casia('data/CASIA_NISVIR/NIR-VIS/NIR/',"nir")
df_buaa_vl_ni = get_files_buaa('data/BUAA/BUAAVISNIR/', labels_fp="Custom_DB/buaa_w_neutral-annotations.csv")
df_customdb_ni = get_files_customdb('Custom_DB/custom_nir/', labels_fp="Custom_DB/custom_nir-annotations.csv")
df_affectnet_vl = get_files_affectnet('data/AffectNet-8Labels/train_set/images/', 9503, labels_fp="data/AffectNet-8Labels/train_custom.csv")

# concatenate all
df = pd.concat([df_oulucasia_vl, df_oulucasia_ni, df_casia_vl, df_casia_ni, df_buaa_vl_ni, df_customdb_ni, df_affectnet_vl])

# Drop all nonimage files
df = df[df["filetype"].isin(['jpeg', 'jpg', 'bmp', 'png'])]

## Create split for CycleGAN - latest version

The whole merged dataset will comprise of:
| Database            | #NIR   | #VIS   | #NIR mongo | #NIR cauca | #VIS mongo | #VIS cauca | #affected             |
|---------------------|--------|--------|------------|------------|------------|------------|-----------------------|
| Oulu-Casia          | 10379 | 10426 | 4170(40%)  | 6255(60%)  | 4170(40%)  | 6255(60%)  | yes                   |
| CASIA               | 12487  | 5093  | 12487      | 0          | 5093      | 0          | no                    |
| BUAA                | 1950   | 0      | 1950       | 0          | 0          | 0          | 1/3                  |
| AffectNet| 0      | 9503   |  0        | 0          | 1930(~10%)     | 7723(80%)  | yes                   |
| Custom DB           | 159    | 0      | 0          | 159        | 0          | 0          | yes                   |
| **Total**           | **24975**  | **24975**   | **18756** (75%)| **6405** (25%)| **11193** (45%)| **13979** (55%)| **-**                 |



Train and test split is following:
<span style="color:red;">TODO</span>

In [59]:
df_cyclegan_all = df[~((df["buaa-image_type"]=="low_light")|((df["db"]=="buaa")&(df["spectra"]=="vis")))]

In [62]:
display(df_cyclegan_all[df_cyclegan_all["spectra"]=="vis"])
display(df_cyclegan_all[df_cyclegan_all["spectra"]=="nir"])
display(df_cyclegan_all[(df_cyclegan_all["spectra"]=="vis") & (df_cyclegan_all["db"]=="affectnet")])
display(df_cyclegan_all[(df_cyclegan_all["spectra"]=="vis") & (df_cyclegan_all["db"]=="oulucasia")])
display(df_cyclegan_all[(df_cyclegan_all["spectra"]=="nir") & (df_cyclegan_all["db"]=="oulucasia")])
display(df_cyclegan_all[(df_cyclegan_all["spectra"]=="vis") & (df_cyclegan_all["db"]=="casia")])
display(df_cyclegan_all[(df_cyclegan_all["spectra"]=="nir") & (df_cyclegan_all["db"]=="casia")])
display(df_cyclegan_all[(df_cyclegan_all["spectra"]=="vis") & (df_cyclegan_all["db"]=="buaa")])
display(df_cyclegan_all[(df_cyclegan_all["spectra"]=="nir") & (df_cyclegan_all["db"]=="buaa")])
display(df_cyclegan_all[(df_cyclegan_all["spectra"]=="nir") & (df_cyclegan_all["db"]=="customdb")])

Unnamed: 0,db,spectra,filepath,filename,adjusted_unique_filename,oulucasia-patient,oulucasia-emotion,filetype,race,expression,valence,arousal,casia-session,casia-patient,casia-emotion,buaa-patient,buaa-image_type,customdb-patient
0,oulucasia,vis,data/B_OriginalImg/VL/Strong/P001/Anger/000.jpeg,000.jpeg,oulucasia-P001-Anger-000.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.000000,,,,,,
1,oulucasia,vis,data/B_OriginalImg/VL/Strong/P001/Anger/001.jpeg,001.jpeg,oulucasia-P001-Anger-001.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.000000,,,,,,
2,oulucasia,vis,data/B_OriginalImg/VL/Strong/P001/Anger/002.jpeg,002.jpeg,oulucasia-P001-Anger-002.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.000000,,,,,,
3,oulucasia,vis,data/B_OriginalImg/VL/Strong/P001/Anger/003.jpeg,003.jpeg,oulucasia-P001-Anger-003.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.000000,,,,,,
4,oulucasia,vis,data/B_OriginalImg/VL/Strong/P001/Anger/004.jpeg,004.jpeg,oulucasia-P001-Anger-004.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.000000,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4930,affectnet,vis,data/AffectNet-8Labels/train_set/images/144735...,144735.jpg,affectnet-144735.jpg,,,jpg,cauca,Happy,0.895277,-0.169377,,,,,,
142892,affectnet,vis,data/AffectNet-8Labels/train_set/images/144549...,144549.jpg,affectnet-144549.jpg,,,jpg,cauca,Happy,0.662989,0.053233,,,,,,
203496,affectnet,vis,data/AffectNet-8Labels/train_set/images/405770...,405770.jpg,affectnet-405770.jpg,,,jpg,cauca,Sad,-0.452899,-0.112146,,,,,,
173922,affectnet,vis,data/AffectNet-8Labels/train_set/images/103930...,103930.jpg,affectnet-103930.jpg,,,jpg,cauca,Happy,0.722255,0.303845,,,,,,


Unnamed: 0,db,spectra,filepath,filename,adjusted_unique_filename,oulucasia-patient,oulucasia-emotion,filetype,race,expression,valence,arousal,casia-session,casia-patient,casia-emotion,buaa-patient,buaa-image_type,customdb-patient
0,oulucasia,nir,data/B_OriginalImg/NI/Strong/P001/Anger/000.jpeg,000.jpeg,oulucasia-P001-Anger-000.jpeg,P001,Anger,jpeg,cauca,Neutral,0.00,0.00,,,,,,
1,oulucasia,nir,data/B_OriginalImg/NI/Strong/P001/Anger/001.jpeg,001.jpeg,oulucasia-P001-Anger-001.jpeg,P001,Anger,jpeg,cauca,Neutral,0.00,0.00,,,,,,
2,oulucasia,nir,data/B_OriginalImg/NI/Strong/P001/Anger/002.jpeg,002.jpeg,oulucasia-P001-Anger-002.jpeg,P001,Anger,jpeg,cauca,Neutral,0.00,0.00,,,,,,
3,oulucasia,nir,data/B_OriginalImg/NI/Strong/P001/Anger/003.jpeg,003.jpeg,oulucasia-P001-Anger-003.jpeg,P001,Anger,jpeg,cauca,Neutral,0.00,0.00,,,,,,
4,oulucasia,nir,data/B_OriginalImg/NI/Strong/P001/Anger/004.jpeg,004.jpeg,oulucasia-P001-Anger-004.jpeg,P001,Anger,jpeg,cauca,Neutral,0.00,0.00,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,customdb,nir,Custom_DB/custom_nir/10-0-neutral.jpg,10-0-neutral.jpg,customdb-10-0-neutral.jpg,,,jpg,cauca,Neutral,-0.02,-0.03,,,,,,10.0
155,customdb,nir,Custom_DB/custom_nir/13-6-contempt.jpg,13-6-contempt.jpg,customdb-13-6-contempt.jpg,,,jpg,cauca,Contempt,-0.55,0.64,,,,,,13.0
156,customdb,nir,Custom_DB/custom_nir/8-7-contempt.jpg,8-7-contempt.jpg,customdb-8-7-contempt.jpg,,,jpg,cauca,Contempt,0.00,0.63,,,,,,8.0
157,customdb,nir,Custom_DB/custom_nir/1-9-disgust.jpg,1-9-disgust.jpg,customdb-1-9-disgust.jpg,,,jpg,cauca,Anger,-0.67,0.49,,,,,,1.0


Unnamed: 0,db,spectra,filepath,filename,adjusted_unique_filename,oulucasia-patient,oulucasia-emotion,filetype,race,expression,valence,arousal,casia-session,casia-patient,casia-emotion,buaa-patient,buaa-image_type,customdb-patient
116160,affectnet,vis,data/AffectNet-8Labels/train_set/images/368758...,368758.jpg,affectnet-368758.jpg,,,jpg,cauca,Neutral,0.164537,-0.004839,,,,,,
168730,affectnet,vis,data/AffectNet-8Labels/train_set/images/189218...,189218.jpg,affectnet-189218.jpg,,,jpg,cauca,Happy,0.725900,0.062911,,,,,,
123953,affectnet,vis,data/AffectNet-8Labels/train_set/images/192278...,192278.jpg,affectnet-192278.jpg,,,jpeg,cauca,Anger,-0.047446,0.155280,,,,,,
104213,affectnet,vis,data/AffectNet-8Labels/train_set/images/291383...,291383.jpg,affectnet-291383.jpg,,,jpg,cauca,Sad,-0.788547,-0.210534,,,,,,
132578,affectnet,vis,data/AffectNet-8Labels/train_set/images/316741...,316741.jpg,affectnet-316741.jpg,,,jpg,cauca,Neutral,0.077429,0.004839,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4930,affectnet,vis,data/AffectNet-8Labels/train_set/images/144735...,144735.jpg,affectnet-144735.jpg,,,jpg,cauca,Happy,0.895277,-0.169377,,,,,,
142892,affectnet,vis,data/AffectNet-8Labels/train_set/images/144549...,144549.jpg,affectnet-144549.jpg,,,jpg,cauca,Happy,0.662989,0.053233,,,,,,
203496,affectnet,vis,data/AffectNet-8Labels/train_set/images/405770...,405770.jpg,affectnet-405770.jpg,,,jpg,cauca,Sad,-0.452899,-0.112146,,,,,,
173922,affectnet,vis,data/AffectNet-8Labels/train_set/images/103930...,103930.jpg,affectnet-103930.jpg,,,jpg,cauca,Happy,0.722255,0.303845,,,,,,


Unnamed: 0,db,spectra,filepath,filename,adjusted_unique_filename,oulucasia-patient,oulucasia-emotion,filetype,race,expression,valence,arousal,casia-session,casia-patient,casia-emotion,buaa-patient,buaa-image_type,customdb-patient
0,oulucasia,vis,data/B_OriginalImg/VL/Strong/P001/Anger/000.jpeg,000.jpeg,oulucasia-P001-Anger-000.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.00000,,,,,,
1,oulucasia,vis,data/B_OriginalImg/VL/Strong/P001/Anger/001.jpeg,001.jpeg,oulucasia-P001-Anger-001.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.00000,,,,,,
2,oulucasia,vis,data/B_OriginalImg/VL/Strong/P001/Anger/002.jpeg,002.jpeg,oulucasia-P001-Anger-002.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.00000,,,,,,
3,oulucasia,vis,data/B_OriginalImg/VL/Strong/P001/Anger/003.jpeg,003.jpeg,oulucasia-P001-Anger-003.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.00000,,,,,,
4,oulucasia,vis,data/B_OriginalImg/VL/Strong/P001/Anger/004.jpeg,004.jpeg,oulucasia-P001-Anger-004.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.00000,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10381,oulucasia,vis,data/B_OriginalImg/VL/Strong/P080/Surprise/025...,025.jpeg,oulucasia-P080-Surprise-025.jpeg,P080,Surprise,jpeg,cauca,Surprise,0.465863,0.78032,,,,,,
10382,oulucasia,vis,data/B_OriginalImg/VL/Strong/P080/Surprise/026...,026.jpeg,oulucasia-P080-Surprise-026.jpeg,P080,Surprise,jpeg,cauca,Surprise,0.465863,0.78032,,,,,,
10383,oulucasia,vis,data/B_OriginalImg/VL/Strong/P080/Surprise/027...,027.jpeg,oulucasia-P080-Surprise-027.jpeg,P080,Surprise,jpeg,cauca,Surprise,0.465863,0.78032,,,,,,
10384,oulucasia,vis,data/B_OriginalImg/VL/Strong/P080/Surprise/028...,028.jpeg,oulucasia-P080-Surprise-028.jpeg,P080,Surprise,jpeg,cauca,Surprise,0.465863,0.78032,,,,,,


Unnamed: 0,db,spectra,filepath,filename,adjusted_unique_filename,oulucasia-patient,oulucasia-emotion,filetype,race,expression,valence,arousal,casia-session,casia-patient,casia-emotion,buaa-patient,buaa-image_type,customdb-patient
0,oulucasia,nir,data/B_OriginalImg/NI/Strong/P001/Anger/000.jpeg,000.jpeg,oulucasia-P001-Anger-000.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.00000,,,,,,
1,oulucasia,nir,data/B_OriginalImg/NI/Strong/P001/Anger/001.jpeg,001.jpeg,oulucasia-P001-Anger-001.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.00000,,,,,,
2,oulucasia,nir,data/B_OriginalImg/NI/Strong/P001/Anger/002.jpeg,002.jpeg,oulucasia-P001-Anger-002.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.00000,,,,,,
3,oulucasia,nir,data/B_OriginalImg/NI/Strong/P001/Anger/003.jpeg,003.jpeg,oulucasia-P001-Anger-003.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.00000,,,,,,
4,oulucasia,nir,data/B_OriginalImg/NI/Strong/P001/Anger/004.jpeg,004.jpeg,oulucasia-P001-Anger-004.jpeg,P001,Anger,jpeg,cauca,Neutral,0.000000,0.00000,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10420,oulucasia,nir,data/B_OriginalImg/NI/Strong/P080/Surprise/020...,020.jpeg,oulucasia-P080-Surprise-020.jpeg,P080,Surprise,jpeg,cauca,Surprise,0.465863,0.78032,,,,,,
10421,oulucasia,nir,data/B_OriginalImg/NI/Strong/P080/Surprise/021...,021.jpeg,oulucasia-P080-Surprise-021.jpeg,P080,Surprise,jpeg,cauca,Surprise,0.465863,0.78032,,,,,,
10422,oulucasia,nir,data/B_OriginalImg/NI/Strong/P080/Surprise/022...,022.jpeg,oulucasia-P080-Surprise-022.jpeg,P080,Surprise,jpeg,cauca,Surprise,0.465863,0.78032,,,,,,
10423,oulucasia,nir,data/B_OriginalImg/NI/Strong/P080/Surprise/023...,023.jpeg,oulucasia-P080-Surprise-023.jpeg,P080,Surprise,jpeg,cauca,Surprise,0.465863,0.78032,,,,,,


Unnamed: 0,db,spectra,filepath,filename,adjusted_unique_filename,oulucasia-patient,oulucasia-emotion,filetype,race,expression,valence,arousal,casia-session,casia-patient,casia-emotion,buaa-patient,buaa-image_type,customdb-patient
0,casia,vis,data/CASIA_NISVIR/NIR-VIS/VIS/s1_VIS_00040_002...,s1_VIS_00040_002.jpg,casia-s1_VIS_00040_002.jpg,,,jpg,mongo,,,,1.0,40.0,2.0,,,
1,casia,vis,data/CASIA_NISVIR/NIR-VIS/VIS/s2_VIS_10108_003...,s2_VIS_10108_003.jpg,casia-s2_VIS_10108_003.jpg,,,jpg,mongo,,,,2.0,10108.0,3.0,,,
2,casia,vis,data/CASIA_NISVIR/NIR-VIS/VIS/s3_VIS_20489_004...,s3_VIS_20489_004.jpg,casia-s3_VIS_20489_004.jpg,,,jpg,mongo,,,,3.0,20489.0,4.0,,,
3,casia,vis,data/CASIA_NISVIR/NIR-VIS/VIS/s2_VIS_10006_005...,s2_VIS_10006_005.jpg,casia-s2_VIS_10006_005.jpg,,,jpg,mongo,,,,2.0,10006.0,5.0,,,
4,casia,vis,data/CASIA_NISVIR/NIR-VIS/VIS/s2_VIS_10220_001...,s2_VIS_10220_001.jpg,casia-s2_VIS_10220_001.jpg,,,jpg,mongo,,,,2.0,10220.0,1.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5088,casia,vis,data/CASIA_NISVIR/NIR-VIS/VIS/s1_VIS_00105_005...,s1_VIS_00105_005.jpg,casia-s1_VIS_00105_005.jpg,,,jpg,mongo,,,,1.0,105.0,5.0,,,
5089,casia,vis,data/CASIA_NISVIR/NIR-VIS/VIS/s2_VIS_10295_002...,s2_VIS_10295_002.jpg,casia-s2_VIS_10295_002.jpg,,,jpg,mongo,,,,2.0,10295.0,2.0,,,
5090,casia,vis,data/CASIA_NISVIR/NIR-VIS/VIS/s2_VIS_10108_002...,s2_VIS_10108_002.jpg,casia-s2_VIS_10108_002.jpg,,,jpg,mongo,,,,2.0,10108.0,2.0,,,
5091,casia,vis,data/CASIA_NISVIR/NIR-VIS/VIS/s1_VIS_00034_004...,s1_VIS_00034_004.jpg,casia-s1_VIS_00034_004.jpg,,,jpg,mongo,,,,1.0,34.0,4.0,,,


Unnamed: 0,db,spectra,filepath,filename,adjusted_unique_filename,oulucasia-patient,oulucasia-emotion,filetype,race,expression,valence,arousal,casia-session,casia-patient,casia-emotion,buaa-patient,buaa-image_type,customdb-patient
0,casia,nir,data/CASIA_NISVIR/NIR-VIS/NIR/s2_NIR_10192_04-...,s2_NIR_10192_04-B.bmp,casia-s2_NIR_10192_04-B.bmp,,,bmp,mongo,,,,2.0,10192.0,4.0,,,
1,casia,nir,data/CASIA_NISVIR/NIR-VIS/NIR/s2_NIR_10145_001...,s2_NIR_10145_001.bmp,casia-s2_NIR_10145_001.bmp,,,bmp,mongo,,,,2.0,10145.0,1.0,,,
2,casia,nir,data/CASIA_NISVIR/NIR-VIS/NIR/s2_NIR_10122_003...,s2_NIR_10122_003.bmp,casia-s2_NIR_10122_003.bmp,,,bmp,mongo,,,,2.0,10122.0,3.0,,,
3,casia,nir,data/CASIA_NISVIR/NIR-VIS/NIR/s1_NIR_00061_004...,s1_NIR_00061_004.bmp,casia-s1_NIR_00061_004.bmp,,,bmp,mongo,,,,1.0,61.0,4.0,,,
4,casia,nir,data/CASIA_NISVIR/NIR-VIS/NIR/s2_NIR_10227_010...,s2_NIR_10227_010.bmp,casia-s2_NIR_10227_010.bmp,,,bmp,mongo,,,,2.0,10227.0,10.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12482,casia,nir,data/CASIA_NISVIR/NIR-VIS/NIR/s2_NIR_10161_014...,s2_NIR_10161_014.bmp,casia-s2_NIR_10161_014.bmp,,,bmp,mongo,,,,2.0,10161.0,14.0,,,
12483,casia,nir,data/CASIA_NISVIR/NIR-VIS/NIR/s3_NIR_20410_016...,s3_NIR_20410_016.bmp,casia-s3_NIR_20410_016.bmp,,,bmp,mongo,,,,3.0,20410.0,16.0,,,
12484,casia,nir,data/CASIA_NISVIR/NIR-VIS/NIR/s2_NIR_10137_002...,s2_NIR_10137_002.bmp,casia-s2_NIR_10137_002.bmp,,,bmp,mongo,,,,2.0,10137.0,2.0,,,
12485,casia,nir,data/CASIA_NISVIR/NIR-VIS/NIR/s3_NIR_20494_006...,s3_NIR_20494_006.bmp,casia-s3_NIR_20494_006.bmp,,,bmp,mongo,,,,3.0,20494.0,6.0,,,


Unnamed: 0,db,spectra,filepath,filename,adjusted_unique_filename,oulucasia-patient,oulucasia-emotion,filetype,race,expression,valence,arousal,casia-session,casia-patient,casia-emotion,buaa-patient,buaa-image_type,customdb-patient


Unnamed: 0,db,spectra,filepath,filename,adjusted_unique_filename,oulucasia-patient,oulucasia-emotion,filetype,race,expression,valence,arousal,casia-session,casia-patient,casia-emotion,buaa-patient,buaa-image_type,customdb-patient
0,buaa,nir,data/BUAA/BUAAVISNIR/19/1.bmp,1.bmp,buaa-19-1.bmp,,,bmp,mongo,Neutral,,,,,,19,neutral,
2,buaa,nir,data/BUAA/BUAAVISNIR/19/3.bmp,3.bmp,buaa-19-3.bmp,,,bmp,mongo,Neutral,,,,,,19,tilt,
4,buaa,nir,data/BUAA/BUAAVISNIR/19/5.bmp,5.bmp,buaa-19-5.bmp,,,bmp,mongo,Neutral,,,,,,19,tilt,
6,buaa,nir,data/BUAA/BUAAVISNIR/19/7.bmp,7.bmp,buaa-19-7.bmp,,,bmp,mongo,Neutral,,,,,,19,tilt,
8,buaa,nir,data/BUAA/BUAAVISNIR/19/9.bmp,9.bmp,buaa-19-9.bmp,,,bmp,mongo,Neutral,,,,,,19,tilt,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5928,buaa,nir,data/BUAA/BUAAVISNIR/42/17.bmp,17.bmp,buaa-42-17.bmp,,,bmp,mongo,Neutral,,,,,,42,tilt,
5930,buaa,nir,data/BUAA/BUAAVISNIR/42/21.bmp,21.bmp,buaa-42-21.bmp,,,bmp,mongo,Neutral,,,,,,42,affection,
5932,buaa,nir,data/BUAA/BUAAVISNIR/42/23.bmp,23.bmp,buaa-42-23.bmp,,,bmp,mongo,Neutral,,,,,,42,affection,
5934,buaa,nir,data/BUAA/BUAAVISNIR/42/25.bmp,25.bmp,buaa-42-25.bmp,,,bmp,mongo,Neutral,,,,,,42,affection,


Unnamed: 0,db,spectra,filepath,filename,adjusted_unique_filename,oulucasia-patient,oulucasia-emotion,filetype,race,expression,valence,arousal,casia-session,casia-patient,casia-emotion,buaa-patient,buaa-image_type,customdb-patient
0,customdb,nir,Custom_DB/custom_nir/2-3-surprise.jpg,2-3-surprise.jpg,customdb-2-3-surprise.jpg,,,jpg,cauca,Fear,-0.01,0.79,,,,,,2.0
1,customdb,nir,Custom_DB/custom_nir/13-5-anger.jpg,13-5-anger.jpg,customdb-13-5-anger.jpg,,,jpg,cauca,Sad,-0.42,0.76,,,,,,13.0
2,customdb,nir,Custom_DB/custom_nir/6-0-neutral.jpg,6-0-neutral.jpg,customdb-6-0-neutral.jpg,,,jpg,cauca,Neutral,0.01,0.02,,,,,,6.0
3,customdb,nir,Custom_DB/custom_nir/7-7-contempt_.jpg,7-7-contempt_.jpg,customdb-7-7-contempt_.jpg,,,jpg,cauca,Contempt,-0.58,0.63,,,,,,7.0
4,customdb,nir,Custom_DB/custom_nir/1-10-calm.jpg,1-10-calm.jpg,customdb-1-10-calm.jpg,,,jpg,cauca,Neutral,0.22,-0.29,,,,,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,customdb,nir,Custom_DB/custom_nir/10-0-neutral.jpg,10-0-neutral.jpg,customdb-10-0-neutral.jpg,,,jpg,cauca,Neutral,-0.02,-0.03,,,,,,10.0
155,customdb,nir,Custom_DB/custom_nir/13-6-contempt.jpg,13-6-contempt.jpg,customdb-13-6-contempt.jpg,,,jpg,cauca,Contempt,-0.55,0.64,,,,,,13.0
156,customdb,nir,Custom_DB/custom_nir/8-7-contempt.jpg,8-7-contempt.jpg,customdb-8-7-contempt.jpg,,,jpg,cauca,Contempt,0.00,0.63,,,,,,8.0
157,customdb,nir,Custom_DB/custom_nir/1-9-disgust.jpg,1-9-disgust.jpg,customdb-1-9-disgust.jpg,,,jpg,cauca,Anger,-0.67,0.49,,,,,,1.0


Process the images - 

In [60]:
from skeleton.inference import Inference, CenterFace
import cv2

models = {
    "face_detector": {
        "net_type": Inference.net_type.FACE_DETECTOR_CENTERFACE,
        "remove_black_stripes": True,
    },
    "spectrum_translator": {
        "net_type": None, 
        "pth_to_onnx": 'models/cyclegan-snellius-casia-oulucasia-double_gen_op/NIR2VIS_cyclegan_snellius_casia_oulucasia_double_gen_opt-GA-20epoch.onnx',
        "input_as_avg_grayscale": False,
        "output_as_avg_grayscale": True,
    },
    "fer": {
        "net_type": None,
        "pth_to_onnx": "models/mobilenet_NIR/mobilenet_on_AffectNet-NIR/mobilenet_aff_nir-aff_continue.onnx",
        "va_to_circumplex_model": False,
    }
}
inf = Inference(models, None, verbose=True, debug=True)

Using '{'net_type': <net_type.FACE_DETECTOR_CENTERFACE: 'F'>, 'remove_black_stripes': True}' as face detector model


Preprocess all data for 

In [5]:
vis_fps = df_cyclegan_all[((df_cyclegan_all["spectra"]=="vis")&(df_cyclegan_all["db"]!="affectnet"))].filepath.to_list()
vis_target_fps = [str(os.path.join("vis_train", fp)) for fp in df_cyclegan_all[df_cyclegan_all["spectra"]=="vis"].adjusted_unique_filename.to_list()]

nir_fps = df_cyclegan_all[((df_cyclegan_all["spectra"]=="nir")&(df_cyclegan_all["db"]!="affectnet"))].filepath.to_list()
nir_target_fps = [str(os.path.join("nir_train", fp)) for fp in df_cyclegan_all[df_cyclegan_all["spectra"]=="nir"].adjusted_unique_filename.to_list()]

vis_out = inf.infer_instant_from_filenames(vis_fps)
for v, fp_tgt in zip(vis_out, vis_target_fps):
    v[0] = cv2.cvtColor(v[0], cv2.COLOR_RGB2BGR)
    cv2.imwrite(fp_tgt.split('.')[0]+".jpg", v[0])
    
# nir_out = inf.infer_instant_from_filenames(nir_fps)
# for v, fp in zip(nir_out, nir_target_fps):
#     cv2.imwrite(fp.split('.')[0]+".jpg", v[0])

In [61]:
import shutil

fps_aff = df_cyclegan_all[df_cyclegan_all["db"]=='affectnet'].filepath.to_list()
tgt_fps_aff = [str(os.path.join("vis_train", fn)) for fn in df_cyclegan_all[df_cyclegan_all["db"]=='affectnet'].adjusted_unique_filename.to_list()]

for src, dst in zip(fps_aff, tgt_fps_aff):
    shutil.copy(src, dst)

## <span style="color:red;">TODO</span>: Create split for FER

The whole merged dataset comprise of:

...

## create-unpaired_casia_oulu-casia_data
### Create unpaired split
From Oulu-Casia and Casia2

In [None]:
import json
from sklearn.model_selection import train_test_split
from pathlib import Path
import random

# Specify the directories
nir_path = Path('data/CASIA_NISVIR/NIR-VIS/NIR/')
vis_path = Path('data/CASIA_NISVIR/NIR-VIS/VIS/')

# Get the contents of the directories
nir_images = list(nir_path.glob('*.bmp'))  # replace with your image extension
vis_images = list(vis_path.glob('*.jpg'))  # replace with your image extension

# Determine the size of the smaller dataset
min_size = min(len(nir_images), len(vis_images))

# Randomly select 'min_size' images from both NIR and VIS datasets
nir_images_selected = random.sample(nir_images, min_size)
vis_images_selected = random.sample(vis_images, min_size)

# Get the images that are not selected
nir_images_unselected = list(set(nir_images) - set(nir_images_selected))
vis_images_unselected = list(set(vis_images) - set(vis_images_selected))

# Split the NIR images into train, val, and test sets
nir_train_val, nir_test = train_test_split(nir_images_selected, test_size=0.2, random_state=42)
nir_train, nir_val = train_test_split(nir_train_val, test_size=0.25, random_state=42)

# Split the VIS images into train, val, and test sets
vis_train_val, vis_test = train_test_split(vis_images_selected, test_size=0.2, random_state=42)
vis_train, vis_val = train_test_split(vis_train_val, test_size=0.25, random_state=42)

# Convert the Path objects to strings
nir_train = [str(path) for path in nir_train]
nir_val = [str(path) for path in nir_val]
nir_test = [str(path) for path in nir_test]
vis_train = [str(path) for path in vis_train]
vis_val = [str(path) for path in vis_val]
vis_test = [str(path) for path in vis_test]
nir_images_unselected = [str(path) for path in nir_images_unselected]
vis_images_unselected = [str(path) for path in vis_images_unselected]

# Prepare the metadata
metadata = {
    'nir_images_selected': len(nir_images_selected),
    'vis_images_selected': len(vis_images_selected),
    'nir_images_unselected': len(nir_images_unselected),
    'vis_images_unselected': len(vis_images_unselected),
    'nir_train': len(nir_train),
    'nir_val': len(nir_val),
    'nir_test': len(nir_test),
    'vis_train': len(vis_train),
    'vis_val': len(vis_val),
    'vis_test': len(vis_test),
}

# Prepare the data to be stored in the JSON file
data = {
    'nir_train': nir_train,
    'nir_val': nir_val,
    'nir_test': nir_test,
    'nir_rest': nir_images_unselected,
    'vis_train': vis_train,
    'vis_val': vis_val,
    'vis_test': vis_test,
    'vis_rest': vis_images_unselected,
    'metadata': metadata,
}

# Write the data to the JSON file
with open('splits/new/casia2_data_splits.json', 'w') as f:
    json.dump(data, f)


In [None]:
from skeleton.data.splitter import DatasetSplitter
import json
import os

# define splitter
splitter = DatasetSplitter(
    vl_data_path=Path('data/B_OriginalImg/VL/Strong/'),
    ni_data_path=Path('data/B_OriginalImg/NI/Strong/'),
    train_n_img_picked=5,
    test_n_img_picked=2,
    val_n_img_picked=2,
    json_train_split_pth=Path('_train_tmp.json'),
    json_test_split_pth=Path('_test_tmp.json'),
    json_val_split_pth=Path('_val_tmp.json'),
)

# split the files
splitter()


# Load the temporary JSON files
with open('_train_tmp.json', 'r') as f:
    train_data = json.load(f)
with open('_test_tmp.json', 'r') as f:
    test_data = json.load(f)
with open('_val_tmp.json', 'r') as f:
    val_data = json.load(f)

# Prepare the metadata
metadata = {
    'nir_images': len(train_data['ni']) + len(test_data['ni']) + len(val_data['ni']),
    'vis_images': len(train_data['vl']) + len(test_data['vl']) + len(val_data['vl']),
    'nir_train': len(train_data['ni']),
    'nir_val': len(val_data['ni']),
    'nir_test': len(test_data['ni']),
    'vis_train': len(train_data['vl']),
    'vis_val': len(val_data['vl']),
    'vis_test': len(test_data['vl']),
}

# Prepare the data to be stored in the JSON file
data = {
    'nir_train': train_data['ni'],
    'nir_val': val_data['ni'],
    'nir_test': test_data['ni'],
    'vis_train': train_data['vl'],
    'vis_val': val_data['vl'],
    'vis_test': test_data['vl'],
    'metadata': metadata,
}

# Write the data to the JSON file
with open('splits/new/oulucasia_data_splits.json', 'w') as f:
    json.dump(data, f)

# Remove the temporary files
os.remove('_train_tmp.json')
os.remove('_test_tmp.json')
os.remove('_val_tmp.json')


In [None]:
# Load the existing data from the two files
with open('splits/new/casia2_data_splits.json', 'r') as f:
    casia2_data = json.load(f)
with open('splits/new/oulucasia_data_splits.json', 'r') as f:
    oulucasia_data = json.load(f)

# Merge the data
merged_data = {}
for key in set(casia2_data.keys()).union(oulucasia_data.keys()):
    if key != 'metadata':
        merged_data[key] = casia2_data.get(key, []) + oulucasia_data.get(key, [])

# Merge the metadata
merged_metadata = {}
for key in set(casia2_data['metadata'].keys()).union(oulucasia_data['metadata'].keys()):
    merged_metadata[key] = casia2_data['metadata'].get(key, 0) + oulucasia_data['metadata'].get(key, 0)

# Add the merged metadata to the merged data
merged_data['metadata'] = merged_metadata

# Save the merged data back to the file
with open('splits/new/merged_data_splits.json', 'w') as f:
    json.dump(merged_data, f)

### Preprocess

In [None]:
import json
import pathlib
import os
import click
import cv2

from deepface import DeepFace


class FacePreprocessor:
    def __init__(
        self,
        train_split_pth,
        test_split_pth,
        val_split_pth,
        new_train_vl_pth,
        new_train_ni_pth,
        new_test_vl_pth,
        new_test_ni_pth,
        new_val_ni_pth,
        new_val_vl_pth,
        detector_backend,
        target_size,
        new_train_split_pth=None,
        new_test_split_pth=None,
        new_val_split_pth=None,
    ):
        self.train_split_pth = train_split_pth
        self.test_split_pth = test_split_pth
        self.val_split_pth = val_split_pth
        self.new_train_vl_pth = new_train_vl_pth
        self.new_train_ni_pth = new_train_ni_pth
        self.new_test_vl_pth = new_test_vl_pth
        self.new_test_ni_pth = new_test_ni_pth
        self.new_val_ni_pth = new_val_ni_pth
        self.new_val_vl_pth = new_val_vl_pth
        self.detector_backend = detector_backend
        self.target_size = target_size
        self.new_train_split = new_train_split_pth
        self.new_test_split = new_test_split_pth
        self.new_val_split = new_val_split_pth

    def detect_and_align_face(self, image_fp):
        try:
            face_objs = DeepFace.extract_faces(
                img_path=image_fp,
                target_size=self.target_size,
                detector_backend=self.detector_backend,
                enforce_detection=False,
            )
        except Exception as e:
            print(f"ERROR at {image_fp}", e)
            return None

        if len(face_objs) != 1:
            print("NOT FOUND OR MULTIPLE FACES!")
            return None

        face = face_objs[0]["face"]

        return face

    def preprocess_part(self, fps, target_fp, spectra):
        # prepare filepath
        os.makedirs(target_fp, exist_ok=True)

        # align faces for all images
        i = 0
        preprocessed_fps = []
        for fp in fps:
            new_filename = "-".join(pathlib.PurePath(fp).parts[-3:])
            target_path = os.path.join(target_fp, new_filename)

            aligned_face = self.detect_and_align_face(fp)

            if aligned_face is None:
                continue

            aligned_face = 255 * aligned_face[:, :, ::-1]

            cv2.imwrite(target_path, aligned_face)

            print(f"#{i} {spectra} Stored: {new_filename}")
            i += 1
            preprocessed_fps.append(target_path)

        return preprocessed_fps

    def preprocess_split(self, split_pth, new_vl_path, new_ni_pth):
        with open(split_pth, "r") as f:
            paths = json.load(f)

        vl_preproc_fps = self.preprocess_part(paths["vl"], new_vl_path, "vl")
        ni_preproc_fps = self.preprocess_part(paths["ni"], new_ni_pth, "ni")

        return {"vl": vl_preproc_fps, "ni": ni_preproc_fps}

    def preprocess(self):
#         preprocess train split
        train_fps = self.preprocess_split(
            self.train_split_pth, self.new_train_vl_pth, self.new_train_ni_pth
        )

        if self.new_train_split:
            with open(self.new_train_split, "w") as f:
                json.dump(train_fps, f)

        # preprocess test split
        test_fps = self.preprocess_split(
            self.test_split_pth, self.new_test_vl_pth, self.new_test_ni_pth
        )

        if self.new_test_split:
            with open(self.new_test_split, "w") as f:
                json.dump(test_fps, f)

        # preprocess val split
        val_fps = self.preprocess_split(
            self.val_split_pth, self.new_val_vl_pth, self.new_val_ni_pth
        )

        if self.new_val_split:
            with open(self.new_val_split, "w") as f:
                json.dump(val_fps, f)

In [None]:
with open('splits/new/merged_data_splits.json') as f:
    merged_data = json.load(f)

In [None]:
with open('_train_split_tmp.json', 'w') as f:
    json.dump({'ni': merged_data['nir_train'], 'vl': merged_data['vis_train']}, f)
with open('_test_split_tmp.json', 'w') as f:
    json.dump({'ni': merged_data['nir_test'], 'vl': merged_data['vis_test']}, f)
with open('_val_split_tmp.json', 'w') as f:
    json.dump({'ni': merged_data['nir_val'], 'vl': merged_data['vis_val']}, f)

preprocessor = FacePreprocessor(
    train_split_pth='_train_split_tmp.json',
    test_split_pth='_test_split_tmp.json',
    val_split_pth='_val_split_tmp.json',
    new_train_vl_pth='data/casia-oulucasia-unpaired/A/train',
    new_train_ni_pth='data/casia-oulucasia-unpaired/B/train',
    new_test_vl_pth='data/casia-oulucasia-unpaired/A/test',
    new_test_ni_pth='data/casia-oulucasia-unpaired/B/test',
    new_val_vl_pth='data/casia-oulucasia-unpaired/A/val',
    new_val_ni_pth='data/casia-oulucasia-unpaired/B/val',
    detector_backend='retinaface',
    target_size=(224, 224),
    new_train_split_pth='splits/new/preproc-merged-train',
    new_test_split_pth='splits/new/preproc-merged-test',
    new_val_split_pth='splits/new/preproc-merged-val',
)

preprocessor.preprocess()
               
os.remove('_train_split_tmp.json')
os.remove('_test_split_tmp.json')
os.remove('_val_split_tmp.json')

In [None]:
merged_data.keys()

## Create-buaa-w_and_wo_stripes-train_test

In [None]:
import os
import numpy as np
import random
from sklearn.model_selection import train_test_split
from PIL import Image
from deepface import DeepFace
from skeleton.inference import CenterFace
import cv2
import glob

__DATA_FOLDER = 'data/BUAA/BUAAVISNIR/'
__TARGET_SIZE = (224, 224)
__TARGET_FOLDER = 'data/buaa-w_and_wo_stripes-train_test/'

centerface = CenterFace()


def fill_black(image):
    height = image.shape[0]
    width = image.shape[1]
    max_dim = max([width, height])

    black_image = np.zeros((max_dim, max_dim, image.shape[-1]), dtype=np.uint8)

    width_padding = max_dim - width
    height_padding = max_dim - height
    y1 = int(height_padding/2)
    y2 = int(max_dim - height_padding/2)
    x1 = int(width_padding/2)
    x2 = int(max_dim - width_padding/2)

    black_image[y1:y2, x1:x2, :] = image

    return black_image


def operation_X(image_pth_A, image_pth_B):
    # load
    image_np_A = cv2.imread(str(image_pth_A))
    image_np_A = cv2.cvtColor(image_np_A, cv2.COLOR_BGR2RGB)
    image_np_B = cv2.imread(str(image_pth_B))
    image_np_B = cv2.cvtColor(image_np_B, cv2.COLOR_BGR2RGB)

    # expects 3 channels
    if image_np_A.shape[2] == 1:
        image_np_A = np.concatenate([image_np_A] * 3, axis=-1)

    # predict
    dets, lms = centerface(image_np_A, threshold=0.35)

    det = dets[0]
    width = det[2] - det[0]
    height = det[3] - det[1]
    bigger_dimension = max([width, height])
    width_padding = bigger_dimension - width
    height_padding = bigger_dimension - height

    face_A = image_np_A[int(det[1]-width_padding//2):int(det[3]+width_padding//2),
                        int(det[0]-width_padding//2):int(det[2]+width_padding//2), :]
    face_B = image_np_B[int(det[1]-width_padding//2):int(det[3]+width_padding//2),
                        int(det[0]-width_padding//2):int(det[2]+width_padding//2), :]

    face_A = cv2.resize(face_A, __TARGET_SIZE)
    face_B = cv2.resize(face_B, __TARGET_SIZE).astype(np.uint8)
    face_A_np = np.mean(face_A, axis=2)
    face_A = np.stack([face_A_np, face_A_np, face_A_np],
                      axis=2).astype(np.uint8)

    display(Image.fromarray(np.concatenate([face_A, face_B], axis=1)))

    return face_A, face_B


def operation_Y(image_pth_A, image_pth_B):
    face_objs_A = DeepFace.extract_faces(
        img_path=image_pth_A,
        target_size=__TARGET_SIZE,
        detector_backend="retinaface",
        enforce_detection=False,
        align=False
    )

    face_B_np = np.mean(cv2.imread(image_pth_B), axis=2)
    face_B_np = np.stack([face_B_np, face_B_np, face_B_np], axis=2)

    face_A = (face_objs_A[0]['face'] * 255).astype(np.uint8)
    ar = face_objs_A[0]['facial_area']
    face_B = face_B_np[ar['y']:ar['y']+ar['h'], ar['x']:ar['x']+ar['w']]

    face_B = fill_black(face_B)
    face_B = cv2.resize(face_B, __TARGET_SIZE).astype(np.uint8)

    face_A_np = np.mean(face_A, axis=2)
    face_A = np.stack([face_A_np, face_A_np, face_A_np],
                      axis=2).astype(np.uint8)

    display(Image.fromarray(np.concatenate([face_A, face_B], axis=1)))

    return face_A, face_B


# Get the list of patient ids
patient_ids = [name for name in os.listdir(
    __DATA_FOLDER) if os.path.isdir(os.path.join(__DATA_FOLDER, name))]

# Split the patient ids into train, validation, and test sets
train_ids, test_ids = train_test_split(
    patient_ids, test_size=0.2, random_state=42)
# train_ids, val_ids = train_test_split(train_ids, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2


# Function to process and save images
def process_and_save_images(ids, split):
    filepaths_A = []
    filepaths_B = []
    invalid = []
    for _id in sorted(ids):
        patient_folder = os.path.join(__DATA_FOLDER, _id)
        dir_content = [i for i in os.listdir(
            patient_folder) if i[-3:] == 'bmp']
        image_files = sorted(dir_content, key=lambda x: int(
            x.split('.')[0]))  # Sort the files to ensure pairs
        image_files = [i for i in image_files if int(i.split('.')[0]) <= 28]
        for file_A, file_B in zip(image_files[0::2], image_files[1::2]):
            print(str(os.path.join(patient_folder, file_A)),
                  str(os.path.join(patient_folder, file_B)))

            img_path_A = os.path.join(patient_folder, file_A)
            img_path_B = os.path.join(patient_folder, file_B)

            try:
                if random.choice([True, False]):
                    img_A, img_B = operation_X(img_path_A, img_path_B)
                else:
                    img_A, img_B = operation_Y(img_path_A, img_path_B)
            except:
                invalid.append(img_path_A)
                continue

            # Save the images
            Image.fromarray(img_A).save(
                os.path.join(split + 'A', f'{_id}_{file_A}'))
            Image.fromarray(img_B).save(
                os.path.join(split + 'B', f'{_id}_{file_B}'))

            filepaths_A.append(
                str(os.path.join(split + 'A', f'{_id}_{file_A}')))
            filepaths_B.append(
                str(os.path.join(split + 'B', f'{_id}_{file_B}')))

    return filepaths_A, filepaths_B, invalid

In [None]:
# Process and save images for each split
out_train = process_and_save_images(train_ids, __TARGET_FOLDER + 'train')
# out_val = process_and_save_images(val_ids, __TARGET_FOLDER + 'val')
out_test = process_and_save_images(test_ids, __TARGET_FOLDER + 'test')

## AffectNet prep

In [None]:
import json

In [None]:
with open('splits/new/merged_data_splits.json', 'r') as f:
    casia_oulucasia_split = json.loads(f.read())

In [None]:
# Load the existing data from the two files
with open('splits/new/casia2_data_splits.json', 'r') as f:
    casia2_data = json.load(f)
with open('splits/new/oulucasia_data_splits.json', 'r') as f:
    oulucasia_data = json.load(f)

In [None]:
# Merge the data
merged_data = {}
for key in set(casia2_data.keys()).union(oulucasia_data.keys()):
    if key != 'metadata':
        merged_data[key] = casia2_data.get(key, []) + oulucasia_data.get(key, [])

# Merge the metadata
merged_metadata = {}
for key in set(casia2_data['metadata'].keys()).union(oulucasia_data['metadata'].keys()):
    merged_metadata[key] = casia2_data['metadata'].get(key, 0) + oulucasia_data['metadata'].get(key, 0)

# Add the merged metadata to the merged data
merged_data['metadata'] = merged_metadata

In [None]:
merged_data['nir_rest']

In [None]:
import json
import pathlib
import os
import click
import cv2

from deepface import DeepFace


def detect_and_align_face(image_fp):
    try:
        face_objs = DeepFace.extract_faces(
            img_path=image_fp,
            target_size=(256, 256),
            detector_backend="retinaface",
            enforce_detection=False,
        )
    except Exception as e:
        print(f"ERROR at {image_fp}", e)
        return None

    if len(face_objs) != 1:
        print("NOT FOUND OR MULTIPLE FACES!")
        return None

    face = face_objs[0]["face"]

    return face

def preprocess_part( fps, target_fp, spectra):
    # prepare filepath
    os.makedirs(target_fp, exist_ok=True)

    # align faces for all images
    i = 0
    preprocessed_fps = []
    for fp in fps:
        new_filename = "-".join(pathlib.PurePath(fp).parts[-3:])
        target_path = os.path.join(target_fp, new_filename)

        aligned_face = detect_and_align_face(fp)

        if aligned_face is None:
            continue

        aligned_face = 255 * aligned_face[:, :, ::-1]

        cv2.imwrite(target_path, aligned_face)

        print(f"#{i} {spectra} Stored: {new_filename}")
        i += 1
        preprocessed_fps.append(target_path)

    return preprocessed_fps

In [None]:
preproc_casia_fps = preprocess_part(merged_data['nir_rest'], "data/for_unpaired-casia_preprocessed_rest_of_NIR/", "NIR")

In [None]:
import os
__BUAA_PREPROC_FOLDER = 'data/Z_PreprocImg-BUAA-centerface-gray-averaged/BUAAVISNIR'

buaa_nir_images = []
for root, dirs, files in os.walk(__BUAA_PREPROC_FOLDER):
    for file in files:
        if int(file.split('.')[0]) % 2 == 0:
            buaa_nir_images.append(os.path.join(root, file))

In [None]:
buaa_nir_images

In [None]:
__AFFECTNET_PREPROC_TRAIN_FOLDER = 'data/AffectNet-8Labels/train_set/images'

retrieve_frist_n_images = len(buaa_nir_images) + len(merged_data['nir_rest'])
affectnet_vis_images = []
for i, f_nms in enumerate(sorted(os.listdir(__AFFECTNET_PREPROC_TRAIN_FOLDER), key=lambda x: int(x.split('.')[0]))):
    if i > retrieve_frist_n_images:
        break
    affectnet_vis_images.append(os.path.join(__AFFECTNET_PREPROC_TRAIN_FOLDER, f_nms))

In [None]:
affectnet_vis_images

In [None]:
import math
a = [0.,0.5]
print(a)

x = math.sqrt(a[0]*a[0] + a[1]*a[1])
print("radius", x, "-", "in" if x <=1 else "out")
r = 1 - x
print("rest to boundary", r)
enl = 1/(1 - a[0])
print("enlarge", enl)
print([a[0]*enl, a[1]*enl])

In [None]:
os.mkdir('data/unpaired_additional_train-affectnet_vis_and_buaa_casia_nir', )
os.mkdir('data/unpaired_additional_train-affectnet_vis_and_buaa_casia_nir/vis', )
os.mkdir('data/unpaired_additional_train-affectnet_vis_and_buaa_casia_nir/nir', )

In [None]:
import shutil

for i, (vis_pth, nir_pth) in enumerate(zip(affectnet_vis_images, buaa_nir_images + preproc_casia_fps)):
#     vis_new_fp = 'data/unpaired_additional_train-affectnet_vis_and_buaa_casia_nir/vis/' + "affnet-" + vis_pth.split('/')[-1]
    nir_new_fp = 'data/unpaired_additional_train-affectnet_vis_and_buaa_casia_nir/nir/' + "buaa-casia-" + str(i) + "." + (nir_pth.split('/')[-1]).split('.')[-1]
    
#     shutil.copyfile(vis_pth, vis_new_fp)
    shutil.copyfile(nir_pth, nir_new_fp)

In [None]:
merged_data['nir_rest']