In [1]:
import pandas as pd
import numpy as np
import os
from transformers import AutoModelForImageClassification, ViTImageProcessor, Trainer
from transformers import ViTForImageClassification
from datasets import load_dataset, Dataset, load_from_disk
from sklearn.metrics import accuracy_score, classification_report, det_curve
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import torch
import math
import operator
from pathlib import Path
from matplotlib.backends.backend_pdf import PdfPages
from src.DET import DET
from tqdm import tqdm
from ipywidgets import fixed, interact
import ipywidgets as widgets
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.stats import describe, gaussian_kde
import random
from PIL import Image
#from src.auxiliary_utils import Auxiliary
import cv2 

In [74]:
info_path= "./GFI_database_with_labels/info/"
right_eye_path = "./GFI_database_with_labels/Right_eye/"
left_eye_path = "./GFI_database_with_labels/Left_eye/"

right_eye = np.loadtxt(f'{info_path}List_Right_eye.txt', dtype=str)
right_eye = pd.DataFrame(right_eye, columns=['filename', 'gender'])
right_eye['gender'] = pd.to_numeric(right_eye['gender'])
left_eye = np.loadtxt(f'{info_path}List_Left_eye.txt', dtype=str)
left_eye = pd.DataFrame(left_eye, columns=['filename','gender'])
#remove unexistent pair
left_eye = left_eye.drop(left_eye.index[left_eye["filename"] == "04233d1786.tiff"].tolist()[0])
right_eye = right_eye.drop(right_eye.index[right_eye["filename"] == "04418d870.tiff"].tolist()[0])

left_eye['gender'] = pd.to_numeric(left_eye['gender'])
sorted_right_eye = right_eye.sort_values(by=['filename']).reset_index(drop=True)
sorted_left_eye = left_eye.sort_values(by=['filename']) .reset_index(drop=True)

print(sorted_right_eye['filename'][972:985])
print(sorted_left_eye['filename'][972:985])

right_eye

972    05594d126.tiff
973    05594d159.tiff
974      05594d3.tiff
975     05594d33.tiff
976    05594d350.tiff
977    05594d355.tiff
978    05594d385.tiff
979    05594d416.tiff
980     05594d63.tiff
981     05594d96.tiff
982    05596d148.tiff
983     05596d28.tiff
984    05596d324.tiff
Name: filename, dtype: object
972    05594d158.tiff
973    05594d349.tiff
974    05594d356.tiff
975    05594d386.tiff
976      05594d4.tiff
977    05594d415.tiff
978     05594d62.tiff
979     05594d95.tiff
980    05596d147.tiff
981     05596d27.tiff
982    05596d323.tiff
983     05596d57.tiff
984    05597d249.tiff
Name: filename, dtype: object


Unnamed: 0,filename,gender
0,05485d154.tiff,1
1,05647d3.tiff,1
2,05512d102.tiff,1
3,05484d132.tiff,1
4,05248d493.tiff,1
...,...,...
1495,05663d307.tiff,0
1496,05728d85.tiff,0
1497,05306d479.tiff,0
1498,05305d393.tiff,0


In [76]:
both_eyes = pd.DataFrame(columns=["right_eye_filename", "left_eye_filename", "gender"])
j = 0
num_found = 0
for i, file_r in tqdm(enumerate(sorted_right_eye["filename"])):
    # Parse subject ID and number from right eye filename
    base_r = file_r.split(".")[0]
    subject_r = base_r.split("d")[0]
    num_r = int(base_r.split("d")[-1])
    # Search for matching left eye candidates
    for file_l in sorted_left_eye["filename"]:
        base_l = file_l.split(".")[0]
        subject_l = base_l.split("d")[0]
        num_l = int(base_l.split("d")[-1])

        if subject_r == subject_l and abs(num_r - num_l) == 1:
            gender_r = right_eye[right_eye["filename"] == file_r]["gender"].values[0]
            gender_l = left_eye[left_eye["filename"] == file_l]["gender"].values[0]

            if gender_r == gender_l:
                both_eyes.loc[j] = [file_r, file_l, gender_r]
                j += 1
                num_found += 1
                break  # once matched, stop searching left_eye for this right_eye




1499it [00:03, 496.75it/s]


In [96]:
both_eyes["gender"].value_counts()

gender
1    747
0    733
Name: count, dtype: int64

In [87]:
both_eyes[["right_eye_filename", "left_eye_filename"]]

Unnamed: 0,right_eye_filename,left_eye_filename
0,02463d1892.tiff,02463d1893.tiff
1,02463d1910.tiff,02463d1911.tiff
2,02463d1928.tiff,02463d1929.tiff
3,02463d1947.tiff,02463d1946.tiff
4,02463d1965.tiff,02463d1964.tiff
...,...,...
1475,05964d23.tiff,05964d24.tiff
1476,05965d13.tiff,05965d14.tiff
1477,05966d22.tiff,05966d23.tiff
1478,05967d13.tiff,05967d14.tiff


In [97]:

#total_x =np.concatenate([right_eye_X, left_eye_X])
#total_y = np.concatenate([right_eye_y, left_eye_y])

# Determine the number of samples for each split
X_train, X_test, y_train, y_test = train_test_split(both_eyes[["right_eye_filename", "left_eye_filename"]], both_eyes["gender"], test_size=0.20, random_state=42)  
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")

print(f"y_test shape: {y_test.shape}")

X_train shape: (1184, 2)
X_test shape: (296, 2)
y_train shape: (1184,)
y_test shape: (296,)


In [102]:
from PIL import Image

for row, label in tqdm(zip(X_train.itertuples(index=False), y_train)):
    r_file = row.right_eye_filename
    l_file = row.left_eye_filename

    r_image = Image.open(f"{right_eye_path}/{r_file}")
    l_image = Image.open(f"{left_eye_path}/{l_file}")
    w= r_image.width + l_image.width
    h = r_image.height
    combined_image = Image.new("RGB", (w, h))

    combined_image.paste(r_image, (0, 0))
    combined_image.paste(l_image, (r_image.width, 0))
    if label == 0:
        combined_image.save(f"./both_eyes_together/train/male/{r_file}")
    else:
        combined_image.save(f"./both_eyes_together/train/female/{r_file}")


1184it [05:15,  3.75it/s]


In [104]:
for row, label in tqdm(zip(X_test.itertuples(index=False), y_test)):
    r_file = row.right_eye_filename
    l_file = row.left_eye_filename

    r_image = Image.open(f"{right_eye_path}/{r_file}")
    l_image = Image.open(f"{left_eye_path}/{l_file}")
    w= r_image.width + l_image.width
    h = r_image.height
    combined_image = Image.new("RGB", (w, h))

    combined_image.paste(r_image, (0, 0))
    combined_image.paste(l_image, (r_image.width, 0))
    if label == 0:
        combined_image.save(f"./both_eyes_together/test/male/{r_file}")
    else:
        combined_image.save(f"./both_eyes_together/test/female/{r_file}")


296it [00:27, 10.67it/s]


In [103]:
path = "./both_eyes_together"

def count_files_in_dirs(root_dir):
    for dirpath, dirnames, filenames in os.walk(root_dir):
        num_files = len([f for f in filenames if not f.startswith('.')])  # skip hidden files
        print(f"{dirpath}: {num_files} files")
    print("Total files:", sum(len(files) for _, _, files in os.walk(root_dir)))

count_files_in_dirs(f'./{path}/train')
count_files_in_dirs(f'./{path}/test')


././both_eyes_together/train: 0 files
././both_eyes_together/train\female: 582 files
././both_eyes_together/train\male: 602 files
Total files: 1184
././both_eyes_together/test: 0 files
././both_eyes_together/test\female: 0 files
././both_eyes_together/test\male: 0 files
Total files: 0


In [105]:
from datasets import load_dataset
path = "./both_eyes_together"
dataset = load_dataset('imagefolder', data_dir=path)
dataset

Resolving data files:   0%|          | 0/1184 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/296 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/1184 [00:00<?, ?files/s]

Downloading data:   0%|          | 0/296 [00:00<?, ?files/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 1184
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 296
    })
})

In [135]:
dataset.save_to_disk(f"./datasets/both_eyes_together")


Saving the dataset (0/5 shards):   0%|          | 0/1184 [00:00<?, ? examples/s]

KeyboardInterrupt: 