In [1]:
import pandas as pd
import os
import glob
import shutil
import numpy as np
import torch
import torchvision.models as models
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
from tqdm import tqdm

In [None]:
# Split left and right eyes of MMU2 dataset
persons = glob.glob("../dataset/mmu2/*", recursive=True)

leftPreExp= "01"
rightPreExp = "02"
for person in persons:
  leftPath = os.path.join(person, "left")
  rightPath = os.path.join(person, "right")
  if not os.path.exists(leftPath):
    os.makedirs(leftPath)
  if not os.path.exists(rightPath):
    os.makedirs(rightPath)
  folderNum = int(person.split('\\')[-1])
  personDir = "0"+str(folderNum) if folderNum < 10 else str(folderNum)
  leftExp = personDir+leftPreExp
  rightExp = personDir+rightPreExp
  for image in glob.glob(person+"/*.bmp"):
    image_name = image.split("\\")[-1]
    if image_name.startswith(leftExp):
      shutil.copy2(image, os.path.join(leftPath, image_name))
    if image_name.startswith(rightExp):
      shutil.copy2(image, os.path.join(rightPath, image_name))
    os.remove(image)

In [5]:
# Get PAD Scores
from pad.detector import PAD
# imageFiles = glob.glob("./data/raw/**/*")
# scores = get_pad_scores(imageFiles, "./pad/Model/D-NetPAD_Model.pth")

In [4]:
def find_in_scores(scores, file):
  for record in scores:   
      if(record[0] == file):
        return record[1]

In [6]:
data_dir = "./data/raw"
info_csv = "./data/raw/info.csv"
output_dir = "./data/processed"

In [108]:
rng = np.random.default_rng()
numbers = rng.choice(398, size=398, replace=False) + 1
fold_count = len(glob.glob(data_dir+"/*/**"))//5
folds = []
file_count=0
identities = []
for identity in numbers:
  file_count += len(os.listdir(os.path.join(data_dir, str(identity))))
  identities.append(identity)
  if(file_count >= fold_count):
    file_count = 0
    folds.append({"group": len(folds) + 1, "identities": identities})
    identities = []
folds.append({"group": len(folds) + 1, "identities": identities})

In [63]:
def get_fold(identity):
  return next((sub for sub in folds if identity in sub['identities']), None)["group"]

In [109]:
df = pd.read_csv("./data/raw/info.csv", sep=";")
df['identity'] = df.apply(lambda row: int(row["file"].split("\\")[0]), axis=1)
df['file'] = df.apply(lambda row: row["file"].split("\\")[1], axis=1)
df = df.sort_values(by=['identity'])
cols = df.columns.tolist()
cols = cols[-1:] + cols[:2]
df= df[cols]
df["filepath"] = df.apply(lambda row: os.path.join("./data/raw", str(row["identity"]), row["file"]), axis=1)
df["pad_score"] = df.apply(lambda row: find_in_scores(scores, row["filepath"]), axis=1)
df["group"] = df.apply(lambda row: get_fold(row["identity"]), axis=1)
df.drop('filepath', axis=1, inplace=True)
cols = df.columns.tolist()
cols = cols[-1:] +cols[:4]
df= df[cols]
df.to_csv("data.csv", sep=";", index=False)

In [25]:
df1 = pd.read_csv("./data/train.csv", sep=";").to_dict("records")
df2 = pd.read_csv("./data/train.csv", sep=";").to_dict("records")

lst = []
for row1 in tqdm(df1, disable=True):
  for row2 in tqdm(df2, disable=True):
    if(row1["file"] == row2["file"]):
      continue
    folds = sorted([row1["group"], row2["group"]])
    pairs = sorted([row1["file"], row2["file"]])  
    pair_type = "impostor"
    if row1["identity"] == row2["identity"]:
      pair_type = "genuine"
    dict = {'first': pairs[0], 'second': pairs[1], 'type': pair_type, 'group_first': folds[0], 'group_second': folds[1]}
    lst.append(dict)
df = pd.DataFrame(lst)
df = df.drop_duplicates(keep='first')
df.to_json("train_pairs.json", orient="records")