# Deepface Intersection betweeen Benchmark data & Perturbed data

## Import relevant libraries

In [1]:
import os
import pandas as pd

# Read Benchmark and Perturbed data:

In [2]:
# Read pairs.csv 
benchmark_df = pd.read_csv('./data/LFW-csv/pairs.csv')

# Read positive male pairs.csv
benchmark_male_df = pd.read_csv('./data/LFW-csv/pairs_positive_male.csv')

# Read positive female pairs.csv
benchmark_female_df = pd.read_csv('./data/LFW-csv/pairs_positive_female.csv')

# PERTURBED DATA: 
# Read male
full_perturbed_male = pd.read_csv('./data/LFW-csv/pairs_perturbed_male.csv')

# Read female
full_perturbed_female = pd.read_csv('./data/LFW-csv/pairs_perturbed_female.csv')

### Get perturbed data for both genders: 

1. `pairs_perturbed_male.csv`, 
2. `pairs_perturbed_female.csv`

In [3]:
def get_perturbed_data_names(perturbed_gender_dir, full_perturbed_gender, perturbed_gender_csv):
    # find out perturbed directory
    perturbed_dir = os.listdir(f"{perturbed_gender_dir}")

    # loop thru the perturbed names
    for name in perturbed_dir:
        # get full directory
        name_dir = f"{perturbed_gender_dir}/{name}"

        # if there exist images inside the folder
        images = os.listdir(name_dir)

        if images: 
            # store into dictionary and convert to dataframe
            perturbed_df = {"name": name, "images": str(images)}
            new_df = pd.DataFrame([perturbed_df])

            # merge both current csv + new perturbed data
            frames = [full_perturbed_gender, new_df]
            new_merge_df = pd.concat(frames)

            # reset index
            new_merge_df = new_merge_df.reset_index()
            new_merge_df.drop(columns=['index'], inplace=True)
            
            # Write results to csv file
            new_merge_df.to_csv(f"{perturbed_gender_csv}", index=False)
            full_perturbed_gender = pd.read_csv(f"{perturbed_gender_csv}")


In [4]:
# # get perturbed male names
# get_perturbed_data_names("./data/LFW_gender_makeup_heavy/Male", full_perturbed_male, "./data/LFW-csv/pairs_perturbed_male.csv")

# # get perturbed female names
# get_perturbed_data_names("./data/LFW_gender_makeup_heavy/Female", full_perturbed_female, "./data/LFW-csv/pairs_perturbed_female.csv")

### Get positive pairs from benchmark data:

1. `pairs_positive_male.csv`, 
2. `pairs_positive_female.csv`

In [5]:
def get_positive_pairs(benchmark_df, pairs_positive_df, positive_gender_csv):

    # loop through pairs.csv
    for index, row in benchmark_df.iterrows():
        
        flag = str(row[3])

        try:
            # case identification (match)
            if flag == "nan":

                # store into dictionary and convert to dataframe
                perturbed_df = {"name": row[0], "imagenum1": row[1], "imagenum2": row[2]}
                new_df = pd.DataFrame([perturbed_df])

                # merge both current csv + new perturbed data
                frames = [pairs_positive_df, new_df]
                new_merge_df = pd.concat(frames)

                # reset index
                new_merge_df = new_merge_df.reset_index()
                new_merge_df.drop(columns=['index'], inplace=True)
                
                # Write results to csv file
                new_merge_df.to_csv(f"{positive_gender_csv}", index=False)
                pairs_positive_df = pd.read_csv(f"{positive_gender_csv}")

        except Exception as e:
            print(e)
        

In [6]:
# # get perturbed male names
# get_positive_pairs(benchmark_df, benchmark_male_df, "./data/LFW-csv/pairs_positive_male.csv")

# # get perturbed female names
# get_positive_pairs(benchmark_df, benchmark_female_df, "./data/LFW-csv/pairs_positive_female.csv")

### Merge benchmark data with perturbed data (Get intersection): 

- benchmark:
    1. benchmark_df = `pairs.csv`, 
    2. benchmark_male_df = `pairs_positive_male.csv`, 
    3. benchmark_female_df = `pairs_positive_female.csv`

- perturbed data:
    1. full_perturbed_male = `pairs_perturbed_male.csv`, 
    2. full_perturbed_female = `pairs_perturbed_female.csv`

In [7]:
# Merge male
merged_perturbed_male = pd.merge(benchmark_df, full_perturbed_male, how='inner', on=['name'])

merged_perturbed_male.dropna(inplace=True)
merged_perturbed_male

Unnamed: 0,name,imagenum1,imagenum2,Unnamed: 3,images
1,Abel_Pacheco,1,Jong_Thae_Hwa,2.0,['Abel_Pacheco_0004.jpg']
2,Abel_Pacheco,2,Jean-Francois_Lemounier,1.0,['Abel_Pacheco_0004.jpg']
5,Akhmed_Zakayev,2,Donna_Morrissey,1.0,"['Akhmed_Zakayev_0001.jpg', 'Akhmed_Zakayev_00..."
8,Anders_Fogh_Rasmussen,2,Johnson_Panjaitan,2.0,"['Anders_Fogh_Rasmussen_0002.jpg', 'Anders_Fog..."
13,Anwar_Ibrahim,1,David_Alpay,1.0,"['Anwar_Ibrahim_0001.jpg', 'Anwar_Ibrahim_0002..."
...,...,...,...,...,...
3559,Roger_Corbett,1,Tocker_Pudwill,1.0,['Roger_Corbett_0001.jpg']
3560,Ruth_Harlow,1,Virgina_Ruano_Pascal,1.0,['Ruth_Harlow_0001.jpg']
3561,Sergei_Alexandrovitch_Ordzhonikidze,1,Yolanda_King,1.0,['Sergei_Alexandrovitch_Ordzhonikidze_0001.jpg']
3562,Shane_Loux,1,Val_Ackerman,1.0,['Shane_Loux_0001.jpg']


In [8]:
# Merge female
merged_perturbed_female = pd.merge(benchmark_df, full_perturbed_female, how='inner', on=['name'])

merged_perturbed_female.dropna(inplace=True)
merged_perturbed_female

Unnamed: 0,name,imagenum1,imagenum2,Unnamed: 3,images
6,Ann_Veneman,6,Sergio_Garcia,2.0,"['Ann_Veneman_0001.jpg', 'Ann_Veneman_0002.jpg..."
7,Ann_Veneman,8,Ted_Williams,1.0,"['Ann_Veneman_0001.jpg', 'Ann_Veneman_0002.jpg..."
9,Barbara_Brezigar,2,Doris_Roberts,2.0,"['Barbara_Brezigar_0001.jpg', 'Barbara_Breziga..."
15,Clare_Short,4,Don_Carcieri,1.0,"['Clare_Short_0002.jpg', 'Clare_Short_0003.jpg']"
17,Corinne_Coman,2,Frank_Beamer,1.0,['Corinne_Coman_0001.jpg']
...,...,...,...,...,...
1311,Mira_Sorvino,1,Tom_Tunney,1.0,['Mira_Sorvino_0001.jpg']
1312,Natanaela_Barnova,1,Nuon_Chea,1.0,['Natanaela_Barnova_0001.jpg']
1313,Nova_Esther_Guthrie,1,Stephen_Joseph,1.0,['Nova_Esther_Guthrie_0001.jpg']
1314,Rani_Mukherjee,1,Timothy_McVeigh,1.0,['Rani_Mukherjee_0001.jpg']


In [9]:
# Merge positive male
merged_perturbed_male_pos = pd.merge(benchmark_male_df, full_perturbed_male, how='inner', on=['name'])

merged_perturbed_male_pos.dropna(inplace=True)
merged_perturbed_male_pos

Unnamed: 0,name,imagenum1,imagenum2,images
0,Abel_Pacheco,1,4,['Abel_Pacheco_0004.jpg']
1,Akhmed_Zakayev,1,3,"['Akhmed_Zakayev_0001.jpg', 'Akhmed_Zakayev_00..."
2,Akhmed_Zakayev,2,3,"['Akhmed_Zakayev_0001.jpg', 'Akhmed_Zakayev_00..."
3,Anders_Fogh_Rasmussen,1,3,"['Anders_Fogh_Rasmussen_0002.jpg', 'Anders_Fog..."
4,Anders_Fogh_Rasmussen,1,4,"['Anders_Fogh_Rasmussen_0002.jpg', 'Anders_Fog..."
...,...,...,...,...
2031,Vladimir_Voltchkov,1,2,['Vladimir_Voltchkov_0001.jpg']
2032,Wang_Yi,1,2,"['Wang_Yi_0001.jpg', 'Wang_Yi_0002.jpg']"
2033,Zafarullah_Khan_Jamali,1,2,['Zafarullah_Khan_Jamali_0002.jpg']
2034,Zhu_Rongji,1,3,"['Zhu_Rongji_0002.jpg', 'Zhu_Rongji_0004.jpg',..."


In [10]:
# Merge positive female
merged_perturbed_female_pos = pd.merge(benchmark_female_df, full_perturbed_female, how='inner', on=['name'])

merged_perturbed_female_pos.dropna(inplace=True)
merged_perturbed_female_pos

Unnamed: 0,name,imagenum1,imagenum2,images
0,Angela_Bassett,1,5,"['Angela_Bassett_0001.jpg', 'Angela_Bassett_00..."
1,Angela_Bassett,2,5,"['Angela_Bassett_0001.jpg', 'Angela_Bassett_00..."
2,Angela_Bassett,3,4,"['Angela_Bassett_0001.jpg', 'Angela_Bassett_00..."
3,Ann_Veneman,3,5,"['Ann_Veneman_0001.jpg', 'Ann_Veneman_0002.jpg..."
4,Ann_Veneman,6,10,"['Ann_Veneman_0001.jpg', 'Ann_Veneman_0002.jpg..."
...,...,...,...,...
751,Oprah_Winfrey,1,2,"['Oprah_Winfrey_0002.jpg', 'Oprah_Winfrey_0003..."
752,Oprah_Winfrey,2,3,"['Oprah_Winfrey_0002.jpg', 'Oprah_Winfrey_0003..."
753,Paula_Zahn,1,2,['Paula_Zahn_0002.jpg']
754,Rita_Wilson,1,4,"['Rita_Wilson_0001.jpg', 'Rita_Wilson_0002.jpg..."


### Data summary after merged: 

1. `merged_perturbed_male` = benchmark (negative case) + perturbed_male 
2. `merged_perturbed_female` = benchmark (negative case) + perturbed_female 
3. `merged_perturbed_male_pos` = benchmark (pos case) + perturbed_male
4. `merged_perturbed_female_pos` = benchmark (pos case) + perturbed_female

### Check if (negative case) `imagenum2` exist in perturbed_gender  

In [11]:
# Read male
full_perturbed_male_copy = pd.read_csv('./data/LFW-csv/pairs_perturbed_male.csv')

# Read female
full_perturbed_female_copy = pd.read_csv('./data/LFW-csv/pairs_perturbed_female.csv')

In [12]:
full_perturbed_male_copy = full_perturbed_male_copy.rename(columns={"name": "imagenum2"})
full_perturbed_female_copy = full_perturbed_female_copy.rename(columns={"name": "imagenum2"})

In [13]:
merged_perturbed_male_neg = pd.merge(merged_perturbed_male, full_perturbed_male_copy, how='inner', on=['imagenum2'])

merged_perturbed_male_neg.dropna(inplace=True)
merged_perturbed_male_neg

Unnamed: 0,name,imagenum1,imagenum2,Unnamed: 3,images_x,images_y
0,Abel_Pacheco,1,Jong_Thae_Hwa,2.0,['Abel_Pacheco_0004.jpg'],['Jong_Thae_Hwa_0002.jpg']
1,Bertrand_Bonello,1,Jong_Thae_Hwa,2.0,['Bertrand_Bonello_0002.jpg'],['Jong_Thae_Hwa_0002.jpg']
2,Anders_Fogh_Rasmussen,2,Johnson_Panjaitan,2.0,"['Anders_Fogh_Rasmussen_0002.jpg', 'Anders_Fog...","['Johnson_Panjaitan_0001.jpg', 'Johnson_Panjai..."
3,Anwar_Ibrahim,1,David_Alpay,1.0,"['Anwar_Ibrahim_0001.jpg', 'Anwar_Ibrahim_0002...",['David_Alpay_0001.jpg']
4,Bill_Frist,5,Jimmy_Kimmel,2.0,"['Bill_Frist_0003.jpg', 'Bill_Frist_0004.jpg',...","['Jimmy_Kimmel_0001.jpg', 'Jimmy_Kimmel_0002.j..."
...,...,...,...,...,...,...
754,Luc_Montagnier,1,Paul_Krueger,1.0,['Luc_Montagnier_0001.jpg'],['Paul_Krueger_0001.jpg']
755,Justin_Wilson,1,Ray_Bradbury,1.0,['Justin_Wilson_0001.jpg'],['Ray_Bradbury_0001.jpg']
756,Michael_J_Fox,1,Ricky_Barnes,1.0,['Michael_J_Fox_0001.jpg'],['Ricky_Barnes_0001.jpg']
757,Paul_Newman,1,Robert_Blake,3.0,['Paul_Newman_0001.jpg'],"['Robert_Blake_0003.jpg', 'Robert_Blake_0004.j..."


In [14]:
merged_perturbed_female_neg = pd.merge(merged_perturbed_female, full_perturbed_female_copy, how='inner', on=['imagenum2'])

merged_perturbed_female_neg.dropna(inplace=True)
merged_perturbed_female_neg

Unnamed: 0,name,imagenum1,imagenum2,Unnamed: 3,images_x,images_y
0,Barbara_Brezigar,2,Doris_Roberts,2.0,"['Barbara_Brezigar_0001.jpg', 'Barbara_Breziga...","['Doris_Roberts_0002.jpg', 'Doris_Roberts_0003..."
1,Elinor_Caplan,1,Hilary_McKay,1.0,['Elinor_Caplan_0002.jpg'],['Hilary_McKay_0001.jpg']
2,Patty_Schnyder,3,Pernilla_Bjorn,1.0,"['Patty_Schnyder_0001.jpg', 'Patty_Schnyder_00...",['Pernilla_Bjorn_0001.jpg']
3,Barbara_Felt-Miller,1,Leticia_Dolera,1.0,['Barbara_Felt-Miller_0001.jpg'],['Leticia_Dolera_0001.jpg']
4,Imelda_Marcos,1,Patty_Schnyder,4.0,['Imelda_Marcos_0001.jpg'],"['Patty_Schnyder_0001.jpg', 'Patty_Schnyder_00..."
...,...,...,...,...,...,...
98,Angie_Martinez,1,Ruth_Pearce,1.0,['Angie_Martinez_0001.jpg'],['Ruth_Pearce_0001.jpg']
99,Emily_Mortimer,1,Maria_Simon,1.0,['Emily_Mortimer_0001.jpg'],['Maria_Simon_0001.jpg']
100,Jada_Pinkett_Smith,2,Jenny_Romero,1.0,['Jada_Pinkett_Smith_0002.jpg'],['Jenny_Romero_0001.jpg']
101,Jeri_Ryan,1,Nova_Esther_Guthrie,1.0,['Jeri_Ryan_0001.jpg'],['Nova_Esther_Guthrie_0001.jpg']


### Merge (negative case) and (positive case) 