# TEST CASE GENERATION

In [2]:
import pandas as pd
import os

In [3]:
LANGUAGES = ["polish"] #["french", "spanish", "portuguese", "hindi", "arabic", "mandarin", "italian", "polish", "german", "dutch"]
STORE_PATH = ".."

## Write Dictionary of Placeholder Values

In [4]:
# LOAD FILES TO DATAFRAMES

import_dict = dict()

for lang in LANGUAGES:
    import_dict[lang] = dict()

    for filename in sorted(os.listdir(f"{STORE_PATH}/2_Placeholders/{lang}/")):
        
            if filename.endswith(".csv"):
                
                # need to handle case-inflected languages separately --> different file structure
                if lang in ["german", "polish"]:
                    import_dict[lang][filename.removesuffix(".csv")] = pd.read_csv(f"{STORE_PATH}/2_Placeholders/{lang}/{filename}")
                    
                else:
                    import_df = pd.read_csv(f"{STORE_PATH}/2_Placeholders/{lang}/{filename}")
                    
                    for col in import_df.columns:
                        if col!="TARGET":
                            import_dict[lang][col] = import_df[["TARGET", col]]
                            import_dict[lang][col].columns = ["TARGET", "PLACEHOLDER"]
                            
                            # for gender-inflected languages, we need to delete rows with empty entries
                            import_dict[lang][col] = import_dict[lang][col].drop(import_dict[lang][col][import_dict[lang][col].PLACEHOLDER=="-"].index)
                            import_dict[lang][col].dropna(inplace=True)
                            
            else:
                print(f"unexpected file: {filename}")

In [5]:
# COLLAPSE COLUMNS IN DATAFRAMES INTO LISTS OF STRINGS IN PLACEHOLDER DICT

placeholder_dict = {}

for lang in LANGUAGES:
    placeholder_dict[lang] = dict()

    for df in import_dict[lang]:
        placeholder_dict[lang][df] = {}
        for column in import_dict[lang][df].columns:
            placeholder_dict[lang][df][column] = import_dict[lang][df][column].to_list()
    print(lang, placeholder_dict[lang].keys(),"\n")

polish dict_keys(['[IDENT_A]', '[IDENT_P]', '[IDENT_P_char_del]', '[IDENT_P_leet]', '[IDENT_P_space_add]', '[IDENT_S]', '[IDENT_S_char_del]', '[IDENT_S_leet]', '[IDENT_S_space_add]', '[SLR_P]', '[SLR_P_leet]', '[SLR_P_space_add]', '[SLR_S]', '[SLR_S_leet]', '[SLR_S_space_add]', '[female_IDENT_P]', '[female_IDENT_P_char_del]', '[female_IDENT_P_leet]', '[female_IDENT_P_space_add]', '[female_IDENT_S]', '[female_IDENT_S_char_del]', '[female_IDENT_S_leet]', '[female_IDENT_S_space_add]', '[female_SLR_P]', '[female_SLR_P_leet]', '[female_SLR_P_space_add]', '[female_SLR_S]', '[female_SLR_S_leet]', '[female_SLR_S_space_add]', '[male_IDENT_P]', '[male_IDENT_P_char_del]', '[male_IDENT_P_leet]', '[male_IDENT_P_space_add]', '[male_IDENT_S]', '[male_IDENT_S_char_del]', '[male_IDENT_S_leet]', '[male_IDENT_S_space_add]', '[male_SLR_P]', '[male_SLR_P_leet]', '[male_SLR_P_space_add]', '[male_SLR_S]', '[male_SLR_S_leet]', '[male_SLR_S_space_add]']) 



## Import Template Data

In [6]:
cases_dict = dict()

for lang in LANGUAGES:
    
    # import data from csv file
    cases_dict[lang] = pd.read_csv(f"{STORE_PATH}/1_Templates/hatecheck_templates_{lang}.csv")

    # drop unneccessary columns
    cases_dict[lang].drop(columns = ["focus_lemma", "trans_deepl", "trans_google", "case_templ", "number"], inplace = True, errors = "ignore")

    # rename manual translation column to be new case_templ column
    cases_dict[lang].rename(columns = {"trans_manual": "case_templ"}, inplace = True)
    
    # create label_gold column if it does not exist already
    if "label_gold" not in cases_dict[lang].columns:
        cases_dict[lang]["label_gold"] = cases_dict[lang].apply(lambda x: "hateful" if x.functionality.endswith("_h") else "non-hateful", axis = 1)
    
    # tidy up column types
    cases_dict[lang] = cases_dict[lang].convert_dtypes()
    cases_dict[lang]['ref_templ_id'] = cases_dict[lang].ref_templ_id.astype('Int64')
    
    print(lang, cases_dict[lang].shape)

polish (755, 8)


## Explode Templates into Individual Cases

In [7]:
# define function for writing lists of test cases and target identities from templates

def write_case_target(row, lang):
    test_case=[]
    target_ident=[] 
    
    # for gender-inflected templates, look at gender-specific columns
    if pd.isnull(row.case_templ):
        for key in placeholder_dict[lang]:
            if key in row.gender_male:
                
                if lang in ["polish", "german"]:
                    for item in placeholder_dict[lang][key][row.case]:
                        test_case.append(row.gender_male.replace(key, item))
                else:
                    for item in placeholder_dict[lang][key]["PLACEHOLDER"]:
                        test_case.append(row.gender_male.replace(key, item))
                        
                for item in placeholder_dict[lang][key]["TARGET"]:
                    target_ident.append(item)
            
            if key in row.gender_female:
                if lang in ["polish", "german"]:
                    for item in placeholder_dict[lang][key][row.case]:
                        test_case.append(row.gender_female.replace(key, item))
                else:
                    for item in placeholder_dict[lang][key]["PLACEHOLDER"]:
                        test_case.append(row.gender_female.replace(key, item))
                        
                for item in placeholder_dict[lang][key]["TARGET"]:
                    target_ident.append(item)
        return test_case, target_ident
    
    # for not gender-inflected templates, use standard column      
    for key in placeholder_dict[lang]:
        if key in row.case_templ:
            
            if lang in ["polish", "german"]:
                for item in placeholder_dict[lang][key][row.case]:
                    test_case.append(row.case_templ.replace(key, item))
            else:
                for item in placeholder_dict[lang][key]["PLACEHOLDER"]:
                    test_case.append(row.case_templ.replace(key, item))
                    
            for item in placeholder_dict[lang][key]["TARGET"]:
                target_ident.append(item)
    
    # for templates without placeholders, the case is just the template itself
    if test_case==[]:
        test_case = row.case_templ
    return test_case, target_ident

In [8]:
# write lists of test cases from templates

for lang in LANGUAGES:

    cases_dict[lang]['test_case'] = cases_dict[lang].apply(lambda x: write_case_target(x, lang), axis=1)
    cases_dict[lang]['target_ident'] = cases_dict[lang].test_case.apply(lambda x: x[1])
    cases_dict[lang]['test_case'] = cases_dict[lang].test_case.apply(lambda x: x[0])
    
    # explode templates 
    cases_dict[lang] = pd.concat([cases_dict[lang].explode('test_case').drop(columns=['target_ident']),
                                  pd.Series.explode(cases_dict[lang].target_ident)], axis=1)
    
    # tidy up column types again
    cases_dict[lang] = cases_dict[lang].convert_dtypes()
    
    print(lang, cases_dict[lang].shape)

polish (3815, 10)


## Create References Between Cases

In [9]:
%%time

def case_id_finder(row, df):
    if pd.notna(row.ref_templ_id) and pd.notna(row.target_ident):
        output = df.case_id[(df.templ_id==row.ref_templ_id)&(df.target_ident==row.target_ident)].values
        if len(output)<2 and len(output)>0:
            return output[0]
    else:
        return pd.NA

for lang in LANGUAGES:

    # create case IDs for every case
    cases_dict[lang].reset_index(inplace=True)
    cases_dict[lang]['case_id']=cases_dict[lang].index + 1
    
    # Match ref_templ_ids to case IDs. Only works for identity terms, not slurs
    cases_dict[lang]['ref_case_id'] = cases_dict[lang].apply(lambda x: case_id_finder(x, cases_dict[lang]), axis = 1)

CPU times: user 1.28 s, sys: 5.43 ms, total: 1.28 s
Wall time: 1.28 s


## Language-specific Tidying

In [10]:
# Capitalise first letter of all test cases in Polish, Dutch, Italian, Portuguese, Spanish, French (needed because of lowercase placeholders)
for lang in ["polish", "dutch", "italian", "portuguese", "spanish", "french"]:
    if lang in LANGUAGES:
        cases_dict[lang]["test_case"] = cases_dict[lang].test_case.apply(lambda s: s[0].upper() + s[1:])
        
# For Italian, adjust articles based on first letter(s) of noun --> only relevant IDENT is immigrati
def italian_articles(test_case, letters):
    for l in letters:
        test_case = test_case.replace(f" i {l}",f" gli {l}")
        test_case = test_case.replace(f" ai {l}",f" agli {l}")
        test_case = test_case.replace(f" il {l}",f" l'{l}")
        test_case = test_case.replace(f"I {l}",f"Gli {l}")
        test_case = test_case.replace(f"Ai {l}",f"Agli {l}")
        test_case = test_case.replace(f"Il {l}",f"L'{l}")
        test_case = test_case.replace(f" sui {l}",f" sugli {l}")
        test_case = test_case.replace(f" quel {l}",f" quell'{l}")
        test_case = test_case.replace(f" quei {l}",f" quegli {l}")
        test_case = test_case.replace(f"Sui {l}",f"Sugli {l}")
        test_case = test_case.replace(f"Quel {l}",f"Quell'{l}")
        test_case = test_case.replace(f"Quei {l}",f"Quegli {l}")
    return test_case

if "italian" in LANGUAGES:
    for rel in ["immigr", "immgra", "i m m i g r", "imm1g"]:
        cases_dict["italian"]["test_case"] = cases_dict["italian"].test_case.apply(lambda x: italian_articles(x, [rel]))

    
# For French, adjust articles based on first letter(s) of noun --> relevant IDENT are homo and handicape, SLR attarde mental
def french_articles(test_case, letters):
    for l in letters:
        test_case = test_case.replace(f" le {l}",f" l'{l}")
        test_case = test_case.replace(f" la {l}",f" l'{l}")
        test_case = test_case.replace(f" de {l}",f" d'{l}")
        test_case = test_case.replace(f" ce {l}",f" cet{l}")
        test_case = test_case.replace(f" quelque {l}",f" quelqu'{l}")
        test_case = test_case.replace(f"Le {l}",f"L'{l}")
        test_case = test_case.replace(f"La {l}",f"L'{l}")
        test_case = test_case.replace(f"De {l}",f"D'{l}")
        test_case = test_case.replace(f"Ce {l}",f"Cet{l}")
        test_case = test_case.replace(f"Quelque {l}",f"Quelqu'{l}")
    return test_case

if "french" in LANGUAGES:
    for rel in ["homo", "hoom", "omo", "h o m", "h0mo",
                "handic", "handci", "h a n d", "h4ndi",
                "attard", "a t t a r", "attrd", "att4rd", "attrad"]:
        cases_dict["french"]["test_case"] = cases_dict["french"].test_case.apply(lambda x: french_articles(x, [rel]))


## Sanity Checks

In [11]:
# Check if any test cases are NA
for lang in LANGUAGES:
    if cases_dict[lang].test_case.isnull().sum()>0:
        print(lang.upper())
        display(cases_dict[lang][cases_dict[lang].test_case.isnull()])
        print()

In [12]:
# Check if any test cases still contain IDENT or SLR placeholders
for lang in LANGUAGES:
    for ph in ["IDENT", "SLR", "_"]:
        if cases_dict[lang].test_case.str.contains(ph).sum()>0:
            print(lang.upper(), ph)
            display(cases_dict[lang][cases_dict[lang].test_case.str.contains(ph)])
            print()

In [13]:
# Check if any gender-inflected templates contain non-inflected placeholders
for lang in LANGUAGES:
    if "gender_male" in cases_dict[lang].columns:
        for ph in ["\[IDENT", "\[SLR"]:
            if cases_dict[lang].gender_male.str.contains(ph).sum()>0:
                print(lang.upper(), "MALE")
                display(cases_dict[lang][cases_dict[lang].gender_male.str.contains(ph)])
                print()
            if cases_dict[lang].gender_female.str.contains(ph).sum()>0:
                print(lang.upper(), "FEMALE")
                display(cases_dict[lang][cases_dict[lang].gender_female.str.contains(ph)])
                print()

## Manual Inspection

In [92]:
lang = "arabic"

for i in cases_dict[lang].test_case.sample(10):
    print(i,"\n")

لازم نعلق كلللللللل سود دول من رقابهم. 

دا مناسب لاللاجئين الحشرات الي زييييييك. 

هي دي العاهره على حقيقتها. 

مش لازم نخلي المعاقيين يعانو اكتر من مهما اصلا. 

ما فيش 7اجة اسواء من اليهودي. 

احنا مجموعة من اليهود. 

المعاقيين زبالة. 

اليهود هم بس اللي جاهلين للدرجة دي. 

هقص رقاب المعاقيين. 

اغتصاب النساء الي متلك لازم يبقى حلال. 



## Export to CSV

In [14]:
for lang in LANGUAGES:
    
    cases_dict[lang]["mhc_case_id"] = cases_dict[lang].case_id.apply(lambda x: lang + "-"+ str(x))
    
    if "gender_male" in cases_dict[lang].columns:
        export_df = cases_dict[lang][['functionality', "mhc_case_id", 'case_id', 'test_case', 'label_gold', 'target_ident', 'ref_case_id',
                                    'ref_templ_id', 'templ_id','case_templ', 'gender_male', 'gender_female']].copy()

    else:
        export_df = cases_dict[lang][['functionality', "mhc_case_id", 'case_id', 'test_case', 'label_gold', 'target_ident', 'ref_case_id',
                            'ref_templ_id', 'templ_id','case_templ']].copy()

    
    export_df.to_csv(f"{STORE_PATH}/3_Generated Cases/hatecheck_cases_{lang}.csv", index=False)

In [33]:
for lang in LANGUAGES:

    export_df = cases_dict[lang][["mhc_case_id", 'test_case']].copy()
    export_df = export_df.sample(frac = 1,random_state=123)

    export_df.to_csv(f"../4_Cases for Annotation/hatecheck_cases_for_annotation_{lang}.csv", index=False)