# IMPORT DATA, PACKAGES, AND FUNCTIONS

## import the relevant functions and packages

In [1]:
import pandas as pd
import numpy as np
from test_case_generator_functions import alphabet

# TEST CASE TYPE REFERENCE INFORMATION

In [2]:
uid = 'UID-227' 
theme = 'Names where name parts are Modified'
category = 'Repetitions'
sub_category = '> 2 letters repeated twice'
entity_type = 'Individual'

## download the OFAC list from the web

In [3]:
ofac_list_download = pd.read_csv('https://www.treasury.gov/ofac/downloads/sdn.csv', header=None)

In [4]:
ofac_list = ofac_list_download[[0,1,2]]
ofac_list.columns = ['uid', 'name', 'entity_type']

# FILTER FOR THE REQUIREMENTS OF THE TEST CASE TYPE

## filter for the requirements of the specific test case type

In [5]:
ofac_list_filtered = ofac_list[(ofac_list.entity_type == 'individual')] # only evaluate individuals

## randomly choose 10 rows

In [6]:
while True:
    ofac_list_sampled = ofac_list_filtered.sample(n = 10)
    cnt = [0] * 10
    for ind, name in enumerate(ofac_list_sampled['name']):
        split_name = name.upper().split(' ')
        
        for i in split_name:
            if all(j not in alphabet() for j in i):
                split_name.remove(i) # remove word with all numbers
                
        cnt[ind] += len(split_name)

    if all(i > 2 for i in cnt):
        break

ofac_list_sampled

Unnamed: 0,uid,name,entity_type
9023,34236,"SANCHEZ FARFAN, Wilder Emilio",individual
10554,37729,"LASHKARYOVA, Nadezhda Vitalyevna",individual
8706,32715,"SHANDAROVICH, Oleg Stanislavovich",individual
756,8154,"GIRALDO SERNA, Hernan",individual
6724,26139,"PERALTA, Cesar Emilio",individual
9370,35315,"SHUVALOV, Igor Ivanovich",individual
7119,26947,"LUGO GOMEZ, Eustiquio Jose",individual
5440,23089,"KU, Sung Sop",individual
921,8392,"OMRAN, Karim Dhaidas",individual
1302,10043,"BARAKAT, Hamzi Ahmad",individual


# CREATE THE TEST CASES

## create blank final test cases table

In [7]:
final_test_cases = pd.DataFrame(columns=['UID', 'Theme','Category','Sub-category','Entity-Type','Test Case ID' , 'OFAC List UID', 'Original Name','Test Case Name'])
final_test_cases

Unnamed: 0,UID,Theme,Category,Sub-category,Entity-Type,Test Case ID,OFAC List UID,Original Name,Test Case Name


## run loop to generate the test cases

In [8]:
for index, row in ofac_list_sampled.iterrows():
    original_name = row['name'].upper()
    split_original_name = original_name.split(' ') # split name into word
    final_test_name = split_original_name.copy()
    
    for i in split_original_name:
        if all(j not in alphabet() for j in i):
            split_original_name.remove(i) # remove word with all numbers
    
    if len(split_original_name) == 3:
        loop_time = 3
    else:
        loop_time = np.random.randint(3, len(split_original_name))
        
    for i in range(loop_time):
        replace_word = np.random.choice(split_original_name) # randomly choose word to be replaced
        split_original_name.remove(replace_word) # remove chosen word
        
        replace_word_list = list(replace_word)
        repeat_list = [] # get index of alphabets in each word
        for i in range(len(replace_word)):
            if replace_word[i] in alphabet():
                repeat_list.append(i)
        
        random_index = np.random.choice(repeat_list) # randomly choose letter to be repeated
        
        replace_word_list[random_index] += replace_word_list[random_index] + replace_word_list[random_index] # add repeated letter twice
        
        final_test_name[final_test_name.index(replace_word)] = ''.join(replace_word_list)

    final_test_name = ' '.join(final_test_name)
    
    final_test_cases.loc[len(final_test_cases)] = [uid, theme, category, sub_category, entity_type, uid + ' - ' + str(index), row['uid'], row['name'], final_test_name] # append to the dataframe

final_test_cases

Unnamed: 0,UID,Theme,Category,Sub-category,Entity-Type,Test Case ID,OFAC List UID,Original Name,Test Case Name
0,UID-227,Names where name parts are Modified,Repetitions,> 2 letters repeated twice,Individual,UID-227 - 9023,34236,"SANCHEZ FARFAN, Wilder Emilio","SANCHEZZZ FARFAN, WILDERRR EMIIILIO"
1,UID-227,Names where name parts are Modified,Repetitions,> 2 letters repeated twice,Individual,UID-227 - 10554,37729,"LASHKARYOVA, Nadezhda Vitalyevna","LASHHHKARYOVA, NADEZHHHDA VITALLLYEVNA"
2,UID-227,Names where name parts are Modified,Repetitions,> 2 letters repeated twice,Individual,UID-227 - 8706,32715,"SHANDAROVICH, Oleg Stanislavovich","SHANDAAAROVICH, OOOLEG STANISLAVOVIIICH"
3,UID-227,Names where name parts are Modified,Repetitions,> 2 letters repeated twice,Individual,UID-227 - 756,8154,"GIRALDO SERNA, Hernan","GIRALLLDO SERNAAA, HHHERNAN"
4,UID-227,Names where name parts are Modified,Repetitions,> 2 letters repeated twice,Individual,UID-227 - 6724,26139,"PERALTA, Cesar Emilio","PERAAALTA, CEEESAR EMILIIIO"
5,UID-227,Names where name parts are Modified,Repetitions,> 2 letters repeated twice,Individual,UID-227 - 9370,35315,"SHUVALOV, Igor Ivanovich","SHUVAAALOV, IGOOOR IVAAANOVICH"
6,UID-227,Names where name parts are Modified,Repetitions,> 2 letters repeated twice,Individual,UID-227 - 7119,26947,"LUGO GOMEZ, Eustiquio Jose","LUGOOO GOMEZZZ, EUSTTTIQUIO JOSE"
7,UID-227,Names where name parts are Modified,Repetitions,> 2 letters repeated twice,Individual,UID-227 - 5440,23089,"KU, Sung Sop","KUUU, SUNGGG SOPPP"
8,UID-227,Names where name parts are Modified,Repetitions,> 2 letters repeated twice,Individual,UID-227 - 921,8392,"OMRAN, Karim Dhaidas","OOOMRAN, KKKARIM DDDHAIDAS"
9,UID-227,Names where name parts are Modified,Repetitions,> 2 letters repeated twice,Individual,UID-227 - 1302,10043,"BARAKAT, Hamzi Ahmad","BARAKAAAT, HHHAMZI AHMAAAD"
