# IMPORT DATA, PACKAGES, AND FUNCTIONS

## import the relevant functions and packages

In [1]:
import pandas as pd
import numpy as np
from test_case_generator_functions import phonetic

# TEST CASE TYPE REFERENCE INFORMATION

In [2]:
uid = 'UID-214' 
theme = 'Names where name parts are Modified'
category = 'Phonetic'
sub_category = '> 2 phonetic variations - different name part'
entity_type = 'Individual'

## download the OFAC list from the web

In [3]:
ofac_list_download = pd.read_csv('https://www.treasury.gov/ofac/downloads/sdn.csv', header=None)
ofac_list_download

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,36,AEROCARIBBEAN AIRLINES,-0-,CUBA,-0-,-0-,-0-,-0-,-0-,-0-,-0-,-0-
1,173,"ANGLO-CARIBBEAN CO., LTD.",-0-,CUBA,-0-,-0-,-0-,-0-,-0-,-0-,-0-,-0-
2,306,BANCO NACIONAL DE CUBA,-0-,CUBA,-0-,-0-,-0-,-0-,-0-,-0-,-0-,a.k.a. 'BNC'.
3,424,BOUTIQUE LA MAISON,-0-,CUBA,-0-,-0-,-0-,-0-,-0-,-0-,-0-,-0-
4,475,CASA DE CUBA,-0-,CUBA,-0-,-0-,-0-,-0-,-0-,-0-,-0-,-0-
...,...,...,...,...,...,...,...,...,...,...,...,...
10871,39280,TIBALAJI PETROCHEM PRIVATE LIMITED,-0-,IRAN-EO13846,-0-,-0-,-0-,-0-,-0-,-0-,-0-,Website https://www.tibalaji.com/; Additional ...
10872,39281,SIERRA VISTA TRADING LIMITED,-0-,IRAN-EO13846,-0-,-0-,-0-,-0-,-0-,-0-,-0-,Additional Sanctions Information - Subject to ...
10873,39282,SOPHYCHEM HK LIMITED,-0-,IRAN-EO13846,-0-,-0-,-0-,-0-,-0-,-0-,-0-,Additional Sanctions Information - Subject to ...
10874,39283,CLARA SHIPPING LLC,-0-,IRAN-EO13846,-0-,-0-,-0-,-0-,-0-,-0-,-0-,Website http://www.clarashipping.com; Addition...


In [4]:
ofac_list = ofac_list_download[[0,1,2]]
ofac_list.columns = ['uid', 'name', 'entity_type']
ofac_list

Unnamed: 0,uid,name,entity_type
0,36,AEROCARIBBEAN AIRLINES,-0-
1,173,"ANGLO-CARIBBEAN CO., LTD.",-0-
2,306,BANCO NACIONAL DE CUBA,-0-
3,424,BOUTIQUE LA MAISON,-0-
4,475,CASA DE CUBA,-0-
...,...,...,...
10871,39280,TIBALAJI PETROCHEM PRIVATE LIMITED,-0-
10872,39281,SIERRA VISTA TRADING LIMITED,-0-
10873,39282,SOPHYCHEM HK LIMITED,-0-
10874,39283,CLARA SHIPPING LLC,-0-


# FILTER FOR THE REQUIREMENTS OF THE TEST CASE TYPE

## filter for the requirements of the specific test case type

In [5]:
ofac_list_filtered = ofac_list[(ofac_list.entity_type == 'individual')] # only evaluate entities
ofac_list_filtered

Unnamed: 0,uid,name,entity_type
53,2674,"ABBAS, Abu",individual
54,2675,"AL RAHMAN, Shaykh Umar Abd",individual
55,2676,"AL ZAWAHIRI, Dr. Ayman",individual
56,2677,"AL-ZOMOR, Abboud Abdul Latif Hassan",individual
57,2678,"AWDA, Abd Al Aziz",individual
...,...,...,...
10861,39257,"ABNOUSH, Salar",individual
10863,39259,"MIRZAEI, Haj Ahmad",individual
10864,39260,"ROSTAMI CHESHMEH GACHI, Mohammad",individual
10865,39262,"SOBYANINA, Olga Sergeevna",individual


## randomly choose 10 rows

In [6]:
ofac_list_sampled = ofac_list_filtered.sample(n = 10)
ofac_list_sampled

Unnamed: 0,uid,name,entity_type
7360,27787,"NUR-AL-DIN, Jawad",individual
874,8332,"AL-QASIR, Nazar Jumah Ali",individual
1686,10984,"LUCERO DE MARTINEZ, Sandra",individual
1512,10485,"AL-SHEIBANI, Abu Mustafa",individual
676,7915,"RANTISI, Abdel Aziz",individual
1658,10896,"AL-SUBAIY, Khalifa Muhammad Turki",individual
1099,9388,"MELGOZA TORRES, Martin",individual
4295,19138,"BARRIOS HERNANDEZ, Mercedes",individual
8286,30746,"AL-AKHRAS, Firas",individual
4577,20276,"KIVIKO, Irina Valerievna",individual


# CREATE THE TEST CASES

## create blank final test cases table

In [7]:
final_test_cases = pd.DataFrame(columns=['UID', 'Theme','Category','Sub-category','Entity-Type','Test Case ID' , 'OFAC List UID', 'Original Name','Test Case Name'])
final_test_cases

Unnamed: 0,UID,Theme,Category,Sub-category,Entity-Type,Test Case ID,OFAC List UID,Original Name,Test Case Name


## run loop to generate the test cases

In [8]:
for index, row in ofac_list_sampled.iterrows():
    original_name = row['name']
    number_of_words = row['name'].count(' ') + 1 # determine for each name the number of words
    split_original_name = original_name.split(' ') # split name into word
    
    final_test_name = split_original_name.copy()
    
    if number_of_words > 3:
        loop_time = np.random.randint(3, number_of_words)
    else:
        loop_time = number_of_words
        
    for i in range(loop_time):
        modify_original_name = np.random.choice(split_original_name) # randomly choose word to be replaced
        modify_index = original_name.split(' ').index(modify_original_name) # record index of chosen word
        split_original_name.remove(modify_original_name) # remove chosen word
        
        modify_original_name = list(modify_original_name) # split word into letter
        replace_name = modify_original_name.copy()
        
        j = np.random.choice(list(range(len(replace_name))))
        while replace_name[j] == modify_original_name[j]: # rerandom if no letter has been replaced
            j = np.random.choice(list(range(len(replace_name))))
            replace_name[j] = phonetic(replace_name[j]) # replace 1 letter with similar phonetic
        
        final_test_name[modify_index] = ''.join(replace_name)
    
    final_test_name = ' '.join(final_test_name)
    
    final_test_cases.loc[len(final_test_cases)] = [uid, theme, category, sub_category, entity_type, uid + ' - ' + str(index), row['uid'], row['name'], final_test_name] # append to the dataframe

final_test_cases

Unnamed: 0,UID,Theme,Category,Sub-category,Entity-Type,Test Case ID,OFAC List UID,Original Name,Test Case Name
0,UID-214,Names where name parts are Modified,Phonetic,> 2 phonetic variations - different name part,Individual,UID-214 - 7360,27787,"NUR-AL-DIN, Jawad","NUR-OL-DIN, Kawad"
1,UID-214,Names where name parts are Modified,Phonetic,> 2 phonetic variations - different name part,Individual,UID-214 - 874,8332,"AL-QASIR, Nazar Jumah Ali","AL-QAXIR, Mazar Kumah Ali"
2,UID-214,Names where name parts are Modified,Phonetic,> 2 phonetic variations - different name part,Individual,UID-214 - 1686,10984,"LUCERO DE MARTINEZ, Sandra","LOCERO DE MARTWNEZ, Xandra"
3,UID-214,Names where name parts are Modified,Phonetic,> 2 phonetic variations - different name part,Individual,UID-214 - 1512,10485,"AL-SHEIBANI, Abu Mustafa","YL-SHEIBANI, Hbu Nustafa"
4,UID-214,Names where name parts are Modified,Phonetic,> 2 phonetic variations - different name part,Individual,UID-214 - 676,7915,"RANTISI, Abdel Aziz","RYNTISI, Ibdel Iziz"
5,UID-214,Names where name parts are Modified,Phonetic,> 2 phonetic variations - different name part,Individual,UID-214 - 1658,10896,"AL-SUBAIY, Khalifa Muhammad Turki","AL-SUBAIY, Ghalifa Nuhammad Durki"
6,UID-214,Names where name parts are Modified,Phonetic,> 2 phonetic variations - different name part,Individual,UID-214 - 1099,9388,"MELGOZA TORRES, Martin","NELGOZA TERRES, Nartin"
7,UID-214,Names where name parts are Modified,Phonetic,> 2 phonetic variations - different name part,Individual,UID-214 - 4295,19138,"BARRIOS HERNANDEZ, Mercedes","PARRIOS HERNANTEZ, Nercedes"
8,UID-214,Names where name parts are Modified,Phonetic,> 2 phonetic variations - different name part,Individual,UID-214 - 8286,30746,"AL-AKHRAS, Firas","AL-WKHRAS, Viras"
9,UID-214,Names where name parts are Modified,Phonetic,> 2 phonetic variations - different name part,Individual,UID-214 - 4577,20276,"KIVIKO, Irina Valerievna","KIVHKO, Arina Falerievna"
