# IMPORT DATA, PACKAGES, AND FUNCTIONS

## import the relevant functions and packages

In [1]:
import pandas as pd
import numpy as np
from test_case_generator_functions import alphabet, num_of_alphabet, rand_letter

# TEST CASE TYPE REFERENCE INFORMATION

In [2]:
uid = 'UID-314' 
theme = 'Names where name parts are Modified'
category = 'Typos'
sub_category = 'Typo noise name parts'
entity_type = 'Entity'

## download the OFAC list from the web

In [3]:
ofac_list_download = pd.read_csv('https://www.treasury.gov/ofac/downloads/sdn.csv', header=None)

In [4]:
ofac_list = ofac_list_download[[0,1,2]]
ofac_list.columns = ['uid', 'name', 'entity_type']

# FILTER FOR THE REQUIREMENTS OF THE TEST CASE TYPE

## filter for the requirements of the specific test case type

In [5]:
ofac_list_filtered = ofac_list[(ofac_list.entity_type == '-0- ')] # only evaluate entities

In [6]:
ofac_list_filtered = ofac_list_filtered.reset_index(drop = True)
ofac_list_filtered_noise = pd.DataFrame(columns=['uid', 'name', 'entity_type'])

noise = ['BANK', 'TRADING', 'STOCK', 'MANUFACTURING', 'SHIPPING', 'TECHNOLOGY', 'PUBLIC', 'MONEY', 'CASH', 'LTD', 'LLC']

for ind, name in enumerate(ofac_list_filtered['name']):
    split_name = name.upper().split(' ')
        
    for i in split_name:
        if i in noise:
            ofac_list_filtered_noise.loc[len(ofac_list_filtered_noise)] = list(ofac_list_filtered.loc[ind])
            break

ofac_list_filtered_noise

Unnamed: 0,uid,name,entity_type
0,815,"GALAX TRADING CO., LTD.",-0-
1,906,HAVIN BANK LIMITED,-0-
2,1571,NORDSTRAND MARITIME AND TRADING COMPANY,-0-
3,4632,BANK MARKAZI JOMHOURI ISLAMI IRAN,-0-
4,4633,BANK MASKAN,-0-
...,...,...,...
915,39281,SIERRA VISTA TRADING LIMITED,-0-
916,39283,CLARA SHIPPING LLC,-0-
917,39356,NEW EASTERN SHIPPING CO LTD,-0-
918,39358,ANFASAR TRADING S PTE. LTD.,-0-


## randomly choose 10 rows

In [7]:
ofac_list_sampled = ofac_list_filtered_noise.sample(n = 10)
ofac_list_sampled

Unnamed: 0,uid,name,entity_type
771,36041,PUBLIC JOINT STOCK COMPANY VYBORG SHIPYARD,-0-
809,37030,JOINT STOCK COMPANY NORTHERN SHIPPING COMPANY,-0-
69,12468,ELLISSA SHIPPING,-0-
552,28546,KOREA DAIZIN TRADING CORP.,-0-
644,32897,ORBIT PETROCHEMICALS TRADING LLC,-0-
306,22486,CENTRAL REPUBLIC BANK,-0-
193,17828,SHADI FOR CARS TRADING,-0-
464,25832,GOLDEN ENTERPRISE SHIPPING LIMITED,-0-
202,18299,ROSNEFT TRADING S.A.,-0-
251,19576,MIRIM SHIPPING CO LTD,-0-


# CREATE THE TEST CASES

## create blank final test cases table

In [8]:
final_test_cases = pd.DataFrame(columns=['UID', 'Theme','Category','Sub-category','Entity-Type','Test Case ID' , 'OFAC List UID', 'Original Name','Test Case Name'])
final_test_cases

Unnamed: 0,UID,Theme,Category,Sub-category,Entity-Type,Test Case ID,OFAC List UID,Original Name,Test Case Name


## run loop to generate the test cases

In [9]:
for index, row in ofac_list_sampled.iterrows():
    original_name = row['name'].upper()
    split_original_name = original_name.split(' ') # split name into word
    
    final_test_name = split_original_name.copy()
    
    for word in split_original_name:
        if word in noise:
            modify_original_name = word # choose word to be replaced

    replace_name = list(modify_original_name) # split word into letter

    j = np.random.choice(list(range(len(replace_name))))
    while modify_original_name[j] not in alphabet(): # rerandom if not letter 
        j = np.random.choice(list(range(len(replace_name))))
    while replace_name[j] == modify_original_name[j]: # rerandom if no letter has been replaced
        replace_name[j] = rand_letter() # replace letter

    final_test_name[final_test_name.index(modify_original_name)] = ''.join(replace_name)

    final_test_name = ' '.join(final_test_name)
    
    final_test_cases.loc[len(final_test_cases)] = [uid, theme, category, sub_category, entity_type, uid + ' - ' + str(index), row['uid'], row['name'], final_test_name] # append to the dataframe

final_test_cases

Unnamed: 0,UID,Theme,Category,Sub-category,Entity-Type,Test Case ID,OFAC List UID,Original Name,Test Case Name
0,UID-314,Names where name parts are Modified,Typos,Typo noise name parts,Entity,UID-314 - 771,36041,PUBLIC JOINT STOCK COMPANY VYBORG SHIPYARD,PUBLIC JOINT SQOCK COMPANY VYBORG SHIPYARD
1,UID-314,Names where name parts are Modified,Typos,Typo noise name parts,Entity,UID-314 - 809,37030,JOINT STOCK COMPANY NORTHERN SHIPPING COMPANY,JOINT STOCK COMPANY NORTHERN SHSPPING COMPANY
2,UID-314,Names where name parts are Modified,Typos,Typo noise name parts,Entity,UID-314 - 69,12468,ELLISSA SHIPPING,ELLISSA SHIPVING
3,UID-314,Names where name parts are Modified,Typos,Typo noise name parts,Entity,UID-314 - 552,28546,KOREA DAIZIN TRADING CORP.,KOREA DAIZIN TRADIUG CORP.
4,UID-314,Names where name parts are Modified,Typos,Typo noise name parts,Entity,UID-314 - 644,32897,ORBIT PETROCHEMICALS TRADING LLC,ORBIT PETROCHEMICALS TRADING QLC
5,UID-314,Names where name parts are Modified,Typos,Typo noise name parts,Entity,UID-314 - 306,22486,CENTRAL REPUBLIC BANK,CENTRAL REPUBLIC NANK
6,UID-314,Names where name parts are Modified,Typos,Typo noise name parts,Entity,UID-314 - 193,17828,SHADI FOR CARS TRADING,SHADI FOR CARS TRADINY
7,UID-314,Names where name parts are Modified,Typos,Typo noise name parts,Entity,UID-314 - 464,25832,GOLDEN ENTERPRISE SHIPPING LIMITED,GOLDEN ENTERPRISE SHIPPRNG LIMITED
8,UID-314,Names where name parts are Modified,Typos,Typo noise name parts,Entity,UID-314 - 202,18299,ROSNEFT TRADING S.A.,ROSNEFT TIADING S.A.
9,UID-314,Names where name parts are Modified,Typos,Typo noise name parts,Entity,UID-314 - 251,19576,MIRIM SHIPPING CO LTD,MIRIM SHIPPING CO LTL
