# IMPORT DATA, PACKAGES, AND FUNCTIONS

## import the relevant functions and packages

In [1]:
import pandas as pd
import numpy as np
from test_case_generator_functions import alphabet, num_of_alphabet, rand_letter

# TEST CASE TYPE REFERENCE INFORMATION

In [2]:
uid = 'UID-308' 
theme = 'Names where name parts are Modified'
category = 'Typos'
sub_category = '2 Typos same name part - Adjacent'
entity_type = 'Entity'

## download the OFAC list from the web

In [3]:
ofac_list_download = pd.read_csv('https://www.treasury.gov/ofac/downloads/sdn.csv', header=None)
ofac_list_download

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,36,AEROCARIBBEAN AIRLINES,-0-,CUBA,-0-,-0-,-0-,-0-,-0-,-0-,-0-,-0-
1,173,"ANGLO-CARIBBEAN CO., LTD.",-0-,CUBA,-0-,-0-,-0-,-0-,-0-,-0-,-0-,-0-
2,306,BANCO NACIONAL DE CUBA,-0-,CUBA,-0-,-0-,-0-,-0-,-0-,-0-,-0-,a.k.a. 'BNC'.
3,424,BOUTIQUE LA MAISON,-0-,CUBA,-0-,-0-,-0-,-0-,-0-,-0-,-0-,-0-
4,475,CASA DE CUBA,-0-,CUBA,-0-,-0-,-0-,-0-,-0-,-0-,-0-,-0-
...,...,...,...,...,...,...,...,...,...,...,...,...
10892,39438,"NEJAT, Hossein",individual,IRGC] [IFSR] [IRAN-HR,IRGC Brigadier General,-0-,-0-,-0-,-0-,-0-,-0-,"DOB Mar 1955 to Mar 1956; POB Shiraz, Iran; na..."
10893,39439,"SAJEDINIA, Hossein",individual,IRAN-HR,Deputy Operations Commander,-0-,-0-,-0-,-0-,-0-,-0-,"DOB 21 Mar 1962 to 20 Apr 1962; POB Isfahan, I..."
10894,39440,"RAHIMI, Hossein",individual,IRAN-HR,Police chief of Tehran,-0-,-0-,-0-,-0-,-0-,-0-,DOB Mar 1963 to Mar 1964; POB Markazi Province...
10895,39441,"JAVANI, Yadollah",individual,IRGC] [IFSR] [IRAN-HR,Deputy Political Commander,-0-,-0-,-0-,-0-,-0-,-0-,"DOB 1962; POB Isfahan, Iran; nationality Iran;..."


In [4]:
ofac_list = ofac_list_download[[0,1,2]]
ofac_list.columns = ['uid', 'name', 'entity_type']
ofac_list

Unnamed: 0,uid,name,entity_type
0,36,AEROCARIBBEAN AIRLINES,-0-
1,173,"ANGLO-CARIBBEAN CO., LTD.",-0-
2,306,BANCO NACIONAL DE CUBA,-0-
3,424,BOUTIQUE LA MAISON,-0-
4,475,CASA DE CUBA,-0-
...,...,...,...
10892,39438,"NEJAT, Hossein",individual
10893,39439,"SAJEDINIA, Hossein",individual
10894,39440,"RAHIMI, Hossein",individual
10895,39441,"JAVANI, Yadollah",individual


# FILTER FOR THE REQUIREMENTS OF THE TEST CASE TYPE

## filter for the requirements of the specific test case type

In [5]:
ofac_list_filtered = ofac_list[(ofac_list.entity_type == '-0- ')] # only evaluate entities
ofac_list_filtered

Unnamed: 0,uid,name,entity_type
0,36,AEROCARIBBEAN AIRLINES,-0-
1,173,"ANGLO-CARIBBEAN CO., LTD.",-0-
2,306,BANCO NACIONAL DE CUBA,-0-
3,424,BOUTIQUE LA MAISON,-0-
4,475,CASA DE CUBA,-0-
...,...,...,...
10884,39282,SOPHYCHEM HK LIMITED,-0-
10885,39283,CLARA SHIPPING LLC,-0-
10886,39356,NEW EASTERN SHIPPING CO LTD,-0-
10888,39358,ANFASAR TRADING S PTE. LTD.,-0-


## randomly choose 10 rows

In [6]:
while True:
    ofac_list_sampled = ofac_list_filtered.sample(n = 10)
    cnt = [0] * 10
    for ind, name in enumerate(ofac_list_sampled['name']):
        split_name = name.upper().split(' ')
        
        for i in split_name:
            if (len(split_name) == 2) & (len(split_name[0]) <= 3):
                break
            elif (len(split_name) == 2) & (len(split_name[-1]) <= 3):
                break
                
            if all(j not in alphabet() for j in i):
                split_name.remove(i) # remove word with all numbers
            elif num_of_alphabet(i) > 3:
                cnt[ind] += 1
                break

    if all(i == 1 for i in cnt):
        break

ofac_list_sampled

Unnamed: 0,uid,name,entity_type
1587,10639,"CONSORCIO INMOBILIARIO DEL VALLE DE CULIACAN, ...",-0-
3901,17671,CARTEL DE JALISCO NUEVA GENERACION,-0-
5238,22285,MINISTRY OF PEOPLE'S ARMED FORCES,-0-
1267,9975,INMOBILIARIA TIJUANA COSTA S.A. DE C.V.,-0-
4273,19048,WHITE SEAL HOLDINGS LIMITED,-0-
2833,15131,HONAR SHIPPING COMPANY LIMITED,-0-
4752,21088,GRAND CASINO,-0-
8499,31928,LIMITED LIABILITY COMPANY MORTRANSSERVICE,-0-
4560,20252,"PROFIT CORPORATION, C.A.",-0-
8590,32127,EUROGROUP ENGINEERING EAD,-0-


# CREATE THE TEST CASES

## create blank final test cases table

In [7]:
final_test_cases = pd.DataFrame(columns=['UID', 'Theme','Category','Sub-category','Entity-Type','Test Case ID' , 'OFAC List UID', 'Original Name','Test Case Name'])
final_test_cases

Unnamed: 0,UID,Theme,Category,Sub-category,Entity-Type,Test Case ID,OFAC List UID,Original Name,Test Case Name


## run loop to generate the test cases

In [8]:
for index, row in ofac_list_sampled.iterrows():
    original_name = row['name'].upper()
    split_original_name = original_name.split(' ') # split name into word
    
    final_test_name = split_original_name.copy()
    
    while True:
        replace_word = np.random.choice(split_original_name) # randomly choose word to be replaced
        if num_of_alphabet(replace_word) > 2:
            break
    
    random_list = list(range(len(replace_word))) 
    replace_word_list = list(replace_word)
    if final_test_name.index(replace_word) == 0: # avoid the at the beginning situation
        del random_list[0] 
        del random_list[-1:]
        
        rand_letter_index = np.random.choice(random_list) # randomly choose beginning letter to be replaced
    
        for i in range(2):
            while replace_word_list[rand_letter_index] == replace_word[rand_letter_index]: # rerandom if no letter has been replaced
                replace_word_list[rand_letter_index] = rand_letter() # replace letter
            rand_letter_index += 1
    
    elif final_test_name.index(replace_word) == len(final_test_name) - 1: # avoid the at the end situation
        if replace_word_list[-1] not in alphabet():
            del random_list[-2:]
        else:
            del random_list[-1]
        del random_list[:1]
    
        rand_letter_index = np.random.choice(random_list) # randomly choose beginning letter to be replaced
        
        for i in range(2):
            while replace_word_list[rand_letter_index] == replace_word[rand_letter_index]: # rerandom if no letter has been replaced
                replace_word_list[rand_letter_index] = rand_letter() # replace letter
            rand_letter_index -= 1
    
    else:
        del random_list[-1:]
        
        rand_letter_index = np.random.choice(random_list) # randomly choose beginning letter to be replaced
    
        for i in range(2):
            while replace_word_list[rand_letter_index] == replace_word[rand_letter_index]: # rerandom if no letter has been replaced
                replace_word_list[rand_letter_index] = rand_letter() # replace letter
            rand_letter_index += 1

    final_test_name[final_test_name.index(replace_word)] = ''.join(replace_word_list)
    
    final_test_name = ' '.join(final_test_name)
    
    final_test_cases.loc[len(final_test_cases)] = [uid, theme, category, sub_category, entity_type, uid + ' - ' + str(index), row['uid'], row['name'], final_test_name] # append to the dataframe

final_test_cases

Unnamed: 0,UID,Theme,Category,Sub-category,Entity-Type,Test Case ID,OFAC List UID,Original Name,Test Case Name
0,UID-308,Names where name parts are Modified,Typos,2 Typos same name part - Adjacent,Entity,UID-308 - 1587,10639,"CONSORCIO INMOBILIARIO DEL VALLE DE CULIACAN, ...","CONSORCIO INMOBILIARIO DEL VASNE DE CULIACAN, ..."
1,UID-308,Names where name parts are Modified,Typos,2 Typos same name part - Adjacent,Entity,UID-308 - 3901,17671,CARTEL DE JALISCO NUEVA GENERACION,CARTEL DE JALISCO NUEVA GENERACLQN
2,UID-308,Names where name parts are Modified,Typos,2 Typos same name part - Adjacent,Entity,UID-308 - 5238,22285,MINISTRY OF PEOPLE'S ARMED FORCES,MIDXSTRY OF PEOPLE'S ARMED FORCES
3,UID-308,Names where name parts are Modified,Typos,2 Typos same name part - Adjacent,Entity,UID-308 - 1267,9975,INMOBILIARIA TIJUANA COSTA S.A. DE C.V.,INMOBILIARIA TIJOYNA COSTA S.A. DE C.V.
4,UID-308,Names where name parts are Modified,Typos,2 Typos same name part - Adjacent,Entity,UID-308 - 4273,19048,WHITE SEAL HOLDINGS LIMITED,WHITE SEAL HOLDTOGS LIMITED
5,UID-308,Names where name parts are Modified,Typos,2 Typos same name part - Adjacent,Entity,UID-308 - 2833,15131,HONAR SHIPPING COMPANY LIMITED,HOIMR SHIPPING COMPANY LIMITED
6,UID-308,Names where name parts are Modified,Typos,2 Typos same name part - Adjacent,Entity,UID-308 - 4752,21088,GRAND CASINO,GIDND CASINO
7,UID-308,Names where name parts are Modified,Typos,2 Typos same name part - Adjacent,Entity,UID-308 - 8499,31928,LIMITED LIABILITY COMPANY MORTRANSSERVICE,LIMITED LIABILITY COMPANY MODJRANSSERVICE
8,UID-308,Names where name parts are Modified,Typos,2 Typos same name part - Adjacent,Entity,UID-308 - 4560,20252,"PROFIT CORPORATION, C.A.","PRNWIT CORPORATION, C.A."
9,UID-308,Names where name parts are Modified,Typos,2 Typos same name part - Adjacent,Entity,UID-308 - 8590,32127,EUROGROUP ENGINEERING EAD,EURBPROUP ENGINEERING EAD
