# IMPORT DATA, PACKAGES, AND FUNCTIONS

## import the relevant functions and packages

In [1]:
import pandas as pd
import numpy as np
from test_case_generator_functions import alphabet, num_of_alphabet

# TEST CASE TYPE REFERENCE INFORMATION

In [2]:
uid = 'UID-280' 
theme = 'Names where name parts are Modified'
category = 'Truncation'
sub_category = '2 Letter Truncation in a middle name part'
entity_type = 'Individual'

## download the OFAC list from the web

In [3]:
ofac_list_download = pd.read_csv('https://www.treasury.gov/ofac/downloads/sdn.csv', header=None)

In [4]:
ofac_list = ofac_list_download[[0,1,2]]
ofac_list.columns = ['uid', 'name', 'entity_type']

# FILTER FOR THE REQUIREMENTS OF THE TEST CASE TYPE

## filter for the requirements of the specific test case type

In [5]:
ofac_list_filtered = ofac_list[(ofac_list.entity_type == 'individual')] # only evaluate individuals

## randomly choose 10 rows

In [6]:
while True:
    ofac_list_sampled = ofac_list_filtered.sample(n = 10)
    cnt = [0] * 10
    for ind, name in enumerate(ofac_list_sampled['name']):
        split_name = name.upper().split(' ')
        
        for i in split_name:
            if all(j not in alphabet() for j in i):
                split_name.remove(i) # remove word with all numbers
            elif len(i) < 3:
                split_name.remove(i) # remove word with < 3 letter
                
        cnt[ind] += len(split_name)

    if all(i > 2 for i in cnt):
        break

ofac_list_sampled

Unnamed: 0,uid,name,entity_type
10053,36500,"RESHETNIKOV, Maxim Gennadyevich",individual
648,7876,"AL-YASSIN, Husam Muhammad Amin",individual
6021,24541,"KAREEM, Aras Habib",individual
1616,10726,"AL MAZIDIH, Badran Turki Hishan",individual
5664,23516,"PAK, Kwang Hun",individual
911,8379,"JON, Hana Paul",individual
8512,31941,"SAING, Sai Lone",individual
8668,32496,"MARRUFO CABRERA, Miguel Raymundo",individual
3181,15843,"VILLA SANCHEZ, Arnoldo",individual
149,6708,"CARRILLO FUENTES, Vicente",individual


# CREATE THE TEST CASES

## create blank final test cases table

In [7]:
final_test_cases = pd.DataFrame(columns=['UID', 'Theme','Category','Sub-category','Entity-Type','Test Case ID' , 'OFAC List UID', 'Original Name','Test Case Name'])
final_test_cases

Unnamed: 0,UID,Theme,Category,Sub-category,Entity-Type,Test Case ID,OFAC List UID,Original Name,Test Case Name


## run loop to generate the test cases

In [8]:
for index, row in ofac_list_sampled.iterrows():
    original_name = row['name'].upper()
    split_original_name = original_name.split(' ') # split name into word
    final_test_name = split_original_name.copy()
    
    split_edit_name = []
    for i in split_original_name:
        if (not all(j not in alphabet() for j in i)) & (num_of_alphabet(i) > 2):
            split_edit_name.append(i) # remove word with all numbers # remove word with < 3 letter
    
    truncate_word = split_edit_name[len(split_edit_name) // 2] # choose word to be truncated
    truncate_number = 2
    if (truncate_word[-1] not in alphabet()) | (truncate_word[-2] not in alphabet()):
        truncate_number += 1
        
    final_test_name[final_test_name.index(truncate_word)] = truncate_word[:len(truncate_word) - truncate_number]

    final_test_name = ' '.join(final_test_name)
    
    final_test_cases.loc[len(final_test_cases)] = [uid, theme, category, sub_category, entity_type, uid + ' - ' + str(index), row['uid'], row['name'], final_test_name] # append to the dataframe

final_test_cases

Unnamed: 0,UID,Theme,Category,Sub-category,Entity-Type,Test Case ID,OFAC List UID,Original Name,Test Case Name
0,UID-280,Names where name parts are Modified,Truncation,2 Letter Truncation in a middle name part,Individual,UID-280 - 10053,36500,"RESHETNIKOV, Maxim Gennadyevich","RESHETNIKOV, MAX GENNADYEVICH"
1,UID-280,Names where name parts are Modified,Truncation,2 Letter Truncation in a middle name part,Individual,UID-280 - 648,7876,"AL-YASSIN, Husam Muhammad Amin","AL-YASSIN, HUSAM MUHAMM AMIN"
2,UID-280,Names where name parts are Modified,Truncation,2 Letter Truncation in a middle name part,Individual,UID-280 - 6021,24541,"KAREEM, Aras Habib","KAREEM, AR HABIB"
3,UID-280,Names where name parts are Modified,Truncation,2 Letter Truncation in a middle name part,Individual,UID-280 - 1616,10726,"AL MAZIDIH, Badran Turki Hishan","AL MAZIDIH, BADRAN TUR HISHAN"
4,UID-280,Names where name parts are Modified,Truncation,2 Letter Truncation in a middle name part,Individual,UID-280 - 5664,23516,"PAK, Kwang Hun","PAK, KWA HUN"
5,UID-280,Names where name parts are Modified,Truncation,2 Letter Truncation in a middle name part,Individual,UID-280 - 911,8379,"JON, Hana Paul","JON, HA PAUL"
6,UID-280,Names where name parts are Modified,Truncation,2 Letter Truncation in a middle name part,Individual,UID-280 - 8512,31941,"SAING, Sai Lone","SAING, S LONE"
7,UID-280,Names where name parts are Modified,Truncation,2 Letter Truncation in a middle name part,Individual,UID-280 - 8668,32496,"MARRUFO CABRERA, Miguel Raymundo","MARRUFO CABRERA, MIGU RAYMUNDO"
8,UID-280,Names where name parts are Modified,Truncation,2 Letter Truncation in a middle name part,Individual,UID-280 - 3181,15843,"VILLA SANCHEZ, Arnoldo",VILLA SANCH ARNOLDO
9,UID-280,Names where name parts are Modified,Truncation,2 Letter Truncation in a middle name part,Individual,UID-280 - 149,6708,"CARRILLO FUENTES, Vicente",CARRILLO FUENT VICENTE
