In [84]:
import pandas as pd
import numpy as np

# Standardized Options

[Locations](#Locations): `loc_list`, `loc_prob_dict`, `loc_abbr_dict`

[Languages](#Languages): `lg_list`, `lg_prob_dict`

[Services](#Services): `sv_adv_list`, `sv_con_list`

[Names](#Names): `first_nm_list`, `last_nm_list`

## Locations (States) <a id='Locations'></a>

In [85]:
df_st = pd.read_csv('data/bank_branch_master_list.csv')
# df.head()

In [86]:
# # find bank name
# names = np.sort(df_st.NAME.unique())
# for name in names:
#     print(name)
# # Bank Of America, National Association

In [87]:
# # find column index
# for i, col in enumerate(df_st.columns):
#     print(i, col)
# # 20 - NAME
# # 24 - SERVTYPE
# # 25 - STALP
# # 27 - STNAME

In [88]:
# select BoA branches with specific service types
df_st_sub = df_st.loc[(df_st['NAME']=='Bank Of America, National Association') 
                & (df_st['SERVTYPE'].isin([11,12,15,16])) 
                & (df_st['STNAME']!=' ')].iloc[:,np.r_[20,24,25,27]].copy()
df_st_sub.reset_index(drop=True, inplace=True)
# SERVTYPE
#     11 Full Service Brick and Mortar Office
#     12 Full Service Retail Office
#     15 Full Service Home/Phone Banking
#     16 Full Service Seasonal Office
# df_st_sub.head()

In [89]:
# create location objects
states = df_st_sub.STALP.unique()
total = len(df_st_sub)

loc_list = list(states) # state list
loc_prob_dict = {} # state:proportion
loc_abbr_dict = {} # state:statename

for state in states:
    count = len(df_st_sub.loc[df_st_sub['STALP']==state])
    loc_prob_dict[state] = count/total
    loc_abbr_dict[state] = df_st_sub.loc[df_st_sub['STALP']==state].iloc[0,3]

# loc_prob_dict
# loc_abbr_dict
# loc_list

## Languages <a id='Languages'></a>

In [90]:
df_lg = pd.read_excel('data/languages.xlsx','Probability')
# df_lg.head()

In [91]:
# create language objects
lg_list = list(df_lg.iloc[1:,0])

lg_prob_dict = {} # language:proportion
for language in lg_list:
    lg_prob_dict[language] = df_lg.loc[df_lg['Language']==language].iloc[0,2]

# lg_list
# lg_prob_dict

## Services <a id='Services'></a>

In [92]:
df_sv = pd.read_excel('data/services.xlsx','categories')
# df_sv.head()

In [93]:
# create service objects
sv_adv_list = list(df_sv.columns) # list of services provided by advisors, category level
sv_con_list = [] # list of services available for consumers, service level

for i in range(len(sv_adv_list)):
    col_arr = df_sv.iloc[:,i].dropna()
    for j in range(len(col_arr)):
        sv_con_list.append(sv_adv_list[i]+'-'+col_arr[j])

# sv_adv_list
# sv_con_list

## Names <a id='Names'></a>

In [94]:
df_nm = pd.read_excel('data/names.xlsx','names')
# df_nm.head()

In [95]:
# create name objects
first_nm_list = list(df_nm.iloc[:,0])
last_nm_list = list(df_nm.iloc[:,1])

# first_nm_list
# last_nm_list

# Mock Data Generation
## Financial Advisors

Output format:

`{'financial advisor's name':{'language':[list of three languages]
                                ,'service':[list of three services]
                                ,'location':state_abbreviation}
  ,'financial advisor's name':{'language':[list of three languages]
                                ,'service':[list of three services]
                                ,'location':state_abbreviation}
 }`

In [96]:
def random_select(array_input, sample_size):    
    optn_list = []
    prob_list = []
    
    for i in array_input:
        optn_list.append(i)
        
        if type(array_input) == dict:   
            prob_list.append(array_input[i])

        else:
            prob_list.append(1/len(array_input))
        
    return np.random.choice(a = optn_list, size = sample_size, replace = False, p = prob_list)

In [97]:
number_of_advisors = 5 # set desired total number of financial advisors
adv_record = {} # dictionary with series of "name:{languages, services, location}"

In [98]:
while number_of_advisors > 0:
    input_dict = {'first_name':(first_nm_list,1) # category:object,samplesize
                  ,'last_name':(last_nm_list,1)
                  ,'language':(lg_prob_dict,2)
                  ,'service':(sv_adv_list,3)
                  ,'location':(loc_prob_dict,1)}

    adv_string = '' # name, English, language2, language3, service1, service2, service3, location

    for category in input_dict:
        for selection in random_select(input_dict[category][0],input_dict[category][1]):        
            if category == 'last_name':
                # combine first and last name
                # add English as default
                adv_string = adv_string.replace(',',' '+selection+',English,')
            else:
                adv_string += selection+','

    adv_string = adv_string[0:len(adv_string)-1].split(',') # convert object from string to list
#     print(adv_string)

    languages = []
    services = []

    for i, data in enumerate(adv_string):
        if i == 0:
            name = data
        elif i < 4:
            languages.append(data)
        elif i < 7:
            services.append(data)
        else:
            location = data

    adv_record.update({name:{'language':languages,'service':services,'location':location}})
#     print(adv_record)
    
    number_of_advisors -= 1