# GTO_waves

This code turns raw info on GTO characteristics into a dictionary of caracteristics with zip codes.
- holds dictionary of raw data for each GTO wave
- converts NY burroughs into counties
- returns dictionary of GTOs with FIPS

### message when loading this code

In [20]:
# print to console when loaded on another code
print('GTO_waves: dictionary with GTO characteristics:')
print('date issued, date enforcement, target state/county, threshold')
print()
print('GTO_table and GTO_table_long: table with thresholds for (fips, dates)')

GTO_waves: dictionary with GTO characteristics:
date issued, date enforcement, target state/county, threshold

GTO_table and GTO_table_long: table with thresholds for (fips, dates)


--------

In [21]:
import pandas as pd

### Load data to get FIPS x counties
Note: cell magic %%capture prevents output

In [22]:
%%capture 

%run Zillow1_loaddata.ipynb
load_zillow('county', 'prices')

In [23]:
%%capture 

%run functions/add_FIPS.ipynb

In [24]:
# source: zillow dataset that has counties and state/ counties fips codes
df = C_Zhvi_All[['State', 'RegionName', 'StateCodeFIPS', 'MunicipalCodeFIPS']].copy() #select columns
add_FIPS(df) # add 5 digit fips 
df.drop(['StateCodeFIPS', 'MunicipalCodeFIPS'],1, inplace=True) # drop columns
df['RegionName'] = df.RegionName.str.replace(' County', '') # delete word 'County'
df.set_index(['State','RegionName'], inplace=True) # multiindex
df.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,FIPS
State,RegionName,Unnamed: 2_level_1
CA,Los Angeles,6037
IL,Cook,17031
TX,Harris,48201


## Raw data from GTO files
https://www.fincen.gov

In [40]:
# Raw data extracted from each GTO example on FINCEN page - I corrected repeated key for NY1
raw_GTO = {1:{'date issued':'2016-01-13',
                'date enforcement':'2016-03-01',
                'target':{'NY1':{'county':['Manhattan'],
                               'threshold':3000000},
                          'FL':{'county':['Miami-Dade'],
                               'threshold':1000000},}},
             # 2016-2
             2:{'date issued':'2016-07-22',
                'date enforcement':'2016-08-28',
                'target':{'TX':{'county':['Bexar'],
                               'threshold':500000},  
                          'FL':{'county':['Miami-Dade', 'Broward', 'Palm Beach'],
                             'threshold':1000000},
                          'NY':{'county':[ 'Bronx','Brooklyn','Queens', 'Staten Island'],
                               'threshold':1500000},
                          'NY1':{'county':[ 'Manhattan'],
                               'threshold':3000000},
                          'CA':{'county':['San Diego', 'Los Angeles', 'San Francisco',
                                         'San Mateo', 'Santa Clara'],
                                'threshold':2000000}}},
             # 2017-1
              3:{'date issued':'2017-02-21',
                'date enforcement':'2017-02-24',
                'target':{'TX':{'county':['Bexar'],
                               'threshold':500000},  
                          'FL':{'county':['Miami-Dade', 'Broward', 'Palm Beach'],
                             'threshold':1000000},
                          'NY':{'county':[ 'Bronx','Brooklyn','Queens', 'Staten Island'],
                               'threshold':1500000},
                          'NY1':{'county':[ 'Manhattan'],
                               'threshold':3000000},
                          'CA':{'county':['San Diego', 'Los Angeles', 'San Francisco',
                                         'San Mateo', 'Santa Clara'],
                                'threshold':2000000}}},
 
################################################################################################           
  # waves 4 and 5 GTOs are not on FinCen website -  I infered DATE ISSUED
  # coverage and thresholds are the same
################################################################################################           
             
           ########### JULHO 2017 INCLUDES WIRE TRANSFERS ###########
              # 2017-2 MISSING....
              4:{'date issued':'2017-08-21',    ####DATE INFERED ####
                'date enforcement':'2017-08-23',
                'target':{'TX':{'county':['Bexar'],
                               'threshold':500000},  
                          'FL':{'county':['Miami-Dade', 'Broward', 'Palm Beach'],
                             'threshold':1000000},
                          'NY':{'county':[ 'Bronx','Brooklyn','Queens', 'Staten Island'],
                               'threshold':1500000},
                          'NY1':{'county':[ 'Manhattan'],
                               'threshold':3000000},
                          'CA':{'county':['San Diego', 'Los Angeles', 'San Francisco',
                                         'San Mateo', 'Santa Clara'],
                                'threshold':2000000},
                          'HI':{'county':['Honolulu'],'threshold':3000000},}},   
           
             # 2018-1 MISSING....
             5:{'date issued':'2018-02-21',   ####DATE INFERED ####
                'date enforcement':'2018-02-24',
                'target':{'TX':{'county':['Bexar'],
                               'threshold':500000},  
                          'FL':{'county':['Miami-Dade', 'Broward', 'Palm Beach'],
                             'threshold':1000000},
                          'NY':{'county':[ 'Bronx','Brooklyn','Queens', 'Staten Island'],
                               'threshold':1500000},
                          'NY1':{'county':[ 'Manhattan'],
                               'threshold':3000000},
                          'CA':{'county':['San Diego', 'Los Angeles', 'San Francisco',
                                         'San Mateo', 'Santa Clara'],
                                'threshold':2000000},
                          'HI':{'county':['Honolulu'],'threshold':3000000},}},    
           
             # 2018-1
             ########### LOWERED THRESHOLD TO $300.000 ##########
             ########### INCLUDES VIRTUAL CURRENCIES ##########
              6:{'date issued':'2018-11-15',
                'date enforcement':'2018-11-17',
                'target':{'TX':{'county':['Bexar', 'Tarrant', 'Dallas'],'threshold':300000},
                          'FL':{'county':['Miami-Dade', 'Broward', 'Palm Beach',],'threshold':300000},
                          'NY':{'county':['Brooklyn', 'Queens', 'Bronx', 'Staten Island',
                                 'Manhattan'],'threshold':300000},
                          'CA':{'county':['San Diego', 'Los Angeles', 'San Francisco', 
                                 'San Mateo', 'Santa Clara'],'threshold':300000},
                          'HI':{'county':['Honolulu'],'threshold':300000},
                          'NV':{'county':['Clark'], 'threshold':300000},
                          'WA':{'county':['King'],'threshold':300000},
                          'MA':{'county':['Suffolk', 'Middlesex'],'threshold':300000},
                          'IL':{'county':['Cook'],'threshold':300000},}}, # end group4
             
            # 2018-2
             7:{'date issued':'2019-05-14',
                'date enforcement':'2019-05-16',
                'target':{'TX':{'county':['Bexar', 'Tarrant', 'Dallas'],'threshold':300000},
                              'FL':{'county':['Miami-Dade', 'Broward', 'Palm Beach',],'threshold':300000},
                          'NY':{'county':['Brooklyn', 'Queens', 'Bronx', 'Staten Island',
                                 'Manhattan'],'threshold':300000},
                          'CA':{'county':['San Diego', 'Los Angeles', 'San Francisco', 
                                 'San Mateo', 'Santa Clara'],'threshold':300000},
                          'HI':{'county':['Honolulu'],'threshold':300000},
                          'NV':{'county':['Clark'], 'threshold':300000},
                          'WA':{'county':['King'],'threshold':300000},
                          'MA':{'county':['Suffolk', 'Middlesex'],'threshold':300000},
                          'IL':{'county':['Cook'],'threshold':300000},}}, # end group5 
            }

### Convert county names into 5-digit FIPS
Notice that conversion has to take into account the state, as there are multiple states with same county name. There are 2 steps here:
- convert burroughs into counties
- convert county names into 5 digit FIPS

In [32]:
# borough name/county name
NYC_boroughs = {'Bronx':'Bronx',
                'Brooklyn': 'Kings',
                'Manhattan': 'New York',
                'Queens': 'Queens',
                'Staten Island':'Richmond'}

In [38]:
import copy
GTO_waves = copy.deepcopy(raw_GTO)

# Use data from zillow to contruct dict
df1 = (df.reset_index())

## dictionary of characteristics now with FIPS
for i in range(1,len(GTO_waves)+1):
    for state in GTO_waves[i]['target'].keys():  #state on last wave
        
        if state == 'NY':
            GTO_waves[i]['target'][state]['county'] = pd.Series(GTO_waves[i]['target'][state]['county']).map(NYC_boroughs).to_list()
            # take only FIPS for that state
            
        df1 = (df.reset_index())
        df1 = df1.loc[df1.State==state,:].drop('State',1)
        df1.set_index('RegionName', inplace=True)
        DIC = dict(df1)# state in the loop
        
        # correcting for repeated key "NY"
        if state == 'NY1':
            GTO_waves[i]['target'][state]['county'] = ['36061']
        else:
            GTO_waves[i]['target'][state]['county'] = pd.Series(GTO_waves[i]['target'][state]['county']).map(DIC['FIPS']).to_list()


In [39]:
GTO_waves

{1: {'date issued': '2016-01-13',
  'date enforcement': '2016-03-01',
  'target': {'NY1': {'county': ['36061'], 'threshold': 3000000},
   'FL': {'county': ['12086'], 'threshold': 1000000}}},
 2: {'date issued': '2016-07-22',
  'date enforcement': '2016-08-28',
  'target': {'TX': {'county': ['48029'], 'threshold': 500000},
   'FL': {'county': ['12086', '12011', '12099'], 'threshold': 1000000},
   'NY': {'county': ['36005', '36047', '36081', '36085'],
    'threshold': 1500000},
   'NY1': {'county': ['36061'], 'threshold': 3000000},
   'CA': {'county': ['06073', '06037', '06075', '06081', '06085'],
    'threshold': 2000000}}},
 3: {'date issued': '2017-02-21',
  'date enforcement': '2017-02-24',
  'target': {'TX': {'county': ['48029'], 'threshold': 500000},
   'FL': {'county': ['12086', '12011', '12099'], 'threshold': 1000000},
   'NY': {'county': ['36005', '36047', '36081', '36085'],
    'threshold': 1500000},
   'NY1': {'county': ['36061'], 'threshold': 3000000},
   'CA': {'county': ['0

### Information on treated states and county FIPS

##### States treated

In [41]:
# all states that were targeted (maximum occurred on last wave from last wave)
target_states = list(GTO_waves[7]['target'].keys())

# commented not to print when run on another code
target_states # ['TX', 'FL', 'NY', 'CA', 'HI', 'NV', 'WA', 'MA', 'IL']

['TX', 'FL', 'NY', 'CA', 'HI', 'NV', 'WA', 'MA', 'IL']

##### Counties treated per state - FIPS

In [42]:
# treated states and counties OR BUROUGHS
for state in target_states:
    key = state
    value= GTO_waves[7]['target'][state]['county']
    #commented not to print when run on another code
    #print(f"'{key}': {value}")

##### Counties treated per state - Names

In [43]:
# convert back into county names
dic = dict(df.reset_index().set_index('FIPS').drop('State',1))
for state in target_states:
    key = state
    value= (pd.Series(GTO_waves[7]['target'][state]['county'])).map(dic['RegionName']).to_list()
    # commented not to print when run on another code

    #print(f"'{key}': {value}")

##### All counties treated

In [44]:
# FIPS that were treated
treated_counties = []

# iterate over dictionary of announced counties
for state in target_states:
    key = state
    value_list= GTO_waves[7]['target'][state]['county']
    for item in value_list:
        treated_counties.append(item)
print()
print(f"All treated counties: {treated_counties}")


All treated counties: ['48029', '48439', '48113', '12086', '12011', '12099', '36047', '36081', '36005', '36085', '36061', '06073', '06037', '06075', '06081', '06085', '15003', '32003', '53033', '25025', '25017', '17031']


## Make dataframes from dictionary information

### Short table (horizontal)

In [46]:
df = pd.DataFrame.from_dict(GTO_waves)
df.columns = ['wave_' + str(i) for i in list(df.columns)]
df

Unnamed: 0,wave_1,wave_2,wave_3,wave_4,wave_5,wave_6,wave_7
date issued,2016-01-13,2016-07-22,2017-02-21,2017-08-21,2018-02-21,2018-11-15,2019-05-14
date enforcement,2016-03-01,2016-08-28,2017-02-24,2017-08-23,2018-02-24,2018-11-17,2019-05-16
target,"{'NY1': {'county': ['36061'], 'threshold': 300...","{'TX': {'county': ['48029'], 'threshold': 5000...","{'TX': {'county': ['48029'], 'threshold': 5000...","{'TX': {'county': ['48029'], 'threshold': 5000...","{'TX': {'county': ['48029'], 'threshold': 5000...","{'TX': {'county': ['48029', '48439', '48113'],...","{'TX': {'county': ['48029', '48439', '48113'],..."


In [47]:
# unpack information from  GTO dictionary

temp_df_list2 = [] # holds dataframe for each wave

#loop into dictionary keys
for wave in df.columns:
    temp_df_list = [] # hold dataframes for each state
    
    for state in df.loc['target', wave]:
        b = pd.DataFrame(df.loc['target',wave][state]['county'])  # list of counties
        b['threshold']=df.loc['target',wave][state]['threshold']  # tresholds 
        b.set_index(0, inplace=True)   # set index to allow merging (use .concat)
        temp_df_list.append(b)   
        
    temp_df_list2.append(pd.concat(temp_df_list)) 
    
thresholds = pd.concat(temp_df_list2, axis=1, sort=False) # merges all waves

# match again original column names (waves)
thresholds.columns = df.columns

# merge all columns
GTO_table = pd.concat([df.iloc[:-1,:],thresholds], axis=0)

In [48]:
# supress scientifc notation on diplayed dataframe
pd.options.display.float_format = '{:20,.0f}'.format

GTO_table

Unnamed: 0,wave_1,wave_2,wave_3,wave_4,wave_5,wave_6,wave_7
date issued,2016-01-13,2016-07-22,2017-02-21,2017-08-21,2018-02-21,2018-11-15,2019-05-14
date enforcement,2016-03-01,2016-08-28,2017-02-24,2017-08-23,2018-02-24,2018-11-17,2019-05-16
36061,3000000,3000000,3000000,3000000,3000000,300000,300000
12086,1000000,1000000,1000000,1000000,1000000,300000,300000
48029,,500000,500000,500000,500000,300000,300000
12011,,1000000,1000000,1000000,1000000,300000,300000
12099,,1000000,1000000,1000000,1000000,300000,300000
36005,,1500000,1500000,1500000,1500000,300000,300000
36047,,1500000,1500000,1500000,1500000,300000,300000
36081,,1500000,1500000,1500000,1500000,300000,300000


### Long table

In [49]:
GTO_table_long = GTO_table[2:].reset_index().melt(id_vars='index')
GTO_table_long.columns = ['FIPS', 'wave', 'threshold']
GTO_table_long.head(3)

Unnamed: 0,FIPS,wave,threshold
0,36061,wave_1,3000000.0
1,12086,wave_1,1000000.0
2,48029,wave_1,


### Effective date adjustment

Most treatments start later in the month, so I adjust for dates where day of the month > 15, I set next month as reference, because Zillow prices are set on end of month for the whole month.

In [50]:
## get dates, day, months and years
df = (GTO_table.loc['date enforcement',:].
                                str.split('-', expand=True).  # all split columns
                                apply(pd.to_numeric))         # to make additions 
        
df.columns = ['Y', 'M', 'D']  # reasonable names

df

Unnamed: 0,Y,M,D
wave_1,2016,3,1
wave_2,2016,8,28
wave_3,2017,2,24
wave_4,2017,8,23
wave_5,2018,2,24
wave_6,2018,11,17
wave_7,2019,5,16


In [51]:
## make adjustments

# end of month, not December -> next month is reference
df.M[(df.D>15) & (df.M<12)] = df.M[(df.D>15) & (df.M<12)] +1

# end of month and December -> effective January next year.
df.M[(df.D>15) & (df.M==12)] = 1
df.Y[(df.D>15) & (df.M==12)] = df.Y[(df.D>15) & (df.M==12)] + 1

# all days set to 1
df.D = 1

# covert back to string type
df = df.astype(str)

# add back leading zeroes
df.D = [str(n).zfill(2) for n in df.D]
df.M = [str(n).zfill(2) for n in df.M]

# concatenate date
effective_dates = df.Y + '-' + df.M + '-' + df.D

effective_dates

wave_1    2016-03-01
wave_2    2016-09-01
wave_3    2017-03-01
wave_4    2017-09-01
wave_5    2018-03-01
wave_6    2018-01-01
wave_7    2019-06-01
dtype: object

### include date columns to long table
Use dictionaries for correspondence 

In [52]:
GTO_table_long['effective'] = pd.to_datetime(GTO_table_long['wave'].map(dict(effective_dates)))

In [53]:
GTO_table_long['date issued'] = pd.to_datetime(GTO_table_long['wave'].map(dict(GTO_table.loc['date issued'])))

In [54]:
GTO_table_long['date enforcement'] = pd.to_datetime(GTO_table_long['wave'].map(dict(GTO_table.loc['date enforcement'])))

## Results
All saved to CSV, so no need to run this code again.

### Table horizontal form

In [55]:
GTO_table.head(4)

Unnamed: 0,wave_1,wave_2,wave_3,wave_4,wave_5,wave_6,wave_7
date issued,2016-01-13,2016-07-22,2017-02-21,2017-08-21,2018-02-21,2018-11-15,2019-05-14
date enforcement,2016-03-01,2016-08-28,2017-02-24,2017-08-23,2018-02-24,2018-11-17,2019-05-16
36061,3000000,3000000,3000000,3000000,3000000,300000,300000
12086,1000000,1000000,1000000,1000000,1000000,300000,300000


In [56]:
# save local
GTO_table.to_csv('.\output\GTO_table.csv')

### Table long form
I intentionally left dates columns (repetitive data), so there is no need to manipulate it to add these info on other codes.

In [57]:
GTO_table_long.head(4)

Unnamed: 0,FIPS,wave,threshold,effective,date issued,date enforcement
0,36061,wave_1,3000000.0,2016-03-01,2016-01-13,2016-03-01
1,12086,wave_1,1000000.0,2016-03-01,2016-01-13,2016-03-01
2,48029,wave_1,,2016-03-01,2016-01-13,2016-03-01
3,12011,wave_1,,2016-03-01,2016-01-13,2016-03-01


In [58]:
# Save local
GTO_table_long.to_csv('.\output\GTO_table_long.csv',index=False)

### Table waves x dates
Correspondence waves with issue, enforcement and effective (a suitable "initial date", to be used when I attribute dummies on my tables.)

In [59]:
GTO_dates = GTO_table[:2].T
GTO_dates['effective'] = pd.to_datetime(list(GTO_dates.index.map(dict(effective_dates))))

In [60]:
GTO_dates

Unnamed: 0,date issued,date enforcement,effective
wave_1,2016-01-13,2016-03-01,2016-03-01
wave_2,2016-07-22,2016-08-28,2016-09-01
wave_3,2017-02-21,2017-02-24,2017-03-01
wave_4,2017-08-21,2017-08-23,2017-09-01
wave_5,2018-02-21,2018-02-24,2018-03-01
wave_6,2018-11-15,2018-11-17,2018-01-01
wave_7,2019-05-14,2019-05-16,2019-06-01


In [61]:
# save local
GTO_dates.to_csv('.\output\GTO_dates.csv')

_________
## Test loading dfs

In [62]:
pd.read_csv('.\output\GTO_table.csv', index_col=0)

Unnamed: 0,wave_1,wave_2,wave_3,wave_4,wave_5,wave_6,wave_7
date issued,2016-01-13,2016-07-22,2017-02-21,2017-08-21,2018-02-21,2018-11-15,2019-05-14
date enforcement,2016-03-01,2016-08-28,2017-02-24,2017-08-23,2018-02-24,2018-11-17,2019-05-16
36061,3000000.0,3000000.0,3000000.0,3000000.0,3000000.0,300000,300000
12086,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,300000,300000
48029,,500000.0,500000.0,500000.0,500000.0,300000,300000
12011,,1000000.0,1000000.0,1000000.0,1000000.0,300000,300000
12099,,1000000.0,1000000.0,1000000.0,1000000.0,300000,300000
36005,,1500000.0,1500000.0,1500000.0,1500000.0,300000,300000
36047,,1500000.0,1500000.0,1500000.0,1500000.0,300000,300000
36081,,1500000.0,1500000.0,1500000.0,1500000.0,300000,300000


In [63]:
pd.read_csv('.\output\GTO_table_long.csv')

Unnamed: 0,FIPS,wave,threshold,effective,date issued,date enforcement
0,36061,wave_1,3000000,2016-03-01,2016-01-13,2016-03-01
1,12086,wave_1,1000000,2016-03-01,2016-01-13,2016-03-01
2,48029,wave_1,,2016-03-01,2016-01-13,2016-03-01
3,12011,wave_1,,2016-03-01,2016-01-13,2016-03-01
4,12099,wave_1,,2016-03-01,2016-01-13,2016-03-01
...,...,...,...,...,...,...
149,32003,wave_7,300000,2019-06-01,2019-05-14,2019-05-16
150,53033,wave_7,300000,2019-06-01,2019-05-14,2019-05-16
151,25025,wave_7,300000,2019-06-01,2019-05-14,2019-05-16
152,25017,wave_7,300000,2019-06-01,2019-05-14,2019-05-16


In [64]:
pd.read_csv('.\output\GTO_dates.csv', index_col=0)

Unnamed: 0,date issued,date enforcement,effective
wave_1,2016-01-13,2016-03-01,2016-03-01
wave_2,2016-07-22,2016-08-28,2016-09-01
wave_3,2017-02-21,2017-02-24,2017-03-01
wave_4,2017-08-21,2017-08-23,2017-09-01
wave_5,2018-02-21,2018-02-24,2018-03-01
wave_6,2018-11-15,2018-11-17,2018-01-01
wave_7,2019-05-14,2019-05-16,2019-06-01
