In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import fiona

In [14]:
# // get geography files
warnCounties = pd.read_excel('data/warnLogs_200720.xlsx',
                             sheet_name="warnCounties",
                             converters={'FIPS':str})

warnCities= pd.read_excel('data/warnLogs_200720.xlsx',
                          sheet_name="warnCities",
                          usecols="A,B,E,F")

msa = pd.read_excel('../data/geographies/cbsa.xlsx',
                    sheet_name="cbsa",
                    usecols="A,D,M",
                    converters={'FIPS_COUNTY':str,
                                'CBSA Code':str})

msa.rename(columns={'CBSA Code':'CBSA','CBSA Title':'MSA','FIPS_COUNTY':'FIPS'}, inplace=True)

zipWalk = pd.read_excel('../data/spatial/zips/Zip_to_zcta_crosswalk_2020.xlsx',
                        sheet_name='ziptozcta2019',
                        usecols='A,E',
                        converters={'ZIP_CODE':str,
                                    'ZCTA':str})

zips = gpd.read_file('./application/data/gaZips.geojson')

In [3]:
data = pd.read_excel('data/warnLogs_200720.xlsx',
                     sheet_name="warnLogs",
                     usecols="A:F,H",
                     converters={'ZIP':str})

data['Companies'] = 1

# trim all columns
data['Company name'] = data['Company name'].str.strip()
data['City'] = data['City'].str.strip()
data['ZIP'] = data['ZIP'].str.strip()
data['County'] = data['County'].str.strip()

data.rename(columns={'County':'warnCounty',
                     'City':'warnCity',
                     'Company name':'Company',
                     'Est. Impact':'Employees'}, inplace=True)

In [4]:
data = pd.merge(data, warnCounties, how='left', on='warnCounty')
data = pd.merge(data, warnCities, how='left', on='warnCity')
data = pd.merge(data, msa, how='left', on='FIPS')

In [5]:
data = pd.merge(data, zipWalk, how='left', left_on='ZIP', right_on='ZIP_CODE')

In [6]:
data = data[['Date','Company','Companies','Employees','City',
             'ZIP','County','FIPS','MSA','CBSA','ZCTA']]

In [7]:
data.sort_values('Date', inplace=True)

In [8]:
data.reset_index(drop=True, inplace=True)

In [9]:
data['Month'] = data['Date'].dt.strftime('%b')
data['Year'] = data['Date'].dt.strftime('%Y')

In [10]:
data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m-%d').dt.strftime('%m-%d-%Y')

In [11]:
counties = gpd.read_file('../data/spatial/ga-custom/ga-counties.geojson')
counties = counties[['GEOID','CDRegion']]

In [15]:
zips = zips[['ZCTA','CDRegion']]

In [17]:
zips['ZCTA'] = zips['ZCTA'].astype(str)
data['ZCTA'] = data['ZCTA'].astype(str)

In [18]:
data.shape

(1836, 13)

In [19]:
data = pd.merge(data,zips, how='left', on='ZCTA')

In [20]:
data.shape

(1836, 14)

In [21]:
data = data[['Date','Month','Year',
             'Company','City','ZIP','County','FIPS','MSA','CBSA','ZCTA','CDRegion',
             'Companies','Employees']]

In [61]:
# write files
data.to_csv('application/data/warnLogs.csv', index=False)
# zctaRoll.to_csv('application/data/zctaRoll.csv', index=False)
# cityRoll.to_csv('application/data/cityRoll.csv', index=False)

In [24]:
dataJson = data.copy()

In [25]:
zctaRoll = data.groupby(['ZCTA','Year']).agg({'Employees':sum}).reset_index()
zctaRoll = zctaRoll.pivot_table('Employees', ['ZCTA'], 'Year')
zctaRoll.reset_index( drop=False, inplace=True )

In [26]:
columns = ['ZCTA','Employees2007','Employees2008','Employees2009','Employees2010','Employees2011',
           'Employees2012','Employees2013','Employees2014','Employees2015','Employees2016','Employees2017',
           'Employees2018','Employees2019','Employees2020']
zctaRoll.columns = columns
numCols = ['Employees2007','Employees2008','Employees2009','Employees2010','Employees2011',
           'Employees2012','Employees2013','Employees2014','Employees2015','Employees2016','Employees2017',
           'Employees2018','Employees2019','Employees2020']
zctaRoll.fillna(0, inplace=True)

In [27]:
for col in numCols:
    zctaRoll[col] = zctaRoll[col].astype(int)

In [28]:
gaZips = gpd.read_file('../data/spatial/ga-custom/gaZips.geojson')

In [29]:
gaZips['ZCTA'] = gaZips['ZCTA'].astype(str)
zctaRoll['ZCTA'] = zctaRoll['ZCTA'].astype(str)

In [30]:
gaZips = pd.merge(gaZips, zctaRoll, how='left', on='ZCTA')

In [31]:
gaZips.fillna(0, inplace=True)
for col in numCols:
    gaZips[col] = gaZips[col].astype(int)

In [32]:
gaZips = gaZips[['ZCTA','CDRegion','MSA','County',
                 'Employees2007','Employees2008','Employees2009','Employees2010','Employees2011',
                 'Employees2012','Employees2013','Employees2014','Employees2015','Employees2016',
                 'Employees2017','Employees2018','Employees2019','Employees2020','geometry']]

In [59]:
gaZips.head()

Unnamed: 0,ZCTA,CDRegion,MSA,County,Employees2007,Employees2008,Employees2009,Employees2010,Employees2011,Employees2012,Employees2013,Employees2014,Employees2015,Employees2016,Employees2017,Employees2018,Employees2019,Employees2020,geometry
0,37350,Northwest,"Chattanooga, TN-GA",Walker County,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"POLYGON ((-85.34438 34.98373, -85.35288 34.983..."
1,37362,Northwest,"Dalton, GA",Murray County,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"POLYGON ((-84.73143 34.98809, -84.74509 34.988..."
2,37363,Northeast,"Chattanooga, TN-GA",Catoosa County,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"POLYGON ((-85.06484 34.98677, -85.08190 34.986..."
3,31820,West,"Columbus, GA-AL",Muscogee County,0,0,112,0,0,0,27,0,46,0,0,75,0,0,"MULTIPOLYGON (((-84.90265 32.58319, -84.90278 ..."
4,31821,West,,Stewart County,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"POLYGON ((-85.05875 32.13602, -85.06114 32.134..."


In [58]:
gaZips.to_file('application/data/gaZips.geojson', driver='GeoJSON')

In [68]:
dataJson['date'] = dataJson.Month+' '+dataJson.Year

In [71]:
dataJson = dataJson[['Date','Month','Year','date',
                    'Company','City','ZIP','County','FIPS','MSA','CBSA','ZCTA','CDRegion',
                    'Companies','Employees']]

In [72]:
dataJson.head()

Unnamed: 0,Date,Month,Year,date,Company,City,ZIP,County,FIPS,MSA,CBSA,ZCTA,CDRegion,Companies,Employees
0,01-04-2007,Jan,2007,Jan 2007,"Polymer Group, Inc",Gainesville,30504,Hall County,13139,"Gainesville, GA",23580.0,30504,Northeast,1,58
1,01-08-2007,Jan,2007,Jan 2007,Enterprise Fund Distributors,Atlanta,30326,Fulton County,13121,"Atlanta-Sandy Springs-Alpharetta, GA",12060.0,30326,Metro West,1,89
2,01-08-2007,Jan,2007,Jan 2007,Marriott Atlanta Global Reservations Sales & C...,Sandy Springs,30328,Fulton County,13121,"Atlanta-Sandy Springs-Alpharetta, GA",12060.0,30328,Metro North,1,153
3,01-08-2007,Jan,2007,Jan 2007,"Glen Raven Custom Fabrics,llc",Elberton,30635,Elbert County,13105,,,30635,Northeast,1,54
4,01-08-2007,Jan,2007,Jan 2007,Schwan's Bakery Inc.,Suwanee,30024,Gwinnett County,13135,"Atlanta-Sandy Springs-Alpharetta, GA",12060.0,30024,Metro North,1,200


In [73]:
dataJson.to_json('application/data/warnLogs.json', orient='records')

In [49]:
warnCities = pd.read_csv('data/warnCitiesFinal.csv')
warnCities = warnCities[['City','CDRegion','MSA','County','Latitude','Longitude']]

In [50]:
warnCities.head()

Unnamed: 0,City,CDRegion,MSA,County,Latitude,Longitude
0,Acworth,Metro West,"Atlanta-Sandy Springs-Alpharetta, GA",Cobb County,34.065933,-84.67688
1,Adairsville,Northwest,"Atlanta-Sandy Springs-Alpharetta, GA",Bartow County,34.368702,-84.934109
2,Adel,South,0,Cook County,31.137136,-83.423494
3,Ailey,Central,"Vidalia, GA",Montgomery County,32.187404,-82.565689
4,Albany,South,"Albany, GA",Dougherty County,31.578206,-84.155681


In [51]:
cityRoll = data.groupby(['City','Year']).agg({'Companies':sum}).reset_index()
cityRoll = cityRoll.pivot_table('Companies', ['City'], 'Year')
cityRoll.reset_index( drop=False, inplace=True )

columns = ['City','Companies2007','Companies2008','Companies2009','Companies2010','Companies2011',
           'Companies2012','Companies2013','Companies2014','Companies2015','Companies2016','Companies2017',
           'Companies2018','Companies2019','Companies2020']
cityRoll.columns = columns
numCols = ['Companies2007','Companies2008','Companies2009','Companies2010','Companies2011',
           'Companies2012','Companies2013','Companies2014','Companies2015','Companies2016','Companies2017',
           'Companies2018','Companies2019','Companies2020']
cityRoll.fillna(0, inplace=True)

for col in numCols:
    cityRoll[col] = cityRoll[col].astype(int)

In [52]:
warnCities = pd.merge(warnCities, cityRoll, how='left', on='City')

In [54]:
warnCities.fillna(0, inplace=True)
for col in numCols:
    warnCities[col] = warnCities[col].astype(int)

In [55]:
warnCities = warnCities[['City','CDRegion','MSA','County',
                         'Companies2007','Companies2008','Companies2009','Companies2010','Companies2011',
                         'Companies2012','Companies2013','Companies2014','Companies2015','Companies2016',
                         'Companies2017','Companies2018','Companies2019','Companies2020',
                         'Latitude','Longitude']]

In [66]:
warnCities.to_csv('data/warnCitiesFinal.csv', index=False)

In [57]:
warnCityTest = warnCities.loc[warnCities['CDRegion'] == 'Metro South']