In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)

import numpy as np

import geopandas as gpd
import fiona

import datetime

In [2]:
sheets_dict = pd.read_excel('ui_claims_county_industry.xlsx', sheet_name=None,skiprows=2,skipfooter=3)

df = pd.DataFrame()
for name, sheet in sheets_dict.items():
    sheet['County'] = name + 'County'
    sheet = sheet.rename(columns=lambda x: x.split('\n')[-1])
    df = df.append(sheet)

df.reset_index(inplace=True, drop=True)

In [3]:
del df['Unnamed: 2']
del df['Unnamed: 4']

In [4]:
df = df.melt(id_vars=["County", "NAICS", "INDUSTRY NAME"], 
             var_name="Date", 
             value_name="Claims")

df['Claims'] = df['Claims'].replace(',', '', regex=True)

df['Claims'] = df['Claims'].astype(int)

In [5]:
warns = pd.read_excel('warnLogs_200720.xlsx')
warns['County'] = warns['County'] + ' County'
warns['County'] = warns['County'].str.strip()

In [6]:
codes = pd.read_excel('../../data/geographies/regional-commissions.xlsx', usecols='A,B,C')
codes['County'] = codes['County'] + ' County'
codes['County'] = codes['County'].astype(str)

msa = pd.read_excel('../../data/geographies/cbsa.xlsx', sheet_name='cbsa', usecols='A,D,H,I')
msa = msa.loc[msa['State'] == 'Georgia']

In [7]:
df = pd.merge(df,codes,how='left',on='County')


In [8]:
df = pd.merge(df,msa,how='left',on='County')
warns = pd.merge(warns,codes,how='left',on='County')
warns = pd.merge(warns,msa,how='left',on='County')

In [9]:
today = np.datetime64('today')
lastYear = today - 365

warnsCurr = warns[(warns['Date'] > '2020-03-01') & (warns['Date'] <= today)]
warnsCurr['Period'] = 'CurrentWarns'
warnsLast = warns[(warns['Date'] > '2019-03-01') & (warns['Date'] <= lastYear)]
warnsLast['Period'] = 'Last Period'
warnPeriods = warnsCurr.append([warnsLast])

warnPeriods = warnPeriods[['ID','Company name','Est. Impact','Date','Period']]

warns = pd.merge(warns,warnPeriods,how='left',on=['ID','Company name','Est. Impact','Date'])

warns['Period'].fillna('NA',inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [10]:
warns['State'] = 'Georgia'

In [11]:
countyWarns = warns.groupby(['County','Period']).size().reset_index()
countyWarns = countyWarns.pivot_table(0, ['County'], 'Period').reset_index(drop=False)
countyWarns.rename(columns={'County':'Area'}, inplace=True)
msaWarns = warns.groupby(['CBSA Title','Period']).size().reset_index()
msaWarns = msaWarns.pivot_table(0, ['CBSA Title'], 'Period').reset_index(drop=False)
msaWarns.rename(columns={'CBSA Title':'Area'}, inplace=True)
rcWarns = warns.groupby(['Regional Commission','Period']).size().reset_index()
rcWarns = rcWarns.pivot_table(0, ['Regional Commission'], 'Period').reset_index(drop=False)
rcWarns.rename(columns={'Regional Commission':'Area'}, inplace=True)
stateWarns = warns.groupby(['State','Period']).size().reset_index()
stateWarns = stateWarns.pivot_table(0, ['State'], 'Period').reset_index(drop=False)
stateWarns.rename(columns={'State':'Area'}, inplace=True)
del warns['State']

areaWarns = stateWarns.append([rcWarns,msaWarns,countyWarns])
areaWarns.reset_index(drop=True, inplace=True)

areaWarns.fillna(0, inplace=True)

In [12]:
areaWarns = areaWarns[['Area','CurrentWarns']]
areaWarns.fillna(0, inplace=True)

In [13]:
labor = pd.read_excel('county_laborForce.xlsx')
labor.dropna(subset=['title'], inplace=True)
labor = labor.loc[labor['Month'] == 3.0]
labor['Date'] = pd.to_datetime(labor.Month.astype(int).astype(str) + '-7' + '-2020', format = '%m-%d-%Y')
labor['Date'] = labor['Date'].dt.strftime('%m/%d/%Y')
labor['title'] = labor['title'].replace(', GA', '', regex=True)

In [14]:
marchLabor = labor[['title','Date','emp','unemp','laborforce']]

In [15]:
marchLabor.rename(columns={'title'      : 'County',
                           'emp'        : 'MarchEmployed',
                           'unemp'      : 'MarchUnemployed',
                           'laborforce' : 'MarchLaborForce'}, inplace=True)
marchLabor.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [16]:
# create separate frame of total claims by county, not industry
countyClaims = df.groupby(['area_code','County','Date']).agg({'Claims':'sum'}).reset_index()

In [17]:
countyClaims.drop_duplicates(inplace=True)

In [18]:
countyClaims = pd.merge(countyClaims,marchLabor,how='left',on=['County','Date'])

countyClaims.MarchEmployed.fillna(method='ffill', inplace=True)
countyClaims.MarchUnemployed.fillna(method='ffill', inplace=True)
countyClaims.MarchLaborForce.fillna(method='ffill', inplace=True)

countyClaims['CumClaims'] = countyClaims.groupby(['area_code','County'])['Claims'].apply(lambda x: x.cumsum())

countyClaims['Employed'] = countyClaims.MarchEmployed - countyClaims.CumClaims
countyClaims['Unemployed'] = countyClaims.MarchUnemployed + countyClaims.CumClaims
countyClaims['PctChngEmployed'] = ( ( countyClaims.Employed - countyClaims.MarchEmployed ) / countyClaims.MarchEmployed ) * 100
countyClaims.PctChngEmployed = countyClaims.PctChngEmployed.round(1)

In [19]:
# do this for the whole state
marchLaborState = marchLabor.groupby('Date') \
                            .agg({'MarchEmployed':'sum','MarchUnemployed' : 'sum','MarchLaborForce' : 'sum'}) \
                            .reset_index()

stateClaims = countyClaims[['County','Date','Claims']]

stateClaims = stateClaims.groupby('Date').agg({'Claims':'sum'}).reset_index()

stateClaims = pd.merge(stateClaims, marchLaborState, how='left', on='Date')

stateClaims.MarchEmployed.fillna(method='ffill', inplace=True)
stateClaims.MarchUnemployed.fillna(method='ffill', inplace=True)
stateClaims.MarchLaborForce.fillna(method='ffill', inplace=True)

In [20]:
stateClaims['State'] = 'Georgia'

In [21]:
stateClaims['CumClaims'] = stateClaims.groupby(['State'])['Claims'].apply(lambda x: x.cumsum())

In [22]:
stateClaims['Employed'] = stateClaims.MarchEmployed - stateClaims.CumClaims
stateClaims['Unemployed'] = stateClaims.MarchUnemployed + stateClaims.CumClaims
stateClaims['PctChngEmployed'] = ( ( stateClaims.Employed - stateClaims.MarchEmployed ) / stateClaims.MarchEmployed ) * 100
stateClaims.PctChngEmployed = stateClaims.PctChngEmployed.round(1)

In [23]:
stateClaims = stateClaims[['State','Date','Claims','MarchEmployed','MarchUnemployed','MarchLaborForce',
                           'CumClaims','Employed','Unemployed','PctChngEmployed']]

In [24]:
# now do the same with msa claims

In [25]:
marchLaborMsa = pd.merge(marchLabor, msa, how='left', on='County')

In [26]:
marchLaborMsa.dropna(subset=['CBSA Code'], inplace=True)

In [27]:
marchLaborMsa = marchLaborMsa.groupby(['CBSA Title','Date']) \
                             .agg({'MarchEmployed':'sum','MarchUnemployed' : 'sum','MarchLaborForce' : 'sum'}) \
                             .reset_index()

In [28]:
msaClaims = countyClaims[['County','Date','Claims']]
msaClaims = pd.merge(msaClaims, msa, how='left', on='County')

msaClaims.dropna(subset=['CBSA Code'], inplace=True)

msaClaims = msaClaims.groupby(['CBSA Title','Date']) \
                             .agg({'Claims':'sum'}) \
                             .reset_index()

msaClaims = pd.merge(msaClaims,marchLaborMsa,how='left',on=['CBSA Title','Date'])


msaClaims.MarchEmployed.fillna(method='ffill', inplace=True)
msaClaims.MarchUnemployed.fillna(method='ffill', inplace=True)
msaClaims.MarchLaborForce.fillna(method='ffill', inplace=True)

msaClaims['CumClaims'] = msaClaims.groupby(['CBSA Title'])['Claims'].apply(lambda x: x.cumsum())

msaClaims['Employed'] = msaClaims.MarchEmployed - msaClaims.CumClaims
msaClaims['Unemployed'] = msaClaims.MarchUnemployed + msaClaims.CumClaims
msaClaims['PctChngEmployed'] = ( ( msaClaims.Employed - msaClaims.MarchEmployed ) / msaClaims.MarchEmployed ) * 100
msaClaims.PctChngEmployed = msaClaims.PctChngEmployed.round(1)

In [29]:
# now repeat with regional commissions

In [30]:
marchLaborRc = pd.merge(marchLabor, codes, how='left', on='County')

In [31]:
marchLaborRc = marchLaborRc.groupby(['Regional Commission','Date']) \
                             .agg({'MarchEmployed':'sum','MarchUnemployed' : 'sum','MarchLaborForce' : 'sum'}) \
                             .reset_index()

In [32]:
rcClaims = countyClaims[['County','Date','Claims']]
rcClaims = pd.merge(rcClaims, codes, how='left', on='County')

rcClaims = rcClaims.groupby(['Regional Commission','Date']) \
                             .agg({'Claims':'sum'}) \
                             .reset_index()

rcClaims = pd.merge(rcClaims,marchLaborRc,how='left',on=['Regional Commission','Date'])


rcClaims.MarchEmployed.fillna(method='ffill', inplace=True)
rcClaims.MarchUnemployed.fillna(method='ffill', inplace=True)
rcClaims.MarchLaborForce.fillna(method='ffill', inplace=True)

rcClaims['CumClaims'] = rcClaims.groupby(['Regional Commission'])['Claims'].apply(lambda x: x.cumsum())

rcClaims['Employed'] = rcClaims.MarchEmployed - rcClaims.CumClaims
rcClaims['Unemployed'] = rcClaims.MarchUnemployed + rcClaims.CumClaims
rcClaims['PctChngEmployed'] = ( ( rcClaims.Employed - rcClaims.MarchEmployed ) / rcClaims.MarchEmployed ) * 100
rcClaims.PctChngEmployed = rcClaims.PctChngEmployed.round(1)

In [33]:
del countyClaims['area_code']

msaClaims.rename(columns={'CBSA Title' : 'Area'}, inplace=True)
rcClaims.rename(columns={'Regional Commission' : 'Area'}, inplace=True)
countyClaims.rename(columns={'County' : 'Area'}, inplace=True)
stateClaims.rename(columns={'State' : 'Area'}, inplace=True)

In [34]:
# stack on top of each other and write out as a single data frame for line and bar charts

In [35]:
data = stateClaims.append([countyClaims,rcClaims,msaClaims])

In [36]:
data.reset_index(drop=True, inplace=True)

In [37]:
data.head()

Unnamed: 0,Area,Date,Claims,MarchEmployed,MarchUnemployed,MarchLaborForce,CumClaims,Employed,Unemployed,PctChngEmployed
0,Georgia,03/07/2020,4401,4936482.0,223653.0,5160135.0,4401,4932081.0,228054.0,-0.1
1,Georgia,03/14/2020,5248,4936482.0,223653.0,5160135.0,9649,4926833.0,233302.0,-0.2
2,Georgia,03/21/2020,11707,4936482.0,223653.0,5160135.0,21356,4915126.0,245009.0,-0.4
3,Georgia,03/28/2020,130544,4936482.0,223653.0,5160135.0,151900,4784582.0,375553.0,-3.1
4,Georgia,04/04/2020,377105,4936482.0,223653.0,5160135.0,529005,4407477.0,752658.0,-10.7


In [38]:
countyClaims=countyClaims.groupby('Area').apply(lambda x: x.reset_index(drop=True)).drop('Area',axis=1).reset_index()
dateMax = countyClaims.level_1.max()
countyClaims = countyClaims.loc[countyClaims['level_1'] == dateMax]
countyClaims.reset_index(drop=True, inplace=True)
countyShape = gpd.read_file("spatial/ga-counties.geojson")
countyClaims = pd.merge(countyClaims, codes, how='left', left_on='Area', right_on='County')
countyClaims.area_code = countyClaims.area_code.astype(str)

In [39]:
countyClaims = pd.merge(countyClaims,areaWarns,how='left',on='Area')

In [40]:
countyShape = pd.merge(countyShape, countyClaims, how='left', left_on='GEOID', right_on='area_code')
countyShapes = countyShape[['area_code','County','CumClaims','MarchEmployed','MarchUnemployed',
                            'MarchLaborForce','Employed','Unemployed','PctChngEmployed','CurrentWarns','geometry']]

In [41]:
countyShapes = gpd.GeoDataFrame(countyShapes, geometry='geometry')
countyPoints = countyShapes.copy()
countyPoints['geometry'] = countyPoints['geometry'].centroid

In [42]:
# now reshape the industry claims data to rollup by county and industry

df['State'] = 'Georgia'

df.rename(columns={'INDUSTRY NAME':'Industry'}, inplace=True)

industryCountyClaims = df.groupby(['County','Industry']) \
                             .agg({'Claims':'sum'}) \
                             .reset_index() \
                             .rename(columns={'County':'Area'})

industryCountyClaims.sort_values(['Area','Claims'], ascending=[True,False], inplace=True)

industryRcClaims = df.groupby(['Regional Commission','Industry']) \
                             .agg({'Claims':'sum'}) \
                             .reset_index() \
                             .rename(columns={'Regional Commission':'Area'})

industryRcClaims.sort_values(['Area','Claims'], ascending=[True,False], inplace=True)

industryMsaClaims = df.groupby(['CBSA Title','Industry']) \
                             .agg({'Claims':'sum'}) \
                             .reset_index() \
                             .rename(columns={'CBSA Title':'Area'})

industryMsaClaims.sort_values(['Area','Claims'], ascending=[True,False], inplace=True)


industryStateClaims = df.groupby(['State','Industry']) \
                             .agg({'Claims':'sum'}) \
                             .reset_index() \
                             .rename(columns={'State':'Area'})

industryStateClaims.sort_values(['Area','Claims'], ascending=[True,False], inplace=True)

del df['State']

In [43]:
# append these together and reindex them
industryClaims = industryStateClaims.append([industryCountyClaims,industryRcClaims,industryMsaClaims])

industryClaims.reset_index(drop=True, inplace=True)

industryClaims['Ind'] = np.where(industryClaims.Industry.str.contains("Accommodation"), "Food Services",
                        np.where(industryClaims.Industry.str.contains("Waste"), "Support, Waste Management",
                        np.where(industryClaims.Industry.str.contains("Agriculture"), "Agriculture",
                        np.where(industryClaims.Industry.str.contains("Entertainment"), "Entertainment",
                        np.where(industryClaims.Industry.str.contains("Construction"), "Construction",
                        np.where(industryClaims.Industry.str.contains("Educational"), "Education",
                        np.where(industryClaims.Industry.str.contains("Finance"), "Finance",
                        np.where(industryClaims.Industry.str.contains("Health"), "Health Care",
                        np.where(industryClaims.Industry.str.contains("Information"), "Information",
                        np.where(industryClaims.Industry.str.contains("Management"), "Enterprise Management",
                        np.where(industryClaims.Industry.str.contains("Manufacturing"), "Manufacturing",
                        np.where(industryClaims.Industry.str.contains("Mining"), "Oil & Gas",
                        np.where(industryClaims.Industry.str.contains("Other"), "Other",
                        np.where(industryClaims.Industry.str.contains("Technical"), "Technical Services",
                        np.where(industryClaims.Industry.str.contains("Public"), "Public Admin",
                        np.where(industryClaims.Industry.str.contains("Real Estate"), "Real Estate",
                        np.where(industryClaims.Industry.str.contains("Retail Trade"), "Retail",
                        np.where(industryClaims.Industry.str.contains("Transportation"), "Transportation",
                        np.where(industryClaims.Industry.str.contains("Unclassified"), "Unclassified",
                        np.where(industryClaims.Industry.str.contains("Utilities"), "Utilities",
                        np.where(industryClaims.Industry.str.contains("Wholesale"), "Wholesale Trade","ts")))))))))))))))))))))

In [44]:
industryClaims = industryClaims[['Area','Industry','Ind','Claims']]

In [45]:
# now get the KPIs you need in a json file
dataK = data.copy()
dataK=dataK.groupby('Area').apply(lambda x: x.reset_index(drop=True)).drop('Area',axis=1).reset_index()
dateMax = dataK.level_1.max()
dataK = dataK.loc[dataK['level_1'] == dateMax]

In [46]:
dataK.sort_values('Claims', ascending=False, inplace=True)

In [47]:
dataK['LaborForce'] = dataK.Employed + dataK.Unemployed
dataK['Rate'] = ( dataK.Unemployed / dataK.LaborForce ) * 100
dataK.Rate = dataK.Rate.round(1)

In [48]:
dataK = pd.merge(dataK,areaWarns,how='left',on='Area')

In [49]:
dataK = dataK[['Area','MarchEmployed','CumClaims','Employed','Unemployed','PctChngEmployed','LaborForce','Rate','CurrentWarns']]

In [50]:
dataK.set_index('Area', inplace=True)

In [51]:
dataK['CurrentWarns'].fillna(0, inplace=True)
countyShapes['CurrentWarns'].fillna(0, inplace=True)
countyPoints['CurrentWarns'].fillna(0, inplace=True)

In [52]:
dataK.MarchEmployed = dataK.MarchEmployed.astype(int)
dataK.Employed = dataK.Employed.astype(int)
dataK.Unemployed = dataK.Unemployed.astype(int)
dataK.LaborForce = dataK.LaborForce.astype(int)
dataK.CurrentWarns = dataK.CurrentWarns.astype(int)

In [53]:
def human_format(num):
    num = float('{:.3g}'.format(num))
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])

dataK['MarchEmployed'] = dataK['MarchEmployed'].apply(lambda x: human_format(x))
dataK['CumClaims'] = dataK['CumClaims'].apply(lambda x: human_format(x))
dataK['CurrentWarns'] = dataK['CurrentWarns'].apply(lambda x: human_format(x))
dataK['Employed'] = dataK['Employed'].apply(lambda x: human_format(x))
dataK['Unemployed'] = dataK['Unemployed'].apply(lambda x: human_format(x))
dataK['LaborForce'] = dataK['LaborForce'].apply(lambda x: human_format(x))

In [54]:
# convert kpi to strings with correct format for display purposes
dataK['PctChngEmployed'] = dataK['PctChngEmployed'].map('{:,.1f}'.format) + '%'
dataK['Rate'] = dataK['Rate'].map('{:,.1f}'.format) + '%'

In [55]:
# now process the data file from which d3 will generate charts to only include necessary columns, etc.
data["date"] = pd.to_datetime(data["Date"]).dt.strftime('%b %d')
data = data[['Area','Date','date','Claims']]

In [56]:
industryClaims['Ind'] = np.where(industryClaims.Industry.str.contains("Accommodation"), "Food Services",
                        np.where(industryClaims.Industry.str.contains("Waste"), "Waste Management",
                        np.where(industryClaims.Industry.str.contains("Agriculture"), "Agriculture",
                        np.where(industryClaims.Industry.str.contains("Entertainment"), "Entertainment",
                        np.where(industryClaims.Industry.str.contains("Construction"), "Construction",
                        np.where(industryClaims.Industry.str.contains("Educational"), "Education",
                        np.where(industryClaims.Industry.str.contains("Finance"), "Finance",
                        np.where(industryClaims.Industry.str.contains("Health"), "Health Care",
                        np.where(industryClaims.Industry.str.contains("Information"), "Information",
                        np.where(industryClaims.Industry.str.contains("Management"), "Enterprise Management",
                        np.where(industryClaims.Industry.str.contains("Manufacturing"), "Manufacturing",
                        np.where(industryClaims.Industry.str.contains("Mining"), "Oil & Gas",
                        np.where(industryClaims.Industry.str.contains("Other"), "Other",
                        np.where(industryClaims.Industry.str.contains("Technical"), "Technical Services",
                        np.where(industryClaims.Industry.str.contains("Public"), "Public Admin",
                        np.where(industryClaims.Industry.str.contains("Real Estate"), "Real Estate",
                        np.where(industryClaims.Industry.str.contains("Retail Trade"), "Retail",
                        np.where(industryClaims.Industry.str.contains("Transportation"), "Transportation",
                        np.where(industryClaims.Industry.str.contains("Unclassified"), "Unclassified",
                        np.where(industryClaims.Industry.str.contains("Utilities"), "Utilities",
                        np.where(industryClaims.Industry.str.contains("Wholesale"), "Wholesale Trade","ts")))))))))))))))))))))

In [57]:
appWarns = warns[warns['Date'] >= '2020-01-01']

In [58]:
appWarns = appWarns[['Date','Company name','City','ZIP','County','Est. Impact','Regional Commission','CBSA Title']]
appWarns.rename(columns={'Company name' : 'Company',
                         'Regional Commissions':'RC',
                         'CBSA Title':'MSA',
                         'Est. Impact':'Employees'}, inplace=True)

In [61]:
appWarns["Date"] = pd.to_datetime(appWarns["Date"]).dt.strftime('%b %d')

In [64]:
# write files
# this is the county polygon file for fill layers in mapbox
countyShapes.to_file("../application/app-data/mapbox/countyShapes.geojson", driver='GeoJSON')
# this is the county point file for circle layers in mapbox
countyPoints.to_file("../application/app-data/mapbox/countyPoints.geojson", driver='GeoJSON')
# this is for the github file to draw the d3 charts
data.to_csv('../application/app-data/uiClaims.csv', index=False)
industryClaims.to_csv('../application/app-data/industryClaims.csv', index=False)
# this is for the warn logs by city for a point layer within the application
appWarns.to_csv('../application/app-data/warns.csv', index=False)
warns.to_excel('../application/app-data/warnsClean.xlsx', index=False)
# this is for the json object to use as dynamically generated KPIs
dataK.to_json(orient='index')

'{"Georgia":{"MarchEmployed":"4.94M","CumClaims":"1.79M","Employed":"3.14M","Unemployed":"2.02M","PctChngEmployed":"-36.3%","LaborForce":"5.16M","Rate":"39.1%","CurrentWarns":"320"},"Atlanta-Sandy Springs-Alpharetta, GA":{"MarchEmployed":"3M","CumClaims":"1.14M","Employed":"1.86M","Unemployed":"1.27M","PctChngEmployed":"-38.0%","LaborForce":"3.13M","Rate":"40.5%","CurrentWarns":"206"},"ARC":{"MarchEmployed":"2.36M","CumClaims":"922K","Employed":"1.44M","Unemployed":"1.03M","PctChngEmployed":"-39.0%","LaborForce":"2.46M","Rate":"41.6%","CurrentWarns":"181"},"Fulton County":{"MarchEmployed":"539K","CumClaims":"240K","Employed":"298K","Unemployed":"266K","PctChngEmployed":"-44.6%","LaborForce":"564K","Rate":"47.1%","CurrentWarns":"77"},"Gwinnett County":{"MarchEmployed":"475K","CumClaims":"176K","Employed":"299K","Unemployed":"196K","PctChngEmployed":"-37.1%","LaborForce":"494K","Rate":"39.5%","CurrentWarns":"21"},"Northwest":{"MarchEmployed":"406K","CumClaims":"153K","Employed":"253K","U