In [69]:
import pandas as pd
pd.set_option('display.max_columns', None)

import numpy as np

import geopandas as gpd
import fiona

import matplotlib.pyplot as plt
import seaborn as sns

import json

In [70]:
counties = pd.read_excel('covid-recovery-data.xlsx', sheet_name="county-population")
jobs = pd.read_excel('covid-recovery-data.xlsx', sheet_name="county-jobs-summary", usecols="B,C,D,F,G,I")
ccvi = pd.read_excel('covid-recovery-data.xlsx', sheet_name="ccvi-county", usecols="D:K")
countyShape = gpd.read_file("spatial/ga-counties.geojson")
countyClaims = pd.read_excel('covid-recovery-data.xlsx', sheet_name="unemployment-claims-monthly")
hospitals = gpd.read_file("spatial/hospitals.geojson")

In [71]:
msa = pd.read_excel('../../data/geographies/cbsa.xlsx', sheet_name='cbsa',usecols='M,D')
rc = pd.read_excel('../../data/geographies/regional-commissions.xlsx', sheet_name='county-rc',usecols='A,C')
msa.rename(columns={'area_code':'FIPS'}, inplace=True)
rc.rename(columns={'area_code':'FIPS'}, inplace=True)
msa['FIPS'] = msa['FIPS'].astype(str)
rc['FIPS'] = rc['FIPS'].astype(str)

In [72]:
hospitals = gpd.read_file("spatial/hospitals.geojson")

In [73]:
hospitals.sort_values('BEDS', inplace=True)

In [74]:
hospitals.BEDS = np.where(hospitals.BEDS < 0, 82,hospitals.BEDS)

In [75]:
counties = pd.merge(counties,jobs,how='left',on='FIPS')

In [76]:
counties = pd.merge(counties,ccvi,how='left',on='FIPS')

In [77]:
counties['FIPS'] = counties['FIPS'].astype(str)

In [78]:
counties = pd.merge(counties,countyShape, how='left', left_on='FIPS', right_on='GEOID')

In [79]:
del counties['GEOID']

In [80]:
# groupby county fips to get number of hospitals and number of beds per county
countyHospitals = hospitals.groupby('COUNTYFIPS') \
                           .agg({'NAME':'count', 'BEDS': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'NAME':'Hospitals',
                                            'BEDS':'Beds',
                                            'COUNTYFIPS':'FIPS'})

In [81]:
counties = pd.merge(counties,countyHospitals, how='left', on='FIPS')

In [82]:
counties['Hospitals'].fillna(0, inplace=True)
counties['Beds'].fillna(0, inplace=True)

In [83]:
counties['Beds_per_1000'] = ( counties['Beds'] / counties['Total_Population'] ) * 1000
counties['Beds_per_1000_Elderly'] = ( counties['Beds'] / counties['Population_Over_65'] ) * 1000

In [84]:
# get clean covid data
covid = pd.read_csv('../application/app-data/covid-county-clean.csv')
covid = covid[['date','area_name','area_code','cases','deaths']]

covid.date = pd.to_datetime(covid.date)

covid['area_code'] = covid['area_code'].astype(int)
covid['area_code'] = covid['area_code'].astype(str)

# get new data from nyt
covidCountyNew = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')
covidCountyNew = covidCountyNew.loc[covidCountyNew['state'] == 'Georgia']

covidCountyNew = covidCountyNew[['date','county','fips','cases','deaths']]

# get state level data
covidStateNew = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv')
covidStateNew = covidStateNew.loc[covidStateNew['state'] == 'Georgia']

covidCountyNew.rename(columns={'county' : 'area_name',
                               'fips'   : 'area_code'}, inplace=True)

covidStateNew.rename(columns={'state' : 'area_name',
                              'fips'  : 'area_code'}, inplace=True)

covidNew = covidStateNew.append(covidCountyNew)

# the covid data is reported as a cumulative sum
# we want the daily counts as well for different charting options
covidNew.date = pd.to_datetime(covidNew.date)
covidNew.sort_values(['area_name', 'date'], ascending=[True, True], inplace=True)



covidNew.dropna(subset=['area_code'], inplace=True)

covidNew['area_code'] = covidNew['area_code'].astype(int)
covidNew['area_code'] = covidNew['area_code'].astype(str)

covidNew.reset_index(inplace=True, drop=True)

covidNew.sort_values(['area_name', 'date'], ascending=[True, True], inplace=True)

# filter only dates AFTER 4/30
covidNew = covidNew.loc[covidNew['date'] >= '2020-04-30']
# append to cleaned sheet
covid = covid.append(covidNew)
covid.sort_values(['area_code','date'], inplace=True)
covid.reset_index(drop=True,inplace=True)

In [85]:
covid['daily_cases'] = np.where(covid['area_code'] == covid['area_code'].shift(), covid.cases - covid.cases.shift(), np.nan)

covid['daily_cases'].fillna(covid['cases'], inplace=True)

covid['daily_deaths'] = np.where(covid['area_code'] == covid['area_code'].shift(), covid.deaths - covid.deaths.shift(), np.nan)
covid['daily_deaths'].fillna(covid['deaths'], inplace=True)

In [86]:
covid.sort_values(['area_name', 'date'],inplace=True)

In [87]:
# we dont' ned the state here as we'r just trying to mak a fram
# at the county level
covid = covid.loc[covid['area_name'] != 'Georgia']
covid.rename(columns={'area_code':'FIPS'}, inplace=True)

In [88]:
covid['FIPS'] = covid['FIPS'].astype(str)

In [89]:
countyList = counties[['FIPS','County','Total_Population']]

In [90]:
covid = pd.merge(covid,countyList,how='left',on='FIPS')
covidMsa = pd.merge(covid,msa,how='left',on='FIPS')
covidRc = pd.merge(covid,rc,how='left',on='FIPS')
covidGa = covid.copy()
covidGa['State'] = 'Georgia'

In [91]:
covidTotals = covid.groupby('FIPS') \
                   .agg({'daily_cases':'sum', 'daily_deaths': 'sum'}) \
                   .reset_index() \
                   .rename(columns={'daily_cases':'Cases',
                                    'daily_deaths':'Deaths'})

# add total new cases last two weeks
cutOff = covid['date'].max() - pd.Timedelta(days=13)
cutOffOne = covid['date'].max() - pd.Timedelta(days=6)
covidLastTwo = covid.loc[covid['date'] >= cutOff]

In [92]:


covidTwo = covidLastTwo.groupby('FIPS') \
                                .agg({'daily_cases':'sum','daily_deaths':'sum'}) \
                                .reset_index() \
                                .rename(columns={'daily_cases':'Recent_Cases',
                                            'daily_deaths':'Recent_Deaths'})

# add daily growth rate over last 14 days
covidLastTwo['Pct_Chng_Case'] = covidLastTwo.groupby('FIPS')['cases'].pct_change()
covidLastTwo.Pct_Chng_Case = covidLastTwo.Pct_Chng_Case * 100
covidLastTwo['Pct_Chng_Case'].fillna(0, inplace=True)

covidTwoAvgChng = covidLastTwo.groupby('FIPS') \
                                .agg({'Pct_Chng_Case':'mean'}) \
                                .reset_index() \
                                .rename(columns={'Pct_Chng_Case':'avg_daily_change'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


In [93]:
# new cases over the last 7 days
covidThisWk = covid.loc[covid['date'] >= cutOffOne]
covidThisWk = covidThisWk[['date','FIPS','daily_cases','daily_deaths']]
covidThisWk.rename(columns={'daily_cases' : 'daily_cases_this_wk',
                            'daily_deaths': 'daily_deaths_this_wk'}, inplace=True)
# new cases over the previous 7-day period
covidPrevWk = covid[(covid['date'] >= cutOff) & (covid['date'] < cutOffOne)]
covidPrevWk = covidPrevWk[['date','FIPS','daily_cases','daily_deaths']]
covidPrevWk.rename(columns={'daily_cases' : 'daily_cases_prev_wk',
                            'daily_deaths': 'daily_deaths_prev_wk'}, inplace=True)
# total change current week to previous week
covidThisWk = covidThisWk.groupby('FIPS') \
                           .agg({'daily_cases_this_wk':'sum', 'daily_deaths_this_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_this_wk' : 'cases_this_wk',
                                            'daily_deaths_this_wk': 'deaths_this_wk'})

covidPrevWk = covidPrevWk.groupby('FIPS') \
                           .agg({'daily_cases_prev_wk':'sum', 'daily_deaths_prev_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_prev_wk' : 'cases_prev_wk',
                                            'daily_deaths_prev_wk': 'deaths_prev_wk'})

covidWeek = pd.merge(covidThisWk,covidPrevWk,how='left',on='FIPS')

covidWeek['cases_diff'] = covidWeek['cases_this_wk'] - covidWeek['cases_prev_wk']
covidWeek['deaths_diff'] = covidWeek['deaths_this_wk'] - covidWeek['deaths_prev_wk']

In [94]:
# merge all of these covid totaled files at the county level
# covidTwoAvgChng,covidWeek,covidTwo,covidTotals
covid = pd.merge(covidTotals,covidTwo,how='left',on='FIPS')
covid = pd.merge(covid,covidWeek,how='left',on='FIPS')
covid = pd.merge(covid,covidTwoAvgChng,how='left',on='FIPS')

In [95]:
# repeat this process but at the MSA level
covidMsa.dropna(inplace=True)
covidMsa.reset_index(drop=True,inplace=True)

In [96]:
covidMsa = covidMsa.groupby(['CBSA Title','date']) \
                       .agg({'daily_cases':'sum', 'daily_deaths': 'sum'}) \
                       .reset_index() \
                       .rename(columns={'daily_cases':'Cases',
                                        'daily_deaths':'Deaths',
                                        'CBSA Title':'MSA'})

In [97]:
covidMsaTotals = covidMsa.groupby('MSA') \
                           .agg({'Cases':'sum', 'Deaths': 'sum'}) \
                           .reset_index()

In [98]:
covidMsaTotals.sort_values('Cases', inplace=True)

In [99]:
# add total new cases last two weeks
cutOff = covidMsa['date'].max() - pd.Timedelta(days=13)
cutOffOne = covidMsa['date'].max() - pd.Timedelta(days=6)
covidLastTwo = covidMsa.loc[covidMsa['date'] >= cutOff]

covidTwo = covidLastTwo.groupby('MSA') \
                                .agg({'Cases':'sum','Deaths':'sum'}) \
                                .reset_index() \
                                .rename(columns={'Cases':'Recent_Cases',
                                                 'Deaths':'Recent_Deaths'})

In [100]:
# add daily growth rate over last 14 days
covidLastTwo['Pct_Chng_Case'] = covidLastTwo.groupby('MSA')['Cases'].pct_change()
covidLastTwo['Pct_Chng_Case']= covidLastTwo['Pct_Chng_Case'] * 100
covidLastTwo['Pct_Chng_Case'].fillna(0, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [101]:


covidTwoAvgChng = covidLastTwo.groupby('MSA') \
                                .agg({'Pct_Chng_Case':'mean'}) \
                                .reset_index() \
                                .rename(columns={'Pct_Chng_Case':'avg_daily_change'})

# new cases over the last 7 days
covidThisWk = covidMsa.loc[covidMsa['date'] >= cutOffOne]
covidThisWk = covidThisWk[['date','MSA','Cases','Deaths']]
covidThisWk.rename(columns={'Cases' : 'daily_cases_this_wk',
                            'Deaths': 'daily_deaths_this_wk'}, inplace=True)
# new cases over the previous 7-day period
covidPrevWk = covidMsa[(covidMsa['date'] >= cutOff) & (covidMsa['date'] < cutOffOne)]
covidPrevWk = covidPrevWk[['date','MSA','Cases','Deaths']]
covidPrevWk.rename(columns={'Cases' : 'daily_cases_prev_wk',
                            'Deaths': 'daily_deaths_prev_wk'}, inplace=True)
# total change current week to previous week
covidThisWk = covidThisWk.groupby('MSA') \
                           .agg({'daily_cases_this_wk':'sum', 'daily_deaths_this_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_this_wk' : 'cases_this_wk',
                                            'daily_deaths_this_wk': 'deaths_this_wk'})

covidPrevWk = covidPrevWk.groupby('MSA') \
                           .agg({'daily_cases_prev_wk':'sum', 'daily_deaths_prev_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_prev_wk' : 'cases_prev_wk',
                                            'daily_deaths_prev_wk': 'deaths_prev_wk'})

covidWeek = pd.merge(covidThisWk,covidPrevWk,how='left',on='MSA')

covidWeek['cases_diff'] = covidWeek['cases_this_wk'] - covidWeek['cases_prev_wk']
covidWeek['deaths_diff'] = covidWeek['deaths_this_wk'] - covidWeek['deaths_prev_wk']

# merge all of these covid totaled files at the county level
# covidTwoAvgChng,covidWeek,covidTwo,covidTotals
covidMsa = pd.merge(covidMsaTotals,covidTwo,how='left',on='MSA')
covidMsa = pd.merge(covidMsa,covidWeek,how='left',on='MSA')
covidMsa = pd.merge(covidMsa,covidTwoAvgChng,how='left',on='MSA')

In [102]:
# repeat this process but at the regional commission level
covidRc.dropna(inplace=True)
covidRc = covidRc.groupby(['Regional Commission','date']) \
                       .agg({'daily_cases':'sum', 'daily_deaths': 'sum'}) \
                       .reset_index() \
                       .rename(columns={'daily_cases':'Cases',
                                        'daily_deaths':'Deaths',
                                        'Regional Commission':'RC'})

covidRcTotals = covidRc.groupby('RC') \
                           .agg({'Cases':'sum', 'Deaths': 'sum'}) \
                           .reset_index()

# add total new cases last two weeks
cutOff = covidRc['date'].max() - pd.Timedelta(days=13)
cutOffOne = covidRc['date'].max() - pd.Timedelta(days=6)
covidLastTwo = covidRc.loc[covidRc['date'] >= cutOff]

covidTwo = covidLastTwo.groupby('RC') \
                                .agg({'Cases':'sum','Deaths':'sum'}) \
                                .reset_index() \
                                .rename(columns={'Cases':'Recent_Cases',
                                                 'Deaths':'Recent_Deaths'})

# add daily growth rate over last 14 days
covidLastTwo['Pct_Chng_Case'] = covidLastTwo.groupby('RC')['Cases'].pct_change()
covidLastTwo.Pct_Chng_Case = covidLastTwo.Pct_Chng_Case * 100
covidLastTwo['Pct_Chng_Case'].fillna(0, inplace=True)

covidTwoAvgChng = covidLastTwo.groupby('RC') \
                                .agg({'Pct_Chng_Case':'mean'}) \
                                .reset_index() \
                                .rename(columns={'Pct_Chng_Case':'avg_daily_change'})

# new cases over the last 7 days
covidThisWk = covidRc.loc[covidRc['date'] >= cutOffOne]
covidThisWk = covidThisWk[['date','RC','Cases','Deaths']]
covidThisWk.rename(columns={'Cases' : 'daily_cases_this_wk',
                            'Deaths': 'daily_deaths_this_wk'}, inplace=True)
# new cases over the previous 7-day period
covidPrevWk = covidRc[(covidRc['date'] >= cutOff) & (covidRc['date'] < cutOffOne)]
covidPrevWk = covidPrevWk[['date','RC','Cases','Deaths']]
covidPrevWk.rename(columns={'Cases' : 'daily_cases_prev_wk',
                            'Deaths': 'daily_deaths_prev_wk'}, inplace=True)
# total change current week to previous week
covidThisWk = covidThisWk.groupby('RC') \
                           .agg({'daily_cases_this_wk':'sum', 'daily_deaths_this_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_this_wk' : 'cases_this_wk',
                                            'daily_deaths_this_wk': 'deaths_this_wk'})

covidPrevWk = covidPrevWk.groupby('RC') \
                           .agg({'daily_cases_prev_wk':'sum', 'daily_deaths_prev_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_prev_wk' : 'cases_prev_wk',
                                            'daily_deaths_prev_wk': 'deaths_prev_wk'})

covidWeek = pd.merge(covidThisWk,covidPrevWk,how='left',on='RC')

covidWeek['cases_diff'] = covidWeek['cases_this_wk'] - covidWeek['cases_prev_wk']
covidWeek['deaths_diff'] = covidWeek['deaths_this_wk'] - covidWeek['deaths_prev_wk']

# merge all of these covid totaled files at the county level
# covidTwoAvgChng,covidWeek,covidTwo,covidTotals
covidRc = pd.merge(covidRcTotals,covidTwo,how='left',on='RC')
covidRc = pd.merge(covidRc,covidWeek,how='left',on='RC')
covidRc = pd.merge(covidRc,covidTwoAvgChng,how='left',on='RC')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [103]:
# repeat this process but at the regional commission level
covidGa = covidGa.groupby(['State','date']) \
                       .agg({'daily_cases':'sum', 'daily_deaths': 'sum'}) \
                       .reset_index() \
                       .rename(columns={'daily_cases':'Cases',
                                        'daily_deaths':'Deaths',
                                        'State':'GA'})

covidGaTotals = covidGa.groupby('GA') \
                           .agg({'Cases':'sum', 'Deaths': 'sum'}) \
                           .reset_index()

# add total new cases last two weeks
cutOff = covidGa['date'].max() - pd.Timedelta(days=13)
cutOffOne = covidGa['date'].max() - pd.Timedelta(days=6)
covidLastTwo = covidGa.loc[covidGa['date'] >= cutOff]

covidTwo = covidLastTwo.groupby('GA') \
                                .agg({'Cases':'sum','Deaths':'sum'}) \
                                .reset_index() \
                                .rename(columns={'Cases':'Recent_Cases',
                                                 'Deaths':'Recent_Deaths'})

# add daily growth rate over last 14 days
covidLastTwo['Pct_Chng_Case'] = covidLastTwo.groupby('GA')['Cases'].pct_change()
covidLastTwo.Pct_Chng_Case = covidLastTwo.Pct_Chng_Case * 100
covidLastTwo['Pct_Chng_Case'].fillna(0, inplace=True)

covidTwoAvgChng = covidLastTwo.groupby('GA') \
                                .agg({'Pct_Chng_Case':'mean'}) \
                                .reset_index() \
                                .rename(columns={'Pct_Chng_Case':'avg_daily_change'})

# new cases over the last 7 days
covidThisWk = covidGa.loc[covidGa['date'] >= cutOffOne]
covidThisWk = covidThisWk[['date','GA','Cases','Deaths']]
covidThisWk.rename(columns={'Cases' : 'daily_cases_this_wk',
                            'Deaths': 'daily_deaths_this_wk'}, inplace=True)
# new cases over the previous 7-day period
covidPrevWk = covidGa[(covidGa['date'] >= cutOff) & (covidGa['date'] < cutOffOne)]
covidPrevWk = covidPrevWk[['date','GA','Cases','Deaths']]
covidPrevWk.rename(columns={'Cases' : 'daily_cases_prev_wk',
                            'Deaths': 'daily_deaths_prev_wk'}, inplace=True)
# total change current week to previous week
covidThisWk = covidThisWk.groupby('GA') \
                           .agg({'daily_cases_this_wk':'sum', 'daily_deaths_this_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_this_wk' : 'cases_this_wk',
                                            'daily_deaths_this_wk': 'deaths_this_wk'})

covidPrevWk = covidPrevWk.groupby('GA') \
                           .agg({'daily_cases_prev_wk':'sum', 'daily_deaths_prev_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_prev_wk' : 'cases_prev_wk',
                                            'daily_deaths_prev_wk': 'deaths_prev_wk'})

covidWeek = pd.merge(covidThisWk,covidPrevWk,how='left',on='GA')

covidWeek['cases_diff'] = covidWeek['cases_this_wk'] - covidWeek['cases_prev_wk']
covidWeek['deaths_diff'] = covidWeek['deaths_this_wk'] - covidWeek['deaths_prev_wk']

# merge all of these covid totaled files at the county level
# covidTwoAvgChng,covidWeek,covidTwo,covidTotals
covidGa = pd.merge(covidGaTotals,covidTwo,how='left',on='GA')
covidGa = pd.merge(covidGa,covidWeek,how='left',on='GA')
covidGa = pd.merge(covidGa,covidTwoAvgChng,how='left',on='GA')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [104]:
rc = pd.merge(rc,countyList,how='left',on='FIPS')
rc = rc.groupby('Regional Commission').agg({'Total_Population':'sum'}).reset_index().rename(columns={'Regional Commission':'RC'})
covidRc = pd.merge(covidRc,rc,how='left',on='RC')

msa = pd.merge(msa,countyList,how='left',on='FIPS')
msa = msa.groupby('CBSA Title').agg({'Total_Population':'sum'}).reset_index().rename(columns={'CBSA Title':'MSA'})
covidMsa = pd.merge(covidMsa,msa,how='left',on='MSA')

gaPop = countyList.copy()
gaPop['GA'] = 'Georgia'
gaPop = gaPop.groupby('GA').agg({'Total_Population':'sum'}).reset_index()
covidGa = pd.merge(covidGa,gaPop,how='left',on='GA')

In [105]:
covid = pd.merge(countyList,covid,how='left',on='FIPS')

In [106]:
covid.fillna(0, inplace=True)

In [107]:
# the calcs that are performed on the counties frame below on covid stuff
covid['CasesPer1K'] = ( covid['Cases'] / covid['Total_Population'] ) * 1000
covid['CasesWeekChng100k'] = ( covid['cases_diff'] / covid['Total_Population'] ) * 100000
covid['DeathsWeekChng100k'] = ( covid['deaths_diff'] / covid['Total_Population'] ) * 100000

covidRc['CasesPer1K'] = ( covidRc['Cases'] / covidRc['Total_Population'] ) * 1000
covidRc['CasesWeekChng100k'] = ( covidRc['cases_diff'] / covidRc['Total_Population'] ) * 100000
covidRc['DeathsWeekChng100k'] = ( covidRc['deaths_diff'] / covidRc['Total_Population'] ) * 100000

covidMsa['CasesPer1K'] = ( covidMsa['Cases'] / covidMsa['Total_Population'] ) * 1000
covidMsa['CasesWeekChng100k'] = ( covidMsa['cases_diff'] / covidMsa['Total_Population'] ) * 100000
covidMsa['DeathsWeekChng100k'] = ( covidMsa['deaths_diff'] / covidMsa['Total_Population'] ) * 100000

covidGa['CasesPer1K'] = ( covidGa['Cases'] / covidGa['Total_Population'] ) * 1000
covidGa['CasesWeekChng100k'] = ( covidGa['cases_diff'] / covidGa['Total_Population'] ) * 100000
covidGa['DeathsWeekChng100k'] = ( covidGa['deaths_diff'] / covidGa['Total_Population'] ) * 100000
# then shape those frames to only have the necessary columns
# stack and push to json

In [108]:
covid.Cases = covid.Cases.astype(int)
covid.Deaths = covid.Deaths.astype(int)
covid.Recent_Cases = covid.Recent_Cases.astype(int)
covid.Recent_Deaths = covid.Recent_Deaths.astype(int)
covid.cases_this_wk = covid.cases_this_wk.astype(int)
covid.deaths_this_wk = covid.deaths_this_wk.astype(int)
covid.cases_prev_wk = covid.cases_prev_wk.astype(int)
covid.deaths_prev_wk = covid.deaths_prev_wk.astype(int)
covid.cases_diff = covid.cases_diff.astype(int)
covid.deaths_diff = covid.deaths_diff.astype(int)
covid.avg_daily_change = covid.avg_daily_change.round(1)
covid.CasesPer1K = covid.CasesPer1K.round(1)
covid.CasesWeekChng100k = covid.CasesWeekChng100k.round(1)
covid.DeathsWeekChng100k = covid.DeathsWeekChng100k.round(1)

covidRc.Cases = covidRc.Cases.astype(int)
covidRc.Deaths = covidRc.Deaths.astype(int)
covidRc.Recent_Cases = covidRc.Recent_Cases.astype(int)
covidRc.Recent_Deaths = covidRc.Recent_Deaths.astype(int)
covidRc.cases_this_wk = covidRc.cases_this_wk.astype(int)
covidRc.deaths_this_wk = covidRc.deaths_this_wk.astype(int)
covidRc.cases_prev_wk = covidRc.cases_prev_wk.astype(int)
covidRc.deaths_prev_wk = covidRc.deaths_prev_wk.astype(int)
covidRc.cases_diff = covidRc.cases_diff.astype(int)
covidRc.deaths_diff = covidRc.deaths_diff.astype(int)
covidRc.avg_daily_change = covidRc.avg_daily_change.round(1)
covidRc.CasesPer1K = covidRc.CasesPer1K.round(1)
covidRc.CasesWeekChng100k = covidRc.CasesWeekChng100k.round(1)
covidRc.DeathsWeekChng100k = covidRc.DeathsWeekChng100k.round(1)

covidMsa.Cases = covidMsa.Cases.astype(int)
covidMsa.Deaths = covidMsa.Deaths.astype(int)
covidMsa.Recent_Cases = covidMsa.Recent_Cases.astype(int)
covidMsa.Recent_Deaths = covidMsa.Recent_Deaths.astype(int)
covidMsa.cases_this_wk = covidMsa.cases_this_wk.astype(int)
covidMsa.deaths_this_wk = covidMsa.deaths_this_wk.astype(int)
covidMsa.cases_prev_wk = covidMsa.cases_prev_wk.astype(int)
covidMsa.deaths_prev_wk = covidMsa.deaths_prev_wk.astype(int)
covidMsa.cases_diff = covidMsa.cases_diff.astype(int)
covidMsa.deaths_diff = covidMsa.deaths_diff.astype(int)
covidMsa.avg_daily_change = covidMsa.avg_daily_change.round(1)
covidMsa.CasesPer1K = covidMsa.CasesPer1K.round(1)
covidMsa.CasesWeekChng100k = covidMsa.CasesWeekChng100k.round(1)
covidMsa.DeathsWeekChng100k = covidMsa.DeathsWeekChng100k.round(1)

covidGa.Cases = covidGa.Cases.astype(int)
covidGa.Deaths = covidGa.Deaths.astype(int)
covidGa.Recent_Cases = covidGa.Recent_Cases.astype(int)
covidGa.Recent_Deaths = covidGa.Recent_Deaths.astype(int)
covidGa.cases_this_wk = covidGa.cases_this_wk.astype(int)
covidGa.deaths_this_wk = covidGa.deaths_this_wk.astype(int)
covidGa.cases_prev_wk = covidGa.cases_prev_wk.astype(int)
covidGa.deaths_prev_wk = covidGa.deaths_prev_wk.astype(int)
covidGa.cases_diff = covidGa.cases_diff.astype(int)
covidGa.deaths_diff = covidGa.deaths_diff.astype(int)
covidGa.avg_daily_change = covidGa.avg_daily_change.round(1)
covidGa.CasesPer1K = covidGa.CasesPer1K.round(1)
covidGa.CasesWeekChng100k = covidGa.CasesWeekChng100k.round(1)
covidGa.DeathsWeekChng100k = covidGa.DeathsWeekChng100k.round(1)

In [109]:
covid.rename(columns={'County':'area'}, inplace=True)
covidRc.rename(columns={'RC':'area'}, inplace=True)
covidMsa.rename(columns={'MSA':'area'}, inplace=True)
covidGa.rename(columns={'GA':'area'}, inplace=True)

counties = pd.merge(counties,covid,how='left',on='FIPS')
counties['Cases'].fillna(0, inplace=True)
counties['Deaths'].fillna(0, inplace=True)

del covid['FIPS']

covid = covid.append( [covidRc,covidMsa,covidGa] )

covid.reset_index(drop=True, inplace=True)

In [110]:
del counties['area']
del counties['Total_Population_y']
counties.rename(columns={'Total_Population_x':'Total_Population'}, inplace=True)

In [111]:
# add custom columns on rates, etc.
counties['COVID_Death_Rate'] = ( counties['Deaths'] / counties['Cases'] ) * 100
counties['COVID_Deaths_Per_1000'] = ( counties['Deaths'] / counties['Total_Population'] ) * 1000
counties['COVID_Cases_Per_1000'] = ( counties['Cases'] / counties['Total_Population'] ) * 1000
# add total new cases per 100000
counties['Cases_Wk_Chng_100k'] = ( counties['cases_diff'] / counties['Total_Population'] ) * 100000
# add total new daths per 100000
counties['Deaths_Wk_Chng_100k'] = ( counties['deaths_diff'] / counties['Total_Population'] ) * 100000

In [112]:
countyClaims = countyClaims.melt(id_vars=["FIPS", "County","Year"], var_name="Month", value_name="Claims")

In [113]:
countyClaimsNew = countyClaims.loc[countyClaims['Year'] == 2020]
countyClaimsNew = countyClaimsNew.loc[countyClaimsNew['Month'] == "March"]
countyClaimsLast = countyClaims.loc[countyClaims['Year'] == 2020]
countyClaimsLast = countyClaimsLast.loc[countyClaimsLast['Month'] == "February"]

claims = pd.merge(countyClaimsNew, countyClaimsLast, how='left', on='FIPS')

In [114]:
claims.rename(columns={'Claims_x' : 'March_Claims',
                       'Claims_y' : 'February_Claims'}, inplace=True)

claims['Claims_Pct_Change'] = ( ( claims['March_Claims'] - claims['February_Claims'] ) / claims['February_Claims'] ) * 100

In [115]:
claims['Claims_Pct_Change'] = claims['Claims_Pct_Change'].round(1)

In [116]:
claims['FIPS'] = claims['FIPS'].astype(str)

In [117]:
claims = claims[['FIPS','March_Claims','February_Claims','Claims_Pct_Change']]

In [118]:
claims.sort_values('February_Claims', inplace=True)

In [119]:
counties = pd.merge(counties, claims, how='left', on='FIPS')

In [120]:
counties.sort_values('February_Claims', inplace=True)

In [121]:
counties['Cases_1000_Norm'] = (counties.iloc[ : , 29 ]-counties.iloc[ : , 29 ].min())/(counties.iloc[ : , 29 ].max()-counties.iloc[ : , 29 ].min()) * 100

In [122]:
counties = counties[['FIPS','County','Total_Population','Population_Under_18','Population_Over_18','Population_Over_65',
                     'Population_Pct_Over_65','Jobs','Jobs_Frequent_Disease_Exposure','Jobs_Pct_Disease_Exposure',
                     'Jobs_Frequent_Physical_Proximity','Jobs_Pct_Prox','Socioeconomic_Status',
                     'Household_Comp_Disability','Minority_Status_Language','Housing_Transportation',
                     'Epidemiology','Healthcare_System','CCVI_Score','Hospitals','Beds','Beds_per_1000','Pct_Uninsured',
                     'Beds_per_1000_Elderly','Cases','Deaths','COVID_Death_Rate','COVID_Deaths_Per_1000','COVID_Cases_Per_1000','Cases_1000_Norm',
                     'Deaths_Wk_Chng_100k','Cases_Wk_Chng_100k','avg_daily_change',
                     'March_Claims','February_Claims','Claims_Pct_Change','geometry']]

counties.rename(columns={'avg_daily_change' : 'Cases_Avg_Pct_Chng_Daily'}, inplace=True)

In [123]:
counties['Cases_1000_Norm'].fillna(0,inplace=True)
counties['Deaths_Wk_Chng_100k'].fillna(0,inplace=True)
counties['Cases_Wk_Chng_100k'].fillna(0,inplace=True)
counties['Cases_Avg_Pct_Chng_Daily'].fillna(0,inplace=True)
counties['COVID_Death_Rate'].fillna(0,inplace=True)
counties['COVID_Deaths_Per_1000'].fillna(0,inplace=True)

In [124]:
counties = gpd.GeoDataFrame(counties, geometry='geometry')

In [125]:
# round all values in your tilesets
counties.Population_Pct_Over_65 = counties.Population_Pct_Over_65.round(1)
counties.Jobs_Frequent_Disease_Exposure = counties.Jobs_Frequent_Disease_Exposure.round(1)
counties.Jobs_Pct_Disease_Exposure = counties.Jobs_Pct_Disease_Exposure.round(1)
counties.Jobs_Frequent_Physical_Proximity = counties.Jobs_Frequent_Physical_Proximity.round(1)
counties.Jobs_Pct_Prox = counties.Jobs_Pct_Prox.round(1)
counties.Socioeconomic_Status = counties.Socioeconomic_Status.round(2)
counties.Household_Comp_Disability = counties.Household_Comp_Disability.round(2)
counties.Minority_Status_Language = counties.Minority_Status_Language.round(2)
counties.Housing_Transportation = counties.Housing_Transportation.round(2)
counties.Epidemiology = counties.Epidemiology.round(2)
counties.Healthcare_System = counties.Healthcare_System.round(2)
counties.CCVI_Score = counties.CCVI_Score.round(2)
counties.Hospitals = counties.Hospitals.astype(int)
counties.Beds = counties.Beds.astype(int)
counties.Beds_per_1000 = counties.Beds_per_1000.round(1)
counties.Pct_Uninsured = counties.Pct_Uninsured.round(1)
counties.Beds_per_1000_Elderly = counties.Beds_per_1000_Elderly.round(1)
counties.COVID_Cases_Per_1000 = counties.COVID_Cases_Per_1000.round(1)
counties.Cases_1000_Norm = counties.Cases_1000_Norm.round(1)
counties.Deaths_Wk_Chng_100k = counties.Deaths_Wk_Chng_100k.round(1)
counties.Cases_Wk_Chng_100k = counties.Cases_Wk_Chng_100k.round(1)
counties.Cases_Avg_Pct_Chng_Daily = counties.Cases_Avg_Pct_Chng_Daily.round(1)
counties.COVID_Death_Rate = counties.COVID_Death_Rate.round(1)
counties.COVID_Deaths_Per_1000 = counties.COVID_Deaths_Per_1000.round(1)

In [126]:
counties.head()

Unnamed: 0,FIPS,County,Total_Population,Population_Under_18,Population_Over_18,Population_Over_65,Population_Pct_Over_65,Jobs,Jobs_Frequent_Disease_Exposure,Jobs_Pct_Disease_Exposure,Jobs_Frequent_Physical_Proximity,Jobs_Pct_Prox,Socioeconomic_Status,Household_Comp_Disability,Minority_Status_Language,Housing_Transportation,Epidemiology,Healthcare_System,CCVI_Score,Hospitals,Beds,Beds_per_1000,Pct_Uninsured,Beds_per_1000_Elderly,Cases,Deaths,COVID_Death_Rate,COVID_Deaths_Per_1000,COVID_Cases_Per_1000,Cases_1000_Norm,Deaths_Wk_Chng_100k,Cases_Wk_Chng_100k,Cases_Avg_Pct_Chng_Daily,March_Claims,February_Claims,Claims_Pct_Change,geometry
49,13101,Echols County,3994,861,3133,646,16.2,854.267832,6.4,0.8,118.7,13.9,0.86,0.27,0.88,0.26,0.07,0.54,0.48,0,0,0.0,23.3,0.0,5,0,0.0,0.0,1.3,0.0,0.0,-25.0,0.4,28,1,2700.0,"MULTIPOLYGON (((-83.13662 30.62524, -83.13510 ..."
117,13239,Quitman County,2276,421,1855,608,26.7,440.061898,3.5,0.8,81.8,18.6,0.98,0.97,0.87,0.37,0.95,0.72,0.99,0,0,0.0,12.3,0.0,5,1,20.0,0.4,2.2,0.0,0.0,87.9,1.3,9,1,800.0,"MULTIPOLYGON (((-85.14181 31.78213, -85.13713 ..."
31,13065,Clinch County,6743,1810,4933,1023,15.2,2513.42463,84.6,3.4,369.1,14.7,0.98,0.97,0.78,0.97,0.66,0.54,0.98,1,25,3.7,13.6,24.4,8,0,0.0,0.0,1.2,0.0,0.0,14.8,0.1,29,1,2800.0,"MULTIPOLYGON (((-82.97070 30.93645, -82.97050 ..."
151,13307,Webster County,2613,625,1988,547,20.9,580.132929,5.8,1.0,95.0,16.4,0.7,0.96,0.91,0.21,0.68,0.38,0.84,0,0,0.0,12.4,0.0,10,2,20.0,0.8,3.8,4.2,76.5,38.3,1.9,15,3,400.0,"MULTIPOLYGON (((-84.65501 31.96153, -84.64536 ..."
130,13265,Taliaferro County,1665,242,1423,411,24.7,303.208929,3.9,1.3,57.3,18.9,0.98,0.91,0.63,0.97,0.73,0.41,0.96,0,0,0.0,14.3,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,29,4,625.0,"MULTIPOLYGON (((-83.01087 33.58560, -83.00047 ..."


In [127]:
countyPoints = counties.copy()

In [128]:
countyPoints['geometry'] = counties['geometry'].centroid

In [129]:
countyPoints.sort_values('Cases_1000_Norm', inplace=True)
countyPoints['Cases_1000_Norm'].round(1)
counties['COVID_Death_Rate'].fillna(0, inplace=True)
countyPoints['COVID_Death_Rate'].fillna(0, inplace=True)

In [130]:
countyData = counties[['FIPS','County','Cases','Deaths','Total_Population','Population_Under_18','Population_Over_18','Population_Over_65',
                     'Population_Pct_Over_65','Jobs','Jobs_Frequent_Disease_Exposure','Jobs_Pct_Disease_Exposure',
                     'Jobs_Frequent_Physical_Proximity','Jobs_Pct_Prox','Socioeconomic_Status',
                     'Household_Comp_Disability','Minority_Status_Language','Housing_Transportation',
                     'Epidemiology','Healthcare_System','CCVI_Score','Hospitals','Beds','Beds_per_1000','Pct_Uninsured',
                     'Beds_per_1000_Elderly','COVID_Death_Rate','COVID_Deaths_Per_1000','COVID_Cases_Per_1000','Cases_1000_Norm',
                     'Deaths_Wk_Chng_100k','Cases_Wk_Chng_100k','Cases_Avg_Pct_Chng_Daily',
                     'March_Claims','February_Claims','Claims_Pct_Change']]

In [131]:
hospitals = hospitals[['ID','NAME','ADDRESS','CITY','STATE','ZIP','TYPE','STATUS','COUNTY','COUNTYFIPS',
                       'LATITUDE','LONGITUDE','NAICS_CODE','NAICS_DESC','BEDS','TRAUMA',
                       'HELIPAD','geometry']]

In [132]:
# covid neds to be restructured again into a kpi file for the kpi portion of the tool
# so only keep necessary columns to reduce size
covidKpi = covid[['area','Cases','Deaths','CasesPer1K','Recent_Cases','avg_daily_change']]

In [133]:
covidKpi.set_index('area', inplace=True)

In [134]:
covidKpi.to_json(orient='index')

'{"Appling County":{"Cases":72,"Deaths":6,"CasesPer1K":3.9,"Recent_Cases":54,"avg_daily_change":6.6},"Atkinson County":{"Cases":9,"Deaths":1,"CasesPer1K":1.1,"Recent_Cases":4,"avg_daily_change":0.6},"Bacon County":{"Cases":25,"Deaths":1,"CasesPer1K":2.2,"Recent_Cases":8,"avg_daily_change":-0.2},"Baker County":{"Cases":23,"Deaths":2,"CasesPer1K":7.2,"Recent_Cases":7,"avg_daily_change":0.2},"Baldwin County":{"Cases":226,"Deaths":8,"CasesPer1K":5.0,"Recent_Cases":118,"avg_daily_change":1.6},"Banks County":{"Cases":25,"Deaths":0,"CasesPer1K":1.4,"Recent_Cases":12,"avg_daily_change":0.1},"Barrow County":{"Cases":141,"Deaths":4,"CasesPer1K":1.8,"Recent_Cases":56,"avg_daily_change":0.7},"Bartow County":{"Cases":323,"Deaths":31,"CasesPer1K":3.1,"Recent_Cases":78,"avg_daily_change":0.1},"Ben Hill County":{"Cases":26,"Deaths":0,"CasesPer1K":1.5,"Recent_Cases":13,"avg_daily_change":1.7},"Berrien County":{"Cases":17,"Deaths":0,"CasesPer1K":0.9,"Recent_Cases":7,"avg_daily_change":1.6},"Bibb County"

In [135]:
countyPoints.sort_values('COVID_Death_Rate')

Unnamed: 0,FIPS,County,Total_Population,Population_Under_18,Population_Over_18,Population_Over_65,Population_Pct_Over_65,Jobs,Jobs_Frequent_Disease_Exposure,Jobs_Pct_Disease_Exposure,Jobs_Frequent_Physical_Proximity,Jobs_Pct_Prox,Socioeconomic_Status,Household_Comp_Disability,Minority_Status_Language,Housing_Transportation,Epidemiology,Healthcare_System,CCVI_Score,Hospitals,Beds,Beds_per_1000,Pct_Uninsured,Beds_per_1000_Elderly,Cases,Deaths,COVID_Death_Rate,COVID_Deaths_Per_1000,COVID_Cases_Per_1000,Cases_1000_Norm,Deaths_Wk_Chng_100k,Cases_Wk_Chng_100k,Cases_Avg_Pct_Chng_Daily,March_Claims,February_Claims,Claims_Pct_Change,geometry
49,13101,Echols County,3994,861,3133,646,16.2,854.267832,6.4,0.8,118.7,13.9,0.86,0.27,0.88,0.26,0.07,0.54,0.48,0,0,0.0,23.3,0.0,5,0,0.0,0.0,1.3,0.0,0.0,-25.0,0.4,28,1,2700.0,POINT (-82.89396 30.71005)
148,13301,Warren County,5346,1128,4218,1119,20.9,1723.063264,28.8,1.7,228.9,13.3,0.95,0.73,0.78,0.72,0.18,0.43,0.71,0,0,0.0,11.9,0.0,12,0,0.0,0.0,2.2,0.0,0.0,-37.4,0.2,98,22,345.5,POINT (-82.67675 33.40895)
139,13283,Treutlen County,6777,1582,5195,1299,19.2,1298.822431,60.5,4.7,324.1,25.0,0.84,0.91,0.67,0.94,0.63,0.56,0.93,0,0,0.0,12.4,0.0,3,0,0.0,0.0,0.4,0.0,0.0,0.0,6.0,72,22,227.3,POINT (-82.56729 32.40387)
60,13123,Gilmer County,29922,5904,24018,6989,23.4,8267.905725,186.2,2.3,1815.1,22.0,0.69,0.59,0.67,0.62,0.91,0.59,0.82,1,50,1.7,17.3,7.2,69,0,0.0,0.0,2.3,0.0,0.0,56.8,8.1,695,64,985.9,POINT (-84.45563 34.69116)
83,13169,Jones County,28548,6760,21788,4748,16.6,5006.288496,296.9,5.9,1255.3,25.1,0.59,0.67,0.36,0.12,0.29,0.50,0.36,0,0,0.0,10.6,0.0,29,0,0.0,0.0,1.0,0.0,0.0,-10.5,1.2,460,46,900.0,POINT (-83.56049 33.02512)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26,13055,Chattooga County,24817,5614,19203,4201,16.9,6833.848508,151.0,2.2,1054.0,15.4,0.95,0.56,0.49,0.90,0.36,0.77,0.81,1,31,1.2,12.9,7.4,16,2,12.5,0.1,0.6,2.1,4.0,-4.0,1.3,68,37,83.8,POINT (-85.34534 34.47500)
101,13207,Monroe County,27010,5652,21358,4736,17.5,17836.536786,340.6,1.9,1969.2,11.0,0.56,0.17,0.58,0.86,0.96,0.50,0.71,1,25,0.9,10.9,5.3,24,3,12.5,0.1,0.9,4.2,7.4,3.7,0.9,576,65,786.2,POINT (-83.91866 33.01392)
29,13061,Clay County,3001,584,2417,681,22.7,685.096116,43.7,6.4,149.7,21.9,1.00,0.96,0.65,0.99,0.88,0.72,1.00,0,0,0.0,10.9,0.0,23,3,13.0,1.0,7.7,2.1,33.3,66.6,-0.2,115,11,945.5,POINT (-84.98009 31.62624)
151,13307,Webster County,2613,625,1988,547,20.9,580.132929,5.8,1.0,95.0,16.4,0.70,0.96,0.91,0.21,0.68,0.38,0.84,0,0,0.0,12.4,0.0,10,2,20.0,0.8,3.8,4.2,76.5,38.3,1.9,15,3,400.0,POINT (-84.55105 32.04664)


In [136]:
# write out files
hospitals.to_excel('hospitals.xlsx',index=False)
countyHospitals.to_excel('hospitals.xlsx',index=False)
hospitals.to_file("spatial/mapbox/hospitals.geojson", driver='GeoJSON')
counties.to_file("spatial/mapbox/countyData.geojson", driver='GeoJSON')
countyPoints.to_file("spatial/mapbox/countyPoints.geojson", driver='GeoJSON')
countyData.to_excel('countyData.xlsx', index=False)
covid.to_excel('../application/app-data/covidDataWrapper.xlsx', index=False)