In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)

import numpy as np

import geopandas as gpd
import fiona

import matplotlib.pyplot as plt
import seaborn as sns

import json

In [2]:
counties = pd.read_excel('covid-recovery-data.xlsx', sheet_name="county-population")
jobs = pd.read_excel('covid-recovery-data.xlsx', sheet_name="county-jobs-summary", usecols="B,C,D,F,G,I")
ccvi = pd.read_excel('covid-recovery-data.xlsx', sheet_name="ccvi-county", usecols="D:K")
countyShape = gpd.read_file("spatial/ga-counties.geojson")
countyClaims = pd.read_excel('covid-recovery-data.xlsx', sheet_name="unemployment-claims-monthly")
hospitals = gpd.read_file("spatial/hospitals.geojson")

In [3]:
msa = pd.read_excel('../../data/geographies/cbsa.xlsx', sheet_name='cbsa',usecols='M,D')
rc = pd.read_excel('../../data/geographies/regional-commissions.xlsx', sheet_name='county-rc',usecols='A,C')
msa.rename(columns={'area_code':'FIPS'}, inplace=True)
rc.rename(columns={'area_code':'FIPS'}, inplace=True)
msa['FIPS'] = msa['FIPS'].astype(str)
rc['FIPS'] = rc['FIPS'].astype(str)

In [4]:
hospitals = gpd.read_file("spatial/hospitals.geojson")

In [5]:
hospitals.sort_values('BEDS', inplace=True)

In [6]:
hospitals.BEDS = np.where(hospitals.BEDS < 0, 82,hospitals.BEDS)

In [7]:
counties = pd.merge(counties,jobs,how='left',on='FIPS')

In [8]:
counties = pd.merge(counties,ccvi,how='left',on='FIPS')

In [9]:
counties['FIPS'] = counties['FIPS'].astype(str)

In [10]:
counties = pd.merge(counties,countyShape, how='left', left_on='FIPS', right_on='GEOID')

In [11]:
del counties['GEOID']

In [12]:
# groupby county fips to get number of hospitals and number of beds per county
countyHospitals = hospitals.groupby('COUNTYFIPS') \
                           .agg({'NAME':'count', 'BEDS': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'NAME':'Hospitals',
                                            'BEDS':'Beds',
                                            'COUNTYFIPS':'FIPS'})

In [13]:
counties = pd.merge(counties,countyHospitals, how='left', on='FIPS')

In [14]:
counties['Hospitals'].fillna(0, inplace=True)
counties['Beds'].fillna(0, inplace=True)

In [15]:
counties['Beds_per_1000'] = ( counties['Beds'] / counties['Total_Population'] ) * 1000
counties['Beds_per_1000_Elderly'] = ( counties['Beds'] / counties['Population_Over_65'] ) * 1000

In [16]:
# get clean covid data
covid = pd.read_csv('../application/app-data/covid-county-clean.csv')
covid = covid[['date','area_name','area_code','cases','deaths']]

covid.date = pd.to_datetime(covid.date)

covid['area_code'] = covid['area_code'].astype(int)
covid['area_code'] = covid['area_code'].astype(str)

# get new data from nyt
covidCountyNew = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')
covidCountyNew = covidCountyNew.loc[covidCountyNew['state'] == 'Georgia']

covidCountyNew = covidCountyNew[['date','county','fips','cases','deaths']]

# get state level data
covidStateNew = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv')
covidStateNew = covidStateNew.loc[covidStateNew['state'] == 'Georgia']

covidCountyNew.rename(columns={'county' : 'area_name',
                               'fips'   : 'area_code'}, inplace=True)

covidStateNew.rename(columns={'state' : 'area_name',
                              'fips'  : 'area_code'}, inplace=True)

covidNew = covidStateNew.append(covidCountyNew)

# the covid data is reported as a cumulative sum
# we want the daily counts as well for different charting options
covidNew.date = pd.to_datetime(covidNew.date)
covidNew.sort_values(['area_name', 'date'], ascending=[True, True], inplace=True)



covidNew.dropna(subset=['area_code'], inplace=True)

covidNew['area_code'] = covidNew['area_code'].astype(int)
covidNew['area_code'] = covidNew['area_code'].astype(str)

covidNew.reset_index(inplace=True, drop=True)

covidNew.sort_values(['area_name', 'date'], ascending=[True, True], inplace=True)

# filter only dates AFTER 4/30
covidNew = covidNew.loc[covidNew['date'] >= '2020-05-07']
# append to cleaned sheet
covid = covid.append(covidNew)
covid.sort_values(['area_code','date'], inplace=True)
covid.reset_index(drop=True,inplace=True)

In [17]:
covid['daily_cases'] = np.where(covid['area_code'] == covid['area_code'].shift(), covid.cases - covid.cases.shift(), np.nan)

covid['daily_cases'].fillna(covid['cases'], inplace=True)

covid['daily_deaths'] = np.where(covid['area_code'] == covid['area_code'].shift(), covid.deaths - covid.deaths.shift(), np.nan)
covid['daily_deaths'].fillna(covid['deaths'], inplace=True)

In [18]:
covid.sort_values(['area_name', 'date'],inplace=True)

In [19]:
# we dont' ned the state here as we'r just trying to mak a fram
# at the county level
covid = covid.loc[covid['area_name'] != 'Georgia']
covid.rename(columns={'area_code':'FIPS'}, inplace=True)

In [20]:
covid['FIPS'] = covid['FIPS'].astype(str)

In [21]:
countyList = counties[['FIPS','County','Total_Population']]

In [22]:
covid = pd.merge(covid,countyList,how='left',on='FIPS')
covidMsa = pd.merge(covid,msa,how='left',on='FIPS')
covidRc = pd.merge(covid,rc,how='left',on='FIPS')
covidGa = covid.copy()
covidGa['State'] = 'Georgia'

In [23]:
covidTotals = covid.groupby('FIPS') \
                   .agg({'daily_cases':'sum', 'daily_deaths': 'sum'}) \
                   .reset_index() \
                   .rename(columns={'daily_cases':'Cases',
                                    'daily_deaths':'Deaths'})

# add total new cases last two weeks
cutOff = covid['date'].max() - pd.Timedelta(days=13)
cutOffOne = covid['date'].max() - pd.Timedelta(days=6)
covidLastTwo = covid.loc[covid['date'] >= cutOff]

In [24]:


covidTwo = covidLastTwo.groupby('FIPS') \
                                .agg({'daily_cases':'sum','daily_deaths':'sum'}) \
                                .reset_index() \
                                .rename(columns={'daily_cases':'Recent_Cases',
                                            'daily_deaths':'Recent_Deaths'})

# add daily growth rate over last 14 days
covidLastTwo['Pct_Chng_Case'] = covidLastTwo.groupby('FIPS')['cases'].pct_change()
covidLastTwo.Pct_Chng_Case = covidLastTwo.Pct_Chng_Case * 100
covidLastTwo['Pct_Chng_Case'].fillna(0, inplace=True)

covidTwoAvgChng = covidLastTwo.groupby('FIPS') \
                                .agg({'Pct_Chng_Case':'mean'}) \
                                .reset_index() \
                                .rename(columns={'Pct_Chng_Case':'avg_daily_change'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


In [25]:
# new cases over the last 7 days
covidThisWk = covid.loc[covid['date'] >= cutOffOne]
covidThisWk = covidThisWk[['date','FIPS','daily_cases','daily_deaths']]
covidThisWk.rename(columns={'daily_cases' : 'daily_cases_this_wk',
                            'daily_deaths': 'daily_deaths_this_wk'}, inplace=True)
# new cases over the previous 7-day period
covidPrevWk = covid[(covid['date'] >= cutOff) & (covid['date'] < cutOffOne)]
covidPrevWk = covidPrevWk[['date','FIPS','daily_cases','daily_deaths']]
covidPrevWk.rename(columns={'daily_cases' : 'daily_cases_prev_wk',
                            'daily_deaths': 'daily_deaths_prev_wk'}, inplace=True)
# total change current week to previous week
covidThisWk = covidThisWk.groupby('FIPS') \
                           .agg({'daily_cases_this_wk':'sum', 'daily_deaths_this_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_this_wk' : 'cases_this_wk',
                                            'daily_deaths_this_wk': 'deaths_this_wk'})

covidPrevWk = covidPrevWk.groupby('FIPS') \
                           .agg({'daily_cases_prev_wk':'sum', 'daily_deaths_prev_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_prev_wk' : 'cases_prev_wk',
                                            'daily_deaths_prev_wk': 'deaths_prev_wk'})

covidWeek = pd.merge(covidThisWk,covidPrevWk,how='left',on='FIPS')

covidWeek['cases_diff'] = covidWeek['cases_this_wk'] - covidWeek['cases_prev_wk']
covidWeek['deaths_diff'] = covidWeek['deaths_this_wk'] - covidWeek['deaths_prev_wk']

In [26]:
# merge all of these covid totaled files at the county level
# covidTwoAvgChng,covidWeek,covidTwo,covidTotals
covid = pd.merge(covidTotals,covidTwo,how='left',on='FIPS')
covid = pd.merge(covid,covidWeek,how='left',on='FIPS')
covid = pd.merge(covid,covidTwoAvgChng,how='left',on='FIPS')

In [27]:
# repeat this process but at the MSA level
covidMsa.dropna(inplace=True)
covidMsa.reset_index(drop=True,inplace=True)

In [28]:
covidMsa = covidMsa.groupby(['CBSA Title','date']) \
                       .agg({'daily_cases':'sum', 'daily_deaths': 'sum'}) \
                       .reset_index() \
                       .rename(columns={'daily_cases':'Cases',
                                        'daily_deaths':'Deaths',
                                        'CBSA Title':'MSA'})

In [29]:
covidMsaTotals = covidMsa.groupby('MSA') \
                           .agg({'Cases':'sum', 'Deaths': 'sum'}) \
                           .reset_index()

In [30]:
covidMsaTotals.sort_values('Cases', inplace=True)

In [31]:
# add total new cases last two weeks
cutOff = covidMsa['date'].max() - pd.Timedelta(days=13)
cutOffOne = covidMsa['date'].max() - pd.Timedelta(days=6)
covidLastTwo = covidMsa.loc[covidMsa['date'] >= cutOff]

covidTwo = covidLastTwo.groupby('MSA') \
                                .agg({'Cases':'sum','Deaths':'sum'}) \
                                .reset_index() \
                                .rename(columns={'Cases':'Recent_Cases',
                                                 'Deaths':'Recent_Deaths'})

In [32]:
# add daily growth rate over last 14 days
covidLastTwo['Pct_Chng_Case'] = covidLastTwo.groupby('MSA')['Cases'].pct_change()
covidLastTwo['Pct_Chng_Case']= covidLastTwo['Pct_Chng_Case'] * 100
covidLastTwo['Pct_Chng_Case'].fillna(0, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [33]:


covidTwoAvgChng = covidLastTwo.groupby('MSA') \
                                .agg({'Pct_Chng_Case':'mean'}) \
                                .reset_index() \
                                .rename(columns={'Pct_Chng_Case':'avg_daily_change'})

# new cases over the last 7 days
covidThisWk = covidMsa.loc[covidMsa['date'] >= cutOffOne]
covidThisWk = covidThisWk[['date','MSA','Cases','Deaths']]
covidThisWk.rename(columns={'Cases' : 'daily_cases_this_wk',
                            'Deaths': 'daily_deaths_this_wk'}, inplace=True)
# new cases over the previous 7-day period
covidPrevWk = covidMsa[(covidMsa['date'] >= cutOff) & (covidMsa['date'] < cutOffOne)]
covidPrevWk = covidPrevWk[['date','MSA','Cases','Deaths']]
covidPrevWk.rename(columns={'Cases' : 'daily_cases_prev_wk',
                            'Deaths': 'daily_deaths_prev_wk'}, inplace=True)
# total change current week to previous week
covidThisWk = covidThisWk.groupby('MSA') \
                           .agg({'daily_cases_this_wk':'sum', 'daily_deaths_this_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_this_wk' : 'cases_this_wk',
                                            'daily_deaths_this_wk': 'deaths_this_wk'})

covidPrevWk = covidPrevWk.groupby('MSA') \
                           .agg({'daily_cases_prev_wk':'sum', 'daily_deaths_prev_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_prev_wk' : 'cases_prev_wk',
                                            'daily_deaths_prev_wk': 'deaths_prev_wk'})

covidWeek = pd.merge(covidThisWk,covidPrevWk,how='left',on='MSA')

covidWeek['cases_diff'] = covidWeek['cases_this_wk'] - covidWeek['cases_prev_wk']
covidWeek['deaths_diff'] = covidWeek['deaths_this_wk'] - covidWeek['deaths_prev_wk']

# merge all of these covid totaled files at the county level
# covidTwoAvgChng,covidWeek,covidTwo,covidTotals
covidMsa = pd.merge(covidMsaTotals,covidTwo,how='left',on='MSA')
covidMsa = pd.merge(covidMsa,covidWeek,how='left',on='MSA')
covidMsa = pd.merge(covidMsa,covidTwoAvgChng,how='left',on='MSA')

In [34]:
# repeat this process but at the regional commission level
covidRc.dropna(inplace=True)
covidRc = covidRc.groupby(['Regional Commission','date']) \
                       .agg({'daily_cases':'sum', 'daily_deaths': 'sum'}) \
                       .reset_index() \
                       .rename(columns={'daily_cases':'Cases',
                                        'daily_deaths':'Deaths',
                                        'Regional Commission':'RC'})

covidRcTotals = covidRc.groupby('RC') \
                           .agg({'Cases':'sum', 'Deaths': 'sum'}) \
                           .reset_index()

# add total new cases last two weeks
cutOff = covidRc['date'].max() - pd.Timedelta(days=13)
cutOffOne = covidRc['date'].max() - pd.Timedelta(days=6)
covidLastTwo = covidRc.loc[covidRc['date'] >= cutOff]

covidTwo = covidLastTwo.groupby('RC') \
                                .agg({'Cases':'sum','Deaths':'sum'}) \
                                .reset_index() \
                                .rename(columns={'Cases':'Recent_Cases',
                                                 'Deaths':'Recent_Deaths'})

# add daily growth rate over last 14 days
covidLastTwo['Pct_Chng_Case'] = covidLastTwo.groupby('RC')['Cases'].pct_change()
covidLastTwo.Pct_Chng_Case = covidLastTwo.Pct_Chng_Case * 100
covidLastTwo['Pct_Chng_Case'].fillna(0, inplace=True)

covidTwoAvgChng = covidLastTwo.groupby('RC') \
                                .agg({'Pct_Chng_Case':'mean'}) \
                                .reset_index() \
                                .rename(columns={'Pct_Chng_Case':'avg_daily_change'})

# new cases over the last 7 days
covidThisWk = covidRc.loc[covidRc['date'] >= cutOffOne]
covidThisWk = covidThisWk[['date','RC','Cases','Deaths']]
covidThisWk.rename(columns={'Cases' : 'daily_cases_this_wk',
                            'Deaths': 'daily_deaths_this_wk'}, inplace=True)
# new cases over the previous 7-day period
covidPrevWk = covidRc[(covidRc['date'] >= cutOff) & (covidRc['date'] < cutOffOne)]
covidPrevWk = covidPrevWk[['date','RC','Cases','Deaths']]
covidPrevWk.rename(columns={'Cases' : 'daily_cases_prev_wk',
                            'Deaths': 'daily_deaths_prev_wk'}, inplace=True)
# total change current week to previous week
covidThisWk = covidThisWk.groupby('RC') \
                           .agg({'daily_cases_this_wk':'sum', 'daily_deaths_this_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_this_wk' : 'cases_this_wk',
                                            'daily_deaths_this_wk': 'deaths_this_wk'})

covidPrevWk = covidPrevWk.groupby('RC') \
                           .agg({'daily_cases_prev_wk':'sum', 'daily_deaths_prev_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_prev_wk' : 'cases_prev_wk',
                                            'daily_deaths_prev_wk': 'deaths_prev_wk'})

covidWeek = pd.merge(covidThisWk,covidPrevWk,how='left',on='RC')

covidWeek['cases_diff'] = covidWeek['cases_this_wk'] - covidWeek['cases_prev_wk']
covidWeek['deaths_diff'] = covidWeek['deaths_this_wk'] - covidWeek['deaths_prev_wk']

# merge all of these covid totaled files at the county level
# covidTwoAvgChng,covidWeek,covidTwo,covidTotals
covidRc = pd.merge(covidRcTotals,covidTwo,how='left',on='RC')
covidRc = pd.merge(covidRc,covidWeek,how='left',on='RC')
covidRc = pd.merge(covidRc,covidTwoAvgChng,how='left',on='RC')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [35]:
# repeat this process but at the regional commission level
covidGa = covidGa.groupby(['State','date']) \
                       .agg({'daily_cases':'sum', 'daily_deaths': 'sum'}) \
                       .reset_index() \
                       .rename(columns={'daily_cases':'Cases',
                                        'daily_deaths':'Deaths',
                                        'State':'GA'})

covidGaTotals = covidGa.groupby('GA') \
                           .agg({'Cases':'sum', 'Deaths': 'sum'}) \
                           .reset_index()

# add total new cases last two weeks
cutOff = covidGa['date'].max() - pd.Timedelta(days=13)
cutOffOne = covidGa['date'].max() - pd.Timedelta(days=6)
covidLastTwo = covidGa.loc[covidGa['date'] >= cutOff]

covidTwo = covidLastTwo.groupby('GA') \
                                .agg({'Cases':'sum','Deaths':'sum'}) \
                                .reset_index() \
                                .rename(columns={'Cases':'Recent_Cases',
                                                 'Deaths':'Recent_Deaths'})

# add daily growth rate over last 14 days
covidLastTwo['Pct_Chng_Case'] = covidLastTwo.groupby('GA')['Cases'].pct_change()
covidLastTwo.Pct_Chng_Case = covidLastTwo.Pct_Chng_Case * 100
covidLastTwo['Pct_Chng_Case'].fillna(0, inplace=True)

covidTwoAvgChng = covidLastTwo.groupby('GA') \
                                .agg({'Pct_Chng_Case':'mean'}) \
                                .reset_index() \
                                .rename(columns={'Pct_Chng_Case':'avg_daily_change'})

# new cases over the last 7 days
covidThisWk = covidGa.loc[covidGa['date'] >= cutOffOne]
covidThisWk = covidThisWk[['date','GA','Cases','Deaths']]
covidThisWk.rename(columns={'Cases' : 'daily_cases_this_wk',
                            'Deaths': 'daily_deaths_this_wk'}, inplace=True)
# new cases over the previous 7-day period
covidPrevWk = covidGa[(covidGa['date'] >= cutOff) & (covidGa['date'] < cutOffOne)]
covidPrevWk = covidPrevWk[['date','GA','Cases','Deaths']]
covidPrevWk.rename(columns={'Cases' : 'daily_cases_prev_wk',
                            'Deaths': 'daily_deaths_prev_wk'}, inplace=True)
# total change current week to previous week
covidThisWk = covidThisWk.groupby('GA') \
                           .agg({'daily_cases_this_wk':'sum', 'daily_deaths_this_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_this_wk' : 'cases_this_wk',
                                            'daily_deaths_this_wk': 'deaths_this_wk'})

covidPrevWk = covidPrevWk.groupby('GA') \
                           .agg({'daily_cases_prev_wk':'sum', 'daily_deaths_prev_wk': 'sum'}) \
                           .reset_index() \
                           .rename(columns={'daily_cases_prev_wk' : 'cases_prev_wk',
                                            'daily_deaths_prev_wk': 'deaths_prev_wk'})

covidWeek = pd.merge(covidThisWk,covidPrevWk,how='left',on='GA')

covidWeek['cases_diff'] = covidWeek['cases_this_wk'] - covidWeek['cases_prev_wk']
covidWeek['deaths_diff'] = covidWeek['deaths_this_wk'] - covidWeek['deaths_prev_wk']

# merge all of these covid totaled files at the county level
# covidTwoAvgChng,covidWeek,covidTwo,covidTotals
covidGa = pd.merge(covidGaTotals,covidTwo,how='left',on='GA')
covidGa = pd.merge(covidGa,covidWeek,how='left',on='GA')
covidGa = pd.merge(covidGa,covidTwoAvgChng,how='left',on='GA')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [36]:
rc = pd.merge(rc,countyList,how='left',on='FIPS')
rc = rc.groupby('Regional Commission').agg({'Total_Population':'sum'}).reset_index().rename(columns={'Regional Commission':'RC'})
covidRc = pd.merge(covidRc,rc,how='left',on='RC')

msa = pd.merge(msa,countyList,how='left',on='FIPS')
msa = msa.groupby('CBSA Title').agg({'Total_Population':'sum'}).reset_index().rename(columns={'CBSA Title':'MSA'})
covidMsa = pd.merge(covidMsa,msa,how='left',on='MSA')

gaPop = countyList.copy()
gaPop['GA'] = 'Georgia'
gaPop = gaPop.groupby('GA').agg({'Total_Population':'sum'}).reset_index()
covidGa = pd.merge(covidGa,gaPop,how='left',on='GA')

In [37]:
covid = pd.merge(countyList,covid,how='left',on='FIPS')

In [38]:
covid.fillna(0, inplace=True)

In [39]:
# the calcs that are performed on the counties frame below on covid stuff
covid['CasesPer1K'] = ( covid['Cases'] / covid['Total_Population'] ) * 1000
covid['CasesWeekChng100k'] = ( covid['cases_diff'] / covid['Total_Population'] ) * 100000
covid['DeathsWeekChng100k'] = ( covid['deaths_diff'] / covid['Total_Population'] ) * 100000

covidRc['CasesPer1K'] = ( covidRc['Cases'] / covidRc['Total_Population'] ) * 1000
covidRc['CasesWeekChng100k'] = ( covidRc['cases_diff'] / covidRc['Total_Population'] ) * 100000
covidRc['DeathsWeekChng100k'] = ( covidRc['deaths_diff'] / covidRc['Total_Population'] ) * 100000

covidMsa['CasesPer1K'] = ( covidMsa['Cases'] / covidMsa['Total_Population'] ) * 1000
covidMsa['CasesWeekChng100k'] = ( covidMsa['cases_diff'] / covidMsa['Total_Population'] ) * 100000
covidMsa['DeathsWeekChng100k'] = ( covidMsa['deaths_diff'] / covidMsa['Total_Population'] ) * 100000

covidGa['CasesPer1K'] = ( covidGa['Cases'] / covidGa['Total_Population'] ) * 1000
covidGa['CasesWeekChng100k'] = ( covidGa['cases_diff'] / covidGa['Total_Population'] ) * 100000
covidGa['DeathsWeekChng100k'] = ( covidGa['deaths_diff'] / covidGa['Total_Population'] ) * 100000
# then shape those frames to only have the necessary columns
# stack and push to json

In [40]:
covid.Cases = covid.Cases.astype(int)
covid.Deaths = covid.Deaths.astype(int)
covid.Recent_Cases = covid.Recent_Cases.astype(int)
covid.Recent_Deaths = covid.Recent_Deaths.astype(int)
covid.cases_this_wk = covid.cases_this_wk.astype(int)
covid.deaths_this_wk = covid.deaths_this_wk.astype(int)
covid.cases_prev_wk = covid.cases_prev_wk.astype(int)
covid.deaths_prev_wk = covid.deaths_prev_wk.astype(int)
covid.cases_diff = covid.cases_diff.astype(int)
covid.deaths_diff = covid.deaths_diff.astype(int)
covid.avg_daily_change = covid.avg_daily_change.round(1)
covid.CasesPer1K = covid.CasesPer1K.round(1)
covid.CasesWeekChng100k = covid.CasesWeekChng100k.round(1)
covid.DeathsWeekChng100k = covid.DeathsWeekChng100k.round(1)

covidRc.Cases = covidRc.Cases.astype(int)
covidRc.Deaths = covidRc.Deaths.astype(int)
covidRc.Recent_Cases = covidRc.Recent_Cases.astype(int)
covidRc.Recent_Deaths = covidRc.Recent_Deaths.astype(int)
covidRc.cases_this_wk = covidRc.cases_this_wk.astype(int)
covidRc.deaths_this_wk = covidRc.deaths_this_wk.astype(int)
covidRc.cases_prev_wk = covidRc.cases_prev_wk.astype(int)
covidRc.deaths_prev_wk = covidRc.deaths_prev_wk.astype(int)
covidRc.cases_diff = covidRc.cases_diff.astype(int)
covidRc.deaths_diff = covidRc.deaths_diff.astype(int)
covidRc.avg_daily_change = covidRc.avg_daily_change.round(1)
covidRc.CasesPer1K = covidRc.CasesPer1K.round(1)
covidRc.CasesWeekChng100k = covidRc.CasesWeekChng100k.round(1)
covidRc.DeathsWeekChng100k = covidRc.DeathsWeekChng100k.round(1)

covidMsa.Cases = covidMsa.Cases.astype(int)
covidMsa.Deaths = covidMsa.Deaths.astype(int)
covidMsa.Recent_Cases = covidMsa.Recent_Cases.astype(int)
covidMsa.Recent_Deaths = covidMsa.Recent_Deaths.astype(int)
covidMsa.cases_this_wk = covidMsa.cases_this_wk.astype(int)
covidMsa.deaths_this_wk = covidMsa.deaths_this_wk.astype(int)
covidMsa.cases_prev_wk = covidMsa.cases_prev_wk.astype(int)
covidMsa.deaths_prev_wk = covidMsa.deaths_prev_wk.astype(int)
covidMsa.cases_diff = covidMsa.cases_diff.astype(int)
covidMsa.deaths_diff = covidMsa.deaths_diff.astype(int)
covidMsa.avg_daily_change = covidMsa.avg_daily_change.round(1)
covidMsa.CasesPer1K = covidMsa.CasesPer1K.round(1)
covidMsa.CasesWeekChng100k = covidMsa.CasesWeekChng100k.round(1)
covidMsa.DeathsWeekChng100k = covidMsa.DeathsWeekChng100k.round(1)

covidGa.Cases = covidGa.Cases.astype(int)
covidGa.Deaths = covidGa.Deaths.astype(int)
covidGa.Recent_Cases = covidGa.Recent_Cases.astype(int)
covidGa.Recent_Deaths = covidGa.Recent_Deaths.astype(int)
covidGa.cases_this_wk = covidGa.cases_this_wk.astype(int)
covidGa.deaths_this_wk = covidGa.deaths_this_wk.astype(int)
covidGa.cases_prev_wk = covidGa.cases_prev_wk.astype(int)
covidGa.deaths_prev_wk = covidGa.deaths_prev_wk.astype(int)
covidGa.cases_diff = covidGa.cases_diff.astype(int)
covidGa.deaths_diff = covidGa.deaths_diff.astype(int)
covidGa.avg_daily_change = covidGa.avg_daily_change.round(1)
covidGa.CasesPer1K = covidGa.CasesPer1K.round(1)
covidGa.CasesWeekChng100k = covidGa.CasesWeekChng100k.round(1)
covidGa.DeathsWeekChng100k = covidGa.DeathsWeekChng100k.round(1)

In [41]:
covid.rename(columns={'County':'area'}, inplace=True)
covidRc.rename(columns={'RC':'area'}, inplace=True)
covidMsa.rename(columns={'MSA':'area'}, inplace=True)
covidGa.rename(columns={'GA':'area'}, inplace=True)

counties = pd.merge(counties,covid,how='left',on='FIPS')
counties['Cases'].fillna(0, inplace=True)
counties['Deaths'].fillna(0, inplace=True)

del covid['FIPS']

covid = covid.append( [covidRc,covidMsa,covidGa] )

covid.reset_index(drop=True, inplace=True)

In [42]:
del counties['area']
del counties['Total_Population_y']
counties.rename(columns={'Total_Population_x':'Total_Population'}, inplace=True)

In [43]:
# add custom columns on rates, etc.
counties['COVID_Death_Rate'] = ( counties['Deaths'] / counties['Cases'] ) * 100
counties['COVID_Deaths_Per_1000'] = ( counties['Deaths'] / counties['Total_Population'] ) * 1000
counties['COVID_Cases_Per_1000'] = ( counties['Cases'] / counties['Total_Population'] ) * 1000
# add total new cases per 100000
counties['Cases_Wk_Chng_100k'] = ( counties['cases_diff'] / counties['Total_Population'] ) * 100000
# add total new daths per 100000
counties['Deaths_Wk_Chng_100k'] = ( counties['deaths_diff'] / counties['Total_Population'] ) * 100000

In [44]:
countyClaims = countyClaims.melt(id_vars=["FIPS", "County","Year"], var_name="Month", value_name="Claims")

In [45]:
countyClaimsNew = countyClaims.loc[countyClaims['Year'] == 2020]
countyClaimsNew = countyClaimsNew.loc[countyClaimsNew['Month'] == "March"]
countyClaimsLast = countyClaims.loc[countyClaims['Year'] == 2020]
countyClaimsLast = countyClaimsLast.loc[countyClaimsLast['Month'] == "February"]

claims = pd.merge(countyClaimsNew, countyClaimsLast, how='left', on='FIPS')

In [46]:
claims.rename(columns={'Claims_x' : 'March_Claims',
                       'Claims_y' : 'February_Claims'}, inplace=True)

claims['Claims_Pct_Change'] = ( ( claims['March_Claims'] - claims['February_Claims'] ) / claims['February_Claims'] ) * 100

In [47]:
claims['Claims_Pct_Change'] = claims['Claims_Pct_Change'].round(1)

In [48]:
claims['FIPS'] = claims['FIPS'].astype(str)

In [49]:
claims = claims[['FIPS','March_Claims','February_Claims','Claims_Pct_Change']]

In [50]:
claims.sort_values('February_Claims', inplace=True)

In [51]:
counties = pd.merge(counties, claims, how='left', on='FIPS')

In [52]:
counties.sort_values('February_Claims', inplace=True)

In [53]:
counties['Cases_1000_Norm'] = (counties.iloc[ : , 29 ]-counties.iloc[ : , 29 ].min())/(counties.iloc[ : , 29 ].max()-counties.iloc[ : , 29 ].min()) * 100

In [54]:
counties = counties[['FIPS','County','Total_Population','Population_Under_18','Population_Over_18','Population_Over_65',
                     'Population_Pct_Over_65','Jobs','Jobs_Frequent_Disease_Exposure','Jobs_Pct_Disease_Exposure',
                     'Jobs_Frequent_Physical_Proximity','Jobs_Pct_Prox','Socioeconomic_Status',
                     'Household_Comp_Disability','Minority_Status_Language','Housing_Transportation',
                     'Epidemiology','Healthcare_System','CCVI_Score','Hospitals','Beds','Beds_per_1000','Pct_Uninsured',
                     'Beds_per_1000_Elderly','Cases','Deaths','COVID_Death_Rate','COVID_Deaths_Per_1000','COVID_Cases_Per_1000','Cases_1000_Norm',
                     'Deaths_Wk_Chng_100k','Cases_Wk_Chng_100k','avg_daily_change',
                     'March_Claims','February_Claims','Claims_Pct_Change','geometry']]

counties.rename(columns={'avg_daily_change' : 'Cases_Avg_Pct_Chng_Daily'}, inplace=True)

In [55]:
counties['Cases_1000_Norm'].fillna(0,inplace=True)
counties['Deaths_Wk_Chng_100k'].fillna(0,inplace=True)
counties['Cases_Wk_Chng_100k'].fillna(0,inplace=True)
counties['Cases_Avg_Pct_Chng_Daily'].fillna(0,inplace=True)
counties['COVID_Death_Rate'].fillna(0,inplace=True)
counties['COVID_Deaths_Per_1000'].fillna(0,inplace=True)

In [56]:
counties = gpd.GeoDataFrame(counties, geometry='geometry')

In [57]:
# round all values in your tilesets
counties.Population_Pct_Over_65 = counties.Population_Pct_Over_65.round(1)
counties.Jobs_Frequent_Disease_Exposure = counties.Jobs_Frequent_Disease_Exposure.round(1)
counties.Jobs_Pct_Disease_Exposure = counties.Jobs_Pct_Disease_Exposure.round(1)
counties.Jobs_Frequent_Physical_Proximity = counties.Jobs_Frequent_Physical_Proximity.round(1)
counties.Jobs_Pct_Prox = counties.Jobs_Pct_Prox.round(1)
counties.Socioeconomic_Status = counties.Socioeconomic_Status.round(2)
counties.Household_Comp_Disability = counties.Household_Comp_Disability.round(2)
counties.Minority_Status_Language = counties.Minority_Status_Language.round(2)
counties.Housing_Transportation = counties.Housing_Transportation.round(2)
counties.Epidemiology = counties.Epidemiology.round(2)
counties.Healthcare_System = counties.Healthcare_System.round(2)
counties.CCVI_Score = counties.CCVI_Score.round(2)
counties.Hospitals = counties.Hospitals.astype(int)
counties.Beds = counties.Beds.astype(int)
counties.Beds_per_1000 = counties.Beds_per_1000.round(1)
counties.Pct_Uninsured = counties.Pct_Uninsured.round(1)
counties.Beds_per_1000_Elderly = counties.Beds_per_1000_Elderly.round(1)
counties.COVID_Cases_Per_1000 = counties.COVID_Cases_Per_1000.round(1)
counties.Cases_1000_Norm = counties.Cases_1000_Norm.round(1)
counties.Deaths_Wk_Chng_100k = counties.Deaths_Wk_Chng_100k.round(1)
counties.Cases_Wk_Chng_100k = counties.Cases_Wk_Chng_100k.round(1)
counties.Cases_Avg_Pct_Chng_Daily = counties.Cases_Avg_Pct_Chng_Daily.round(1)
counties.COVID_Death_Rate = counties.COVID_Death_Rate.round(1)
counties.COVID_Deaths_Per_1000 = counties.COVID_Deaths_Per_1000.round(1)

In [58]:
countyPoints = counties.copy()

In [59]:
countyPoints['geometry'] = counties['geometry'].centroid

In [60]:
countyPoints.sort_values('Cases_1000_Norm', inplace=True)
countyPoints['Cases_1000_Norm'].round(1)
counties['COVID_Death_Rate'].fillna(0, inplace=True)
countyPoints['COVID_Death_Rate'].fillna(0, inplace=True)

In [61]:
countyData = counties[['FIPS','County','Cases','Deaths','Total_Population','Population_Under_18','Population_Over_18','Population_Over_65',
                     'Population_Pct_Over_65','Jobs','Jobs_Frequent_Disease_Exposure','Jobs_Pct_Disease_Exposure',
                     'Jobs_Frequent_Physical_Proximity','Jobs_Pct_Prox','Socioeconomic_Status',
                     'Household_Comp_Disability','Minority_Status_Language','Housing_Transportation',
                     'Epidemiology','Healthcare_System','CCVI_Score','Hospitals','Beds','Beds_per_1000','Pct_Uninsured',
                     'Beds_per_1000_Elderly','COVID_Death_Rate','COVID_Deaths_Per_1000','COVID_Cases_Per_1000','Cases_1000_Norm',
                     'Deaths_Wk_Chng_100k','Cases_Wk_Chng_100k','Cases_Avg_Pct_Chng_Daily',
                     'March_Claims','February_Claims','Claims_Pct_Change']]

In [62]:
hospitals = hospitals[['ID','NAME','ADDRESS','CITY','STATE','ZIP','TYPE','STATUS','COUNTY','COUNTYFIPS',
                       'LATITUDE','LONGITUDE','NAICS_CODE','NAICS_DESC','BEDS','TRAUMA',
                       'HELIPAD','geometry']]

In [63]:
# covid neds to be restructured again into a kpi file for the kpi portion of the tool
# so only keep necessary columns to reduce size
covidKpi = covid[['area','Cases','Deaths','CasesPer1K','Recent_Cases','avg_daily_change']]

In [64]:
covidKpi.set_index('area', inplace=True)

In [65]:
covidKpi.to_json(orient='index')

'{"Appling County":{"Cases":118,"Deaths":12,"CasesPer1K":6.4,"Recent_Cases":30,"avg_daily_change":2.1},"Atkinson County":{"Cases":26,"Deaths":2,"CasesPer1K":3.1,"Recent_Cases":7,"avg_daily_change":2.0},"Bacon County":{"Cases":73,"Deaths":2,"CasesPer1K":6.5,"Recent_Cases":41,"avg_daily_change":5.7},"Baker County":{"Cases":32,"Deaths":1,"CasesPer1K":10.0,"Recent_Cases":3,"avg_daily_change":0.3},"Baldwin County":{"Cases":299,"Deaths":16,"CasesPer1K":6.6,"Recent_Cases":29,"avg_daily_change":0.7},"Banks County":{"Cases":68,"Deaths":0,"CasesPer1K":3.7,"Recent_Cases":33,"avg_daily_change":5.4},"Barrow County":{"Cases":276,"Deaths":9,"CasesPer1K":3.6,"Recent_Cases":85,"avg_daily_change":2.7},"Bartow County":{"Cases":424,"Deaths":36,"CasesPer1K":4.1,"Recent_Cases":65,"avg_daily_change":1.2},"Ben Hill County":{"Cases":53,"Deaths":1,"CasesPer1K":3.1,"Recent_Cases":15,"avg_daily_change":2.3},"Berrien County":{"Cases":30,"Deaths":0,"CasesPer1K":1.6,"Recent_Cases":11,"avg_daily_change":3.5},"Bibb Co

In [66]:
covid.head()

Unnamed: 0,area,Total_Population,Cases,Deaths,Recent_Cases,Recent_Deaths,cases_this_wk,deaths_this_wk,cases_prev_wk,deaths_prev_wk,cases_diff,deaths_diff,avg_daily_change,CasesPer1K,CasesWeekChng100k,DeathsWeekChng100k
0,Appling County,18454.0,118,12,30,2,10,2,20,0,-10,2,2.1,6.4,-54.2,10.8
1,Atkinson County,8265.0,26,2,7,1,6,0,1,1,5,-1,2.0,3.1,60.5,-12.1
2,Bacon County,11228.0,73,2,41,1,22,0,19,1,3,-1,5.7,6.5,26.7,-8.9
3,Baker County,3189.0,32,1,3,-1,-1,-1,4,0,-5,-1,0.3,10.0,-156.8,-31.4
4,Baldwin County,45286.0,299,16,29,6,19,4,10,2,9,2,0.7,6.6,19.9,4.4


In [67]:
covid.shape

(211, 16)

In [68]:
msaCode = pd.read_excel('../../data/geographies/cbsa.xlsx', sheet_name='cbsa',usecols='A,D')
msaCode.drop_duplicates(inplace=True)
rcCode = pd.read_excel('../../data/geographies/regional-commissions.xlsx', sheet_name='county-rc',usecols='C,D')
rcCode.drop_duplicates(inplace=True)
msaCode.rename(columns={'CBSA Title':'area','CBSA Code':'area_code'},inplace=True)
rcCode.rename(columns={'Regional Commission':'area','RC_ID':'area_code'},inplace=True)
countyList.rename(columns={'County':'area','FIPS':'area_code'},inplace=True)
msaCode = msaCode[['area','area_code']]
rcCode = rcCode[['area','area_code']]
countyList = countyList[['area','area_code']]
areaCodes = msaCode.append([rcCode,countyList])


In [69]:
covid = pd.merge(covid,areaCodes,how='left',on='area')
covid['area_code'].fillna(13, inplace=True)

In [70]:
covid.shape

(211, 17)

In [71]:
# write out files
hospitals.to_excel('hospitals.xlsx',index=False)
countyHospitals.to_excel('hospitals.xlsx',index=False)
hospitals.to_file("spatial/mapbox/hospitals.geojson", driver='GeoJSON')
counties.to_file("spatial/mapbox/countyData.geojson", driver='GeoJSON')
countyPoints.to_file("spatial/mapbox/countyPoints.geojson", driver='GeoJSON')
countyData.to_excel('countyData.xlsx', index=False)
covid.to_excel('../application/app-data/covidDataWrapper.xlsx', index=False)

In [72]:
countyPoints.shape

(159, 37)

In [73]:
countyPoints.sort_values('County', inplace=True)