In [1]:
import pandas as pd
import seaborn as sns
import statsmodels.api as sm

## Import Mobility Data

In [2]:
# keep FIPS as string to preserve leading zeroes

In [3]:
mob = pd.read_csv(r"/Users/philip.ballentine/Downloads/Region_Mobility_Report_CSVs/2020_US_Region_Mobility_Report.csv", dtype={'census_fips_code': 'str'} )

In [4]:
mob['date'] = mob['date'].astype('datetime64')

In [6]:
mob_renam_col = [i.replace('percent_change_from_baseline', 'PCT_CFB') for i in mob.columns]

In [7]:
mob.columns = mob_renam_col

In [9]:
mob_renam_col

['country_region_code',
 'country_region',
 'sub_region_1',
 'sub_region_2',
 'metro_area',
 'iso_3166_2_code',
 'census_fips_code',
 'date',
 'retail_and_recreation_PCT_CFB',
 'grocery_and_pharmacy_PCT_CFB',
 'parks_PCT_CFB',
 'transit_stations_PCT_CFB',
 'workplaces_PCT_CFB',
 'residential_PCT_CFB']

In [8]:
mob.dtypes

country_region_code                      object
country_region                           object
sub_region_1                             object
sub_region_2                             object
metro_area                              float64
iso_3166_2_code                          object
census_fips_code                         object
date                             datetime64[ns]
retail_and_recreation_PCT_CFB           float64
grocery_and_pharmacy_PCT_CFB            float64
parks_PCT_CFB                           float64
transit_stations_PCT_CFB                float64
workplaces_PCT_CFB                      float64
residential_PCT_CFB                     float64
dtype: object

In [14]:
mob.sort_values(by="date", inplace=True)

In [16]:
mob.head(5)

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,date,retail_and_recreation_PCT_CFB,grocery_and_pharmacy_PCT_CFB,parks_PCT_CFB,transit_stations_PCT_CFB,workplaces_PCT_CFB,residential_PCT_CFB
0,US,United States,,,,,,2020-02-15,6.0,2.0,15.0,3.0,2.0,-1.0
72735,US,United States,Florida,Brevard County,,,12009.0,2020-02-15,4.0,0.0,6.0,18.0,1.0,0.0
527841,US,United States,Tennessee,,,US-TN,,2020-02-15,7.0,4.0,54.0,4.0,3.0,-1.0
528114,US,United States,Tennessee,Anderson County,,,47001.0,2020-02-15,8.0,5.0,,,1.0,-1.0
72487,US,United States,Florida,Bradford County,,,12007.0,2020-02-15,4.0,1.0,,,-2.0,


## Compute Rolling Average of Mobility Data

In [17]:
list_all_locations = list(mob.census_fips_code.unique())
list_frames = []
for i in list_all_locations:
    frame = mob[mob.census_fips_code == i]
    frame['retail_and_recreation_PCT_CFB_RollingAvg'] = frame.rolling(7, min_periods=7, center=False, on="date")['retail_and_recreation_PCT_CFB'].mean()
    frame['grocery_and_pharmacy_PCT_CFB_RollingAvg'] = frame.rolling(7, min_periods=7, center=False, on="date")['grocery_and_pharmacy_PCT_CFB'].mean()
    frame['parks_PCT_CFB_RollingAvg'] = frame.rolling(7, min_periods=7, center=False, on="date")['parks_PCT_CFB'].mean()
    frame['transit_stations_PCT_CFB_RollingAvg'] = frame.rolling(7, min_periods=7, center=False, on="date")['transit_stations_PCT_CFB'].mean()
    frame['workplaces_PCT_CFB_RollingAvg'] = frame.rolling(7, min_periods=7, center=False, on="date")['workplaces_PCT_CFB'].mean()
    frame['residential_PCT_CFB_RollingAvg'] = frame.rolling(7, min_periods=7, center=False, on="date")['residential_PCT_CFB'].mean()
    list_frames.append(frame)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/p

In [18]:
recombine = pd.concat(list_frames)

In [19]:
mobility_final = recombine

In [28]:
mobility_final[(mobility_final.census_fips_code == "12009") & (mobility_final.date > '2020-10-01')].head(-18)

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,date,retail_and_recreation_PCT_CFB,grocery_and_pharmacy_PCT_CFB,parks_PCT_CFB,transit_stations_PCT_CFB,workplaces_PCT_CFB,residential_PCT_CFB,retail_and_recreation_PCT_CFB_RollingAvg,grocery_and_pharmacy_PCT_CFB_RollingAvg,parks_PCT_CFB_RollingAvg,transit_stations_PCT_CFB_RollingAvg,workplaces_PCT_CFB_RollingAvg,residential_PCT_CFB_RollingAvg
72965,US,United States,Florida,Brevard County,,,12009,2020-10-02,-18.0,-11.0,-6.0,-16.0,-26.0,6.0,-19.571429,-12.285714,-13.428571,-20.142857,-24.142857,6.142857
72966,US,United States,Florida,Brevard County,,,12009,2020-10-03,-23.0,-15.0,-34.0,-32.0,-17.0,5.0,-20.0,-12.857143,-18.857143,-20.285714,-24.428571,6.285714
72967,US,United States,Florida,Brevard County,,,12009,2020-10-04,-19.0,-13.0,-32.0,-24.0,-19.0,4.0,-20.142857,-13.0,-22.0,-19.714286,-24.571429,6.285714
72968,US,United States,Florida,Brevard County,,,12009,2020-10-05,-22.0,-14.0,-27.0,-14.0,-28.0,7.0,-19.714286,-12.428571,-20.714286,-19.0,-24.571429,6.142857
72969,US,United States,Florida,Brevard County,,,12009,2020-10-06,-19.0,-12.0,-10.0,-9.0,-28.0,7.0,-19.571429,-12.428571,-18.428571,-18.0,-24.714286,6.142857
72970,US,United States,Florida,Brevard County,,,12009,2020-10-07,-19.0,-11.0,-15.0,-15.0,-27.0,7.0,-19.714286,-12.571429,-18.571429,-17.571429,-24.571429,6.142857
72971,US,United States,Florida,Brevard County,,,12009,2020-10-08,-20.0,-13.0,-3.0,-8.0,-27.0,7.0,-20.0,-12.714286,-18.142857,-16.857143,-24.571429,6.142857
72972,US,United States,Florida,Brevard County,,,12009,2020-10-09,-19.0,-12.0,-14.0,-16.0,-26.0,7.0,-20.142857,-12.857143,-19.285714,-16.857143,-24.571429,6.285714
72973,US,United States,Florida,Brevard County,,,12009,2020-10-10,-21.0,-14.0,-19.0,-30.0,-16.0,5.0,-19.857143,-12.714286,-17.142857,-16.571429,-24.428571,6.285714
72974,US,United States,Florida,Brevard County,,,12009,2020-10-11,-20.0,-15.0,-18.0,-23.0,-18.0,4.0,-20.0,-13.0,-15.142857,-16.428571,-24.285714,6.285714


## Join Full County-Level Dataset

In [29]:
counties = pd.read_csv(r"/Users/philip.ballentine/Documents/hia_covid_repo/hia_covid_data_assets/counties_dataset_full.csv",dtype={'County_FIPS':'str'})

In [30]:
counties.dtypes

Unnamed: 0                 int64
County_FIPS               object
StateCD                   object
Name                      object
HS_DIPLOMA_ONLY_2018     float64
SOME_COLLEGE_2018        float64
BACHELORS_PLUS_2018      float64
Poverty_PCT_2018         float64
UrbanCD                    int64
Median_Income_2018       float64
Population_ACS           float64
Population_Black_ACS     float64
Population_Hisp_ACS      float64
Population_Native_ACS    float64
PCT_Black_ACS            float64
PCT_Hisp_ACS             float64
PCT_Native_ACS           float64
UrbanCD_Description       object
Metro                     object
dtype: object

In [31]:
counties['census_fips_code'] = counties['County_FIPS'].astype('str')

In [32]:
mobility_counties = mobility_final.merge(counties, on = "census_fips_code")

In [33]:
mobility_counties.dtypes

country_region_code                                 object
country_region                                      object
sub_region_1                                        object
sub_region_2                                        object
metro_area                                         float64
iso_3166_2_code                                     object
census_fips_code                                    object
date                                        datetime64[ns]
retail_and_recreation_PCT_CFB                      float64
grocery_and_pharmacy_PCT_CFB                       float64
parks_PCT_CFB                                      float64
transit_stations_PCT_CFB                           float64
workplaces_PCT_CFB                                 float64
residential_PCT_CFB                                float64
retail_and_recreation_PCT_CFB_RollingAvg           float64
grocery_and_pharmacy_PCT_CFB_RollingAvg            float64
parks_PCT_CFB_RollingAvg                           float

In [34]:
mobility_counties[mobility_counties.census_fips_code == ""]

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,date,retail_and_recreation_PCT_CFB,grocery_and_pharmacy_PCT_CFB,...,Median_Income_2018,Population_ACS,Population_Black_ACS,Population_Hisp_ACS,Population_Native_ACS,PCT_Black_ACS,PCT_Hisp_ACS,PCT_Native_ACS,UrbanCD_Description,Metro


In [35]:
mobility_counties.head(5)

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,date,retail_and_recreation_PCT_CFB,grocery_and_pharmacy_PCT_CFB,...,Median_Income_2018,Population_ACS,Population_Black_ACS,Population_Hisp_ACS,Population_Native_ACS,PCT_Black_ACS,PCT_Hisp_ACS,PCT_Native_ACS,UrbanCD_Description,Metro
0,US,United States,Florida,Brevard County,,,12009,2020-02-15,4.0,0.0,...,57612.0,568183.0,57728.0,55042.0,1945.0,10.160107,9.687372,0.342319,In small metro area of less than 1 million res...,Smaller_metro
1,US,United States,Florida,Brevard County,,,12009,2020-02-16,6.0,1.0,...,57612.0,568183.0,57728.0,55042.0,1945.0,10.160107,9.687372,0.342319,In small metro area of less than 1 million res...,Smaller_metro
2,US,United States,Florida,Brevard County,,,12009,2020-02-17,6.0,1.0,...,57612.0,568183.0,57728.0,55042.0,1945.0,10.160107,9.687372,0.342319,In small metro area of less than 1 million res...,Smaller_metro
3,US,United States,Florida,Brevard County,,,12009,2020-02-18,4.0,1.0,...,57612.0,568183.0,57728.0,55042.0,1945.0,10.160107,9.687372,0.342319,In small metro area of less than 1 million res...,Smaller_metro
4,US,United States,Florida,Brevard County,,,12009,2020-02-19,3.0,0.0,...,57612.0,568183.0,57728.0,55042.0,1945.0,10.160107,9.687372,0.342319,In small metro area of less than 1 million res...,Smaller_metro


## Bring in the COVID Data to be Joined In 

In [36]:
covid = pd.read_csv(r"/Users/philip.ballentine/Documents/hia_covid_repo/hia_covid_data_assets/covid_dataset_full.csv",dtype={'County_FIPS':'str'})

In [37]:
covid.Date = covid.Date.astype("datetime64")

In [38]:
mobility_counties['Date'] = mobility_counties['date']



In [39]:
mobility_covid_urban = mobility_counties.merge(covid, on =['Date','County_FIPS'])

In [40]:
# Can't deal with the + signs, so remove these 

large_columns = list(mobility_covid_urban.columns)
large_columns = [x.replace('+','') for x in large_columns]
mobility_covid_urban.columns = large_columns

In [43]:
list(mobility_covid_urban.columns)

['country_region_code',
 'country_region',
 'sub_region_1',
 'sub_region_2',
 'metro_area',
 'iso_3166_2_code',
 'census_fips_code',
 'date',
 'retail_and_recreation_PCT_CFB',
 'grocery_and_pharmacy_PCT_CFB',
 'parks_PCT_CFB',
 'transit_stations_PCT_CFB',
 'workplaces_PCT_CFB',
 'residential_PCT_CFB',
 'retail_and_recreation_PCT_CFB_RollingAvg',
 'grocery_and_pharmacy_PCT_CFB_RollingAvg',
 'parks_PCT_CFB_RollingAvg',
 'transit_stations_PCT_CFB_RollingAvg',
 'workplaces_PCT_CFB_RollingAvg',
 'residential_PCT_CFB_RollingAvg',
 'Unnamed: 0_x',
 'County_FIPS',
 'StateCD',
 'Name',
 'HS_DIPLOMA_ONLY_2018',
 'SOME_COLLEGE_2018',
 'BACHELORS_PLUS_2018',
 'Poverty_PCT_2018',
 'UrbanCD',
 'Median_Income_2018',
 'Population_ACS',
 'Population_Black_ACS',
 'Population_Hisp_ACS',
 'Population_Native_ACS',
 'PCT_Black_ACS',
 'PCT_Hisp_ACS',
 'PCT_Native_ACS',
 'UrbanCD_Description',
 'Metro',
 'Date',
 'Unnamed: 0_y',
 'Admin2',
 'State',
 'Confirmed',
 'Deaths',
 'Country/Region',
 'Deaths_Previou

In [44]:
columns = [ 'date',
 'County_FIPS',
 'StateCD',
 'Name',
 'retail_and_recreation_PCT_CFB',
 'grocery_and_pharmacy_PCT_CFB',
 'parks_PCT_CFB',
 'transit_stations_PCT_CFB',
 'workplaces_PCT_CFB',
 'residential_PCT_CFB',
 'retail_and_recreation_PCT_CFB_RollingAvg',
 'grocery_and_pharmacy_PCT_CFB_RollingAvg',
 'parks_PCT_CFB_RollingAvg',
 'transit_stations_PCT_CFB_RollingAvg',
 'workplaces_PCT_CFB_RollingAvg',
 'residential_PCT_CFB_RollingAvg',
 'HS_DIPLOMA_ONLY_2018',
 'SOME_COLLEGE_2018',
 'BACHELORS_PLUS_2018',
 'Poverty_PCT_2018',
 'UrbanCD',
 'Median_Income_2018',
 'Population_ACS',
 'Population_Black_ACS',
 'Population_Hisp_ACS',
 'Population_Native_ACS',
 'PCT_Black_ACS',
 'PCT_Hisp_ACS',
 'PCT_Native_ACS',
 'UrbanCD_Description',
 'Metro',
 'Date',
 'State',
 'Confirmed',
 'Deaths',
 'Country/Region',
 'Deaths_Previous',
 'Confirmed_Previous',
 'Deaths_New',
 'Deaths_New_7',
 'Deaths_New_14',
 'Deaths_New_21',
 'Deaths_New_28',
 'Confirmed_New',
 'locationcol',
 'Confirmed_New_RollingAvg',
 'Deaths_New_RollingAvg',
 'Deaths_New_7_RollingAvg',
 'Deaths_New_14_RollingAvg',
 'Deaths_New_21_RollingAvg',
 'Deaths_New_28_RollingAvg',
 ]

In [45]:
combined_dataset_sub = mobility_covid_urban[columns]

In [46]:
#check to see matching
combined_dataset_sub[['State','StateCD']].sample(frac=.00004)


Unnamed: 0,State,StateCD
415065,Kentucky,KY
64361,Connecticut,CT
464163,Iowa,IA
87528,Oklahoma,OK
543164,Georgia,GA
451682,Iowa,IA
92990,Oklahoma,OK
311792,Utah,UT
453002,Louisiana,LA
367060,Mississippi,MS


In [48]:
def create_percapita_measures(dataframe):
    list_col = list(dataframe.columns)
    for i in list_col:
        if dataframe[i].dtype == "O":
            continue
        if "percent" in i or "PCT" in i:
            continue
        if "death" in i.lower() or "confirmed" in i.lower() and "previous" not in i.lower() :
            try:
                new_col_name = i+'_PER_100K'
                dataframe[new_col_name] = dataframe[i]/(dataframe['Population_ACS']/100000)
            except:
                print("error occurred for {i}".format(i=i)) 

In [49]:
create_percapita_measures(combined_dataset_sub)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


In [51]:
combined_dataset_sub.Deaths_New_7_RollingAvg_PER_100K.isna().value_counts()

False    670260
Name: Deaths_New_7_RollingAvg_PER_100K, dtype: int64

In [52]:
combined_dataset_sub.Population_ACS.isna().value_counts()

False    670260
Name: Population_ACS, dtype: int64

In [53]:
combined_dataset_sub.Deaths_New_7_RollingAvg_PER_100K.isna().value_counts()

False    670260
Name: Deaths_New_7_RollingAvg_PER_100K, dtype: int64

In [54]:
combined_dataset_sub.Metro.isna().value_counts()

False    670260
Name: Metro, dtype: int64

## Export to File

In [55]:
import os
os.chdir(r'/Users/philip.ballentine/Documents/hia_covid_repo/hia_covid_data_assets/')
path = str(os.getcwd())
filename = "hia_covid_combined.csv"
combined_dataset_sub.to_csv(filename)
print("{filename} has been created in {path}".format(filename=filename, path=path))

hia_covid_combined.csv has been created in /Users/philip.ballentine/Documents/hia_covid_repo/hia_covid_data_assets


In [56]:
import os
os.chdir(r'/Users/philip.ballentine/Documents/hia_covid_repo/hia_covid_data_assets/')
path = str(os.getcwd())
filename = "hia_covid_combined_sample.csv"
combined_dataset_sub.sample(frac=.10).to_csv(filename)
print("{filename} has been created in {path}".format(filename=filename, path=path))

hia_covid_combined_sample.csv has been created in /Users/philip.ballentine/Documents/hia_covid_repo/hia_covid_data_assets


In [57]:
# ratio to check join 
combined_dataset_sub.shape[0]/mobility_counties.shape[0]

0.9970056807279186

In [58]:
import os
os.chdir(r'/Users/philip.ballentine/Documents/hia_covid_repo/hia_covid_data_assets/')
path = str(os.getcwd())
filename = "hia_covid_combined_post0601.csv"
combined_dataset_sub[combined_dataset_sub.Date >= '2020-06-01'].to_csv(filename)
print("{filename} has been created in {path}".format(filename=filename, path=path))

hia_covid_combined_post0601.csv has been created in /Users/philip.ballentine/Documents/hia_covid_repo/hia_covid_data_assets
