In [2]:
import importlib
import os
from pathlib import Path
import sys

from arcgis.features import GeoAccessor, GeoSeriesAccessor
from arcgis.gis import GIS
from dotenv import load_dotenv, find_dotenv
import janitor
import pandas as pd

# import arcpy if available
if importlib.util.find_spec("arcpy") is not None:
    import arcpy
    
# load environment variables from .env
load_dotenv(find_dotenv())

# create a GIS object instance; if you did not enter any information here, it defaults to anonymous access to ArcGIS Online
gis = GIS(
    url=os.getenv('ESRI_GIS_URL'), 
    username=os.getenv('ESRI_GIS_USERNAME'),
    password=os.getenv('ESRI_GIS_PASSWORD')
)

# paths to common data locations - NOTE: to convert any path to a raw string, simply use str(path_instance)
project_parent = Path('./').absolute().parent

data_dir = project_parent/'data'

data_raw = data_dir/'raw'
data_ext = data_dir/'external'
data_int = data_dir/'interim'
data_out = data_dir/'processed'

gdb_raw = data_raw/'raw.gdb'
gdb_int = data_int/'interim.gdb'
gdb_out = data_out/'processed.gdb'

# import the project package from the project package path
sys.path.append(str(project_parent/'src'))
import unacast

# load the "autoreload" extension so that code can change, & always reload modules so that as you change code in src, it gets loaded
%load_ext autoreload
%autoreload 2

In [3]:
county_fc_pth = gdb_int/'county_gen4'
unacast_csv_pth = data_raw/'covid_sds_full_2020-03-27.csv'
itm_id = '7566e0221e5646f99ea249a197116605'

In [14]:
# def get_county_geometry_series(county_fc_pth):
#     county_fc_pth = str(county_fc_pth) if isinstance(county_fc_pth, Path) else county_fc_pth
#     county_df = GeoAccessor.from_featureclass(county_fc_pth)
#     county_df.rename(columns={'ID': 'FIPS'}, inplace=True)
#     county_df = county_df[['FIPS', 'SHAPE']].copy()
#     geom_srs = county_df.set_index('FIPS')['SHAPE']
#     return geom_srs

def get_county_geometry_series():
    cty_lyr = gis.content.get(itm_id).layers[0]
    cty_df = cty_lyr.query(out_fields=['FIPS'], out_sr=4326, as_df=True)
    cty_df.set_index(['FIPS'], inplace=True)
    cty_srs = cty_df['SHAPE']
    return cty_srs
    

def load_unacast_csv(unacast_csv_path, existing_data_end_date=None):
    una_df = pd.read_csv(unacast_csv_pth).clean_names()
    una_df.county_fips = una_df.county_fips.astype(str).str.zfill(5)
    una_df.last_updated = pd.to_datetime(una_df.last_updated)
    una_df.localeventdate = pd.to_datetime(una_df.localeventdate)
    una_df.drop(columns=['county_centroid'], inplace=True)
    if existing_data_end_date:
        una_df = una_df[una_df['localeventdate'] > existing_data_end_date].copy()
    return una_df

def create_update_dataframe(county_fc_pth, unacast_csv_path, existing_data_end_date=None):
    county_srs = get_county_geometry_series()
    una_df = load_unacast_csv(unacast_csv_pth, existing_data_end_date)
    full_df = una_df.join(county_srs, on='county_fips')
    return full_df

In [15]:
full_df = create_update_dataframe(county_fc_pth, unacast_csv_pth)
full_df.head()

Unnamed: 0,localeventdate,weekday,county_fips,county_name,state_fips,state_name,state_code,grade,n_grade,covid,daily_distance_diff,county_population,last_updated,SHAPE
0,2020-02-24,2,1001,Autauga,1,Alabama,AL,F,1,PRE,-0.042038,55601,2020-03-27 15:24:40.826361+00:00,"{'rings': [[[-86.8206698922349, 32.34730961636..."
1,2020-02-28,6,1001,Autauga,1,Alabama,AL,F,1,PRE,0.01741,55601,2020-03-27 15:24:40.826361+00:00,"{'rings': [[[-86.8206698922349, 32.34730961636..."
2,2020-03-06,6,1001,Autauga,1,Alabama,AL,F,1,PRE,0.021396,55601,2020-03-27 15:24:40.826361+00:00,"{'rings': [[[-86.8206698922349, 32.34730961636..."
3,2020-03-09,2,1001,Autauga,1,Alabama,AL,F,1,POST,-0.018811,55601,2020-03-27 15:24:40.826361+00:00,"{'rings': [[[-86.8206698922349, 32.34730961636..."
4,2020-03-02,2,1001,Autauga,1,Alabama,AL,F,1,PRE,-0.030956,55601,2020-03-27 15:24:40.826361+00:00,"{'rings': [[[-86.8206698922349, 32.34730961636..."


In [16]:
full_df.iloc[:100].spatial.plot(gis=GIS())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._data[col] = GeoArray(self._data[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._data['OBJECTID'] = list(range(1, self._data.shape[0] + 1))


MapView(layout=Layout(height='400px', width='100%'))

In [17]:
full_df.spatial.to_featureclass(str(gdb_int/'unacast'))

'D:\\projects\\covid-19-unacast\\data\\interim\\interim.gdb\\unacast'