In [1]:
import pandas as pd
import json, requests
import math
import numpy as np
from pathlib import Path
from collections import defaultdict
import datetime
import shutil
import logging
import datetime

## General Setup

### Read in static data

In [2]:
county_data_folder = Path('static_data/county')
with open(county_data_folder/"staticCounties.json", 'r') as f:
    county_json = json.load(f)

In [3]:
state_data_folder = Path('static_data/state')
with open(state_data_folder/"staticStates.json", 'r') as f:
    state_json = json.load(f)

We make one request that will be parsed once for State Data and once for county

In [4]:
def request_multiple_attempts(url):
    #print_and_log(f"Requesting data from {url}")
    num_requests = 0
    max_requests = 5
    req = requests.get(api_url)
    if(not req.status_code == 200):
        num_requests += 1 
        if(num_requests >= max_requests): req.raise_for_status()
        req = requests.get(api_url)
    #print_and_log("Data Received\n")
    return req

### Go through the county_json records and index by geo_id

In [5]:
geo_id_index_dict = {}
counties = county_json["features"]
for idx, county in enumerate(counties):
    geo_id = county["properties"]["geo_id"]
    geo_id_index_dict[geo_id] = idx

### Setup logging

In [8]:
now = datetime.datetime.now()
date_str = f"{now.month}-{now.day}-{now.year}-{now.hour}{now.minute}"

In [9]:
logging.basicConfig(filename=f'logs/message_logs/{date_str}.log',level=logging.DEBUG)

In [10]:
def print_and_log(message):
    logging.info(message)
    print(message)

## Add new data from APIs

### Add State Covid19 data from John's Hopkins

In [11]:
#data from John Hopkins CSSE
api_url = "https://covid19.mathdro.id/api/countries/USA/confirmed"
covid_jh = request_multiple_attempts(api_url).json()

#### Test that data appears to be valid

In [12]:
# there should be at least 1700 counties
num_counties = len(covid_jh)
if (num_counties < 1700): raise ValueError("Only", num_counties, " counties found in Johns Hopkins data")
mandatory_keys = ['fips', 'confirmed', 'recovered', 'deaths', 'active', 'combinedKey', 'provinceState']
for key in mandatory_keys:
    if not key in covid_jh[120]: raise ValueError("John's Hopkins Record missing key: ", key)

In [13]:
covid_jh[0].keys()

dict_keys(['provinceState', 'countryRegion', 'lastUpdate', 'lat', 'long', 'confirmed', 'recovered', 'deaths', 'active', 'admin2', 'fips', 'combinedKey', 'incidentRate', 'peopleTested'])

In [14]:
#includes ms which fromtimestamp doesnt accept so we cut it off
def get_str_from_timestamp(timestamp):
    timestamp = int(str(timestamp)[:-3])
    cur = datetime.datetime.fromtimestamp(timestamp)
    return cur.strftime('%#m/%d %#I:%M%p')

In [15]:
#initialize all state case data to 0
null_dict = {"cases":0, "deaths":0, "active":0, "recovered":0}
for state in state_json["features"]:
    state["properties"].update(null_dict)

In [16]:
keys = ["deaths", "recovered", "active"]
def add_record_to_state(record):
    for state in state_json["features"]:
        if(record["provinceState"] == state["properties"]["statename"]):
            for key in keys:
                state["properties"][key] += int(record[key])
            state["properties"]["cases"] += int(record["confirmed"])
            state["properties"]["time_cases_update"] = get_str_from_timestamp(record["lastUpdate"])
            return
    print_and_log(f"{record} unmatched")

In [17]:
def add_unassigned_to_state(statename, record):
    for state in state_json["features"]:
        props = state["properties"]
        if(props["statename"].lower() == statename.lower()):
            print_and_log(f'{record["confirmed"]} unassigned cases added to {statename}')
            print_and_log(f'{record["deaths"]} unassigned deaths added to {statename}')
            props["unassigned_cases"] = record["confirmed"]


In [18]:
skips = ["Diamond Princess, US", "Guam, US", "Grand Princess, US", "Puerto Rico, US", "Virgin Islands, US"]
for record in covid_jh:
    if(record == {}): continue
    if record["combinedKey"] in skips: continue
    county, state, _ = map(str.strip, record["combinedKey"].split(','))
    if county == "Unassigned":
        add_unassigned_to_state(state, record)
    add_record_to_state(record)

8858 unassigned cases added to New York
851 unassigned deaths added to New York
1391 unassigned cases added to New Jersey
4 unassigned deaths added to New Jersey
871 unassigned cases added to Georgia
7 unassigned deaths added to Georgia
838 unassigned cases added to Washington
0 unassigned deaths added to Washington
366 unassigned cases added to Connecticut
2 unassigned deaths added to Connecticut
364 unassigned cases added to Michigan
3 unassigned deaths added to Michigan
274 unassigned cases added to Rhode Island
32 unassigned deaths added to Rhode Island
238 unassigned cases added to Colorado
1 unassigned deaths added to Colorado
223 unassigned cases added to Massachusetts
13 unassigned deaths added to Massachusetts
76 unassigned cases added to Kentucky
5 unassigned deaths added to Kentucky
73 unassigned cases added to Illinois
1 unassigned deaths added to Illinois
57 unassigned cases added to Nevada
6 unassigned deaths added to Nevada
49 unassigned cases added to Louisiana
2 unassi

### Add County Covid Data19 from John's Hopkins


In [19]:
#initialize all county case data to 0
for county in counties:
    county["properties"].update(null_dict)

In [20]:
skips = ["Diamond Princess, US", "Guam, US", "Grand Princess, US", "Puerto Rico, US", "Virgin Islands, US"]
missing_fips = {
    "Dona Ana,New Mexico,US":"35013", 
    "Kansas City,Missouri,US":"29095", 
    "Dukes and Nantucket,Massachusetts,US":"25007"
}

In [24]:
def add_record_to_county(record):
    if("Unassigned" in record["combinedKey"]): return
    #sub in countyid for counties that have missing county id
    if(record["combinedKey"] in missing_fips): 
        record["fips"] = missing_fips[record["combinedKey"]]
    #skip anything without a countyID
    if(record['fips'] is None or record['fips'] in ['00078', '80015', '80040', '46102']):
        print_and_log(f'No geo_id, skipping {record["combinedKey"]}')
        return
    #skip anything in the skip list
    if(record["combinedKey"] in skips): return
    geo_id = '0500000US' + record["fips"]
    
    county = counties[geo_id_index_dict[geo_id]]
    #Add the contents of record to the county
    for key in keys:
        county["properties"][key] += int(record[key])
    county["properties"]["cases"] += int(record["confirmed"])
    county["properties"]["time_cases_update"] = get_str_from_timestamp(record["lastUpdate"])

In [25]:
for record in covid_jh:
    add_record_to_county(record)

No geo_id, skipping Out of TN,Tennessee,US
No geo_id, skipping Out of MI,Michigan,US
No geo_id, skipping Weber,Utah,US
No geo_id, skipping ,Virgin Islands,US
No geo_id, skipping Manassas,Virginia,US
No geo_id, skipping McDuffie,Georgia,US
No geo_id, skipping Fillmore,Minnesota,US
No geo_id, skipping LaSalle,Louisiana,US
No geo_id, skipping DeKalb,Tennessee,US
No geo_id, skipping ,Northern Mariana Islands,US
No geo_id, skipping McIntosh,Georgia,US
No geo_id, skipping Out of HI,Hawaii,US
No geo_id, skipping Out of OK,Oklahoma,US
No geo_id, skipping Lac qui Parle,Minnesota,US
No geo_id, skipping Oglala Lakota, South Dakota, US


#### Reassign NYC to the proper counties

In [26]:
new_york_county = counties[geo_id_index_dict['0500000US36061']]
queens_county = counties[geo_id_index_dict['0500000US36081']]
ny_cases = new_york_county["properties"]["cases"]
ny_deaths = new_york_county["properties"]["deaths"]
ny_time_updated = new_york_county["properties"]["time_cases_update"]
queens_cases = queens_county["properties"]["cases"]
nyc_reassignment_needed = ny_cases > 0 and queens_cases == 0
if(ny_cases <= 0 or not isinstance(ny_cases, int)): raise ValueError("Problem with NYCases, value:", ny_cases)
print_and_log(f"{ny_cases} cases initially in New York County")
print_and_log(f"{queens_cases} in queens.")
print_and_log(f"Reassign needed: {nyc_reassignment_needed}")

174 cases initially in New York County
0 in queens.
Reassign needed: True


In [27]:
# This is based on an estimate on 3/21/20 in which NYC had 5687 cases broken down as follows
# this has since been updated on 3/26/20 for data from here: 
# https://www1.nyc.gov/assets/doh/downloads/pdf/imm/covid-19-daily-data-summary.pdf
# https://www1.nyc.gov/assets/doh/downloads/pdf/imm/covid-19-daily-data-summary-deaths.pdf
# # Kings - 1750, Queens - 1514, New York 1402, Bronx 736, Richmond 285
cases_proportion_dict = {
    #code:proportion of NYC cases
    #queens
    '0500000US36081':.335515,
    #kings
    '0500000US36047':.269878,
    #Bronx
    '0500000US36005': .196217,
    #Richmond (Staten Island)
    '0500000US36085':.055885,
    #New York (Manhattan)
    '0500000US36061':.142502,
}
deaths_proportion_dict = {
    #code:proportion of NYC cases
    #queens
    '0500000US36081':.311893,
    #kings
    '0500000US36047':.270226,
    #Bronx
    '0500000US36005':.253640,
    #Richmond (Staten Island)
    '0500000US36085':.051779,
    #New York (Manhattan)
    '0500000US36061':.112055,
}
# #get cases that were all aggregated in NY county

In [28]:
ny_county_names = {
    '0500000US36081':'Queens County',
    '0500000US36047':'Kings County',
    '0500000US36005':'Bronx County',
    '0500000US36085':'Richmond County',
    '0500000US36061':'Manhattan',
}

In [29]:
print_and_log(f"\nTotal Cases listed for New York County {ny_cases}")
if(nyc_reassignment_needed):
    print_and_log("Reassigning NYC with proportional estimates")
    for county in counties:
        county_id = county["properties"]["geo_id"]
        if county_id in cases_proportion_dict.keys():
            county["properties"]["cases"] = int(ny_cases*cases_proportion_dict[county_id])
            county["properties"]["deaths"] = int(ny_deaths*deaths_proportion_dict[county_id])
            county["properties"]["time_cases_update"] = ny_time_updated
            county["properties"]["notes"] = "Data for this county is estimated"
else:
    print_and_log("\nQueens County found, not estimating NYC")


Total Cases listed for New York County 174
Reassigning NYC with proportional estimates


### Add Covid Test Data from Covid Tracking Project

In [30]:
def format_test_time(test_time):
    d, h = test_time.split(' ')
    h, m = h.split(":")
    h = int(h)
    am_pm = "AM" if h <= 12 else "PM"
    if(am_pm) == "pm": h-=12
    return f"{d} {h}:{m}{am_pm}"

In [31]:
#https://covidtracking.com/api/states/info <- this api has info about where the data comes from
api_url = 'https://covidtracking.com/api/states'
state_tests = request_multiple_attempts(api_url).json()

#### Test that data appears to be valid

In [32]:
# there should be at least 50 entries
num_states = len(state_tests)
if (num_states < 50): raise ValueError("Only", num_states, " states found in covidtracking testing data")
mandatory_keys = ['state', 'lastUpdateEt', 'positive', 'negative', 'total']
for key in mandatory_keys:
    if not key in state_tests[20]: raise ValueError("CovidTracking testing missing key: ", key)

In [33]:
state_tests[20]

{'state': 'MD',
 'positive': 5529,
 'positiveScore': 1,
 'negativeScore': 1,
 'negativeRegularScore': 1,
 'commercialScore': 1,
 'grade': 'A',
 'score': 4,
 'negative': 32933,
 'pending': None,
 'hospitalizedCurrently': None,
 'hospitalizedCumulative': 1210,
 'inIcuCurrently': None,
 'inIcuCumulative': None,
 'onVentilatorCurrently': None,
 'onVentilatorCumulative': None,
 'recovered': 365,
 'lastUpdateEt': '4/08 10:00',
 'checkTimeEt': '4/08 16:36',
 'death': 124,
 'hospitalized': 1210,
 'total': 38462,
 'totalTestResults': 38462,
 'posNeg': 38462,
 'fips': '24',
 'dateModified': '2020-04-08T14:00:00Z',
 'dateChecked': '2020-04-08T20:36:00Z',
 'notes': 'Please stop using the "total" field. Use "totalTestResults" instead.',
 'hash': '55f06adfe36d194e63e7825b5ff452fa72a98a2d'}

In [34]:
for state1 in state_tests:
    if(state1 == {}): continue
    for state2 in state_json["features"]:
        if(state1["state"] == state2["properties"]["abbr"]):
            state2["properties"]["time_tests_updated"] = format_test_time(state1["lastUpdateEt"])
            state2["properties"]["test_grade"] = state1["grade"]
            testing_keys = ["test_positive", "test_negative", "test_total"]
            for key in testing_keys:
                state2["properties"][key] = state1[key.split('_')[1]]

## Per Capita Calculations and Data

In [35]:
print_and_log("\nAdding per capita stats for county")
for county in counties:
    props = county["properties"]
    per_cap = props["population"]/100000
    props["pc_cases"] = props["cases"]/per_cap
    props["pc_deaths"] = props["deaths"]/per_cap


Adding per capita stats for county


In [36]:
print_and_log("Adding per capita stats for states")
for state in state_json["features"]:
    props = state["properties"]
    per_cap = props["population"]/100000
    props["pc_cases"] = props["cases"]/per_cap
    props["pc_active"] = props["active"]/per_cap
    props["pc_deaths"] = props["deaths"]/per_cap
    props["pc_tests"] = props["test_total"]/per_cap  

Adding per capita stats for states


## Time Series Data

### Add County time series

In [37]:
with open(county_data_folder/'countyTimeData.json', 'r') as f:
    county_time_data = json.load(f)

#### Add today's data to time series

Note we wont display this in time series until tomorrow becauseit makes it look like curve is flattening

In [38]:
today = datetime.datetime.today()
todays_date = f"{today.month}-{today.day}-{today.year}"
print_and_log(f"Adding time series for today: {todays_date}")

Adding time series for today: 4-9-2020


In [39]:
for county in counties:
    props = county["properties"]
    geo_id = props["geo_id"]
    cases = props["cases"]
    deaths = props["deaths"]
    county_time_data[geo_id][todays_date] = {"cases":cases, "deaths":deaths}

#### Add time series to county geojson

In [40]:
for county in counties:
    geo_id = county["properties"]["geo_id"]
    county["properties"]["time_series"] = county_time_data[geo_id]

In [41]:
counties[0]

{'type': 'Feature',
 'properties': {'geo_id': '0500000US01001',
  'county': '001',
  'name': 'Autauga',
  'lsad': 'County',
  'censusarea': 594.436,
  'state': '01',
  'statename': 'Alabama',
  'stateabbr': 'AL',
  'population': 55601,
  'land_area': '594.44',
  'housing_units': '22135',
  'population_density': 93.53509185115402,
  'comorbid_diabetes': 15.45,
  'comorbid_smoking': 18.41,
  'comorbid_cancer': 0.4956,
  'comorbid_obesity': 37.945,
  'comorbid_hypertension': 40.335,
  'lat': -86.64274644826507,
  'long': 32.53492681419619,
  'age0-19': 14424,
  'age20-44': 17583,
  'age45-54': 7777,
  'age55-64': 7164,
  'age65-74': 4900,
  'age75-84': 2882,
  'age85+': 871,
  'cases': 24,
  'deaths': 2,
  'active': 22,
  'recovered': 0,
  'time_cases_update': '4/09 1:05PM',
  'pc_cases': 43.16469128253089,
  'pc_deaths': 3.597057606877574,
  'time_series': {'3-15-2020': {'cases': 0, 'deaths': None},
   '3-17-2020': {'cases': 0, 'deaths': None},
   '3-18-2020': {'cases': 0, 'deaths': 0},


#### Save latest county time series data to file

In [42]:
with open(county_data_folder/"countyTimeData.json", 'w') as f:
    json.dump(county_time_data, f)

### Add State time series

In [43]:
with open(state_data_folder/'stateTimeData.json', 'r') as f:
    state_time_data = json.load(f)

In [44]:
state_time_data

{'Alabama': {'3-13-2020': {'cases': 5,
   'deaths': 0,
   'recovered': 0,
   'test_total': 12,
   'test_positive': 1,
   'test_negative': 11},
  '3-14-2020': {'cases': 6,
   'deaths': 0,
   'recovered': 0,
   'test_total': 74,
   'test_positive': 6,
   'test_negative': 22},
  '3-15-2020': {'cases': 12,
   'deaths': 0,
   'recovered': 0,
   'test_total': 86,
   'test_positive': 12,
   'test_negative': 28},
  '3-16-2020': {'cases': 29,
   'deaths': 0,
   'recovered': 0,
   'test_total': 96,
   'test_positive': 28,
   'test_negative': 28},
  '3-17-2020': {'cases': 39,
   'deaths': 0,
   'recovered': 0,
   'test_total': 64,
   'test_positive': 36,
   'test_negative': 28},
  '3-18-2020': {'cases': 46,
   'deaths': 0,
   'recovered': 0,
   'test_total': 74,
   'test_positive': 46,
   'test_negative': 28},
  '3-19-2020': {'cases': 78,
   'deaths': 0,
   'recovered': 0,
   'test_total': 96,
   'test_positive': 68,
   'test_negative': 28},
  '3-20-2020': {'cases': 83,
   'deaths': 0,
   'recove

#### Add today's data to state time series

This makes it look like curve is flattening, so we adding it now, but dont display it until the next day

In [45]:
for cur_state in state_json["features"]:
    props = cur_state["properties"]
    statename = props["statename"]
    state_time_data[statename][todays_date] = {
        "cases":props["cases"], 
        "deaths":props["deaths"],
        "recovered":props["recovered"],
        "test_total":props["test_total"],
        "test_negative":props["test_negative"],
        "test_positive":props["test_positive"],
    }

#### Add old state date to state time series

In [46]:
state_time_data["Alabama"]

{'3-13-2020': {'cases': 5,
  'deaths': 0,
  'recovered': 0,
  'test_total': 12,
  'test_positive': 1,
  'test_negative': 11},
 '3-14-2020': {'cases': 6,
  'deaths': 0,
  'recovered': 0,
  'test_total': 74,
  'test_positive': 6,
  'test_negative': 22},
 '3-15-2020': {'cases': 12,
  'deaths': 0,
  'recovered': 0,
  'test_total': 86,
  'test_positive': 12,
  'test_negative': 28},
 '3-16-2020': {'cases': 29,
  'deaths': 0,
  'recovered': 0,
  'test_total': 96,
  'test_positive': 28,
  'test_negative': 28},
 '3-17-2020': {'cases': 39,
  'deaths': 0,
  'recovered': 0,
  'test_total': 64,
  'test_positive': 36,
  'test_negative': 28},
 '3-18-2020': {'cases': 46,
  'deaths': 0,
  'recovered': 0,
  'test_total': 74,
  'test_positive': 46,
  'test_negative': 28},
 '3-19-2020': {'cases': 78,
  'deaths': 0,
  'recovered': 0,
  'test_total': 96,
  'test_positive': 68,
  'test_negative': 28},
 '3-20-2020': {'cases': 83,
  'deaths': 0,
  'recovered': 0,
  'test_total': 109,
  'test_positive': 81,
  '

In [47]:
for state in state_json["features"]:
    statename = state["properties"]["statename"]
    if statename in state_time_data.keys():
        state["properties"]["time_series"] = state_time_data[statename]

#### Save latest state time series data to file

In [48]:
state_json["features"][0]

{'type': 'Feature',
 'id': '01',
 'properties': {'population': 4903185,
  'beds': 19000,
  'statename': 'Alabama',
  'population_density': 94.65,
  'lat': 32.78990682135675,
  'long': -86.82778271419518,
  'abbr': 'AL',
  'age0-19': 1217278,
  'age20-44': 1561062,
  'age45-54': 627458,
  'age55-64': 655179,
  'age65-74': 487721,
  'age75-84': 249308,
  'age85+': 89865,
  'comorbid_obesity': 36.2,
  'comorbid_diabetes': 16.19,
  'comorbid_cancer': 0.4578,
  'comorbid_smoking': 17.48,
  'comorbid_hypertension': 0.4229768229426384,
  'cases': 2499,
  'deaths': 67,
  'active': 2432,
  'recovered': 0,
  'time_cases_update': '4/09 1:05PM',
  'time_tests_updated': '4/09 0:00AM',
  'test_grade': 'B',
  'test_positive': 2547,
  'test_negative': 18058,
  'test_total': 20605,
  'pc_cases': 50.96687153350322,
  'pc_active': 49.6004127929091,
  'pc_deaths': 1.366458740594124,
  'pc_tests': 420.2370499991332,
  'time_series': {'3-13-2020': {'cases': 5,
    'deaths': 0,
    'recovered': 0,
    'test_

In [49]:
with open(state_data_folder/"stateTimeData.json", 'w') as f:
    json.dump(state_time_data, f)

## Calculate Risk

### Add in county risk

In [50]:
#111 is to convert degrees to kilometers
def get_distance(c0, c1):
    lat_dist = abs(c0[0])-abs(c1[0])
    lng_dist = abs(c0[1])-abs(c1[1])
    distance = 111 * math.sqrt(lat_dist**2 + lng_dist**2)
    return float(distance)

#### County Local Risk

In [51]:
def calc_county_local_risk(props):
    cases = props["cases"]
    try:
        population = props["population"]
    except KeyError:
        print_and_log(f"{props['name']}, {props['statename']}")
        raise KeyError
    if cases == 'NaN': 
        print_and_log('NaN cases found')
        cases = 0
    return cases/(population/100000) if population != -1 else -1

In [52]:
print_and_log("\nCalculating local county risk")
for county in counties:
    county["properties"]["risk_local"] = calc_county_local_risk(county["properties"])


Calculating local county risk


#### County Neighbor Risk

In [53]:
def calc_county_neighbor_risk(risks):
    total_neighbor_risk = 0
    num_risks = len(risks)
    if(num_risks == 0): return 0
    total_cases = 0
    total_pop = 0
    for risk in risks.values():
        distance, neighbor_cases, neighbor_pop = risk.values()
        #total_neighbor_risk += (float(neighbor_cases)*(2**((-distance-50)/50)))
        exp = 2**((-distance-50)/50)
        total_cases += float(neighbor_cases)*(2**((-distance-50)/50))
        total_pop += (float(neighbor_pop)*(2**((-distance-50)/50)))/100000
    total_neighbor_risk = total_cases/total_pop
    return total_neighbor_risk

In [54]:
def get_county_all_neighbor_risk(props):
    risk_details = {}
    for county in counties:
        neighbor_props = county["properties"]
        neighbor_risk = get_county_neighbor_risk(props, neighbor_props)
        if(neighbor_risk != {}): risk_details[neighbor_props["geo_id"]] = neighbor_risk
    props["risk_nearby"] = calc_county_neighbor_risk(risk_details)
    props["risk_total"] = props["risk_nearby"] + props["risk_local"]

In [55]:
MAX_DISTANCE = 100
def get_county_neighbor_risk(props, neighbor_props):
    geoID1 = props["geo_id"]
    geoID2 = neighbor_props["geo_id"]
    centroid = [props["lat"], props["long"]]
    centroid_neighbor = [neighbor_props["lat"], neighbor_props["long"]]
    distance = get_distance(centroid, centroid_neighbor)
    neighbor_cases = neighbor_props["cases"]
    neighbor_population = neighbor_props["population"]
    if(geoID1 == geoID2 or distance > MAX_DISTANCE or neighbor_cases == "NaN"): 
        return {}
    else:
        return {"distance":distance, "cases":neighbor_cases, "pop":neighbor_population}

In [56]:
print_and_log("\nCalculating local county risk")
for county in counties:
    get_county_all_neighbor_risk(county["properties"])


Calculating local county risk


### Add in state risk

####  Local State Risk

In [57]:
#111 is to convert degrees to kilometers
def get_distance(c0, c1):
    lat_dist = abs(c0[0])-abs(c1[0])
    lng_dist = abs(c0[1])-abs(c1[1])
    distance = 111 * math.sqrt(lat_dist**2 + lng_dist**2)
    return distance

In [58]:
def calc_state_local_risk(props):
    #changed from props["active"] change back when possible
    cases = props["cases"]
    population = props.get("population", -1)
    if cases == 'NaN': cases = 0
    return cases/(population/100000) if population != -1 else -1

In [59]:
print_and_log("Calculating local state risk")
for state in state_json["features"]:
    state["properties"]["risk_local"] = calc_state_local_risk(state["properties"])

Calculating local state risk


#### Neighbor State Risk

In [60]:
def get_state_all_neighbor_risk(props):
    risk_details = {}
    for state in state_json["features"]:
        neighbor_props = state["properties"]
        neighbor_risk = get_state_neighbor_risk(props, neighbor_props)
        if(neighbor_risk != {}): 
            risk_details[neighbor_props["abbr"]] = neighbor_risk
    props["risk_nearby"] = calc_state_neighbor_risk(risk_details)
    props["risk_total"] = props["risk_nearby"] + props["risk_local"]

In [61]:
def calc_state_neighbor_risk(risks):
    num_risks = len(risks)
    total_neighbor_risk = 0
    for risk in risks.values():
        distance, neighbor_cases, neighbor_pop = risk.values()
        total_neighbor_risk += (neighbor_cases*(2**((-distance-150)/50)))
    return total_neighbor_risk

In [62]:
def get_state_neighbor_risk(props, neighbor_props):
    neighbor_centroid = [neighbor_props["lat"], neighbor_props["long"]]
    #changed from props["active"] change back when possible
    neighbor_cases = neighbor_props["cases"]
    neighbor_pop = neighbor_props["population"]
    centroid = [props["lat"], props["long"]]
    distance = get_distance(centroid, neighbor_centroid)
    if(props["abbr"] == neighbor_props["abbr"] or neighbor_cases == "NaN"):
        return {}
    else:
        return {"DISTANCE":distance, "CASES":neighbor_cases, "POP":neighbor_pop}

In [63]:
print_and_log("Calculating neighbor state risk\n")
for state in state_json["features"]:
    get_state_all_neighbor_risk(state["properties"])

Calculating neighbor state risk



## Add daily change to states

In [64]:
def get_date_string(d):
    return f"{d.month}-{d.day}-{d.year}"

In [65]:
today = datetime.datetime.today()
yesterday = today - datetime.timedelta(days=1)
back_0 = get_date_string(yesterday)
back_1 = get_date_string(yesterday - datetime.timedelta(days=1))
back_3 = get_date_string(yesterday - datetime.timedelta(days=3))
back_7 = get_date_string(yesterday - datetime.timedelta(days=7))

In [66]:
def add_change(state_or_county, feature_name, save_name):
    try:
        latest = state_or_county["properties"]["time_series"][back_0][feature_name]
    except KeyError:
        latest = 0
    try:
        minus1d = state_or_county["properties"]["time_series"][back_1][feature_name]
    except KeyError:
        minus1d = 0
    try:
        minus3d = state_or_county["properties"]["time_series"][back_3][feature_name]
    except KeyError:
        minus3d = 0 
    try:
        minus7d = state_or_county["properties"]["time_series"][back_7][feature_name]
    except KeyError:
        minus7d = 0 
        
    if(latest == 0): percent_growth1d = percent_growth3d = percent_growth7d = "N/A"
    else:
        percent_growth1d = (latest)/minus1d if minus1d != 0 else "N/A"
        percent_growth3d = ((latest)/minus3d)**(1/3) if minus3d != 0 else "N/A"
        percent_growth7d = ((latest)/minus7d)**(1/7) if minus7d != 0 else "N/A"
    state_or_county["properties"][save_name + "24hr"] = percent_growth1d
    state_or_county["properties"][save_name + "72hr"] = percent_growth3d
    state_or_county["properties"][save_name + "1w"] = percent_growth7d

In [67]:
for state in state_json["features"]:
    add_change(state, "cases", "growth_cases")
    add_change(state, "deaths", "growth_deaths")
    add_change(state, "test_total", "growth_tests")

In [68]:
for county in county_json["features"]:
    add_change(county, "cases", "growth_cases")
    add_change(county, "deaths", "growth_deaths")

In [69]:
county_json["features"][1441]

{'type': 'Feature',
 'properties': {'geo_id': '0500000US27017',
  'county': '017',
  'name': 'Carlton',
  'lsad': 'County',
  'censusarea': 861.381,
  'state': '27',
  'statename': 'Minnesota',
  'stateabbr': 'MN',
  'population': 35837,
  'land_area': '861.38',
  'housing_units': '15656',
  'population_density': 41.604170052706124,
  'comorbid_diabetes': 12.16,
  'comorbid_smoking': 17.27,
  'comorbid_cancer': nan,
  'comorbid_obesity': 35.915,
  'comorbid_hypertension': 36.795,
  'lat': -92.67703838784512,
  'long': 46.592403871196595,
  'age0-19': 8785,
  'age20-44': 10863,
  'age45-54': 4810,
  'age55-64': 5243,
  'age65-74': 3430,
  'age75-84': 1900,
  'age85+': 806,
  'cases': 30,
  'deaths': 0,
  'active': 30,
  'recovered': 0,
  'time_cases_update': '4/09 1:05PM',
  'pc_cases': 83.7123643162095,
  'pc_deaths': 0.0,
  'time_series': {'3-15-2020': {'cases': 0, 'deaths': None},
   '3-17-2020': {'cases': 0, 'deaths': None},
   '3-18-2020': {'cases': 0, 'deaths': 0},
   '3-19-2020':

## Add state rank data

In [70]:
def add_state_rank(feature_name, rank_name):
    case_num = []
    for state in state_json["features"]:
        case_num.append(state["properties"][feature_name])
    ordered = sorted(case_num, reverse=True)
    for state in state_json["features"]:
        state["properties"][rank_name] = ordered.index(state["properties"][feature_name]) + 1

In [71]:
add_state_rank("pc_cases", "rank_cases")
add_state_rank("pc_deaths", "rank_deaths")
add_state_rank("pc_tests", "rank_tests")
add_state_rank("risk_total", "rank_risk_total")

## Add county rank data

In [72]:
def add_county_rank(feature_name, rank_name):
    case_num = []
    for county in county_json["features"]:
        case_num.append(county["properties"][feature_name])
    ordered = sorted(case_num, reverse=True)
    for county in county_json["features"]:
        county["properties"][rank_name] = ordered.index(county["properties"][feature_name]) + 1

In [73]:
def get_counties_in_state(statename):
    return list(filter(lambda x: is_in_state(x, statename), county_json["features"]))

def is_in_state(county, statename):
    return county["properties"]["statename"] == statename

In [74]:
def add_county_state_rank(feature_name, rank_name):
    for state in state_json["features"]:
        statename = state["properties"]["statename"]
        i = 0
        case_num = []
        counties_in_state = get_counties_in_state(statename)
        num_counties = len(counties_in_state)
        for county in counties_in_state:
            case_num.append(county["properties"][feature_name])
        ordered = sorted(case_num, reverse=True)
        for county in counties_in_state:
            county["properties"][rank_name] = ordered.index(county["properties"][feature_name]) + 1
            county["properties"]["num_counties_statewide"] = num_counties

In [75]:
add_county_rank("pc_cases", "rank_cases")
add_county_rank("pc_deaths", "rank_deaths")
add_county_rank("risk_total", "rank_risk_total")

In [76]:
add_county_state_rank("pc_cases", "rank_cases_state")
add_county_state_rank("pc_deaths", "rank_deaths_state")
add_county_state_rank("risk_total", "rank_risk_total_state")

In [77]:
state_json["features"][0]["properties"]

{'population': 4903185,
 'beds': 19000,
 'statename': 'Alabama',
 'population_density': 94.65,
 'lat': 32.78990682135675,
 'long': -86.82778271419518,
 'abbr': 'AL',
 'age0-19': 1217278,
 'age20-44': 1561062,
 'age45-54': 627458,
 'age55-64': 655179,
 'age65-74': 487721,
 'age75-84': 249308,
 'age85+': 89865,
 'comorbid_obesity': 36.2,
 'comorbid_diabetes': 16.19,
 'comorbid_cancer': 0.4578,
 'comorbid_smoking': 17.48,
 'comorbid_hypertension': 0.4229768229426384,
 'cases': 2499,
 'deaths': 67,
 'active': 2432,
 'recovered': 0,
 'time_cases_update': '4/09 1:05PM',
 'time_tests_updated': '4/09 0:00AM',
 'test_grade': 'B',
 'test_positive': 2547,
 'test_negative': 18058,
 'test_total': 20605,
 'pc_cases': 50.96687153350322,
 'pc_active': 49.6004127929091,
 'pc_deaths': 1.366458740594124,
 'pc_tests': 420.2370499991332,
 'time_series': {'3-13-2020': {'cases': 5,
   'deaths': 0,
   'recovered': 0,
   'test_total': 12,
   'test_positive': 1,
   'test_negative': 11},
  '3-14-2020': {'cases':

In [78]:
county_json["features"][0]["properties"]

{'geo_id': '0500000US01001',
 'county': '001',
 'name': 'Autauga',
 'lsad': 'County',
 'censusarea': 594.436,
 'state': '01',
 'statename': 'Alabama',
 'stateabbr': 'AL',
 'population': 55601,
 'land_area': '594.44',
 'housing_units': '22135',
 'population_density': 93.53509185115402,
 'comorbid_diabetes': 15.45,
 'comorbid_smoking': 18.41,
 'comorbid_cancer': 0.4956,
 'comorbid_obesity': 37.945,
 'comorbid_hypertension': 40.335,
 'lat': -86.64274644826507,
 'long': 32.53492681419619,
 'age0-19': 14424,
 'age20-44': 17583,
 'age45-54': 7777,
 'age55-64': 7164,
 'age65-74': 4900,
 'age75-84': 2882,
 'age85+': 871,
 'cases': 24,
 'deaths': 2,
 'active': 22,
 'recovered': 0,
 'time_cases_update': '4/09 1:05PM',
 'pc_cases': 43.16469128253089,
 'pc_deaths': 3.597057606877574,
 'time_series': {'3-15-2020': {'cases': 0, 'deaths': None},
  '3-17-2020': {'cases': 0, 'deaths': None},
  '3-18-2020': {'cases': 0, 'deaths': 0},
  '3-19-2020': {'cases': 0, 'deaths': 0},
  '3-20-2020': {'cases': 0, 

## View Output

In [79]:
county_json["features"][408]

{'type': 'Feature',
 'properties': {'geo_id': '0500000US39089',
  'county': '089',
  'name': 'Licking',
  'lsad': 'County',
  'censusarea': 682.5,
  'state': '39',
  'statename': 'Ohio',
  'stateabbr': 'OH',
  'population': 175769,
  'land_area': '682.50',
  'housing_units': '69291',
  'population_density': 257.5369963369963,
  'comorbid_diabetes': 14.87,
  'comorbid_smoking': 18.87,
  'comorbid_cancer': 0.4763,
  'comorbid_obesity': 38.625,
  'comorbid_hypertension': 37.88500000000001,
  'lat': -82.48315238695075,
  'long': 40.09160968135006,
  'age0-19': 45504,
  'age20-44': 52992,
  'age45-54': 23705,
  'age55-64': 24662,
  'age65-74': 17133,
  'age75-84': 8713,
  'age85+': 3060,
  'cases': 128,
  'deaths': 6,
  'active': 122,
  'recovered': 0,
  'time_cases_update': '4/09 1:05PM',
  'pc_cases': 72.82285272146966,
  'pc_deaths': 3.41357122131889,
  'time_series': {'3-15-2020': {'cases': 0, 'deaths': None},
   '3-17-2020': {'cases': 0, 'deaths': None},
   '3-18-2020': {'cases': 0, 'd

In [80]:
state_json["features"][0]

{'type': 'Feature',
 'id': '01',
 'properties': {'population': 4903185,
  'beds': 19000,
  'statename': 'Alabama',
  'population_density': 94.65,
  'lat': 32.78990682135675,
  'long': -86.82778271419518,
  'abbr': 'AL',
  'age0-19': 1217278,
  'age20-44': 1561062,
  'age45-54': 627458,
  'age55-64': 655179,
  'age65-74': 487721,
  'age75-84': 249308,
  'age85+': 89865,
  'comorbid_obesity': 36.2,
  'comorbid_diabetes': 16.19,
  'comorbid_cancer': 0.4578,
  'comorbid_smoking': 17.48,
  'comorbid_hypertension': 0.4229768229426384,
  'cases': 2499,
  'deaths': 67,
  'active': 2432,
  'recovered': 0,
  'time_cases_update': '4/09 1:05PM',
  'time_tests_updated': '4/09 0:00AM',
  'test_grade': 'B',
  'test_positive': 2547,
  'test_negative': 18058,
  'test_total': 20605,
  'pc_cases': 50.96687153350322,
  'pc_active': 49.6004127929091,
  'pc_deaths': 1.366458740594124,
  'pc_tests': 420.2370499991332,
  'time_series': {'3-13-2020': {'cases': 5,
    'deaths': 0,
    'recovered': 0,
    'test_

In [108]:
for county in county_json["features"]:
    if county["properties"]["geo_id"] == "0500000US36061": 
        for k, v in county["properties"]["time_series"].items():
            print(k,v)

3-13-2020 {'cases': 38, 'deaths': 0}
3-14-2020 {'cases': 66, 'deaths': 1}
3-15-2020 {'cases': 81, 'deaths': 5}
3-17-2020 {'cases': 159, 'deaths': 10}
3-18-2020 {'cases': 330, 'deaths': 20}
3-19-2020 {'cases': 608, 'deaths': 22}
3-20-2020 {'cases': 1086, 'deaths': 26}
3-23-2020 {'cases': 2646, 'deaths': 17}
3-24-2020 {'cases': 3013, 'deaths': 19}
3-25-2020 {'cases': 3616, 'deaths': 30}
3-26-2020 {'cases': 4046, 'deaths': 43}
3-27-2020 {'cases': 4627, 'deaths': 55}
3-28-2020 {'cases': 5237, 'deaths': 65}
3-29-2020 {'cases': 5582, 'deaths': 93}
3-30-2020 {'cases': 6060, 'deaths': 103}
3-31-2020 {'cases': 6539, 'deaths': 119}
4-1-2020 {'cases': 7022, 'deaths': 132}
4-2-2020 {'cases': 7398, 'deaths': 165}
4-3-2020 {'cases': 8222, 'deaths': 179}
4-4-2020 {'cases': 8781, 'deaths': 217}
4-5-2020 {'cases': 9251, 'deaths': 264}
4-6-2020 {'cases': 9624, 'deaths': 277}
4-7-2020 {'cases': 10098, 'deaths': 348}
4-8-2020 {'cases': 10642, 'deaths': 389}
3-16-2020 {'cases': 114, 'deaths': 7}
3-21-2020 

## Calculate US data in total

In [94]:
us_json = {"properties":
                {
                    "population":0,
                    "beds":0,
                    "population_density":94,
                    "age0-19":0,
                    'age20-44': 0,
                     'age45-54': 0,
                     'age55-64': 0,
                     'age65-74': 0,
                     'age75-84': 0,
                     'age85+': 0,
                     'comorbid_obesity': 39.8,
                     'comorbid_hypertension': 33.2,
                     'comorbid_diabetes': 10.5,
                     'comorbid_cancer': .448,
                     'comorbid_smoking': 15.7,
                     'cases': 0,
                     'deaths': 0,
                     'active': 0,
                     'recovered': 0,
                     'test_grade': 'N/A',
                     'test_positive': 0,
                     'test_negative': 0,
                     'test_total': 0,
                     'risk_local':'N/A',
                     'risk_total':'N/A',
                     'risk_nearby':'N/A',
                     'rank_cases': 'N/A',
                     'rank_deaths': 'N/A',
                     'rank_tests': 'N/A',
                     'rank_risk_total': 'N/A'
                }
              
          }

In [95]:
summable_keys = ["population", "beds", "age0-19", "age20-44", "age45-54", "age55-64", "age65-74", "age75-84", "age85+",
                "cases", "deaths", "active", "recovered", "test_positive", "test_negative", "test_total"]
for state in state_json["features"]:
    props = state["properties"]
    if props["statename"] == "Puerto Rico": continue
    for key in summable_keys:
        us_json["properties"][key] += props[key]
    if "unassigned_cases" in props: us_json["properties"]["cases"] += props["unassigned_cases"]
    if "unassigned_deaths" in props: us_json["properties"]["deaths"] += props["unassigned_deaths"]

### Add Percapita data

In [96]:
us_pop = us_json["properties"]["population"]
us_json["properties"]["pc_cases"] = us_json["properties"]["cases"]/(us_pop/100000)
us_json["properties"]["pc_deaths"] = us_json["properties"]["deaths"]/(us_pop/100000)
us_json["properties"]["pc_active"] = us_json["properties"]["active"]/(us_pop/100000)
us_json["properties"]["pc_tests"] = us_json["properties"]["test_total"]/(us_pop/100000)

### Add time series data

In [97]:
us_time_series = {}
for state in state_json["features"]:
    for date, values in state["properties"]["time_series"].items():
        if not date in us_time_series:
            us_time_series[date] = defaultdict(int)
        for k, v in values.items():
            addend = v if v is not None else 0
            us_time_series[date][k] += addend

In [98]:
us_json["properties"]["time_series"] = us_time_series

In [99]:
state["properties"]["time_series"]

{'3-14-2020': {'cases': 3, 'deaths': 0, 'recovered': 0},
 '3-15-2020': {'cases': 5, 'deaths': 0, 'recovered': 0},
 '3-16-2020': {'cases': 5,
  'deaths': 0,
  'recovered': 0,
  'test_total': 14,
  'test_positive': 5,
  'test_negative': 9},
 '3-17-2020': {'cases': 5,
  'deaths': 0,
  'recovered': 0,
  'test_total': 22,
  'test_positive': 5,
  'test_negative': 13},
 '3-18-2020': {'cases': 5,
  'deaths': 0,
  'recovered': 0,
  'test_total': 57,
  'test_positive': 5,
  'test_negative': 31},
 '3-19-2020': {'cases': 5,
  'deaths': 0,
  'recovered': 0,
  'test_total': 90,
  'test_positive': 5,
  'test_negative': 56},
 '3-20-2020': {'cases': 14,
  'deaths': 0,
  'recovered': 0,
  'test_total': 180,
  'test_positive': 14,
  'test_negative': 114},
 '3-21-2020': {'cases': 21,
  'deaths': 1,
  'recovered': 0,
  'test_total': 235,
  'test_positive': 21,
  'test_negative': 143},
 '3-22-2020': {'cases': 23,
  'deaths': 1,
  'recovered': 0,
  'test_total': 275,
  'test_positive': 23,
  'test_negative':

In [100]:
us_json

{'properties': {'population': 328300544,
  'beds': 1007169,
  'population_density': 94,
  'age0-19': 81982665,
  'age20-44': 108849241,
  'age45-54': 41631699,
  'age55-64': 42272636,
  'age65-74': 30492316,
  'age75-84': 15394374,
  'age85+': 6544503,
  'comorbid_obesity': 39.8,
  'comorbid_hypertension': 33.2,
  'comorbid_diabetes': 10.5,
  'comorbid_cancer': 0.448,
  'comorbid_smoking': 15.7,
  'cases': 376600,
  'deaths': 15645,
  'active': 347187,
  'recovered': 0,
  'test_grade': 'N/A',
  'test_positive': 425889,
  'test_negative': 1801574,
  'test_total': 2243030,
  'risk_local': 'N/A',
  'risk_total': 'N/A',
  'risk_nearby': 'N/A',
  'rank_cases': 'N/A',
  'rank_deaths': 'N/A',
  'rank_tests': 'N/A',
  'rank_risk_total': 'N/A',
  'pc_cases': 114.71196343798931,
  'pc_deaths': 4.765450525723162,
  'pc_active': 105.75279461005097,
  'pc_tests': 683.2245760762431,
  'time_series': {'3-13-2020': defaultdict(int,
               {'cases': 2112,
                'deaths': 47,
         

### Add in daily changes in cases/deaths

In [101]:
add_change(us_json, "cases", "growth_cases")
add_change(us_json, "deaths", "growth_deaths")

## Export

In [102]:
print_and_log("Exporting Files")
# this just makes sure we dont have any encoding problems, taken from stackoverflow
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

with open("states.json", 'w') as f:
    json.dump(state_json, f, cls=NpEncoder)

with open("stateData.js", 'w') as f:
    f.write("let stateData = ")
    json.dump(state_json, f, cls=NpEncoder)
    
with open("USData.js", 'w') as f:
    f.write("let USData = ")
    json.dump(us_json, f, cls=NpEncoder)    
    
with open("counties.json", 'w') as f:
    json.dump(county_json, f, cls=NpEncoder)

with open("countyData.js", 'w') as f:
    f.write("let countyData = ")
    json.dump(county_json, f, cls=NpEncoder)

Exporting Files


In [103]:
live_path = Path("/var/www/html/data")
test_path = Path("../data")
if(live_path.exists()):
    print_and_log("Copying data to the /var/www/html")
    shutil.copy("stateData.js", live_path/"stateData.js")
    shutil.copy("countyData.js", live_path/"countyData.js")
    shutil.copy("USData.js", live_path/"USData.js")
    shutil.copy("counties.json", live_path/"counties.json")
    shutil.copy("states.json", live_path/"states.json")
    print_and_log("Data successfully copied to live path")
else:
    print_and_log(f"{live_path} not found, NOT COPYING DATA")
    if(test_path.exists()):
        print_and_log("Copying data to the test path")
        shutil.copy("stateData.js", test_path/"stateData.js")
        shutil.copy("countyData.js", test_path/"countyData.js")
        shutil.copy("USData.js", test_path/"USData.js")
        print_and_log("Data successfully copied to test path")
    else:
        print_and_log(f"{test_path} test path not found, no data exported")

\var\www\html\data not found, NOT COPYING DATA
Copying data to the test path
Data successfully copied to test path


In [104]:
us_json

{'properties': {'population': 328300544,
  'beds': 1007169,
  'population_density': 94,
  'age0-19': 81982665,
  'age20-44': 108849241,
  'age45-54': 41631699,
  'age55-64': 42272636,
  'age65-74': 30492316,
  'age75-84': 15394374,
  'age85+': 6544503,
  'comorbid_obesity': 39.8,
  'comorbid_hypertension': 33.2,
  'comorbid_diabetes': 10.5,
  'comorbid_cancer': 0.448,
  'comorbid_smoking': 15.7,
  'cases': 376600,
  'deaths': 15645,
  'active': 347187,
  'recovered': 0,
  'test_grade': 'N/A',
  'test_positive': 425889,
  'test_negative': 1801574,
  'test_total': 2243030,
  'risk_local': 'N/A',
  'risk_total': 'N/A',
  'risk_nearby': 'N/A',
  'rank_cases': 'N/A',
  'rank_deaths': 'N/A',
  'rank_tests': 'N/A',
  'rank_risk_total': 'N/A',
  'pc_cases': 114.71196343798931,
  'pc_deaths': 4.765450525723162,
  'pc_active': 105.75279461005097,
  'pc_tests': 683.2245760762431,
  'time_series': {'3-13-2020': defaultdict(int,
               {'cases': 2112,
                'deaths': 47,
         

### Keep a record of each update for future time series use

In [105]:
with open(f"logs/data_logs/state-{date_str}.json", 'w') as f:
    json.dump(state_json, f, cls=NpEncoder)

In [106]:
with open(f"logs/data_logs/county-{date_str}.json", 'w') as f:
    json.dump(county_json, f, cls=NpEncoder)