In [1]:
import pandas as pd
import numpy as np
import json
from collections import defaultdict
import requests
import math
from pathlib import Path
import datetime

## Setup

In [2]:
data_folder = Path('../../data/state')

In [3]:
with open(data_folder/"usStates.json", 'r') as f:
    state_json = json.load(f)

In [4]:
state_json

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'id': '01',
   'properties': {'name': 'Alabama',
    'population': 4903185,
    'beds': 19000,
    'density': 94.65},
   'geometry': {'type': 'Polygon',
    'coordinates': [[[-87.359296, 35.00118],
      [-85.606675, 34.984749],
      [-85.431413, 34.124869],
      [-85.184951, 32.859696],
      [-85.069935, 32.580372],
      [-84.960397, 32.421541],
      [-85.004212, 32.322956],
      [-84.889196, 32.262709],
      [-85.058981, 32.13674],
      [-85.053504, 32.01077],
      [-85.141136, 31.840985],
      [-85.042551, 31.539753],
      [-85.113751, 31.27686],
      [-85.004212, 31.003013],
      [-85.497137, 30.997536],
      [-87.600282, 30.997536],
      [-87.633143, 30.86609],
      [-87.408589, 30.674397],
      [-87.446927, 30.510088],
      [-87.37025, 30.427934],
      [-87.518128, 30.280057],
      [-87.655051, 30.247195],
      [-87.90699, 30.411504],
      [-87.934375, 30.657966],
      [-88.011052, 30.685351

### Change 'name' to 'statename'

In [5]:
for state in state_json["features"]:
    state["properties"]["statename"] = state["properties"]["name"]
    del state["properties"]["name"]

## Add state abbreviations

In [6]:
state_to_abbr = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Palau': 'PW',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

## Add state centroids

This is pretty awful but we actually do two runs here and grab two different sets of centroids. The first we have all 52, but they are slightly inaccurate. The 2nd set is accurate but some were corrupted and we only have 45 or so. It works so it's just not priority to fix right now 

In [7]:
with open(data_folder/"dataCovidState.json", 'r') as f:
    centroids_json = json.load(f)

In [8]:
for state in state_json["features"]:
    state_id = state["id"]
    state["properties"]["lat"] = centroids_json[state_id]["LAT"]
    state["properties"]["long"] = centroids_json[state_id]["LONG"]

In [9]:
with open(data_folder/"stateCentroids.json", 'r') as f:
    centroids_json = json.load(f)

In [10]:
for state in centroids_json['features']:
    name = state["properties"]["name"]
    long,lat = state["geometry"]["coordinates"]
    for state2 in state_json["features"]:
        if state2["properties"]["statename"] == name:
            state2["properties"]["lat"] = lat
            state2["properties"]["long"] = long
            break

## Add state abbreviation


In [11]:
for state in state_json["features"]:
    state["properties"]["abbr"] = state_to_abbr[state["properties"]["statename"]]

## Get and add covid data

In [12]:
api_url = "https://covid19.mathdro.id/api/countries/USA/confirmed"
state_covid = requests.get(api_url).json()

In [13]:
#includes ms which fromtimestamp doesnt accept so we cut it off
def get_str_from_timestamp(timestamp):
    timestamp = int(str(timestamp)[:-3])
    cur = datetime.datetime.fromtimestamp(timestamp)
    return cur.strftime('%#m/%d %#I:%M%p')

In [14]:
keys = ["deaths", "recovered", "active"]
for state1 in state_covid:
    if(state1 == {}): continue
    for state2 in state_json["features"]:
        if(state1["provinceState"] == state2["properties"]["statename"]):
            for key in keys:
                state2["properties"][key] = state1[key]
            state2["properties"]["cases"] = state1["confirmed"]
            state2["properties"]["time_cases_update"] = get_str_from_timestamp(state1["lastUpdate"])    


In [15]:
state_json["features"][0]

{'type': 'Feature',
 'id': '01',
 'properties': {'population': 4903185,
  'beds': 19000,
  'density': 94.65,
  'statename': 'Alabama',
  'lat': 32.78990682135675,
  'long': -86.82778271419518,
  'abbr': 'AL',
  'deaths': 0,
  'recovered': 0,
  'active': 68,
  'cases': 68,
  'time_cases_update': '3/19 12:13PM'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.359296, 35.00118],
    [-85.606675, 34.984749],
    [-85.431413, 34.124869],
    [-85.184951, 32.859696],
    [-85.069935, 32.580372],
    [-84.960397, 32.421541],
    [-85.004212, 32.322956],
    [-84.889196, 32.262709],
    [-85.058981, 32.13674],
    [-85.053504, 32.01077],
    [-85.141136, 31.840985],
    [-85.042551, 31.539753],
    [-85.113751, 31.27686],
    [-85.004212, 31.003013],
    [-85.497137, 30.997536],
    [-87.600282, 30.997536],
    [-87.633143, 30.86609],
    [-87.408589, 30.674397],
    [-87.446927, 30.510088],
    [-87.37025, 30.427934],
    [-87.518128, 30.280057],
    [-87.655051, 30.247195],
    [-8

## Get testing data

In [16]:
#https://covidtracking.com/api/states/info <- this api has info about where the data comes from
api_url = 'https://covidtracking.com/api/states'
state_covid = requests.get(api_url).json()

In [17]:
state_covid

[{'state': 'AK',
  'positive': 6,
  'negative': 400,
  'pending': None,
  'death': None,
  'total': 406,
  'lastUpdateEt': '3/18 16:30',
  'checkTimeEt': '3/19 12:03'},
 {'state': 'AL',
  'positive': 68,
  'negative': 28,
  'pending': None,
  'death': 0,
  'total': 96,
  'lastUpdateEt': '3/19 10:45',
  'checkTimeEt': '3/19 13:55'},
 {'state': 'AR',
  'positive': 46,
  'negative': 310,
  'pending': 113,
  'death': None,
  'total': 469,
  'lastUpdateEt': '3/19 11:23',
  'checkTimeEt': '3/19 13:55'},
 {'state': 'AS',
  'positive': 0,
  'negative': None,
  'pending': None,
  'death': 0,
  'total': 0,
  'lastUpdateEt': '3/14 00:00',
  'checkTimeEt': '3/19 14:02'},
 {'state': 'AZ',
  'positive': 44,
  'negative': 175,
  'pending': 130,
  'death': 0,
  'total': 349,
  'lastUpdateEt': '3/19 00:00',
  'checkTimeEt': '3/19 13:57'},
 {'state': 'CA',
  'positive': 892,
  'negative': 8787,
  'pending': None,
  'death': 17,
  'total': 9679,
  'lastUpdateEt': '3/19 13:30',
  'checkTimeEt': '3/19 12:3

In [18]:
def format_test_time(test_time):
    d, h = test_time.split(' ')
    h, m = h.split(":")
    h = int(h)
    am_pm = "AM" if h <= 12 else "PM"
    if(am_pm) == "pm": h-=12
    return f"{d} {h}:{m}{am_pm}"

In [19]:
for state1 in state_covid:
    if(state1 == {}): continue
    for state2 in state_json["features"]:
        if(state1["state"] == state2["properties"]["abbr"]):
            state2["properties"]["time_tests_updated"] = format_test_time(state1["lastUpdateEt"])
            testing_keys = ["test_positive", "test_negative", "test_pending", "test_total"]
            for key in testing_keys:
                state2["properties"][key] = state1[key.split('_')[1]]

In [20]:
state_json["features"][0]

{'type': 'Feature',
 'id': '01',
 'properties': {'population': 4903185,
  'beds': 19000,
  'density': 94.65,
  'statename': 'Alabama',
  'lat': 32.78990682135675,
  'long': -86.82778271419518,
  'abbr': 'AL',
  'deaths': 0,
  'recovered': 0,
  'active': 68,
  'cases': 68,
  'time_cases_update': '3/19 12:13PM',
  'time_tests_updated': '3/19 10:45AM',
  'test_positive': 68,
  'test_negative': 28,
  'test_pending': None,
  'test_total': 96},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.359296, 35.00118],
    [-85.606675, 34.984749],
    [-85.431413, 34.124869],
    [-85.184951, 32.859696],
    [-85.069935, 32.580372],
    [-84.960397, 32.421541],
    [-85.004212, 32.322956],
    [-84.889196, 32.262709],
    [-85.058981, 32.13674],
    [-85.053504, 32.01077],
    [-85.141136, 31.840985],
    [-85.042551, 31.539753],
    [-85.113751, 31.27686],
    [-85.004212, 31.003013],
    [-85.497137, 30.997536],
    [-87.600282, 30.997536],
    [-87.633143, 30.86609],
    [-87.408589, 30.6

## Risks

### Local Risk

In [34]:
#111 is to convert degrees to kilometers
def get_distance(c0, c1):
    lat_dist = abs(c0[0])-abs(c1[0])
    lng_dist = abs(c0[1])-abs(c1[1])
    distance = 111 * math.sqrt(lat_dist**2 + lng_dist**2)
    return distance

In [35]:
def calc_state_local_risk(props):
    cases = props["active"]
    population = props.get("population", -1)
    if cases == 'NaN': cases = 0
    return cases/population if population != -1 else -1

In [36]:
for state in state_json["features"]:
    state["properties"]["risk_local"] = calc_state_local_risk(state["properties"])

In [37]:
def get_state_all_neighbor_risk(props):
    risk_details = {}
    for state in state_json["features"]:
        neighbor_props = state["properties"]
        neighbor_risk = get_state_neighbor_risk(props, neighbor_props)
        if(neighbor_risk != {}): 
            risk_details[neighbor_props["abbr"]] = neighbor_risk
    props["risk_nearby"] = calc_state_neighbor_risk(risk_details)/props["population"]
    props["risk_total"] = props["risk_nearby"] + props["risk_local"]

In [38]:
def calc_state_neighbor_risk(risks):
    num_risks = len(risks)
    total_neighbor_risk = 0
    for risk in risks.values():
        distance, neighbor_cases = risk.values()
        total_neighbor_risk += float(neighbor_cases)*(2**(-1*math.log2(float(distance))))
        print("RISK: ", total_neighbor_risk)
    return total_neighbor_risk

In [41]:
def get_state_neighbor_risk(props, neighbor_props):
    neighbor_centroid = [neighbor_props["lat"], neighbor_props["long"]]
    neighbor_cases = neighbor_props["active"]
    centroid = [props["lat"], props["long"]]
    distance = get_distance(centroid, neighbor_centroid)
    if(props["abbr"] == neighbor_props["abbr"] or neighbor_cases == "NaN"):
        return {}
    else:
        return {"DISTANCE":distance, "CASES":neighbor_cases}

In [42]:
for state in state_json["features"]:
    get_state_all_neighbor_risk(state["properties"])

RISK:  0.0010906076851244046
RISK:  0.017022311177085008
RISK:  0.08613706763233632
RISK:  0.32790850670118615
RISK:  0.4279380259770259
RISK:  0.4793805258073847
RISK:  0.49752108875312695
RISK:  0.5287131647944923
RISK:  1.102696752565917
RISK:  1.616465049530428
RISK:  1.618508723299715
RISK:  1.6217953709506119
RISK:  1.9680894668675133
RISK:  2.0361869497844123
RISK:  2.066914457155108
RISK:  2.0836988672097165
RISK:  2.1488277817378054
RISK:  2.709963755893182
RISK:  2.7316170945509026
RISK:  2.8122558257190464
RISK:  2.9423362403294777
RISK:  3.0724162296629465
RISK:  3.1242821222969774
RISK:  3.283115022086246
RISK:  3.3114829949527214
RISK:  3.315167031373727
RISK:  3.3318617324034707
RISK:  3.356345533938536
RISK:  3.3750951159121554
RISK:  3.8413214089906482
RISK:  3.854358390287811
RISK:  6.304515614701284
RISK:  6.441170904147833
RISK:  6.447920082408865
RISK:  6.541982729909506
RISK:  6.578711595249036
RISK:  6.596972075108317
RISK:  6.735991125063743
RISK:  6.75282195046

RISK:  14.624088081912483
RISK:  14.628733286641038
RISK:  14.700832239970879
RISK:  14.716120113320045
RISK:  14.72942830689784
RISK:  15.004824066354717
RISK:  15.47166982022308
RISK:  15.514799111313703
RISK:  15.51796324460272
RISK:  15.57816069359855
RISK:  15.645237584753168
RISK:  15.660123695047476
RISK:  15.746440590193423
RISK:  15.847799162548421
RISK:  16.05406897221271
RISK:  16.0559667933221
RISK:  16.113488471222475
RISK:  16.118028789887138
RISK:  16.119857678818697
RISK:  0.05265142425402303
RISK:  0.053782270179141736
RISK:  0.0678911448898073
RISK:  0.10312144906600668
RISK:  0.33224601827216227
RISK:  0.4270097096465667
RISK:  0.49291740557700053
RISK:  0.5136802531658439
RISK:  0.5496733835597082
RISK:  0.7648552998614857
RISK:  0.9114769835749863
RISK:  0.9134037515858179
RISK:  0.9167935774684234
RISK:  1.3813066499833506
RISK:  1.4892192321402455
RISK:  1.5311144582298164
RISK:  1.5463205676515805
RISK:  1.5939122060748896
RISK:  1.8000933525355514
RISK:  1.8289

RISK:  0.0718076274057445
RISK:  0.07284744998794959
RISK:  0.08549199966625326
RISK:  0.11890798891664622
RISK:  0.3239190829688481
RISK:  0.4030446646357476
RISK:  0.5045242111279254
RISK:  0.5501778476719248
RISK:  0.6501597067226763
RISK:  0.988116094690832
RISK:  1.2505597908653439
RISK:  1.252411851031346
RISK:  1.2552821841089736
RISK:  1.5598478877986335
RISK:  1.6437874277611564
RISK:  1.670136870788015
RISK:  1.6823065755450353
RISK:  1.749793494675163
RISK:  1.973286614185982
RISK:  2.008696458341019
RISK:  2.249915312669395
RISK:  2.49283641467129
RISK:  2.695746131413055
RISK:  2.746815122913416
RISK:  2.7885447856531007
RISK:  2.8075289560588748
RISK:  2.810808249632229
RISK:  2.8242683219670677
RISK:  2.845013279498581
RISK:  2.878961673098007
RISK:  3.9578160078209814
RISK:  3.9675790004614595
RISK:  9.542030358964046
RISK:  9.867076214689334
RISK:  9.873298140590384
RISK:  10.166794226090888
RISK:  10.190415189524844
RISK:  10.206515232291393
RISK:  10.6749928026537
RI

In [43]:
state_json["features"][0]

{'type': 'Feature',
 'id': '01',
 'properties': {'population': 4903185,
  'beds': 19000,
  'density': 94.65,
  'statename': 'Alabama',
  'lat': 32.78990682135675,
  'long': -86.82778271419518,
  'abbr': 'AL',
  'deaths': 0,
  'recovered': 0,
  'active': 68,
  'cases': 68,
  'time_cases_update': '3/19 12:13PM',
  'time_tests_updated': '3/19 10:45AM',
  'test_positive': 68,
  'test_negative': 28,
  'test_pending': None,
  'test_total': 96,
  'risk_local': 1.3868536471701557e-05,
  'risk_nearby': 1.5922255227594402e-06,
  'risk_total': 1.5460761994460996e-05},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.359296, 35.00118],
    [-85.606675, 34.984749],
    [-85.431413, 34.124869],
    [-85.184951, 32.859696],
    [-85.069935, 32.580372],
    [-84.960397, 32.421541],
    [-85.004212, 32.322956],
    [-84.889196, 32.262709],
    [-85.058981, 32.13674],
    [-85.053504, 32.01077],
    [-85.141136, 31.840985],
    [-85.042551, 31.539753],
    [-85.113751, 31.27686],
    [-85.004212

## Export

In [44]:
state_json["features"][0]["properties"].keys()

dict_keys(['population', 'beds', 'density', 'statename', 'lat', 'long', 'abbr', 'deaths', 'recovered', 'active', 'cases', 'time_cases_update', 'time_tests_updated', 'test_positive', 'test_negative', 'test_pending', 'test_total', 'risk_local', 'risk_nearby', 'risk_total'])

In [45]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

with open("../../data/states.json", 'w') as f:
    json.dump(state_json, f, cls=NpEncoder)

with open("../../data/stateData.js", 'w') as f:
    f.write("let stateData = ")
    json.dump(state_json, f, cls=NpEncoder)