In [1]:
import pandas as pd
import numpy as np
import json
from collections import defaultdict
import requests
import math
from pathlib import Path
import datetime

## Setup

In [2]:
data_folder = Path('../../data/state')

In [3]:
with open(data_folder/"usStates.json", 'r') as f:
    state_json = json.load(f)

In [4]:
state_json

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'id': '01',
   'properties': {'name': 'Alabama',
    'population': 4903185,
    'beds': 19000,
    'density': 94.65},
   'geometry': {'type': 'Polygon',
    'coordinates': [[[-87.359296, 35.00118],
      [-85.606675, 34.984749],
      [-85.431413, 34.124869],
      [-85.184951, 32.859696],
      [-85.069935, 32.580372],
      [-84.960397, 32.421541],
      [-85.004212, 32.322956],
      [-84.889196, 32.262709],
      [-85.058981, 32.13674],
      [-85.053504, 32.01077],
      [-85.141136, 31.840985],
      [-85.042551, 31.539753],
      [-85.113751, 31.27686],
      [-85.004212, 31.003013],
      [-85.497137, 30.997536],
      [-87.600282, 30.997536],
      [-87.633143, 30.86609],
      [-87.408589, 30.674397],
      [-87.446927, 30.510088],
      [-87.37025, 30.427934],
      [-87.518128, 30.280057],
      [-87.655051, 30.247195],
      [-87.90699, 30.411504],
      [-87.934375, 30.657966],
      [-88.011052, 30.685351

### Change 'name' to 'statename'

In [5]:
for state in state_json["features"]:
    state["properties"]["statename"] = state["properties"]["name"]
    del state["properties"]["name"]

## Add state abbreviations

In [6]:
state_to_abbr = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Palau': 'PW',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

## Add state centroids

This is pretty awful but we actually do two runs here and grab two different sets of centroids. The first we have all 52, but they are slightly inaccurate. The 2nd set is accurate but some were corrupted and we only have 45 or so. It works so it's just not priority to fix right now 

In [7]:
with open(data_folder/"dataCovidState.json", 'r') as f:
    centroids_json = json.load(f)

In [8]:
for state in state_json["features"]:
    state_id = state["id"]
    state["properties"]["lat"] = centroids_json[state_id]["LAT"]
    state["properties"]["long"] = centroids_json[state_id]["LONG"]

In [9]:
with open(data_folder/"stateCentroids.json", 'r') as f:
    centroids_json = json.load(f)

In [10]:
for state in centroids_json['features']:
    name = state["properties"]["name"]
    long,lat = state["geometry"]["coordinates"]
    for state2 in state_json["features"]:
        if state2["properties"]["statename"] == name:
            state2["properties"]["lat"] = lat
            state2["properties"]["long"] = long
            break

## Add state abbreviation


In [11]:
for state in state_json["features"]:
    state["properties"]["abbr"] = state_to_abbr[state["properties"]["statename"]]

## Get and add covid data

In [12]:
api_url = "https://covid19.mathdro.id/api/countries/USA/confirmed"
state_covid = requests.get(api_url).json()

In [13]:
#includes ms which fromtimestamp doesnt accept so we cut it off
def get_str_from_timestamp(timestamp):
    timestamp = int(str(timestamp)[:-3])
    cur = datetime.datetime.fromtimestamp(timestamp)
    return cur.strftime('%#m/%d %#I:%M%p')

In [14]:
keys = ["deaths", "recovered", "active"]
for state1 in state_covid:
    if(state1 == {}): continue
    for state2 in state_json["features"]:
        if(state1["provinceState"] == state2["properties"]["statename"]):
            for key in keys:
                state2["properties"][key] = state1[key]
            state2["properties"]["cases"] = state1["confirmed"]
            state2["properties"]["time_cases_update"] = get_str_from_timestamp(state1["lastUpdate"])    


In [15]:
state_json["features"][0]

{'type': 'Feature',
 'id': '01',
 'properties': {'population': 4903185,
  'beds': 19000,
  'density': 94.65,
  'statename': 'Alabama',
  'lat': 32.78990682135675,
  'long': -86.82778271419518,
  'abbr': 'AL',
  'deaths': 0,
  'recovered': 0,
  'active': 78,
  'cases': 78,
  'time_cases_update': '3/19 7:43PM'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.359296, 35.00118],
    [-85.606675, 34.984749],
    [-85.431413, 34.124869],
    [-85.184951, 32.859696],
    [-85.069935, 32.580372],
    [-84.960397, 32.421541],
    [-85.004212, 32.322956],
    [-84.889196, 32.262709],
    [-85.058981, 32.13674],
    [-85.053504, 32.01077],
    [-85.141136, 31.840985],
    [-85.042551, 31.539753],
    [-85.113751, 31.27686],
    [-85.004212, 31.003013],
    [-85.497137, 30.997536],
    [-87.600282, 30.997536],
    [-87.633143, 30.86609],
    [-87.408589, 30.674397],
    [-87.446927, 30.510088],
    [-87.37025, 30.427934],
    [-87.518128, 30.280057],
    [-87.655051, 30.247195],
    [-87

## Get testing data

In [16]:
#https://covidtracking.com/api/states/info <- this api has info about where the data comes from
api_url = 'https://covidtracking.com/api/states'
state_covid = requests.get(api_url).json()

In [17]:
state_covid

[{'state': 'AK',
  'positive': 9,
  'negative': 504,
  'pending': None,
  'death': None,
  'total': 513,
  'lastUpdateEt': '3/19 16:30',
  'checkTimeEt': '3/20 01:09'},
 {'state': 'AL',
  'positive': 78,
  'negative': 28,
  'pending': None,
  'death': 0,
  'total': 106,
  'lastUpdateEt': '3/19 17:30',
  'checkTimeEt': '3/20 01:14'},
 {'state': 'AR',
  'positive': 62,
  'negative': 351,
  'pending': 146,
  'death': None,
  'total': 559,
  'lastUpdateEt': '3/19 15:31',
  'checkTimeEt': '3/20 01:16'},
 {'state': 'AS',
  'positive': 0,
  'negative': None,
  'pending': None,
  'death': 0,
  'total': 0,
  'lastUpdateEt': '3/19 00:00',
  'checkTimeEt': '3/20 03:07'},
 {'state': 'AZ',
  'positive': 44,
  'negative': 177,
  'pending': 131,
  'death': 0,
  'total': 352,
  'lastUpdateEt': '3/19 00:00',
  'checkTimeEt': '3/20 01:22'},
 {'state': 'CA',
  'positive': 1058,
  'negative': 8787,
  'pending': None,
  'death': 19,
  'total': 9845,
  'lastUpdateEt': '3/20 01:25',
  'checkTimeEt': '3/20 01

In [18]:
def format_test_time(test_time):
    d, h = test_time.split(' ')
    h, m = h.split(":")
    h = int(h)
    am_pm = "AM" if h <= 12 else "PM"
    if(am_pm) == "pm": h-=12
    return f"{d} {h}:{m}{am_pm}"

In [19]:
for state1 in state_covid:
    if(state1 == {}): continue
    for state2 in state_json["features"]:
        if(state1["state"] == state2["properties"]["abbr"]):
            state2["properties"]["time_tests_updated"] = format_test_time(state1["lastUpdateEt"])
            testing_keys = ["test_positive", "test_negative", "test_pending", "test_total"]
            for key in testing_keys:
                state2["properties"][key] = state1[key.split('_')[1]]

In [20]:
state_json["features"][0]

{'type': 'Feature',
 'id': '01',
 'properties': {'population': 4903185,
  'beds': 19000,
  'density': 94.65,
  'statename': 'Alabama',
  'lat': 32.78990682135675,
  'long': -86.82778271419518,
  'abbr': 'AL',
  'deaths': 0,
  'recovered': 0,
  'active': 78,
  'cases': 78,
  'time_cases_update': '3/19 7:43PM',
  'time_tests_updated': '3/19 17:30PM',
  'test_positive': 78,
  'test_negative': 28,
  'test_pending': None,
  'test_total': 106},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.359296, 35.00118],
    [-85.606675, 34.984749],
    [-85.431413, 34.124869],
    [-85.184951, 32.859696],
    [-85.069935, 32.580372],
    [-84.960397, 32.421541],
    [-85.004212, 32.322956],
    [-84.889196, 32.262709],
    [-85.058981, 32.13674],
    [-85.053504, 32.01077],
    [-85.141136, 31.840985],
    [-85.042551, 31.539753],
    [-85.113751, 31.27686],
    [-85.004212, 31.003013],
    [-85.497137, 30.997536],
    [-87.600282, 30.997536],
    [-87.633143, 30.86609],
    [-87.408589, 30.6

## Risks

### Local Risk

In [21]:
#111 is to convert degrees to kilometers
def get_distance(c0, c1):
    lat_dist = abs(c0[0])-abs(c1[0])
    lng_dist = abs(c0[1])-abs(c1[1])
    distance = 111 * math.sqrt(lat_dist**2 + lng_dist**2)
    return distance

In [22]:
def calc_state_local_risk(props):
    cases = props["active"]
    population = props.get("population", -1)
    if cases == 'NaN': cases = 0
    return cases/population if population != -1 else -1

In [23]:
for state in state_json["features"]:
    state["properties"]["risk_local"] = calc_state_local_risk(state["properties"])

In [24]:
def get_state_all_neighbor_risk(props):
    risk_details = {}
    for state in state_json["features"]:
        neighbor_props = state["properties"]
        neighbor_risk = get_state_neighbor_risk(props, neighbor_props)
        if(neighbor_risk != {}): 
            risk_details[neighbor_props["abbr"]] = neighbor_risk
    props["risk_nearby"] = calc_state_neighbor_risk(risk_details)/props["population"]
    props["risk_total"] = props["risk_nearby"] + props["risk_local"]

In [25]:
def calc_state_neighbor_risk(risks):
    num_risks = len(risks)
    total_neighbor_risk = 0
    for risk in risks.values():
        distance, neighbor_cases = risk.values()
        total_neighbor_risk += float(neighbor_cases)*(2**(-1*math.log2(float(distance))))
        print("RISK: ", total_neighbor_risk)
    return total_neighbor_risk

In [26]:
def get_state_neighbor_risk(props, neighbor_props):
    neighbor_centroid = [neighbor_props["lat"], neighbor_props["long"]]
    neighbor_cases = neighbor_props["active"]
    centroid = [props["lat"], props["long"]]
    distance = get_distance(centroid, neighbor_centroid)
    if(props["abbr"] == neighbor_props["abbr"] or neighbor_cases == "NaN"):
        return {}
    else:
        return {"DISTANCE":distance, "CASES":neighbor_cases}

In [27]:
for state in state_json["features"]:
    get_state_all_neighbor_risk(state["properties"])

RISK:  0.0010906076851244046
RISK:  0.01738439534735684
RISK:  0.11053906709139122
RISK:  0.38607138278697234
RISK:  0.510765715034937
RISK:  0.5952397147563683
RISK:  0.616171133539917
RISK:  0.6715370685133405
RISK:  1.3087944376138987
RISK:  2.0461717342935337
RISK:  2.049492704168625
RISK:  2.0563647856205
RISK:  2.5487148947812646
RISK:  2.621856635692008
RISK:  2.656523567084588
RISK:  2.6803014813286166
RISK:  2.767140034032735
RISK:  3.399452547682746
RISK:  3.4211058863404666
RISK:  3.500990984507039
RISK:  3.667656515726654
RISK:  3.923945066139619
RISK:  3.97581095877365
RISK:  4.131467200567133
RISK:  4.174586519324176
RISK:  4.178270555745182
RISK:  4.194965256774926
RISK:  4.222693899477289
RISK:  4.243847274011628
RISK:  4.707543166584946
RISK:  4.7238393932064
RISK:  8.083699662507254
RISK:  8.22261371706862
RISK:  8.231162676199261
RISK:  8.356931834093375
RISK:  8.392028305417815
RISK:  8.4135858163628
RISK:  8.56598637914816
RISK:  8.588427479681712
RISK:  8.70783070

RISK:  5.620854490198526
RISK:  5.660708029327453
RISK:  5.755645074694532
RISK:  6.026556570849418
RISK:  6.086268340406621
RISK:  6.093536279425999
RISK:  6.1366024285120115
RISK:  6.683420458027269
RISK:  6.684348738877948
RISK:  6.824604057545594
RISK:  6.845182336562905
RISK:  6.846289928989318
RISK:  0.023008873731748234
RISK:  0.024813328109348436
RISK:  0.0818455122855891
RISK:  0.10453256593593552
RISK:  2.616248256069685
RISK:  2.8375964151076865
RISK:  2.869548403755843
RISK:  2.8761149798291803
RISK:  2.892253160818314
RISK:  2.998741450100805
RISK:  3.0724018517697824
RISK:  3.077727943644156
RISK:  3.115998680916628
RISK:  3.2530964578115347
RISK:  3.27029103599616
RISK:  3.287294083003568
RISK:  3.30403917387384
RISK:  3.317804126758324
RISK:  3.4500512981199423
RISK:  3.459851287187746
RISK:  3.4837166419545977
RISK:  3.549520524683807
RISK:  3.6438147446210825
RISK:  3.678075804530911
RISK:  3.6939622653312614
RISK:  3.708110372038538
RISK:  3.7176432683236578
RISK:  3

In [28]:
state_json["features"][0]

{'type': 'Feature',
 'id': '01',
 'properties': {'population': 4903185,
  'beds': 19000,
  'density': 94.65,
  'statename': 'Alabama',
  'lat': 32.78990682135675,
  'long': -86.82778271419518,
  'abbr': 'AL',
  'deaths': 0,
  'recovered': 0,
  'active': 78,
  'cases': 78,
  'time_cases_update': '3/19 7:43PM',
  'time_tests_updated': '3/19 17:30PM',
  'test_positive': 78,
  'test_negative': 28,
  'test_pending': None,
  'test_total': 106,
  'risk_local': 1.5908027129304727e-05,
  'risk_nearby': 2.0324808127650722e-06,
  'risk_total': 1.79405079420698e-05},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.359296, 35.00118],
    [-85.606675, 34.984749],
    [-85.431413, 34.124869],
    [-85.184951, 32.859696],
    [-85.069935, 32.580372],
    [-84.960397, 32.421541],
    [-85.004212, 32.322956],
    [-84.889196, 32.262709],
    [-85.058981, 32.13674],
    [-85.053504, 32.01077],
    [-85.141136, 31.840985],
    [-85.042551, 31.539753],
    [-85.113751, 31.27686],
    [-85.004212, 

## Export

In [29]:
state_json["features"][0]["properties"].keys()

dict_keys(['population', 'beds', 'density', 'statename', 'lat', 'long', 'abbr', 'deaths', 'recovered', 'active', 'cases', 'time_cases_update', 'time_tests_updated', 'test_positive', 'test_negative', 'test_pending', 'test_total', 'risk_local', 'risk_nearby', 'risk_total'])

In [30]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

with open("../../data/states.json", 'w') as f:
    json.dump(state_json, f, cls=NpEncoder)

with open("../../data/stateData.js", 'w') as f:
    f.write("let stateData = ")
    json.dump(state_json, f, cls=NpEncoder)