In [29]:
import pandas as pd
import numpy as np
import json
from collections import defaultdict
import requests
import math
from pathlib import Path
import datetime

## Setup

In [30]:
data_folder = Path('../../data/state')

In [31]:
with open(data_folder/"usStates.json", 'r') as f:
    state_json = json.load(f)

In [32]:
state_json

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'id': '01',
   'properties': {'name': 'Alabama',
    'population': 4903185,
    'beds': 19000,
    'density': 94.65},
   'geometry': {'type': 'Polygon',
    'coordinates': [[[-87.359296, 35.00118],
      [-85.606675, 34.984749],
      [-85.431413, 34.124869],
      [-85.184951, 32.859696],
      [-85.069935, 32.580372],
      [-84.960397, 32.421541],
      [-85.004212, 32.322956],
      [-84.889196, 32.262709],
      [-85.058981, 32.13674],
      [-85.053504, 32.01077],
      [-85.141136, 31.840985],
      [-85.042551, 31.539753],
      [-85.113751, 31.27686],
      [-85.004212, 31.003013],
      [-85.497137, 30.997536],
      [-87.600282, 30.997536],
      [-87.633143, 30.86609],
      [-87.408589, 30.674397],
      [-87.446927, 30.510088],
      [-87.37025, 30.427934],
      [-87.518128, 30.280057],
      [-87.655051, 30.247195],
      [-87.90699, 30.411504],
      [-87.934375, 30.657966],
      [-88.011052, 30.685351

## Add state abbreviations

In [33]:
state_to_abbr = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Palau': 'PW',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

## Add state centroids

This is pretty awful but we actually do two runs here and grab two different sets of centroids. The first we have all 52, but they are slightly inaccurate. The 2nd set is accurate but some were corrupted and we only have 45 or so. It works so it's just not priority to fix right now 

In [34]:
with open(data_folder/"dataCovidState.json", 'r') as f:
    centroids_json = json.load(f)

In [35]:
for state in state_json["features"]:
    state_id = state["id"]
    state["properties"]["lat"] = centroids_json[state_id]["LAT"]
    state["properties"]["long"] = centroids_json[state_id]["LONG"]

In [36]:
with open(data_folder/"stateCentroids.json", 'r') as f:
    centroids_json = json.load(f)

In [37]:
for state in centroids_json['features']:
    name = state["properties"]["name"]
    long,lat = state["geometry"]["coordinates"]
    for state2 in state_json["features"]:
        if state2["properties"]["name"] == name:
            state2["properties"]["lat"] = lat
            state2["properties"]["long"] = long
            break

## Add state abbreviation


In [38]:
for state in state_json["features"]:
    state["properties"]["abbr"] = state_to_abbr[state["properties"]["name"]]

## Get and add covid data

In [39]:
api_url = "https://covid19.mathdro.id/api/countries/USA/confirmed"
state_covid = requests.get(api_url).json()

In [40]:
#includes ms which fromtimestamp doesnt accept so we cut it off
def get_str_from_timestamp(timestamp):
    timestamp = int(str(timestamp)[:-3])
    cur = datetime.datetime.fromtimestamp(timestamp)
    return cur.strftime('%#m/%d %#I:%M%p')

In [41]:
keys = ["confirmed", "deaths", "recovered", "active"]
for state1 in state_covid:
    if(state1 == {}): continue
    for state2 in state_json["features"]:
        if(state1["provinceState"] == state2["properties"]["name"]):
            for key in keys:
                state2["properties"][key] = state1[key]
            state2["properties"]["time_cases_update"] = get_str_from_timestamp(state1["lastUpdate"])    


In [42]:
state_json["features"][0]

{'type': 'Feature',
 'id': '01',
 'properties': {'name': 'Alabama',
  'population': 4903185,
  'beds': 19000,
  'density': 94.65,
  'lat': 32.78990682135675,
  'long': -86.82778271419518,
  'abbr': 'AL',
  'confirmed': 46,
  'deaths': 0,
  'recovered': 0,
  'active': 46,
  'time_cases_update': '3/18 3:33PM'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.359296, 35.00118],
    [-85.606675, 34.984749],
    [-85.431413, 34.124869],
    [-85.184951, 32.859696],
    [-85.069935, 32.580372],
    [-84.960397, 32.421541],
    [-85.004212, 32.322956],
    [-84.889196, 32.262709],
    [-85.058981, 32.13674],
    [-85.053504, 32.01077],
    [-85.141136, 31.840985],
    [-85.042551, 31.539753],
    [-85.113751, 31.27686],
    [-85.004212, 31.003013],
    [-85.497137, 30.997536],
    [-87.600282, 30.997536],
    [-87.633143, 30.86609],
    [-87.408589, 30.674397],
    [-87.446927, 30.510088],
    [-87.37025, 30.427934],
    [-87.518128, 30.280057],
    [-87.655051, 30.247195],
    [-87.

## Get testing data

In [43]:
#https://covidtracking.com/api/states/info <- this api has info about where the data comes from
api_url = 'https://covidtracking.com/api/states'
state_covid = requests.get(api_url).json()

In [44]:
state_covid

[{'state': 'AK',
  'positive': 6,
  'negative': 406,
  'pending': None,
  'death': None,
  'total': 412,
  'lastUpdateEt': '3/17 23:00',
  'checkTimeEt': '3/18 16:11'},
 {'state': 'AL',
  'positive': 46,
  'negative': 28,
  'pending': None,
  'death': 0,
  'total': 74,
  'lastUpdateEt': '3/18 11:05',
  'checkTimeEt': '3/18 15:12'},
 {'state': 'AR',
  'positive': 33,
  'negative': 236,
  'pending': 50,
  'death': None,
  'total': 319,
  'lastUpdateEt': '3/18 00:00',
  'checkTimeEt': '3/18 15:14'},
 {'state': 'AS',
  'positive': 0,
  'negative': None,
  'pending': None,
  'death': 0,
  'total': 0,
  'lastUpdateEt': '3/14 00:00',
  'checkTimeEt': '3/18 16:25'},
 {'state': 'AZ',
  'positive': 28,
  'negative': 148,
  'pending': 102,
  'death': 0,
  'total': 278,
  'lastUpdateEt': '3/18 00:00',
  'checkTimeEt': '3/18 16:05'},
 {'state': 'CA',
  'positive': 611,
  'negative': 7981,
  'pending': None,
  'death': 13,
  'total': 8592,
  'lastUpdateEt': '3/17 21:00',
  'checkTimeEt': '3/18 16:26

In [45]:
def format_test_time(test_time):
    d, h = test_time.split(' ')
    h, m = h.split(":")
    h = int(h)
    am_pm = "AM" if h <= 12 else "PM"
    if(am_pm) == "pm": h-=12
    return f"{d} {h}:{m}{am_pm}"

In [46]:
for state1 in state_covid:
    if(state1 == {}): continue
    for state2 in state_json["features"]:
        if(state1["state"] == state2["properties"]["abbr"]):
            state2["properties"]["time_tests_updated"] = format_test_time(state1["lastUpdateEt"])
            testing_keys = ["test_positive", "test_negative", "test_pending", "test_total"]
            for key in testing_keys:
                state2["properties"][key] = state1[key.split('_')[1]]

In [47]:
state_json["features"][0]

{'type': 'Feature',
 'id': '01',
 'properties': {'name': 'Alabama',
  'population': 4903185,
  'beds': 19000,
  'density': 94.65,
  'lat': 32.78990682135675,
  'long': -86.82778271419518,
  'abbr': 'AL',
  'confirmed': 46,
  'deaths': 0,
  'recovered': 0,
  'active': 46,
  'time_cases_update': '3/18 3:33PM',
  'time_tests_updated': '3/18 11:05AM',
  'test_positive': 46,
  'test_negative': 28,
  'test_pending': None,
  'test_total': 74},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.359296, 35.00118],
    [-85.606675, 34.984749],
    [-85.431413, 34.124869],
    [-85.184951, 32.859696],
    [-85.069935, 32.580372],
    [-84.960397, 32.421541],
    [-85.004212, 32.322956],
    [-84.889196, 32.262709],
    [-85.058981, 32.13674],
    [-85.053504, 32.01077],
    [-85.141136, 31.840985],
    [-85.042551, 31.539753],
    [-85.113751, 31.27686],
    [-85.004212, 31.003013],
    [-85.497137, 30.997536],
    [-87.600282, 30.997536],
    [-87.633143, 30.86609],
    [-87.408589, 30.674

## Risks

### Local Risk

In [48]:
#111 is to convert degrees to kilometers
def get_distance(c0, c1):
    lat_dist = abs(c0[0])-abs(c1[0])
    lng_dist = abs(c0[1])-abs(c1[1])
    distance = 111 * math.sqrt(lat_dist**2 + lng_dist**2)
    return distance

In [49]:
def calc_state_local_risk(props):
    cases = props["active"]
    population = props.get("population", -1)
    if cases == 'NaN': cases = 0
    return cases/population if population != -1 else -1

In [50]:
for state in state_json["features"]:
    state["properties"]["risk_local"] = calc_state_local_risk(state["properties"])

In [51]:
def get_state_all_neighbor_risk(props):
    risk_details = {}
    for state in state_json["features"]:
        neighbor_props = state["properties"]
        neighbor_risk = get_state_neighbor_risk(props, neighbor_props)
        if(neighbor_risk != {}): 
            risk_details[neighbor_props["abbr"]] = neighbor_risk
    props["risk_nearby"] = calc_state_neighbor_risk(risk_details)/props["population"]
    props["risk_total"] = props["risk_nearby"] + props["risk_local"]

In [52]:
def calc_state_neighbor_risk(risks):
    num_risks = len(risks)
    total_neighbor_risk = 0
    for risk in risks.values():
        distance, neighbor_cases = risk.values()
        total_neighbor_risk += float(neighbor_cases)*(2**(-1*math.log2(float(distance))))
    return total_neighbor_risk

In [53]:
def get_state_neighbor_risk(props, neighbor_props):
    print(neighbor_props)
    neighbor_centroid = [neighbor_props["lat"], neighbor_props["long"]]
    neighbor_cases = neighbor_props["active"]
    centroid = [props["lat"], props["long"]]
    distance = get_distance(centroid, neighbor_centroid)
    if(props["abbr"] == neighbor_props["abbr"] or neighbor_cases == "NaN"):
        return {}
    else:
        return {"DISTANCE":distance, "CASES":neighbor_cases}

In [54]:
for state in state_json["features"]:
    get_state_all_neighbor_risk(state["properties"])

{'name': 'Alabama', 'population': 4903185, 'beds': 19000, 'density': 94.65, 'lat': 32.78990682135675, 'long': -86.82778271419518, 'abbr': 'AL', 'confirmed': 46, 'deaths': 0, 'recovered': 0, 'active': 46, 'time_cases_update': '3/18 3:33PM', 'time_tests_updated': '3/18 11:05AM', 'test_positive': 46, 'test_negative': 28, 'test_pending': None, 'test_total': 74, 'risk_local': 9.381657024974583e-06}
{'name': 'Alaska', 'population': 731545, 'beds': 1813, 'density': 1.264, 'lat': 63.588753, 'long': -154.493062, 'abbr': 'AK', 'confirmed': 6, 'deaths': 0, 'recovered': 0, 'active': 6, 'time_cases_update': '3/18 1:53AM', 'time_tests_updated': '3/17 23:00PM', 'test_positive': 6, 'test_negative': 406, 'test_pending': None, 'test_total': 412, 'risk_local': 8.201819436945095e-06}
{'name': 'Arizona', 'population': 7278717, 'beds': 15452, 'density': 57.05, 'lat': 34.29339257053982, 'long': -111.6632955488151, 'abbr': 'AZ', 'confirmed': 27, 'deaths': 0, 'recovered': 0, 'active': 27, 'time_cases_update': 

{'name': 'Missouri', 'population': 6137428, 'beds': 24026, 'density': 87.26, 'lat': 38.36805363515973, 'long': -92.47744231074401, 'abbr': 'MO', 'confirmed': 18, 'deaths': 0, 'recovered': 0, 'active': 18, 'time_cases_update': '3/18 3:33PM', 'time_tests_updated': '3/17 19:15PM', 'test_positive': 13, 'test_negative': 253, 'test_pending': None, 'test_total': 266, 'risk_local': 2.932824629470195e-06, 'risk_nearby': 7.777837806079116e-07, 'risk_total': 3.710608410078107e-06}
{'name': 'Montana', 'population': 1068778, 'beds': 4126, 'density': 6.858, 'lat': 47.033440693500935, 'long': -109.64685632824923, 'abbr': 'MT', 'confirmed': 11, 'deaths': 0, 'recovered': 0, 'active': 11, 'time_cases_update': '3/18 3:33PM', 'time_tests_updated': '3/18 10:15AM', 'test_positive': 10, 'test_negative': 499, 'test_pending': 0, 'test_total': 509, 'risk_local': 1.029212801910219e-05, 'risk_nearby': 3.125154800398655e-06, 'risk_total': 1.3417282819500845e-05}
{'name': 'Nebraska', 'population': 1934408, 'beds': 

In [55]:
state_json["features"][0]

{'type': 'Feature',
 'id': '01',
 'properties': {'name': 'Alabama',
  'population': 4903185,
  'beds': 19000,
  'density': 94.65,
  'lat': 32.78990682135675,
  'long': -86.82778271419518,
  'abbr': 'AL',
  'confirmed': 46,
  'deaths': 0,
  'recovered': 0,
  'active': 46,
  'time_cases_update': '3/18 3:33PM',
  'time_tests_updated': '3/18 11:05AM',
  'test_positive': 46,
  'test_negative': 28,
  'test_pending': None,
  'test_total': 74,
  'risk_local': 9.381657024974583e-06,
  'risk_nearby': 1.105860173648026e-06,
  'risk_total': 1.048751719862261e-05},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.359296, 35.00118],
    [-85.606675, 34.984749],
    [-85.431413, 34.124869],
    [-85.184951, 32.859696],
    [-85.069935, 32.580372],
    [-84.960397, 32.421541],
    [-85.004212, 32.322956],
    [-84.889196, 32.262709],
    [-85.058981, 32.13674],
    [-85.053504, 32.01077],
    [-85.141136, 31.840985],
    [-85.042551, 31.539753],
    [-85.113751, 31.27686],
    [-85.004212, 31.

## Export

In [56]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

with open("../../data/states.json", 'w') as f:
    json.dump(state_json, f, cls=NpEncoder)

with open("../../data/stateData.js", 'w') as f:
    f.write("let stateData = ")
    json.dump(state_json, f, cls=NpEncoder)