In [41]:
import pandas as pd
import json
import math
import numpy as np
from pathlib import Path
from collections import defaultdict

## Setup

In [42]:
data_folder = Path('../../data/county')

In [43]:
code_to_state = {'01': 'Alabama',
 '02': 'Alaska',
 '04': 'Arizona',
 '05': 'Arkansas',
 '06': 'California',
 '08': 'Colorado',
 '09': 'Connecticut',
 '10': 'Delaware',
 '11': 'District of Columbia',
 '12': 'Florida',
 '13': 'Georgia',
 '15': 'Hawaii',
 '16': 'Idaho',
 '17': 'Illinois',
 '18': 'Indiana',
 '19': 'Iowa',
 '20': 'Kansas',
 '21': 'Kentucky',
 '22': 'Louisiana',
 '23': 'Maine',
 '24': 'Maryland',
 '25': 'Massachusetts',
 '26': 'Michigan',
 '27': 'Minnesota',
 '28': 'Mississippi',
 '29': 'Missouri',
 '30': 'Montana',
 '31': 'Nebraska',
 '32': 'Nevada',
 '33': 'New Hampshire',
 '34': 'New Jersey',
 '35': 'New Mexico',
 '36': 'New York',
 '37': 'North Carolina',
 '38': 'North Dakota',
 '39': 'Ohio',
 '40': 'Oklahoma',
 '41': 'Oregon',
 '42': 'Pennsylvania',
 '44': 'Rhode Island',
 '45': 'South Carolina',
 '46': 'South Dakota',
 '47': 'Tennessee',
 '48': 'Texas',
 '49': 'Utah',
 '50': 'Vermont',
 '51': 'Virginia',
 '53': 'Washington',
 '54': 'West Virginia',
 '55': 'Wisconsin',
 '56': 'Wyoming',
 '72': 'Puerto Rico'}

In [44]:
state_to_abbr = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Palau': 'PW',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

In [45]:
with open(data_folder/"counties5m.json", 'r') as f:
    county_json = json.load(f)

## Alter Key Names 

This section is to make key names lowercases and consistent

In [46]:
keys = ['GEO_ID', 'COUNTY', 'NAME', 'LSAD', 'CENSUSAREA', 'STATE']

In [47]:
for county in county_json["features"]:
    for key in keys:
        county["properties"][key.lower()] = county["properties"][key]
        del county["properties"][key]

In [48]:
county_json["features"][0]

{'type': 'Feature',
 'properties': {'geo_id': '0500000US02261',
  'county': '261',
  'name': 'Valdez-Cordova',
  'lsad': 'CA',
  'censusarea': 34239.88,
  'state': '02'},
 'geometry': {'type': 'MultiPolygon',
  'coordinates': [[[[-147.483828, 60.618636],
     [-147.500009, 60.653852],
     [-147.483828, 60.683358],
     [-147.487635, 60.728092],
     [-147.395312, 60.74332],
     [-147.383891, 60.741417],
     [-147.362, 60.714767],
     [-147.3087, 60.665274],
     [-147.348675, 60.627202],
     [-147.454323, 60.619588],
     [-147.483828, 60.618636]]],
   [[[-147.341061, 60.305499],
     [-147.340109, 60.275042],
     [-147.483828, 60.224598],
     [-147.499057, 60.235067],
     [-147.505719, 60.253151],
     [-147.496201, 60.265524],
     [-147.421962, 60.279801],
     [-147.341061, 60.305499]]],
   [[[-147.217704, 60.293504],
     [-147.19494, 60.304563],
     [-147.183277, 60.32068],
     [-147.185243, 60.323083],
     [-147.195608, 60.326224],
     [-147.211625, 60.324936],
     

## Add State Name

In [49]:
for cur_county in county_json["features"]:
    cur_county["properties"]["statename"] = code_to_state[cur_county["properties"]["state"]]
    cur_county["properties"]["stateabbr"] = state_to_abbr[cur_county["properties"]["statename"]]

## Add population data

In [50]:
alt_names = {"Wade Hampton":"Kusilvak Census Area"}

In [51]:
df_pop = pd.read_csv(data_folder/"population.csv")

In [52]:
d = {}
for row in df_pop.iterrows():
    d[row[1].STATE] = row[1].STNAME

In [53]:
df_pop = df_pop[["STNAME", "CTYNAME", "POPESTIMATE2018", "STATE", "COUNTY"]]

In [54]:
for cur_county in county_json["features"]:
    props = cur_county["properties"]
    state = props["state"]
    county = props["county"]
    pop = df_pop[(df_pop["STATE"] == int(state))&(df_pop["COUNTY"] == int(county))]["POPESTIMATE2018"]
    if(len(pop.values) > 0):
        props["population"] = pop.values[0]
    else:
        if(props["statename"] == "Virginia" and props["name"] == "Bedford"): props["population"] = 78742
        elif(props["statename"] == "South Dakota" and props["name"] == "Shannon"): props["population"] = 14309
        else: print(props["statename"], props["name"], "not found")

Alaska Wade Hampton not found
Puerto Rico Aguas Buenas not found
Puerto Rico Aibonito not found
Puerto Rico Arroyo not found
Puerto Rico Barceloneta not found
Puerto Rico Barranquitas not found
Puerto Rico Bayamï¿½n not found
Puerto Rico Cabo Rojo not found
Puerto Rico Camuy not found
Puerto Rico Ciales not found
Puerto Rico Coamo not found
Puerto Rico Dorado not found
Puerto Rico Florida not found
Puerto Rico Gurabo not found
Puerto Rico Humacao not found
Puerto Rico Juncos not found
Puerto Rico Las Piedras not found
Puerto Rico Luquillo not found
Puerto Rico Maunabo not found
Puerto Rico Naranjito not found
Puerto Rico Patillas not found
Puerto Rico Rincï¿½n not found
Puerto Rico Sabana Grande not found
Puerto Rico San Lorenzo not found
Puerto Rico Santa Isabel not found
Puerto Rico Vega Baja not found
Puerto Rico Villalba not found
Puerto Rico Caguas not found
Puerto Rico Canï¿½vanas not found
Puerto Rico Carolina not found
Puerto Rico Cataï¿½o not found
Puerto Rico Cayey not found


### Add Missing Alaska County and Puerto Rico Data

In [55]:
df_pop_add = pd.read_csv(data_folder/"population2.csv")

In [56]:
pop_dict = {row[1]: row[2] for row in df_pop_add.values}

In [57]:
for cur_county in county_json["features"]:
    props = cur_county["properties"]
    geo_id = props["geo_id"]
    if(geo_id in pop_dict):
        props["population"] = pop_dict[geo_id]

### Alert to any missing populations

In [58]:
for cur_county in county_json["features"]:
    try:
        pop = int(cur_county["properties"]["population"])
    except KeyError:
        print("No population found for ", cur_county["properties"])
        raise KeyError
    except ValueError:
        print(cur_county["properties"]["population"], "is not a valid population")
        raise ValueError

## Add Covid Data

In [59]:
df = pd.read_csv(data_folder/"dataCovidCounty.csv")
df["State"] = df['State'].str.strip()
df["County"] = df['County'].str.strip()

In [60]:
df

Unnamed: 0,State,County,Cases,Deaths
0,﻿New York,New York,3615,22
1,New York,Westchester,798,0
2,New York,Nassau,372,3
3,New York,Suffolk,239,3
4,New York,Rockland,53,1
...,...,...,...,...
728,Alaska,Ketchikan Gateway,1,0
729,Alaska,Seward,1,0
730,Puerto Rico,Puerto Rico,5,0
731,West Virginia,Jefferson,1,0


In [61]:
df[df["State"]=="Louisiana"]

Unnamed: 0,State,County,Cases,Deaths
171,Louisiana,Orleans,231,8
172,Louisiana,Jefferson,62,1
173,Louisiana,St. Tammany,10,0
174,Louisiana,Caddo,7,0
175,Louisiana,East Baton Rouge,5,0
176,Louisiana,Lafourche,4,0
177,Louisiana,St. Charles,4,0
178,Louisiana,Terrebonne,4,0
179,Louisiana,St. Bernard,4,0
180,Louisiana,Bossier,2,0


In [62]:
codes = defaultdict(dict)
for cur_county in county_json["features"]:
    countyID = cur_county["properties"]["geo_id"]
    code = cur_county["properties"]["state"]
    countyName = cur_county["properties"]["name"]
    state = code_to_state[code]
    codes[state][countyName] = countyID

In [63]:
codes["New York"]

{'Cayuga': '0500000US36011',
 'Columbia': '0500000US36021',
 'Cortland': '0500000US36023',
 'Franklin': '0500000US36033',
 'Hamilton': '0500000US36041',
 'Lewis': '0500000US36049',
 'Montgomery': '0500000US36057',
 'Oneida': '0500000US36065',
 'Orleans': '0500000US36073',
 'Putnam': '0500000US36079',
 'Rockland': '0500000US36087',
 'Schoharie': '0500000US36095',
 'Tioga': '0500000US36107',
 'Wayne': '0500000US36117',
 'Albany': '0500000US36001',
 'Allegany': '0500000US36003',
 'Bronx': '0500000US36005',
 'Broome': '0500000US36007',
 'Cattaraugus': '0500000US36009',
 'Chautauqua': '0500000US36013',
 'Chemung': '0500000US36015',
 'Chenango': '0500000US36017',
 'Clinton': '0500000US36019',
 'Delaware': '0500000US36025',
 'Dutchess': '0500000US36027',
 'Erie': '0500000US36029',
 'Essex': '0500000US36031',
 'Fulton': '0500000US36035',
 'Genesee': '0500000US36037',
 'Greene': '0500000US36039',
 'Herkimer': '0500000US36043',
 'Jefferson': '0500000US36045',
 'Kings': '0500000US36047',
 'Living

In [64]:
def get_county_cases():
    dict_cases = {}
    used_states = []
    for _,(state, county, cases, deaths, *_) in df.iterrows():
        if not state in used_states:
            for county_temp, geo in codes[state].items():
                dict_cases[geo] = (state, county_temp, 0, 0)
            used_states.append(state)
        try:
            geo_id = codes[state][county]
            dict_cases[geo_id] = (state, county, cases, deaths)
        except KeyError:
            print(f"'{state}', '{county}', '{cases}'")
            print("County not found")
    return dict_cases

In [65]:
dict_cases = get_county_cases()

'﻿New York', 'New York', '3615'
County not found
'Illinois', 'Lasalle', '1'
County not found
'Michigan', 'Detroit City', '75'
County not found
'Texas', 'Mclennan', '5'
County not found
'Texas', 'Amarillo', '2'
County not found
'Texas', 'Dewitt', '1'
County not found
'Wisconsin', 'St. Croix County', '1'
County not found
'Virginia', 'Richmond City', '3'
County not found
'Oregon', '​Benton', '3'
County not found
'Diamond Princess', 'San Diego', '1'
County not found
'New Hampshire', 'Berknap', '2'
County not found
'Oklahoma', 'Mcclain', '1'
County not found
'Missouri', 'St. Louis City', '3'
County not found
'Missouri', 'Kansas City', '2'
County not found
'Grand Princess', 'Sonoma', '1'
County not found
'Grand Princess', 'Ventura', '1'
County not found
'Grand Princess', 'San Diego', '1'
County not found
'Grand Princess', 'Grand Princess', '0'
County not found
'Guam', 'Guam', '12'
County not found
'Alaska', 'Seward', '1'
County not found
'Puerto Rico', 'Puerto Rico', '5'
County not found


In [81]:
dict_cases['0500000US36061'] = ("New York", "New York", 3954, 26)

In [82]:
for cur_county in county_json["features"]:
    countyID = cur_county["properties"]["geo_id"]
    if countyID in dict_cases:
        cur_county["properties"]["cases"] = dict_cases[countyID][2]
        cur_county["properties"]["deaths"] = dict_cases[countyID][3]
    else:
        cur_county["properties"]["cases"] = 0
        cur_county["properties"]["deaths"] = 0
        print(cur_county["properties"]["statename"], cur_county["properties"]["name"])
        print(countyID, "not found")

Virginia Richmond
0500000US51159 not found
Virginia Bedford
0500000US51515 not found
Virginia Roanoke
0500000US51770 not found
District of Columbia District of Columbia
0500000US11001 not found
Maryland Baltimore
0500000US24005 not found
Missouri St. Louis
0500000US29189 not found
Virginia Fairfax
0500000US51059 not found
Virginia Franklin
0500000US51067 not found


## Add Centroids

In [83]:
with open(data_folder/"countyCentroids.json", 'r') as f:
    county_centroids = json.load(f)

In [84]:
county_centroids["features"][0]

{'type': 'Feature',
 'properties': {'GEO_ID': '0500000US01029',
  'STATE': '01',
  'COUNTY': '029',
  'NAME': 'Cleburne',
  'LSAD': 'County',
  'CENSUSAREA': 560.1},
 'geometry': {'type': 'Point',
  'coordinates': [-85.5187931476936, 33.674542689931684]}}

In [85]:
for county in county_json["features"]:
    for county2 in county_centroids["features"]:
        if county["properties"]['geo_id'] == county2["properties"]['GEO_ID']:
            county["properties"]["lat"] = county2["geometry"]["coordinates"][0]
            county["properties"]["long"] = county2["geometry"]["coordinates"][1]

## Add Risk Data

In [86]:
#111 is to convert degrees to kilometers
def get_distance(c0, c1):
    lat_dist = abs(c0[0])-abs(c1[0])
    lng_dist = abs(c0[1])-abs(c1[1])
    distance = 111 * math.sqrt(lat_dist**2 + lng_dist**2)
    return distance

### Local Risk

In [87]:
def calc_county_local_risk(props):
    cases = props["cases"]
    try:
        population = props["population"]
    except KeyError:
        print(props['name'], props['statename'])
        raise KeyError
    if cases == 'NaN': 
        print('NaN cases found')
        cases = 0
    return cases/population if population != -1 else -1

In [88]:
for cur_county in county_json["features"]:
    cur_county["properties"]["risk_local"] = calc_county_local_risk(cur_county["properties"])

### Neighbor Risk

In [89]:
def calc_county_neighbor_risk(risks):
    total_neighbor_risk = 0
    num_risks = len(risks)
    for risk in risks.values():
        distance, neighbor_cases, neighbor_pop = risk.values()
        neighbor_density = float(neighbor_cases)/neighbor_pop
        total_neighbor_risk += float(neighbor_density)*(2**(-1*math.log2(float(distance))))
    return total_neighbor_risk

In [90]:
def get_county_all_neighbor_risk(props):
    risk_details = {}
    for cur_county in county_json["features"]:
        neighbor_props = cur_county["properties"]
        neighbor_risk = get_county_neighbor_risk(props, neighbor_props)
        if(neighbor_risk != {}): risk_details[neighbor_props["geo_id"]] = neighbor_risk
    population = props.get("population", 1000000)
    props["risk_nearby"] = calc_county_neighbor_risk(risk_details)
    props["risk_total"] = props["risk_nearby"] + props["risk_local"]

In [91]:
MAX_DISTANCE = 250
def get_county_neighbor_risk(props, neighbor_props):
    geoID1 = props["geo_id"]
    geoID2 = neighbor_props["geo_id"]
    centroid = [props["lat"], props["long"]]
    centroid_neighbor = [neighbor_props["lat"], neighbor_props["long"]]
    distance = get_distance(centroid, centroid_neighbor)
    neighbor_cases = neighbor_props["cases"]
    neighbor_population = neighbor_props["population"]
    if(geoID1 == geoID2 or distance > MAX_DISTANCE or neighbor_cases == "NaN"): 
        return {}
    else:
        return {"distance":distance, "cases":neighbor_cases, "pop":neighbor_population}

In [92]:
for cur_county in county_json["features"]:
    get_county_all_neighbor_risk(cur_county["properties"])

In [93]:
county_json["features"]

[{'type': 'Feature',
  'properties': {'geo_id': '0500000US02261',
   'county': '261',
   'name': 'Valdez-Cordova',
   'lsad': 'CA',
   'censusarea': 34239.88,
   'state': '02',
   'statename': 'Alaska',
   'stateabbr': 'AK',
   'population': 9164,
   'cases': 0,
   'deaths': 0,
   'lat': -144.48437223659363,
   'long': 61.5605492044838,
   'risk_local': 0.0,
   'risk_nearby': 0,
   'risk_total': 0.0},
  'geometry': {'type': 'MultiPolygon',
   'coordinates': [[[[-147.483828, 60.618636],
      [-147.500009, 60.653852],
      [-147.483828, 60.683358],
      [-147.487635, 60.728092],
      [-147.395312, 60.74332],
      [-147.383891, 60.741417],
      [-147.362, 60.714767],
      [-147.3087, 60.665274],
      [-147.348675, 60.627202],
      [-147.454323, 60.619588],
      [-147.483828, 60.618636]]],
    [[[-147.341061, 60.305499],
      [-147.340109, 60.275042],
      [-147.483828, 60.224598],
      [-147.499057, 60.235067],
      [-147.505719, 60.253151],
      [-147.496201, 60.265524],
 

In [94]:
county_json["features"][0]["properties"].keys()

dict_keys(['geo_id', 'county', 'name', 'lsad', 'censusarea', 'state', 'statename', 'stateabbr', 'population', 'cases', 'deaths', 'lat', 'long', 'risk_local', 'risk_nearby', 'risk_total'])

## Export

In [95]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

with open("../../data/counties.json", 'w') as f:
    json.dump(county_json, f, cls=NpEncoder)
    
with open("../../data/countyData.js", 'w') as f:
    f.write("let countyData = ")
    json.dump(county_json, f, cls=NpEncoder)