In [1]:
import pandas as pd
import sklearn as skl
import tensorflow as tf
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.patches as patches

from imageio import mimwrite

from sklearn.preprocessing import (
    StandardScaler, QuantileTransformer)

import cartopy
import cartopy.crs as ccrs

from shapely.geometry import Polygon
from shapely.ops import cascaded_union

from datetime import date

import tensorflow as tf

from scipy.interpolate import UnivariateSpline

In [2]:
nytimes_data = pd.read_csv('~/nytimes/covid-19-data/us-counties.csv')

nytimes_data = nytimes_data.iloc[np.where(np.isfinite(nytimes_data[['fips']].values))[0]]

nytimes_data['predecessor_date'] = (
    [date.fromisoformat(x).toordinal() - 1 for x in nytimes_data['date']])

In [3]:
nytimes_data['state_id'] = 1000. * (nytimes_data[['fips']] // 1000)

In [5]:
## From US Census Bureau records
county_reader = cartopy.io.shapereader.Reader('./cb_2018_us_county_500k.shp')
counties = list(county_reader.records())
county_indices = pd.DataFrame({
    'GEOID' : [float(counties[i].attributes['GEOID']) for i in range(len(counties))],
    'index' : [i for i in range(len(counties))]})

In [6]:
with open('./co-est2019-alldata.csv', 'r') as fd:
    a = pd.read_csv('./co-est2019-alldata.csv', encoding='latin1')

all_pops = pd.DataFrame({
    'POP' : a[['POPESTIMATE2019']].values.reshape(-1),
    'fips' : a[['STATE']].values.reshape(-1) * 1000. + a['COUNTY'].values.reshape(-1)
})

state_pops = all_pops[a['COUNTY'].values.reshape(-1) == 0]
county_pops = all_pops[a['COUNTY'].values.reshape(-1) != 0]

In [7]:
extended_data = nytimes_data.merge(county_pops, left_on='fips', right_on='fips')

In [8]:
extended_data['normalized_cases'] = extended_data[['cases']].values.reshape(-1) / extended_data[['POP']].values.reshape(-1)
extended_data['normalized_deaths'] = extended_data[['deaths']].values.reshape(-1) / extended_data[['POP']].values.reshape(-1)

In [9]:
county_data = extended_data.groupby(['state', 'county', 'predecessor_date']).sum()

In [10]:
def build_county_stats_dict():
    county_stats_dict = {}
    for state_name in np.unique(county_data.index.get_level_values(0)):
        restricted_county_data = county_data[county_data.index.get_level_values(0) == state_name]
        restricted_county_data = restricted_county_data[restricted_county_data[['cases']].values >= 20]
    
        for county_name in np.unique(restricted_county_data.index.get_level_values(1)):
            county_cases = restricted_county_data[restricted_county_data.index.get_level_values(1) == county_name]
        
            if county_cases.shape[0] < 20:
                continue
            
            x = county_cases.index.get_level_values(2).values.reshape(-1)[1:]
            y_base = county_cases[['normalized_cases']].cummax().values.reshape(-1)
            y = y_base[1:] - y_base[:-1]
            y_max = np.max(y)
            y = y / y_max
            
            spl = UnivariateSpline(x, y)
            spl.set_smoothing_factor(10)
                
            xs = np.arange(np.min(x) + 1, np.max(x) + 1)
            ys = spl(xs)
            dydxs = spl.derivative(1)(xs)
            
            base_fips = np.empty([x.shape[0]])
            base_fips[:] = county_cases[['fips']].values.reshape(-1)[0]
            smoothed_fips = np.empty([xs.shape[0]])
            smoothed_fips[:] = county_cases[['fips']].values.reshape(-1)[0]
                
            county_stats_dict[int(county_cases[['fips']].values.reshape(-1)[0])] = (            
                {
                    'base_diffs' : np.vstack((base_fips, x, y)).T, 
                    'smoothed_diffs' : np.vstack((smoothed_fips, xs, ys)).T, 
                    'smoothed_diff_derivs': np.vstack((smoothed_fips, xs, dydxs)).T,
                })
            
    return county_stats_dict

county_stats_dict = build_county_stats_dict()



In [11]:
base_diffs_array = pd.DataFrame(
    np.vstack([county_stats_dict[fips]['base_diffs'] 
               for fips in list(county_stats_dict.keys())]))
base_diffs_array.columns = ['fips', 'x', 'y']

smoothed_diffs_array = pd.DataFrame(
    np.vstack([county_stats_dict[fips]['smoothed_diffs'] 
               for fips in list(county_stats_dict.keys())]))
smoothed_diffs_array.columns = ['fips', 'xs', 'ys']

smoothed_diff_derivs_array = pd.DataFrame(
    np.vstack([county_stats_dict[fips]['smoothed_diff_derivs'] 
               for fips in list(county_stats_dict.keys())]))
smoothed_diff_derivs_array.columns = ['fips', 'xs', 'dysdxs']

In [58]:
class plot_normalizer:
    def __init__(self, base_data):
        valid_data = (
            base_data.values[np.isfinite(base_data).values])
        
        self.positive_normalizer = (
            QuantileTransformer(n_quantiles=100))
        self.negative_normalizer = (
            QuantileTransformer(n_quantiles=100))
        
        self.positive_normalizer.fit(
            valid_data[valid_data > 0].reshape(-1, 1))
        self.negative_normalizer.fit(
            valid_data[valid_data < 0].reshape(-1, 1))
        
    def transform(self, base_data):
        data = base_data.reshape(-1, 1)
        positive_base = (
            self.positive_normalizer.transform(data))
        negative_base = (
            self.negative_normalizer.transform(data))
        
        positive = (positive_base + 1) / 2
        negative = negative_base / 2
        
        retval = np.empty(data.shape)
        retval[:] = 0.5
        retval[data > 0] = positive[data > 0]
        ##retval[data < 0] = negative[data < 0]
        retval[data < 0] = 0.45
        
        return retval

In [59]:
base_diffs_normalizer = QuantileTransformer(n_quantiles=100)
base_diffs_normalizer.fit(
    base_diffs_array['y'].values[
        np.isfinite(base_diffs_array['y'].values)].reshape(-1, 1))

smoothed_diffs_normalizer = QuantileTransformer(n_quantiles=100)
smoothed_diffs_normalizer.fit(
    smoothed_diffs_array['ys'].values[
        np.isfinite(smoothed_diffs_array['ys'].values)].reshape(-1, 1))

smoothed_diff_derivs_normalizer = (
    plot_normalizer(smoothed_diff_derivs_array))

In [60]:
def plot_county_cases(result_array, date):
    working_cmap = plt.get_cmap('coolwarm')
    
    restricted_result_array_precursor = (
        result_array[result_array['xs'].values.reshape(-1) == date]).merge(
        county_indices, left_on='fips', right_on='GEOID')
    
    good_rows = (
        np.setdiff1d(
            np.arange(restricted_result_array_precursor.shape[0]),
            np.where(~np.isfinite(restricted_result_array_precursor))[0]))
    
    restricted_result_array = (
        restricted_result_array_precursor.values[good_rows, :])
        
    central_lat = 37.5
    central_lon = -96
    extent = [-120, -70, 23, 50.5]
    central_lon = np.mean(extent[:2])
    central_lat = np.mean(extent[2:])

    fig, ax = plt.subplots(figsize=(24, 12))
    ax = plt.axes(projection=ccrs.AlbersEqualArea(central_lon, central_lat))
    ax.set_extent(extent)

    ax.add_feature(cartopy.feature.OCEAN)
    ax.add_feature(cartopy.feature.LAND, edgecolor='black')
    ax.add_feature(cartopy.feature.LAKES, edgecolor='black')
    ax.add_feature(cartopy.feature.BORDERS)
        
    transformed_scores = (
        smoothed_diff_derivs_normalizer.transform(
            restricted_result_array[:, 2].reshape(-1, 1)))
    
    colors = [
        working_cmap(transformed_scores[i, 0])
        for i in range(restricted_result_array.shape[0])]
    
    ax.add_feature(
        cartopy.feature.ShapelyFeature(
            [counties[int(restricted_result_array[int(i), -1])].geometry 
             for i in range(restricted_result_array.shape[0])], 
            cartopy.crs.PlateCarree(),
            facecolor=colors))    
    ax.add_feature(cartopy.feature.STATES, edgecolor='lightgrey')

    fig.canvas.draw()
    image = np.frombuffer(fig.canvas.tostring_rgb(), dtype='uint8')
    image = image.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    
    plt.close()
    
    return image

In [61]:
county_cases = [
    plot_county_cases(smoothed_diff_derivs_array, image_date) 
    for image_date in np.unique(smoothed_diff_derivs_array['xs'].values)
]

In [62]:
mimwrite('./county_cases.gif', 
         county_cases,
         fps=2,
         subrectangles=True,
         loop=1)