In [61]:
import json
from datetime import datetime, timedelta
from urllib.request import urlopen

import altair as alt
from altair import datum
import matplotlib.pyplot as plt
import numpy as np
import numpy.polynomial.polynomial as poly
import pandas as pd
from pytz import timezone
import seaborn as sns

from scipy.signal import savgol_filter

In [2]:
# for large datasets
alt.data_transformers.enable('data_server')

DataTransformerRegistry.enable('data_server')

In [3]:
with open('data/county_json2.json') as f:
    county_json2 = json.load(f)
    
df_ac = pd.read_csv('data/df_ac.csv', dtype={'fips':'str'}, parse_dates=['date'])

In [4]:
df_ac['days'] = ((df_ac['date'] - df_ac['date'].min()) / np.timedelta64(1, 'D')).astype('int')

In [5]:
def round_big(data, col, n):
    
    mi = data[col].min()
    ma = data[col].max()
    
    mi = mi-(-mi)%(10**n)
    ma = ma-(-ma)%(10**n)
    
    return [mi, ma]

In [6]:
data0 = df_ac[df_ac['gop']==0]
data1 = df_ac[df_ac['gop']==1]

In [89]:
chart_title='Change in New Cases per 100k vs. New Cases per 100k'
x_col='new_cases_per_100k_15sg'
x_scale='linear'
x_title='new cases per 100k'
y_col='delta_new_cases_per_100k_15sg'
y_scale='linear'
y_title=u'Δ new cases per 100k'
s_col='cases'
s_title='total cases'
c_col='per_votes_gop'
c_title='% votes GOP'
c_scheme='redblue'
w_col='total_pop'
split=False
line=True

data = df_ac.sort_values(by='county').copy()
# data = data1
p = 3
dmax = data['days'].max()
dmin = dmax % p
xmin, xmax = round_big(data, x_col, 1)
ymin, ymax = round_big(data, y_col, 1)
_, smax = round_big(data, s_col, 5)
cmin, cmax = round_big(data, c_col, -1)


bind = alt.binding_range(min=dmin, max=dmax, step=p)
selector = alt.selection_single(
    empty='all', name='select', fields=['days'], init={'days': dmax}, bind=bind
)

base = alt.Chart(data).properties(
    width=480,
    height=480,
).add_selection(selector)

circles = base.mark_circle(
    stroke='black',
    strokeWidth=0.25,
# ).transform_fold(
#     fold=[]
).encode(
    x=alt.X(
        'new_cases_per_100k_15sg' + ':Q',
        scale=alt.Scale(type=x_scale, domain=[xmin, xmax]),
        title=x_title
    ),
    y=alt.Y(
        'delta_new_cases_per_100k_15sg' + ':Q',
        scale=alt.Scale(type=y_scale, domain=[ymin, ymax]),
        title=y_title
    ),
    size=alt.Size(
        s_col + ':Q',
        scale=alt.Scale(
            domain=[1,smax],
            range=[10,2000]
        ),
        title=s_title
    ),
    color=alt.condition(
        selector,
        c_col + ':Q',
        alt.value('lightgray'),
        scale=alt.Scale(
            scheme=c_scheme,
            domainMid=0.5,
            domain=[cmin,cmax]
        ),
        sort='descending',
        title=c_title
    ),
    tooltip=[
        'state:N', 'county:N',
        alt.Tooltip(
            'cases:Q',
            title='total cases',
        ), 
        alt.Tooltip(
            'cases_per_100k:Q',
            format='.1f',
            title='cases per 100k',
        ), 
        alt.Tooltip(
            'pop_density:Q',
            format='.1f',
            title='pop per sq mi',
        ), 
        alt.Tooltip(
            y_col + ':Q',
            format='.1f',
            title=y_title
        ),
        alt.Tooltip(
            x_col + ':Q',
            format='.1f',
            title=x_title
        )
    ]
).add_selection(selector).transform_filter(selector)

bars = base.mark_bar().encode(
    x='days:Q',
    y='new_cases:Q'
)

line = base.mark_line().encode(
    x='days:Q',
    y='new_cases_15sg:Q'
).transform_filter(selector)

# line0 = chart.transform_filter(
#     (datum.gop == 0)
# ).transform_regression(
#     'new_cases_per_100k_15sg', 
#     'delta_new_cases_per_100k_15sg',
#     method='linear'
# ).mark_line(color='darkblue')

# line1 = chart.transform_filter(
#     (datum.gop == 1)
# ).transform_regression(
#     'new_cases_per_100k_15sg', 
#     'delta_new_cases_per_100k_15sg',
#     method='linear'
# ).mark_line(color='red')

circles | bars

In [71]:
line0 = chart.transform_filter(
    (datum.gop == 0)
).transform_regression(
    'new_cases_per_100k_15sg', 
    'delta_new_cases_per_100k_15sg',
    method='linear'
).mark_line(color='darkblue')

line0

In [65]:
chart = alt.Chart(df_ac[df_ac['days']==184]).mark_point().encode(
    x=alt.X('new_cases_per_100k_15sg:Q'),
    y=alt.Y('delta_new_cases_per_100k_15sg:Q')
# ).transform_regression(
#     'new_cases_per_100k_15sg:Q', 
#     'delta_new_cases_per_100k_15sg:Q'
# ).mark_line(
# ).add_selection(
#     select_week
# ).transform_filter(
#     select_week
)

chart + chart.transform_filter(
    (datum.gop == 0)
).transform_regression(
    'new_cases_per_100k_15sg', 
    'delta_new_cases_per_100k_15sg',
    method='linear'
).mark_line(color='darkblue') + chart.transform_filter(
    (datum.gop == 1)
).transform_regression(
    'new_cases_per_100k_15sg', 
    'delta_new_cases_per_100k_15sg',
    method='linear'
).mark_line(color='darkred')

In [27]:
df_fit.columns

Index(['state', 'cluster', 'date', 'cases', 'deaths', 'county', 'total_pop',
       'fips', 'white', 'black', 'asian', 'hispanic', 'area', 'lon', 'lat',
       'votes_gop', 'votes_dem', 'total_votes', 'median_income', 'pop_density',
       'per_white', 'per_black', 'per_asian', 'per_hispanic',
       'per_total_votes', 'per_votes_gop', 'per_votes_dem', 'gop',
       'cases_per_100k', 'deaths_per_100k', 'new_cases', 'new_deaths',
       'new_cases_per_100k', 'new_deaths_per_100k', 'new_cases_7sg',
       'new_deaths_7sg', 'new_cases_per_100k_7sg', 'new_deaths_per_100k_7sg',
       'new_cases_15sg', 'new_deaths_15sg', 'new_cases_per_100k_15sg',
       'new_deaths_per_100k_15sg', 'delta_new_cases', 'delta_new_deaths',
       'delta_new_cases_per_100k', 'delta_new_deaths_per_100k',
       'delta_new_cases_7sg', 'delta_new_deaths_7sg',
       'delta_new_cases_per_100k_7sg', 'delta_new_deaths_per_100k_7sg',
       'delta_new_cases_15sg', 'delta_new_deaths_15sg',
       'delta_new_cases_per_1

In [40]:
df_fit.reset_index().pivot_table(index=['days', 'gop'], columns=['fips'], values=['new_cases_per_100k_15sg', 'delta_new_cases_per_100k_15sg'])

Unnamed: 0_level_0,Unnamed: 1_level_0,delta_new_cases_per_100k_15sg,delta_new_cases_per_100k_15sg,delta_new_cases_per_100k_15sg,delta_new_cases_per_100k_15sg,delta_new_cases_per_100k_15sg,delta_new_cases_per_100k_15sg,delta_new_cases_per_100k_15sg,delta_new_cases_per_100k_15sg,delta_new_cases_per_100k_15sg,delta_new_cases_per_100k_15sg,...,new_cases_per_100k_15sg,new_cases_per_100k_15sg,new_cases_per_100k_15sg,new_cases_per_100k_15sg,new_cases_per_100k_15sg,new_cases_per_100k_15sg,new_cases_per_100k_15sg,new_cases_per_100k_15sg,new_cases_per_100k_15sg,new_cases_per_100k_15sg
Unnamed: 0_level_1,fips,01001-01007-01013-01021-01031-01037-01039-01041-01045-01051-01061-01067-01069-01109-01121-01123,01003-01097,01005-01011-01017-01087-01113,01009-01015-01019-01027-01029-01043-01049-01055-01071-01095-01111-01115,01023-01025-01035-01047-01053-01063-01065-01085-01091-01099-01105-01107-01119-01129-01131,01033-01057-01059-01075-01077-01079-01093-01127-01133,01073-01089,01081-01101,01083-01103-01117-01125,02013-02016,...,55021-55023-55043-55045-55049-55065-55103-55111-55123,55029-55037-55041-55061-55067-55075-55083,55035-55063-55073,55051-55069-55085-55099-55119-55125,55078,55079,56001-56003-56005-56007-56013-56017-56019-56023-56025-56029-56033-56035-56037-56039-56041-56043,56009-56011-56015-56027-56031-56045,56021,nyc
days,gop,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
0,0,,,,,,,,,,,...,,,,,,,,,,
1,0,,,,,,,,,,,...,,,,,,,,,,
2,0,,,,,,,,,,,...,,,,,,,,,,
3,0,,,,,,,,,,,...,,,,,,,,,,
4,0,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,1,1.959762,10.936136,,1.866152,,-0.686338,-0.625545,,-1.645995,,...,7.118187,12.077859,8.908124,9.916818,,,9.866962,1.368206,5.920077,
183,0,,,-0.345401,,1.722801,,,0.406742,,-18.180274,...,,,,,21.478741,32.212844,,,,4.372344
183,1,2.013139,12.147105,,2.020732,,-0.894128,-0.951013,,-2.084644,,...,7.102310,12.244213,8.941013,10.371963,,,10.285195,1.361377,5.790859,
184,0,,,-0.595775,,1.560692,,,0.187540,,-21.086464,...,,,,,23.595259,32.455665,,,,4.424950


In [None]:
def add_savgol_regression_cols(df, cols):
    def my_savgol(x):
        if len(x) >= w:
            return savgol_filter(x, w, 1)
        else:
            new_window = int(np.ceil(len(x) / 2) * 2 - 1)
            if new_window <= 1:
                return x
            else:
                return savgol_filter(x, new_window, 1)
    df = df.sort_values(by=['date', 'fips'])
    cols_d = [c + '_' + str(window) + 'sg' for c in cols]
    df[cols_d] = df.groupby(by='fips')[cols].transform(lambda x: my_savgol(x, window))
    return (df, cols_d)

In [None]:
df_fit['regression'] = df_fit.sort_values(by=['date', 'gop']).groupby(by='date')[]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


In [None]:


pfit0 = poly.polyfit(data[data['gop']==0][x_col], data[data['gop']==0][y_col], 1)
pfit1 = poly.polyfit(data[data['gop']==1][x_col], data[data['gop']==1][y_col], 1)
fit0 = poly.polyval(np.unique(data[data['gop']==0][x_col]), pfit0)
fit1 = poly.polyval(np.unique(data[data['gop']==1][x_col]), pfit1)
line_df0 = pd.DataFrame({
  'x': np.unique(data[data['gop']==0][x_col]),
  'y': fit0
})
line_df1 = pd.DataFrame({
  'x': np.unique(data[data['gop']==1][x_col]),
  'y': fit1
})
line0 = alt.Chart(line_df0).mark_line(color='darkblue').encode(
    x='x:Q',
    y='y:Q'
).add_selection(select_week).transform_filter(select_week)
line1 = alt.Chart(line_df1).mark_line(color='firebrick').encode(
    x='x:Q',
    y='y:Q'
).add_selection(select_week).transform_filter(select_week)