In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm
import scipy.stats as sps

sns.set()

confirmed_min = 500
dead_min = 10

In [None]:
us_state_dict = pd.read_pickle('us_state_dict.pkl')
ny = us_state_dict['New York']
ny.index = pd.DatetimeIndex(ny.index)
ny

In [None]:
dick = pd.read_pickle('country_data.pkl')

def strip(x):
    return x.replace('\t','')

country_populations = pd.read_csv('world_pop.csv',
                                  sep=';',header=None,index_col=0,names=['population'],
                                 thousands=',',converters={0 : strip})

dick['New York'] = ny

swe = dick['Sweden']
us = dick['US']
india = dick['India']

swe_population = country_populations.loc['Sweden','population']
swe['conf_per_M'] = swe['confirmed'] / (swe_population / 1e6)
swe['dead_per_M'] = swe['deceased'] / (swe_population / 1e6)
us['dead_per_M'] = us['deceased'] / (country_populations.loc['US','population'] / 1e6) 


In [None]:

countries = ['India','Italy','Spain','Sweden','US',
             'Denmark','Netherlands','Canada','Germany','New York']

#countries = ['United Kingdom','Finland','Norway','Denmark','Sweden','Iceland']
#countries = ['Sweden']

colors = ['b','lime','orange','yellow','navy','k','crimson','maroon','cyan','deeppink']


plt.figure(figsize=(18,12))
plt.title('Corona - Consolidated data CONFIRMED per million, after each country hit {} dead'.format(dead_min))
plt.yscale('log')

#longest history
max_x = len(dick['Italy'])

def process_country(country,param,color_idx):
    df = dick[country]
    
    try:
        df = df.drop(['dead_conf_ratio','conf_over_dead'],axis=1)
    except:
        print ('no cols dead_conf_ratio,conf_over_dead  for {} - skipping delete'.format(country))
   
    population = country_populations.loc[country,'population']
    df['conf_per_M'] = df['confirmed'] / (population / 1e6)
    df['dead_per_M'] = df['deceased'] / (population / 1e6)
    df['factor_per_M'] = df['conf_per_M'] / df['conf_per_M'].shift()
    df['factor_dead_per_M'] = df['dead_per_M'] / df['dead_per_M'].shift()
    
    mask = ( ( df['confirmed'] >= confirmed_min ) & ( df['deceased'] >= dead_min))
    df = df[mask]
    df.reset_index(inplace=True)
    
    #slope,intercept,_,_,_ = sps.linregress(df.index,df[param])
    #slope = np.log10(slope)
    #intercept = np.log10(intercept)
    
    #X = np.linspace(min(df.index) ,max(df.index),40)
    
    #plt.plot([X[i] * slope + intercept for i in range(len(X))],'--',color=colors[color_idx],
             #label=country +'_regression, slope:{:.2f} intercept:{:.2f}'.format(slope,intercept))
    
    plt.plot(df[param],'x-',label=country,color=colors[color_idx])
    
    plt.legend(loc='upper left')  
    
    if 'factor' in param:
        plt.ylabel('Factor')
    else:
        plt.ylabel('{}'.format(param))
        
    plt.xlabel('day number after {} dead was passed '.format(dead_min))
    plt.yscale('log')
    
    ### CHANGE LIMIT WHEN NEEDED ###
    #plt.xlim([0,max_x])
    
    
    
param = 'conf_per_M'

for i,c in enumerate(countries):
    process_country(c,param,i)

plt.savefig('Corona_consolidated_min_confirmed.jpg',format='jpg')

plt.figure(figsize=(18,12))
plt.title('Corona - Consolidated data DEATHS per million, after each country hit {} dead'.format(dead_min))

param = 'dead_per_M'

for i,c in enumerate(countries):
    process_country(c,param,i)
  
plt.savefig('Corona_consolidated_min_dead.jpg',format='jpg')

plt.figure(figsize=(18,12))
plt.title('Corona - Consolidated data GROWTH FACTOR per million, after each country hit {} dead'.format(dead_min))

param = 'factor_per_M'

for i,c in enumerate(countries):
    process_country(c,param,i)
    plt.yscale('linear')
    
plt.savefig('Corona_consolidated_min_factor.jpg',format='jpg')

plt.figure(figsize=(18,12))
plt.title('Corona - Consolidated data DEATH GROWTH FACTOR per million, after each country hit {} dead'.format(dead_min))

param = 'factor_dead_per_M'

for i,c in enumerate(countries):
    process_country(c,param,i)
    plt.yscale('linear')
    
    
plt.savefig('Corona_consolidated_dead_min_factor.jpg',format='jpg')


In [None]:
def smooth(x,window_len=10,window='hanning'):
    """smooth the data using a window with requested size.
    
    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal 
    (with the window size) in both ends so that transient parts are minimized
    in the begining and end part of the output signal.
    
    input:
        x: the input signal 
        window_len: the dimension of the smoothing window
        window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
            flat window will produce a moving average smoothing.

    output:
        the smoothed signal
        
    example:

    t=linspace(-2,2,0.1)
    x=sin(t)+randn(len(t))*0.1
    y=smooth(x)
    
    see also: 
    
    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
    scipy.signal.lfilter
 
    TODO: the window parameter could be the window itself if an array instead of a string   
    """
    
    '''
    if x.ndim != 1:
        raise ValueError, "smooth only accepts 1 dimension arrays."

    if x.size < window_len:
        raise ValueError, "Input vector needs to be bigger than window size."


    if window_len<3:
        return x


    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"

    '''
    x = np.array(x,dtype=float) #conv from dataframe col to array
    s=np.r_[2*x[0]-x[window_len:1:-1],x,2*x[-1]-x[-1:-window_len:-1]]
    #print(len(s))
    if window == 'flat': #moving average
        w=ones(window_len,'d')
    else:
        w=eval('np.'+window+'(window_len)')

    y=np.convolve(w/w.sum(),s,mode='same')
    return y[window_len-1:-window_len+1]


In [None]:
for c in countries:
    print (c)
    window_len = 15
    fig,axes = plt.subplots(2,1,sharex=True,figsize=(18,12))
    df = dick[c]
    df = df[df['deceased'] > 1]
    df.index = df.index.date
    axes[0].set_title('{} Daily increment confirmed'.format(c))
    axes[1].set_title('{} Daily increment deceased'.format(c))
    axes[0].plot(df.index,smooth(df['inc'],window_len=window_len),
                 color='k',ls='dashed',label='smoothed, window_len={}'.format(window_len))
    axes[1].plot(df.index,smooth(df['dead_inc'],window_len=window_len),color='k',
                 ls='dashed',label='smoothed, window_len={}'.format(window_len))
    axes[0].bar(df.index,df['inc'],color='orange',label='daily increment confirmed')
    axes[1].bar(df.index,df['dead_inc'],color='crimson',label='daily increment deceased')
    axes[0].legend(loc='upper left')
    axes[1].legend(loc='upper left')
    axes[0].set_ylabel('Daily Increment')
    axes[1].set_ylabel('Daily Increment')
    plt.savefig('daily_incs_{}.jpg'.format(c))

In [None]:


resample_period = ['W-MON','W-TUE','W-WED','W-THU','W-FRI','W-SAT','W-SUN']
today = pd.datetime.today().weekday()

yesterday = lambda i : resample_period[i % 7 -1] if i != 0 else resample_period[6]

window_len = 6

for c in countries:
    fig,axes=plt.subplots(2,1,figsize=(18,12),sharex=True)
    
    df = dick[c]
    df = df[df['deceased'] > 0]
    df_weekly = df.resample(yesterday(today)).agg({'inc' : np.sum,'dead_inc':np.sum})

    print ('++++++++++++++++ {}'.format(c))
    print (df_weekly)
    axes[0].set_title('Weekly increment CONFIRMED {}'.format(c))
    axes[1].set_title('Weekly increment DEAD {}'.format(c))
    axes[0].set_ylabel('Weekly increment')
    axes[1].set_ylabel('Weekly increment')
    axes[0].bar(df_weekly.index,df_weekly['inc'],color='orange')
    axes[1].bar(df_weekly.index,df_weekly['dead_inc'],color='crimson')
    
    axes[0].plot(df_weekly.index,smooth(df_weekly['inc'],window_len=window_len),
                 color='k',ls='dashed',label='smoothed, window_len={}'.format(window_len))
    
    axes[1].plot(df_weekly.index,smooth(df_weekly['dead_inc'],window_len=window_len),
                 color='k',ls='dashed',label='smoothed, window_len={}'.format(window_len))

    axes[0].legend(loc='upper left')
    axes[1].legend(loc='upper left')
    
    plt.savefig('weekly_increments_{}.jpg'.format(c))

    

In [None]:
stats_df = pd.DataFrame()

param = 'conf_per_M'

for country,data in dick.items():
    try:
        population = country_populations.loc[country,'population']
    except:
        print ('cant find population for {}, skippin..'.format(country))
    data['conf_per_M'] = data['confirmed'] / (population / 1e6)
    data['dead_per_M'] = data['deceased'] / (population / 1e6)
    data['factor_per_M'] = data['conf_per_M'] / data['conf_per_M'].shift()
    data['factor_dead_per_M'] = data['dead_per_M'] / data['dead_per_M'].shift()
    data = pd.DataFrame(data[param])
    data = data.iloc[-1]
    
    stats_df = pd.concat([stats_df,data])


magnitude = np.log10(stats_df[0])

def std(x,mean,std):
    return (x -  mean) / std

magnitude_std = std(magnitude,magnitude.mean(),magnitude.std())

plt.figure(figsize=(18,12))

ax = plt.gca()
ax2 = plt.twinx()

ax.set_title('Distribution of Magnitude of {} '.format(param))
ax.set_ylabel('Rel. Freq.')
ax.set_xlabel('Magnitude of ' + param + ' [STD]')
ax.hist(magnitude_std,weights= np.ones_like(magnitude_std) / len(magnitude_std),color='c',
        label='true exp mean: {:.2f} true exp std: {:.2f}'.format(magnitude.mean(),magnitude.std()))
ax.legend(loc='upper left')  

countries = ['Sweden','US','Germany','Spain','Italy','New York',
            'France','Austria','United Kingdom','Belgium','Finland',
            'Denmark','Norway','Australia','Korea, South',
            'New Zealand','Singapore','Taiwan*','Ethiopia','Japan']


for i,c in enumerate(countries):
    label = 'some select countries' if i == 0 else ''
    
    ax2.scatter(std(np.log10(dick[c].loc[:,'conf_per_M'][-1]),
        magnitude.mean(),magnitude.std()),
                np.log10(dick[c].loc[:,'dead_per_M'][-1]),
                    color='orange',label=label)
    
    ax2.text(std(np.log10(dick[c].loc[:,'conf_per_M'][-1]),
              magnitude.mean(),magnitude.std()),
                 np.log10(dick[c].loc[:,'dead_per_M'][-1]),c)
             
ax2.set_ylabel('Magnitude of deaths_per_M')
ax2.legend(loc='upper center')

In [None]:
for i,c in enumerate(countries):
    plt.figure(figsize=(18,12))
    plt.title('Growth Factors for Confirmed (green) and Deaths (red) for {}'.format(c))
    
    process_country(c,'factor_per_M',1)
    process_country(c,'factor_dead_per_M',6)
    plt.yscale('linear')
    plt.savefig('Growth_factors_for_{}.jpg'.format(c),format='jpg')

In [None]:
x_param = 'factor_dead_per_M'
y_param = 'factor_per_M'

def regression(x,y,country):
    
    def standardize(x):
        return (x - x.mean()) / x.std()
    
    x_org = x
    y_org = y
    
    x = standardize(x)
    y = standardize(y)
    
    plt.figure(figsize=(18,12))
    plt.ylabel(y_param + r' [ STD units $\mu$: {:.2f} $\sigma$: {:.2f} ]'.format(y_org.mean(),y_org.std()))
    plt.xlabel(x_param + r'[ STD units $\mu$: {:.2f} $\sigma$: {:.2f} ]'.format(x_org.mean(),x_org.std()))
    plt.scatter(x,y,color='r')
    
    slope,intercept,_,_,_ = sps.linregress(x,y)
    
    X = np.linspace(min(x),max(x),1000)
    
    plt.plot(X,[X[i] * slope + intercept for i in range(len(X))], color='k',ls='dashed',
            label='LSQ, slope {:.2f} intercept {:.2f}'.format(slope,intercept))
    
    
    beta_mu = pm.Uniform('beta_mu',0,0.5)
    beta_sigma = pm.Uniform('beta_sigma',0,0.1)
    alpha_mu = pm.Uniform('alpha_mu', 0, 0.5)
    alpha_sigma = pm.Uniform('alpha_sigma',0,0.1)
    
    beta = pm.Normal('beta',beta_mu,beta_sigma / 1 ** 2)
    alpha = pm.Normal('alpha',alpha_mu,alpha_sigma / 1 ** 2)
    
    obs_sigma = pm.Uniform('obs_sigma',0,1)
    
    @pm.deterministic()
    def linreq(alpha=alpha,beta=beta,x=x):
        return x * beta + alpha
    
    obs = pm.Normal('linreq',linreq,1 / obs_sigma ** 2,observed=True,value=y)
    
    model = pm.Model([beta_mu,beta_sigma,alpha_mu,alpha_sigma,beta,alpha,linreq,obs,obs_sigma])
    
    try:
        map_ = pm.MAP(model)
        map_.fit()
    except:
        print ('cant fit {}'.format(country))
    
    mcmc = pm.MCMC(model)
    mcmc.sample(100000,50000,2)
    
    beta_post = mcmc.trace('beta')[:]
    alpha_post = mcmc.trace('alpha')[:]
    
    result = pd.DataFrame({'beta_post': beta_post,
                          'alpha_post' : alpha_post})
    
    print()
    print (result.describe())
    
    CI_89 = np.percentile(result.beta_post,[5.5,94.5])
    
    plt.title(r'Regression for {} $\alpha$ {:.2f} $\beta$ {:.2f} $\beta$-CI [ {:.2f} {:.2f} ]  [STD]'.format(
        country,result.alpha_post.mean(),result.beta_post.mean(),CI_89[0],CI_89[1]))

    
    beta_samples = np.random.choice(result.beta_post,replace=True,size=len(X))
    alpha_samples = np.random.choice(result.alpha_post,replace=True,size=len(X))
    
    lines = np.array([X[i] * beta_samples + alpha_samples for i in range(len(X))])
    plt.plot(X,lines,alpha=0.03,color='r')
    
    plt.plot(X,[X[i] * result.beta_post.mean() + result.alpha_post.mean() for i in range(len(X))],
             color='orange',ls='dashed',label='Bayesian mean')
 
    plt.legend(loc='upper left')
    
    plt.savefig('growth_factor_regression_{}.jpg'.format(country),format='jpg')
    
for country in countries:

    df = dick[country]
    
    try:
        df = df.drop(['dead_conf_ratio','conf_over_dead'],axis=1)
    except:
        print ('Skipping delete col for {}'.format(country))
        
    population = country_populations.loc[country,'population']
    df['conf_per_M'] = df['confirmed'] / (population / 1e6)
    df['dead_per_M'] = df['deceased'] / (population / 1e6)
    df['factor_per_M'] = df['conf_per_M'] / df['conf_per_M'].shift()
    df['factor_dead_per_M'] = df['dead_per_M'] / df['dead_per_M'].shift()
    
    mask = ( ( df['confirmed'] >= confirmed_min ) & ( df['deceased'] >= dead_min))
    df = df[mask]
    df = df.replace(np.inf,np.nan)
    df.dropna(inplace=True)
    df.reset_index(inplace=True)

    #regression(df[x_param],df[y_param],country)


In [None]:
shifted_df = df.copy()
shifted_df[x_param] = shifted_df[x_param].shift(7)
shifted_df.dropna(inplace=True)

x_param = 'dead_inc'
y_param = 'inc'
#regression(shifted_df[x_param],shifted_df[y_param],'Italy')

