In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm
import scipy.stats as sps

sns.set()

print ('!!!!! RUN new_york_data to update the info before running this one !!!!!')

In [None]:
dick = pd.read_pickle('us_state_dict.pkl')


In [None]:
df = pd.DataFrame()

def get_latest_numbers(df,statename):
    state = dick[statename]
    last_row = pd.DataFrame(state.iloc[-1]).T
    last_row['state'] = statename
    df = pd.concat([df,last_row],axis=0)
    return df

    

In [None]:
for key,data in dick.items():
    df = get_latest_numbers(df,key)
    
df.sort_values('dead_per_M',inplace=True,ascending=False)
df.head(40)
df.to_pickle('US_states_latest_day.pkl')

In [None]:
df.describe()

In [None]:
df['mortality'] = df['deceased'] / df['confirmed']
df.head(40)

In [None]:
def standardize(n):
    return (n - n.mean()) / n.std()


def regression(x,y):
    
    alpha_mu = 0
    alpha_sigma = 1
    
    beta_mu = 1
    beta_sigma = 1
    
    alpha = pm.Normal('alpha',mu=alpha_mu,tau=1 / alpha_sigma ** 2)
    beta = pm.Normal('beta',mu=beta_mu, tau=1 / beta_sigma ** 2)
    obs_sigma = pm.Uniform('obs_sigma',0.1,1) # determines how much spread on the lines there is
    
    #### PRIOR SIMULATION
    N = 100000

    alpha_prior = [alpha.random() for i in range(N)]
    beta_prior = [beta.random() for i in range(N)]
    
    alphas = np.random.choice(alpha_prior,replace=True,size=100)
    betas = np.random.choice(beta_prior,replace=True,size=100)
    
    X = np.linspace(0,7,100)
    
    #plt.figure()
    #plt.title('Prior seeing data')
    #plt.plot(X,[X[i] * betas + alphas for i in range(len(X))],color='r',alpha=0.1)
    ####
    
    
    @pm.deterministic()
    def linreq(x=x,y=y,alpha=alpha,beta=beta):
        return x * beta + alpha
    
    
    obs = pm.Normal('obs',mu=linreq,tau=1/obs_sigma ** 2,observed=True,value=y)
    
    model = pm.Model([alpha,beta,obs_sigma,linreq])
    
    map_ = pm.MAP(model)
    map_.fit()
    
    mcmc = pm.MCMC(model)
    
    samples = mcmc.sample(10000,5000,2)
    
    alpha_post = mcmc.trace('alpha')[:]
    beta_post = mcmc.trace('beta')[:]
    obs_sigma_post = mcmc.trace('obs_sigma')[:]
    linreq_post = mcmc.trace('linreq')[:,0]
    
    result = pd.DataFrame({'alpha_post': alpha_post,
                          'beta_post': beta_post,
                          'obs_sigma_post': obs_sigma_post,
                          'linreq_post' : linreq_post})
    
    print (linreq_post.shape)
    
    return result
    
    
    


In [None]:
y_param = 'conf_per_M'
x_param = 'density'

x_orig_mean = df[x_param].mean()
x_orig_std = df[x_param].std()
y_orig_mean = df[y_param].mean()
y_orig_std = df[y_param].std()

x = df[x_param]
y = df[y_param]

#### Scatter data ####
plt.figure(figsize=(18,12))
plt.title('US states Corona Regression')
plt.xlabel(x_param + ' ')
plt.ylabel(y_param + ' ')
plt.scatter(x,y,color='crimson')

plt.savefig('US_states_scatter_{}_{}.jpg'.format(x_param,y_param))

#### EXTREME OUTLIERS ####
df = df[df.loc[:,'state'] != 'District of Columbia']
#df = df[df.loc[:,'state'] != 'New York']
####

x = standardize(df[x_param])
y = standardize(df[y_param])
####

slope,intercept,_,_,_ = sps.linregress(x,y)

####

####

result = regression (x,y)
print()
print (result.describe())

X = np.linspace(min(x),max(x),1000)

alpha_samples = np.random.choice(result.alpha_post,replace=True,size=len(X))
beta_samples = np.random.choice(result.beta_post,replace=True,size=len(X))

alpha_mean = alpha_samples.mean()
alpha_std = alpha_samples.std()
beta_mean = beta_samples.mean()
beta_std  = beta_samples.std()

plt.figure(figsize=(18,12))
plt.title(r'US states Corona Regression $\alpha$: {:.2f} $\beta$: {:.2f}'.format(alpha_mean,beta_mean))
plt.xlabel(x_param + r' $\mu$: {:.2f} $\sigma$: {:.2f} [STD]'.format(x_orig_mean,x_orig_std))
plt.ylabel(y_param + r' $\mu$: {:.2f} $\sigma$: {:.2f} [STD]'.format(y_orig_mean,y_orig_std))
plt.scatter(x,y,color='crimson')
plt.plot(X,[X[i] * slope + intercept for i in range(len(X))],color='k',ls='dashed')
plt.plot(X,[X[i] * beta_mean + alpha_mean for i in range(len(X))],color='orange',ls='dashed')

lines = np.array([X[i] * beta_samples + alpha_samples for i in range(len(X))])

_ = plt.plot(X,lines,color='r',alpha=0.01)

plt.savefig('US_states_regression_{}_{}.jpg'.format(x_param,y_param))

In [None]:
colors = ['orange','red']

temp = df.copy()

temp['conf_per_M'] -= temp['dead_per_M']

bottom = temp['dead_per_M']

temp.sort_values('conf_per_M',ascending=False).plot(
    kind='bar',x='state',y=['conf_per_M','dead_per_M'],figsize=(18,12),color=colors,
title='US States confirmed & dead per million',stacked=True,bottom=bottom)

plt.savefig('US_states_conf_per_M.jpg',format='jpg')