In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm
from pymc.Matplot import plot as pmplot

sns.set()

In [None]:
df = pd.read_csv('Folkhalsomyndigheten.csv',sep=';',encoding='ISO-8859-1')
df['Region'].replace('Jämtland Härjedalen','Jämtland',inplace=True)
df['Region'].replace('Sörmland','Södermanland',inplace=True)
df.set_index('Region',inplace=True)
df

In [None]:
pop_df = pd.read_csv('sverige_befolkning.csv',sep=';',thousands=' ',header=None,usecols=[0,1,2])
pop_df.set_index(0,inplace=True)
pop_df

In [None]:
def regioner(x):
    if len(x) == 2:
        return 'region'
    else:
        return 'kommun'

grouped = pop_df.groupby(regioner)

region_df = grouped.get_group('region')
region_df = region_df.set_index(1)
region_df.rename(columns={2:'befolkning'},inplace=True)
region_df.index.name='region'

region_data = pd.read_csv('sveriges_regioner.csv',sep=';',encoding='utf-8',index_col='region')
region_df = region_df.join(region_data)
region_df

In [None]:
df = df.join(region_df)
df.drop('Fall_per_100000_inv',axis=1,inplace=True)
df.columns=['confirmed','icu','dead','population','density']
df['conf_per_M'] = df['confirmed'] / (df['population'] / 1e6)
df['dead_per_M'] = df['dead'] / (df['population'] / 1e6)
df.sort_values('dead_per_M',inplace=True,ascending=False)
df

In [None]:
print (df.describe())
df.sum()

In [None]:
df_age = pd.read_csv('Folkhalsomyndigheten_age.csv',sep=';',encoding='ISO-8859-1')
df_age

In [None]:
df_age.plot(kind='bar',x='Åldersgrupp',y=['Totalt_antal_fall','Totalt_antal_avlidna'],figsize=(18,12))
plt.title('Corona Sverige åldersprofil')
plt.ylabel('Antal')
plt.savefig('Corona_sverige_age.jpg',format='jpg')

In [None]:
df['conf_of_total'] = 100 * df['confirmed'] / df['confirmed'].sum()
df['dead_of_total'] = 100 * df['dead'] / df['dead'].sum()

df.plot(kind='bar',y=['conf_of_total','dead_of_total'],figsize=(18,12))
plt.title('Corona Sweden - percentages confirmed & dead per Region')
plt.ylabel('percent')
plt.savefig('corona_sweden_regions_pct.jpg',format='jpg')

In [None]:
x_param = 'density'
y_param = 'dead_per_M'

def regression(x_std,y_std):
    
    alpha_mu = pm.Normal('alpha_mu',0,1 / 1 ** 2)
    alpha_sigma = pm.Uniform('alpha_sigma', 0.01, 0.5)
    
    beta_mu = pm.Normal('beta_mu', 0, 1 / 1 ** 2)
    beta_sigma = pm.Uniform('beta_sigma',0.01, 0.5 )
    
    alpha = pm.Normal('alpha',alpha_mu, 1 / alpha_sigma ** 2)
    beta = pm.Normal('beta',beta_mu, 1 / beta_sigma ** 2)
    
    obs_sigma = pm.Uniform('obs_sigma',0,0.5)
    
    @pm.deterministic()
    def linreq(x_std=x_std,alpha=alpha,beta=beta):
        return x_std * beta + alpha
    
    obs = pm.Normal('obs',linreq, 1 / obs_sigma ** 2,observed=True,value=y_std)
    
    model = pm.Model([alpha_mu,alpha_sigma,beta_mu,beta_sigma,alpha,beta,obs_sigma,linreq,obs])
    
    map_ = pm.MAP (model)
    map_.fit()
    
    mcmc = pm.MCMC(model)
    
    sample = mcmc.sample(100000,50000,2)
    
    alpha_post = mcmc.trace('alpha')[:]
    beta_post = mcmc.trace('beta')[:]
    obs_sigma_post = mcmc.trace('obs_sigma')[:]
    
    pmplot(alpha_post,'alpha')
    pmplot(beta_post,'beta')
    pmplot(obs_sigma_post,'obs_sigma')
    
    result = pd.DataFrame({'alpha_post' : alpha_post,
                          'beta_post' : beta_post,
                          'obs_sigma_post' : obs_sigma_post})
    
    return result
    

In [None]:

def plot(x,y,result,x_orig_mean,x_orig_std,y_orig_mean,y_orig_std,region_names):
    
    beta_mean = result.beta_post.mean()
    beta_sigma = result.beta_post.std()
    alpha_mean = result.alpha_post.mean()
    alpha_sigma = result.alpha_post.std()
    beta_89 = np.percentile(result.beta_post,[5.5,94.5])
    alpha_89 = np.percentile(result.alpha_post,[5.5,94.5])
    
    title_suffix = r'$\alpha$: {:.2f} alpha_89: [ {:.2f} , {:.2f} ] $\beta$: {:.2f} beta_89: [ {:.2f} , {:.2f} ]'.format(
    alpha_mean,alpha_89[0],alpha_89[1],beta_mean,beta_89[0],beta_89[1])
    
    plt.figure(figsize=(18,12))
    plt.title('Corona Sverige Regioner/Län: Linear Regression ' + title_suffix)
    
    
    
    plt.xlabel(x_param + r' [STD] $\mu$: {:.2f} $\sigma$: {:.2f}'.format(x_orig_mean,x_orig_std))
    plt.ylabel(y_param + r' [STD] $\mu$: {:.2f} $\sigma$: {:.2f}'.format(y_orig_mean,y_orig_std))
    
    X = np.linspace(min(x),max(x),1000)
    
    alpha_samples = np.random.choice(result.alpha_post,replace=True,size=len(X))
    beta_samples = np.random.choice(result.beta_post,replace=True,size=len(X))
    
    lines = [X[i] * beta_samples + alpha_samples for i in range(len(X))]
    
    plt.plot(X,lines,alpha=0.01,color='red')
    
    rows = np.random.choice(result.index,replace=True,size=len(X))
    
    samples = np.array([pm.rnormal(X[i] * result['beta_post'][rows] + result['alpha_post'][rows],
                                  1 / result['obs_sigma_post'][rows],size=len(X)) for i in range(len(X))])
    
    step = 100
    for i in range(0,len(X),step):
        plt.scatter(X,samples[:,i],edgecolors='orange',facecolors='none',alpha=0.5)
        
    high,low = np.percentile(samples,[5.5,94.5],axis=1)
    
    plt.fill_between(X,high,low,color='c',alpha=0.2)
    
    plt.scatter(x,y,color='lime',s=100)
    
    offset = np.array([-0.05,0.05])
    rotation = np.array([0,45,90])
    
    for i in range(len(x)):
        plt.text(x[i],y[i] + offset[i % 2],region_names[i],rotation=rotation[i % 3],rotation_mode='anchor')
        

In [None]:
def standardize(x):
    return (x - x.mean()) / x.std()

x_std = standardize(df[x_param])
y_std = standardize(df[y_param])

max_std = 5

mask_x = (x_std > -max_std) & (x_std < max_std)
mask_y = (y_std > -max_std) & (y_std < max_std)

mask = mask_x & mask_y

x_std = x_std[mask]
y_std = y_std[mask]
df = df[mask]

region_names = list(df.index.values)
print (region_names)


#result = regression(x_std,y_std)
plot(x_std,y_std,result,df[x_param].mean(),df[x_param].std(),
     df[y_param].mean(),df[y_param].std(),
    region_names)