In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc3 as pm
import requests

sns.set()

In [None]:
pop_url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101G/BefUtvKon1749'
pop_json_query = {
  "query": [
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1+2"
        ]
      }
    },
    {
      "code": "ContentsCode",
      "selection": {
        "filter": "item",
        "values": [
          "000000LV",
          "0000001F"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r = requests.post(pop_url,json=pop_json_query)
r.status_code

In [None]:
json_data = r.json()['data']
json_data[0]

In [None]:
pop_df = pd.DataFrame.from_dict(json_data)
pop_df[['gender','year']] = pop_df['key'].to_list()

def unpack_values(x):
    
    return int(x[0]),int(x[1])

pop_df[['pop','dead']] = pd.DataFrame.from_records(pop_df['values'].apply(unpack_values),columns=['pop','dead'])
pop_df.drop(['key','values','gender'],axis=1,inplace=True)
pop_df.set_index('year',inplace=True)
pop_df

In [None]:
scb_file = pd.ExcelFile('https://www.scb.se/hitta-statistik/statistik-efter-amne/befolkning/befolkningens-sammansattning/befolkningsstatistik/pong/tabell-och-diagram/preliminar-statistik-over-doda/')
scb_prel = scb_file.parse(sheet_name='Tabell 1',skiprows=6,usecols=range(10))
scb_prel

In [None]:
abs_dead_2020 = pd.Series(scb_prel['2020'].sum(),name='dead')
abs_dead_2020

In [None]:
pop_2020_url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101A/FolkmangdNov'
pop_2020_json_query = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "vs:RegionRiket99",
        "values": [
          "00"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:ÅlderTotA",
        "values": [
          "tot"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "item",
        "values": [
          "2020"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r = requests.post(pop_2020_url,json=pop_2020_json_query)
r.status_code

In [None]:
pop_2020_json = r.json()['data']
pop_2020_json[0]

In [None]:
pop_2020_df = pd.DataFrame.from_dict(pop_2020_json)
pop_2020_df[['area','tot','gender','year']] = pop_2020_df['key'].to_list()
pop_2020_df['pop'] = pop_2020_df['values'].apply(lambda x : x[0]).astype(int)
pop_2020_df.drop(['key','values','area','tot'],axis=1,inplace=True)
pop_2020_df

In [None]:
pop_2020 = pd.Series(pop_2020_df['pop'].sum(),name='pop')
pop_2020

In [None]:
pop_df.at['2020','pop'] = pop_2020
pop_df.at['2020','dead'] = abs_dead_2020
pop_df

In [None]:
pop_df['mortality'] = pop_df['dead'] / pop_df['pop']
pop_df

In [None]:
pop_df.plot(y='mortality',figsize=(18,12))

In [None]:
pair_idx = [pop_df.index[i] + '/' + pop_df.index[i+1] for i in range(len(pop_df.index) - 1)]

In [None]:
#### pairwise years ####

pairwise_pop = np.zeros(len (pop_df) - 1)
pairwise_dead = np.zeros(len (pop_df) - 1)

for i,y in enumerate(pop_df.index[:-1]):
    pairwise_pop[i] = pop_df.iloc[i,0] + pop_df.iloc[i+1,0]
    pairwise_dead[i] = pop_df.iloc[i,1] + pop_df.iloc[i+1,1]
    

pairwise_df = pd.DataFrame({'pop' : pairwise_pop,
                           'dead' : pairwise_dead})

pairwise_df.index = pair_idx
pairwise_df['mortality'] = pairwise_df['dead'] / pairwise_df['pop']
pairwise_df


In [None]:

title = 'SWEDEN mortality : consecutive pairwise years 1749/1750 - 2019/2020\nDataSource : scb.se'
pairwise_df.plot(y='mortality',figsize=(18,12),title=title)

ax = plt.gca()

_= plt.xticks(range(len(pair_idx)),pair_idx,rotation=90)

plt.ylabel('mortality')
plt.xlabel('pairwise consecutive years')

for i,label in enumerate(ax.xaxis.get_ticklabels()):
    if i % 10 == 0:
        label.set_visible(True)
    else:
        label.set_visible(False)
    

    
plt.savefig('mortality_pairwise_years_1749-2020.jpg',format='jpg')


In [None]:
title = 'SWEDEN mortality : consecutive pairwise years 2015/2016 - 2019/2020\nDataSource : scb.se'
pairwise_df.loc['2015/2016':].plot(y='mortality',kind='bar',figsize=(18,12),title=title)
plt.ylabel('mmortality')
plt.xlabel('pair of years')
_= plt.yticks(np.arange(0,0.01,0.0005))
plt.savefig('pairwise_years_mortality_2015-2020.jpg',format='jpg')

In [None]:
pairwise_df.plot(y='dead',figsize=(18,12))

In [None]:
top_dead_2k = (pairwise_df.loc['2000/2001':]).sort_values('dead',ascending=False)
top_dead_2k

In [None]:
colormap  = {y:'b' for y in top_dead_2k.index}
colormap['2019/2020'] = 'orange'

title = 'SWEDEN ranking abs deaths pairwise consecutive years 2000/2001 - 2019/2020\nDataSource : scb.se'

top_dead_2k.plot(y='dead',figsize=(18,12),kind='bar',color=colormap.values())
plt.ylabel('total dead in 2 years')
plt.xlabel('pairwise consecutive years')

plt.savefig('pairwise_years_2K_abs_dead_ranking.jpg',format='jpg')

In [None]:
top_mort_2k = top_dead_2k.sort_values('mortality',ascending=False)
top_mort_2k

In [None]:
colormap  = {y:'b' for y in top_mort_2k.index}
colormap['2019/2020'] = 'orange'

title = 'SWEDEN ranking mortality pairwise consecutive years 2000/2001 - 2019/2020\nDataSource : scb.se'

top_mort_2k.plot(y='mortality',figsize=(18,12),kind='bar',color=colormap.values(),title=title)
plt.ylabel('mortality')
plt.xlabel('pairwise consecutive years')

plt.savefig('pairwise_years_2K_mortality_ranking.jpg',format='jpg')

In [None]:
#### multi-year averages #####

def compute_set_sums(set,compute_over=2):

    nr_items = len(set)
    
    subset_pop_sum = np.zeros((nr_items-(compute_over-1),set.shape[-1]))
    
    for i in range(len(subset_pop_sum)):
        for j in range(compute_over):
            subset_pop_sum[i] += set[i+j]

    return subset_pop_sum


def create_set_labels(set,subset_length=avg_over):
    
    years = set.index.values
    
    index_labels = []
    
    for i in range(len(years) - (subset_length - 1)):
        label = ''
        
        for l in range(subset_length ):
            label += (years[i+l][-2:])
            if l < subset_length - 1:
                label = label + '/'
        
        index_labels.append(label)
        
    return index_labels

#create_set_labels(pop_df.loc['2015':],4)

In [None]:
pop_df

In [None]:
start_year = '1990'
set_data = pop_df.loc[start_year:,['pop','dead']]
set_data

In [None]:
compute_over = 4
set_sums = pd.DataFrame(compute_set_sums(set_data.values,compute_over),columns=['pop','dead'],
                       index = create_set_labels(set_data,compute_over))

set_sums['mortality'] = set_sums['dead'] / set_sums['pop']
set_sums

In [None]:
fig,axes = plt.subplots(3,2,figsize=(18,12),sharey=True)

col = 0
rows = [0,0,1,1,2,2]

for i, subset in enumerate(range(1,7)):
    
    labels = create_set_labels(set_data,subset)
    
    set_sums = pd.DataFrame(compute_set_sums(set_data.values,subset),columns=['pop','dead'],
                       index = labels)

    set_sums['mortality'] = set_sums['dead'] / set_sums['pop']
        
    col =  i % 2
    row = rows[i]
    
    axes[row,col].plot(set_sums['mortality'],'o--')
    
    axes[row,col].set_xticklabels(labels,rotation=90)
    
    axes[row,col].set_title('SWE mortality averaged over {} year(s)'.format(i+1))
    
    axes[row,col].set_ylabel('mortality')
    axes[row,col].set_xlabel('years')
    
    
plt.suptitle('SWEDEN mortality 1990-2020, averaged over 1..6 years\n\nDataSource : scb.se')
plt.tight_layout()
plt.savefig('mortality_averaged_over_different_years.jpg',format='jpg')

In [None]:
%load_ext watermark
%watermark -n -u -v -iv -w