In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import requests

sns.set()

In [None]:
pop_url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101A/BefolkningR1860'

pop_json_query = {
  "query": [
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:Ålder1årA",
        "values": [
          "0",
          "1",
          "2",
          "3",
          "4",
          "5",
          "6",
          "7",
          "8",
          "9",
          "10",
          "11",
          "12",
          "13",
          "14",
          "15",
          "16",
          "17",
          "18",
          "19",
          "20",
          "21",
          "22",
          "23",
          "24",
          "25",
          "26",
          "27",
          "28",
          "29",
          "30",
          "31",
          "32",
          "33",
          "34",
          "35",
          "36",
          "37",
          "38",
          "39",
          "40",
          "41",
          "42",
          "43",
          "44",
          "45",
          "46",
          "47",
          "48",
          "49",
          "50",
          "51",
          "52",
          "53",
          "54",
          "55",
          "56",
          "57",
          "58",
          "59",
          "60",
          "61",
          "62",
          "63",
          "64",
          "65",
          "66",
          "67",
          "68",
          "69",
          "70",
          "71",
          "72",
          "73",
          "74",
          "75",
          "76",
          "77",
          "78",
          "79",
          "80",
          "81",
          "82",
          "83",
          "84",
          "85",
          "86",
          "87",
          "88",
          "89",
          "90",
          "91",
          "92",
          "93",
          "94",
          "95",
          "96",
          "97",
          "98",
          "99",
          "100+"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r = requests.post(pop_url,json=pop_json_query)
r.status_code

In [None]:
pop_json = r.json()
pop_data = pop_json['data']
pop_data[0]

In [None]:
tmp = pd.DataFrame.from_dict(pop_data)
tmp.head()

In [None]:
pop_df = pd.DataFrame(tmp['key'].tolist(),columns=['age','gender','year'])

# cant use pop as col name - it's a method! 

pop_df['population'] = tmp['values'].apply(lambda x : x[0]).astype(int)
pop_df['age'] = pop_df['age'].apply(lambda x : x.replace('+','')).astype(int)
pop_df = pop_df.groupby(['year','age']).sum()
pop_df['age_num'] = pop_df.index.get_level_values(1)
pop_df['age_num'] = pop_df['age_num'] + 0.5 # median of each bin

pop_df

In [None]:
# set median of top age grp to 105
pop_df.loc[pop_df.index.get_level_values(1) == 100,'age_num'] = 105
pop_df


In [None]:
pop_df.info()

In [None]:
years = pop_df.index.get_level_values(0).unique()
stats_df = pd.DataFrame()

for y in years:
    flat = np.repeat (pop_df.loc[y,'age_num'],pop_df.loc[y,'population'])
    s = flat.agg(['mean','median','count'])
    s.name=y
    stats_df = pd.concat([stats_df,s],axis=1)

stats_df.T

In [None]:
limit = -5

stats_df.T[limit:]

In [None]:


title = 'SWEDEN population mean and median age, 1860 - 2019\nDataSource : scb.se'
ax = stats_df.T.loc['1958':].plot(y=['mean','median'],figsize=(18,12),style='o--',title=title)
ax.set_ylabel('age')
ax.set_xlabel('year')

plt.xticks(rotation=90)

_= ax.set_yticks(np.arange(24,45,1))

In [None]:
pop_df['coarse_bins'] = pd.cut(pop_df.index.get_level_values(1),[-1,70,80,90,101],
                              labels=['-70','70-79','80-89','90+'])
pop_df

In [None]:
coarse_pop = pop_df.groupby(['year','coarse_bins'])['population'].sum()
coarse_pop

In [None]:
base_year = '2000'
coarse_pop_growth_vs_base = coarse_pop.unstack() / coarse_pop.unstack().loc[base_year]
coarse_pop_growth_vs_base

In [None]:
title = 'SWEDEN Population Growth per Age Group vs base year {} \nDataSource : scb.se'.format(base_year)
coarse_pop_growth_vs_base.loc[base_year:].plot(figsize=(18,12),style='o--',title=title)
#_= plt.yticks(range(1,13))
plt.ylabel('growth factor vs base year of {}'.format(base_year))
plt.savefig('swe_pop_age_grp_growth_vs_base_{}.jpg'.format(base_year),format='jpg')

In [None]:

coarse_y2y_growth = coarse_pop.unstack() / coarse_pop.unstack().shift()
coarse_y2y_growth

In [None]:
start_year = '2015'
coarse_y2y_growth.loc[start_year:].plot(figsize=(18,12),style='o--')

In [None]:
scb_bef_utv = pd.ExcelFile('https://www.scb.se/contentassets/fbc2871e93e64b9aa4d20d1fe8b7de2b/be0101_tab9utv1749-2019.xlsx')


In [None]:
scb_bef_utv.sheet_names

In [None]:
last_year = 2019
scb_bef_utv_df = scb_bef_utv.parse('Befutv',skiprows=2)
last_year_idx = scb_bef_utv_df.loc[scb_bef_utv_df['År'] == last_year].index
last_year_idx = last_year_idx.values[0]

scb_bef_utv_df = scb_bef_utv_df.loc[:last_year_idx]
scb_bef_utv_df.set_index('År',inplace=True)
scb_bef_utv_df.replace('..',np.nan,inplace=True)
scb_bef_utv_df = scb_bef_utv_df.astype(float)
scb_bef_utv_df

In [None]:
scb_bef_changes = pd.DataFrame(scb_bef_utv_df['Födda'] / scb_bef_utv_df['Folkmängd'],columns=['born_%'])
scb_bef_changes['dead_%'] = scb_bef_utv_df['Döda'] / scb_bef_utv_df['Folkmängd']
scb_bef_changes['migration_in_%'] = scb_bef_utv_df['Invandringar'] / scb_bef_utv_df['Folkmängd']
scb_bef_changes['migration_out_%'] = scb_bef_utv_df['Utvandringar'] / scb_bef_utv_df['Folkmängd']

scb_bef_changes

In [None]:
start_year = 2000 #first avail year : 1749

title = 'SWEDEN demographics evolution {} - 2019\nDataSource : scb.se'.format(start_year)

ax = scb_bef_changes.loc[start_year:].plot(figsize=(18,12),y=['born_%','dead_%','migration_in_%',
                                                             'migration_out_%'],
                                           style='o--',title=title)

ax2 = plt.twinx(ax)

scb_bef_utv_df.loc[start_year:].plot(y='Folkmängd',ax=ax2,color='lightgrey')

ax.set_ylabel('proportion of population')
ax2.set_ylabel('population size')

ax.legend(loc='upper center')
ax2.legend(['population'],loc='upper right')

if start_year == 1749:
    ax.text(1773+2,scb_bef_changes.loc[1773,'dead_%'],'1773 Crops Failed => Dysentery',color='crimson')
    ax.text(1809+2,scb_bef_changes.loc[1809,'dead_%'],'1808/09 Finnish War => "Lantvärnssjukan" (Dysentery/Tyfus)',
           color='crimson')

    ax.text(1918,scb_bef_changes.loc[1918,'dead_%'] + 0.0001,'1918 Spanish Flu',color='crimson',
           rotation=45)
    
plt.savefig('swe_demographics_evolution_{}_2019.jpg'.format(start_year),format='jpg')

In [None]:
scb_bef_changes.sort_values('dead_%',ascending=False)