In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

sns.set()

In [None]:
##### PARAM #####
last_complete_month = '2020-05-31'

In [None]:
def date_parser(m):
    eng_months = {'januari' : 'January',
                 'februari' : 'February',
                 'mars' : 'March',
                 'april' : 'April',
                 'maj' : 'May',
                 'juni' : 'June',
                 'juli' : 'July',
                 'augusti' : 'August',
                 'september' : 'September',
                 'oktober' : 'October',
                 'november' : 'November',
                 'december' : 'December'}
    
    eng = eng_months[m]
    
    return pd.datetime.strptime(eng,'%B').date().strftime('%B')
    
    
death_df = pd.read_csv('scb_dead_per_month.csv',encoding='ISO-8859-1')

In [None]:
death_df['månad'] = death_df['månad'].apply(date_parser)
death_df['månad'] = pd.Categorical(death_df['månad'],['January','February','March','April','May','June',
                                         'July','August','September','October','November','December'])

In [None]:
death_df

In [None]:
death_df = death_df.groupby('månad').sum()

death_df

In [None]:
scb_pop = pd.read_csv('scb_population.csv',encoding='ISO-8859-1')

scb_pop

In [None]:
pd.set_option("display.precision", 2)
deaths_per_m = death_df / (scb_pop.loc[0,:] / 1e6)
deaths_per_m = deaths_per_m.T
deaths_per_m

In [None]:
monthly_deaths = []

for r,v in deaths_per_m.iterrows():
    monthly_deaths.extend(v)
    


In [None]:
dates = pd.date_range('1990-01-01','2019-12-31',freq='M')
dates

In [None]:
monthly_timeline = pd.DataFrame(data=monthly_deaths,index=dates,columns=['dead_per_million'])
scb_1990_2019 = monthly_timeline.copy()

In [None]:
scb_pop.mean()['2015':'2019'].mean()

In [None]:
curr_pop = 10327589 #from scb.se for 2019-12-31

scb_2020 = pd.read_csv('scb-dead_per_day.csv',sep=';')
scb_2020 = scb_2020.groupby('Månad').sum()
scb_2020['min_2015_2019'] = scb_2020[['2015','2016','2017','2018','2019']].min(axis=1)
scb_2020['max_2015_2019'] = scb_2020[['2015','2016','2017','2018','2019']].max(axis=1)

scb_2020 = scb_2020[['2020','2015-2019','min_2015_2019','max_2015_2019']]
scb_2020[['2015-2019','min_2015_2019','max_2015_2019']] = scb_2020[['2015-2019','min_2015_2019','max_2015_2019']] / (scb_pop.mean()['2015':'2019'].mean()/1e6)

scb_2020 = pd.DataFrame(scb_2020)
scb_2020.reset_index(inplace=True)
scb_2020['Månad'] = pd.Categorical(scb_2020['Månad'],['januari','februari','mars','april','maj','juni',                                                    'juli','augusti','september','oktober','november','december'])

scb_2020.sort_values('Månad',inplace=True)
scb_2020.index = pd.date_range('2020-01-01','2020-12-31',freq='M')
#scb_2020 = scb_2020.loc[:last_complete_month]
scb_2020['2020'] = scb_2020['2020'] / (curr_pop/1e6)
scb_2020.rename(columns={'2020' : 'dead_per_million'},inplace=True)
scb_2020

In [None]:
monthly_timeline

In [None]:
monthly_timeline.plot(figsize=(18,12))
scb_2020['dead_per_million'].plot(color='r')

In [None]:
monthly_timeline = pd.concat([monthly_timeline['dead_per_million'],scb_2020['dead_per_million']],axis=0)
monthly_timeline

In [None]:
two_thousands_rank = monthly_timeline.rank(ascending=False).sort_values()
two_thousands_rank = two_thousands_rank.loc['2000-01-01':]
two_thousands_rank.plot(figsize=(18,12),style=['o--'])
plt.ylabel('rank')

In [None]:
monthly_timeline = pd.DataFrame(monthly_timeline.sort_values(ascending=False))
monthly_timeline['rank'] = range(1,len(monthly_timeline) + 1)
monthly_timeline.head(50)

In [None]:
mean = monthly_timeline.mean()
mean

In [None]:
monthly_timeline.loc[last_complete_month]

In [None]:
top_20 = pd.DataFrame(monthly_timeline[:20])
top_20

In [None]:
colors = ['b'] * 18 # must set colors after seeing which place they fall onto
colors.append('r')
colors.append('b')
print (colors)
top_20.index = top_20.index.date
top_20['dead_per_million'].plot(kind='bar',
                                figsize=(18,12),
                                color=colors,
                                title='SWEDEN - Top-20 deadliest months January 1990 - May 2020\nDataSource: SCB.se')
plt.ylabel('deaths per million')
plt.axhline(mean['dead_per_million'],color='orange',ls='dashed',label='average 1990-2019')
plt.legend(loc='upper right')
plt.savefig('scb_population_deaths_1990-2020.jpg',format='jpg')

In [None]:

scb_1990_2019

In [None]:
scb_1990_2020 = pd.DataFrame(pd.concat([scb_1990_2019['dead_per_million'],scb_2020['dead_per_million']],axis=0))
scb_1990_2020

In [None]:
scb_1990_2020['year'] = scb_1990_2020.index.year
scb_1990_2020['month'] = scb_1990_2020.index.month

In [None]:
scb_1990_2020

In [None]:
month_grp = scb_1990_2020[:'2019-12-31'].groupby('month')['dead_per_million'].agg(['min','mean','max'])
month_grp

In [None]:
scb_2020['month'] = scb_2020.index.month 

plt.figure(figsize=(18,12))
plt.title('Sweden Monthly Deaths per million, adjusted for population - All Causes\nDataSource:SCB.se')
plt.plot(month_grp['mean'],'o--',label='Monthly Average 1990-2019')
plt.plot(scb_2020['month'],scb_2020['2015-2019'],'o--',color='orange',label='Monthly Average 2015-2019')
plt.fill_between(scb_2020['month'],scb_2020['min_2015_2019'],scb_2020['max_2015_2019'],color='m',alpha=0.3,label='2015-2019 range')

plt.fill_between(month_grp.index,month_grp['min'],month_grp['max'],color='c',alpha=0.3,label='1990-2019 range')

plt.plot(scb_2020['month'][:last_complete_month],scb_2020['dead_per_million'][:last_complete_month],'o--',color='crimson',label='2020')
plt.legend(loc='upper left')
plt.xlabel('month')
plt.ylabel('dead per million')
xticks = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
_=plt.xticks(month_grp.index,xticks)
plt.savefig('scb_monthly_deaths_all.jpg',format='jpg')

In [None]:
scb_2020_monthly = scb_2020.set_index('month')
print (scb_2020_monthly)

mean_1990_2019 = month_grp.loc[:pd.Timestamp(last_complete_month).month]
print (mean_1990_2019)
monthly_excess = scb_2020_monthly['dead_per_million'] - mean_1990_2019['mean']
print (monthly_excess)
print (monthly_excess.cumsum())

In [None]:
pd.Timestamp(last_complete_month).month

In [None]:
mask = scb_1990_2020.index.month.isin(range(1,pd.Timestamp(last_complete_month).month + 1))
first_months = scb_1990_2020[mask]['dead_per_million']
first_cum = first_months.groupby(first_months.index.year).sum()
first_cum

In [None]:
first_cum.plot(kind='bar',figsize=(18,12),color='orange',
               title='SWEDEN cumulative deaths per million Jan 1:st - {} last,1990-2020\nDataSource:SCB.se'.format(
                   pd.Timestamp(last_complete_month).month_name()))
plt.ylabel('deaths per million')
plt.savefig('scb_deaths_jan_may_unsorted.jpg'.format('jpg'))

In [None]:
colors = ['orange'] * 22
colors.append('crimson')
color_tail = ['orange'] * 8
colors.extend(color_tail)

print (colors)
first_cum.sort_values(ascending=False).plot(kind='bar',figsize=(18,12),
                                            title='SWEDEN cumulative deaths per million Jan 1:st - {} last,1990-2020\nDataSource:SCB.se'.format(
                   pd.Timestamp(last_complete_month).month_name()),
                                            color=colors)
plt.ylabel('deaths per million')
plt.savefig('scb_deaths_jan_may_sorted.jpg',format='jpg')

In [None]:
by_month = first_months.groupby(first_months.index.month)
month_stats =[]

for month in range (1,pd.Timestamp(last_complete_month).month + 1):
    m = by_month.get_group(month)
    month_stats.append(m.agg(['min','mean','max']).values)
    
month_stats = pd.DataFrame(data=np.array(month_stats),
                           index=range(1,pd.Timestamp(last_complete_month).month + 1),
                           columns=['mini','mean','maxi'])

month_stats.index.name='month'
month_stats

In [None]:
from matplotlib import cm

first_cumsum = first_months.groupby(first_months.index.year).cumsum()
bar = first_cumsum.groupby(first_cumsum.index.year)

plt.figure(figsize=(18,12))
plt.title('SWEDEN cumulative monthly deaths per million, 1990-2020, Jan 1st to {} last'.format(
    pd.Timestamp(last_complete_month).month_name()))

for k in bar.groups.keys():
    plt.plot(bar.get_group(k),'ro--')
    
plt.ylabel('cumulative deaths per million')

In [None]:
first_months

In [None]:
first_sum = first_months.groupby(first_months.index.year)

month_names = {1:'Jan',2:'Feb',3:'Mar',4:'Apr',5:'May',6:'Jun',
              7:'Jul',8:'Aug',9:'Sep',10:'Oct',11:'Nov',12:'Dec'}

years = dict()

for k in first_sum.groups.keys():
    years[k] = first_sum.get_group(k).values
    
years = pd.DataFrame(years).T
num_months = range(1,pd.Timestamp(last_complete_month).month + 1)
years.columns= num_months

years.rename(columns=month_names,inplace=True)

years.columns.name = 'month'
years.index.name = 'year'
years



In [None]:
years.plot(kind='bar',stacked=True,figsize=(18,12),
          title='Sweden1990-2020 : Deaths per million : All Causes : January to {}\nDataSource:SCB.se'.format(
              pd.Timestamp(last_complete_month).month_name()))

plt.ylabel('Nr dead per million')
plt.savefig('scb_deaths_per_M_per_month_1990_2020.jpg',format='jpg')