In [19]:
# import libraries
import numpy as np
import pandas as pd
import seaborn as sns
import functools
from util import cleaner_climate

In [20]:
# import raw data - Belgium mortality 
mortality = pd.read_csv('../dataset/cause_of_death_quarterly.csv') 
reference_dist = pd.read_csv('../dataset/reference_belgium_1996_to_2021.csv') 

In [21]:
# import raw data - Belgium temperature 
climate_brussels = pd.read_csv('../dataset/brusselsdailytemperature.csv')
climate_antwerp = pd.read_csv('../dataset/antwerpdailytemperature.csv')

In [22]:
# define quarter 
def get_quarter(row):
    if 0 < row['MO'] <= 3:
        return 'Q1'
    if 3 < row['MO'] <= 6:
        return 'Q2'
    if 6 < row['MO'] <= 9:
        return 'Q3'
    if 9 < row['MO'] <= 12:
        return 'Q4'

In [23]:
# concatenate dataframes from cities
df_concat = pd.concat([climate_brussels, climate_antwerp])
df_concat['country'] = 'belgium'
df_concat['quarters'] = df_concat.apply(get_quarter, axis=1)

In [24]:
# rename date columns
date_dict = {'YEAR': 'YEAR', 'MO': 'MONTH', 'DY': 'DAY'}
df_concat.rename(columns = date_dict, inplace = True)

In [25]:
# create features for multi-countries case
class cleaner_climate_countries(cleaner_climate):  

    def monthly_stat(self):
        df_results = pd.DataFrame()
        df_groupby = self.climate_city.groupby(self.key1)
        df_results['TEMP_MEAN'] = df_groupby['T2M_MAX'].mean() # Mean of daily temperature
        return df_results

In [26]:
# create features
Cleaner = cleaner_climate(df_concat, reference_dist, key = ['country', 'YEAR', 'quarters'])
climate = Cleaner.featurize()

In [27]:
print(mortality)
print(climate)

     REGION  YEAR quarters                                           COD  \
0      2000  2009       Q1            Diseases of the circulatory system   
1      2000  2009       Q1                Diseases of the nervous system   
2      2000  2009       Q1            Diseases of the respiratory system   
3      2000  2009       Q1  Diseases of the skin and subcutaneous tissue   
4      2000  2009       Q1              Mental and behavioural disorders   
..      ...   ...      ...                                           ...   
643    4000  2017       Q4                Diseases of the nervous system   
644    4000  2017       Q4            Diseases of the respiratory system   
645    4000  2017       Q4  Diseases of the skin and subcutaneous tissue   
646    4000  2017       Q4              Mental and behavioural disorders   
647    4000  2017       Q4                                     Neoplasms   

     deathcount  country  
0          5498  belgium  
1           622  belgium  
2     

In [28]:
# merge with the mortality dataset
mortality_merged = mortality.merge(climate, how = 'left', on = ['country', 'YEAR', 'quarters'])

In [29]:
print(mortality_merged)

     REGION  YEAR quarters                                           COD  \
0      2000  2009       Q1            Diseases of the circulatory system   
1      2000  2009       Q1                Diseases of the nervous system   
2      2000  2009       Q1            Diseases of the respiratory system   
3      2000  2009       Q1  Diseases of the skin and subcutaneous tissue   
4      2000  2009       Q1              Mental and behavioural disorders   
..      ...   ...      ...                                           ...   
643    4000  2017       Q4                Diseases of the nervous system   
644    4000  2017       Q4            Diseases of the respiratory system   
645    4000  2017       Q4  Diseases of the skin and subcutaneous tissue   
646    4000  2017       Q4              Mental and behavioural disorders   
647    4000  2017       Q4                                     Neoplasms   

     deathcount  country  TEMP_MEAN  TEMP_RNG  HEAT_DAYS  
0          5498  belgium   5

In [30]:
# Export the mortality dataset
mortality_merged.to_csv('../dataset/mortality_merged_belgium.csv', index = False)