In [1]:
# import libraries
import numpy as np
import pandas as pd

In [2]:
# import raw data - Belgium temperature
climate_brussels = pd.read_csv('../dataset/brusselsdailytemperature.csv')
climate_antwerp = pd.read_csv('../dataset/antwerpdailytemperature.csv')

In [3]:
# define quarter
def get_quarter(row):
    if 0 < row['MO'] <= 3:
        return 'Q1'
    if 3 < row['MO'] <= 6:
        return 'Q2'
    if 6 < row['MO'] <= 9:
        return 'Q3'
    if 9 < row['MO'] <= 12:
        return 'Q4'

In [4]:
# concatenate dataframes from cities
df_concat = pd.concat([climate_brussels, climate_antwerp])
df_concat['country'] = 'belgium'
df_concat['quarters'] = df_concat.apply(get_quarter, axis=1)

In [5]:
# rename date columns
date_dict = {'YEAR': 'YEAR', 'MO': 'MONTH', 'DY': 'DAY'}
df_concat.rename(columns = date_dict, inplace = True)

In [6]:
# quarterly distribution for each region
class cleaner_climate():
    def __init__(self, df_concat, group_list):
        self.climate_city = df_concat
        self.group_list = group_list
        
    def skip_year(self, year = None): 
        if year != None:
            return self.climate_city[self.climate_city['YEAR'] != year]
        else:
            return self.climate_city 
        
    def quarter_stat(self):
        df_results = pd.DataFrame()
        df_groupby = self.skip_year().groupby(self.group_list)
        # Definition of threshold: 90th quantile of daily max temperature
        df_results['TEMPMAX_90th'] = df_groupby['T2M_MAX'].quantile(.90)
        return df_results

In [7]:
# clean data
Cleaner = cleaner_climate(df_concat, ['country', 'quarters'])
climate = Cleaner.quarter_stat()

In [8]:
# export the table of thresholds
climate.reset_index().to_csv('../dataset/reference_belgium_1996_to_2021.csv', index = False)