In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import functools

from subset_helper import cleaner_climate

In [2]:
reference_dist = pd.read_csv('../dataset/reference_netherland_1996_to_2021.csv') # 1983 Jan 01 - 2008 Dec 31 as references

In [3]:
climate_rotterdam = pd.read_csv('../dataset/rotterdamdailytemperature.csv')
climate_amsterdam = pd.read_csv('../dataset/amsterdamdailytemperature.csv')

In [4]:
def get_quarter(row):
    if 0 < row['MO'] <= 3:
        return 'Q1'
    if 3 < row['MO'] <= 6:
        return 'Q2'
    if 6 < row['MO'] <= 9:
        return 'Q3'
    if 9 < row['MO'] <= 12:
        return 'Q4'

In [5]:
# Concatenate dataframes from cities
df_concat = pd.concat([climate_rotterdam, climate_amsterdam])
df_concat['country'] = 'netherland'
df_concat['quarters'] = df_concat.apply(get_quarter, axis=1)

In [6]:
# Rename date columns
date_dict = {'YEAR': 'YEAR', 'MO': 'MONTH', 'DY': 'DAY'}
df_concat.rename(columns = date_dict, inplace = True)

In [7]:
# Create features for multi-countries case
class cleaner_climate_countries(cleaner_climate):  

    def monthly_stat(self):
        df_results = pd.DataFrame()
        df_groupby = self.climate_city.groupby(self.key1)
        # Do some features transformation here, add extra columns if needed
        df_results['TEMP_MEAN'] = df_groupby['T2M_MAX'].mean() # Mean of daily temperature
        return df_results

In [8]:
# Create features
Cleaner = cleaner_climate(df_concat, reference_dist, key = ['country', 'YEAR', 'quarters'])
climate = Cleaner.featurize()
climate.to_csv('../dataset/netherland_heat_days.csv', index = False)