In [77]:
# SPDX-FileCopyrightText: 2022 Aleksander Grochowicz
#
# SPDX-License-Identifier: GPL-3.0-or-later

from matplotlib.ticker import AutoMinorLocator
import pandas as pd
from datetime import date
import holidays
from utilities import compute_cdd_hdd_artificial, create_daily_data, create_hourly_data

# Load temperature data

In [327]:
# .txt files from ERA5 notebook
temperatures = pd.read_csv('original_data/temp_moy_South Korea_2021_2021.txt', header = None)
temperatures.columns = ['KR']
year = 2021
index =  pd.date_range(start='2021-01-01', end='2021-12-31 23:00:00', freq='H')
temperatures.index =index

  index =  pd.date_range(start='2021-01-01', end='2021-12-31 23:00:00', freq='H')


In [328]:
temperatures

Unnamed: 0,KR
2021-01-01 00:00:00,-7.208960
2021-01-01 01:00:00,-6.802008
2021-01-01 02:00:00,-2.131903
2021-01-01 03:00:00,-1.620947
2021-01-01 04:00:00,-1.367712
...,...
2021-12-31 19:00:00,-8.567328
2021-12-31 20:00:00,-9.063788
2021-12-31 21:00:00,-9.432013
2021-12-31 22:00:00,-9.613074


In [329]:
# temperatures = pd.read_csv('original_data/europe_temperatures_1980-2020.csv', index_col = [0], infer_datetime_format=True)
# temperatures.drop('IS', axis = 1) # Remove Iceland
# temperatures.index = pd.to_datetime(temperatures.index)

# Load regression parameters


In [330]:
reg_parameters_days = pd.read_csv('processing/reg_parameters_days_of_week_2010-2014.csv', index_col = [0])
reg_parameters_hours = pd.read_csv('processing/reg_parameters_hours_of_week_2010-2014.csv', index_col = [0])
reg_parameters_temp = pd.read_csv('processing/reg_parameters_temp_2010-2014.csv', index_col = [0])
reg_parameters_trend = pd.read_csv('processing/reg_parameters_trend_2010-2014.csv', index_col = [0])

In [331]:
reg_parameters_days.columns

Index(['AT', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'UK',
       'EL', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NL', 'NO',
       'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK'],
      dtype='object')

In [332]:
# Dans un premier temps on s'intéresse aux coeffcients de régression pour la France
reg_days_FR = reg_parameters_days['FR']
reg_hours_FR = reg_parameters_hours['FR']
reg_temp_FR = reg_parameters_temp['FR']
reg_trend_FR = reg_parameters_trend['FR']

reg_trend_FR=pd.DataFrame(reg_trend_FR)
reg_days_FR=pd.DataFrame(reg_days_FR)
reg_hours_FR=pd.DataFrame(reg_hours_FR)
reg_temp_FR=pd.DataFrame(reg_temp_FR)

reg_trend_FR.columns = ['KR']
reg_days_FR.columns = ['KR']
reg_hours_FR.columns = ['KR']
reg_temp_FR.columns = ['KR']


Since our validation year is 2015, compared to the training data from 2010 to 2014, all weather years that we use will be processed on the premise that they happened in 2015.

In [333]:
years = list(temperatures.index.to_series().dt.year.unique())
countries = list(temperatures.columns)

Add holidays as previously with the `holidays` package and by adding Christmas week and for some countries Orthodox Christmas. 

In [334]:
list_holidays = {}
yrs = years.copy()
for c in countries:
    if c == 'EL':
        list_holidays[c] = holidays.CountryHoliday('GR', years = yrs)
    else:
        try:
            list_holidays[c] = holidays.CountryHoliday(c, years = yrs)
        except KeyError:
            list_holidays[c] = holidays.HolidayBase() #if no holidays are available, e.g. ME, MK        

# julian = ['ME', 'MK', 'RS']
# gregorian = countries.copy()
# for i in countries:
#     if i in julian:
#         gregorian.remove(i)

# Add last week of the year as holidays.
# for y in list(yrs):
#     for i in gregorian:
#         list_holidays[i].append(date(y,1,2))
#         list_holidays[i].append(list(pd.date_range(start = date(y, 12, 24), end = date(y, 12, 31), freq = '1D')))
#     for i in julian:
#         list_holidays[i].append(list(pd.date_range(start = date(y, 1, 6), end = date(y, 1, 8), freq = '1D')))
        

In [335]:
# #Add German holidays before reunification (so include reunification as everything is assumed to be in 2015)
# overlapping_holidays = ['Neujahrestag', 'Karfreitag', 'Ostermontag', 'Auffahrt', 'Pfingstmontag', 'Weihnachten']
# for y in range(1980, 1991):
#     for date, name in sorted(holidays.CH(years = y).items()):
#         if name in overlapping_holidays:
#             list_holidays['DE'].append({date: name})
#     list_holidays['DE'].append(str(y)+'-05-01')
#     list_holidays['DE'].append(str(y)+'-10-03')
#     list_holidays['DE'].append(str(y)+'-12-26')

In [336]:
# #Add Slovenian holidays before 1992
# AT_SI = ['Neujahr', 'Allerheiligen', 'Stefanitag'] #New Year's, All Saints Day, Independence Day on Boxing Day
# for y in range(1980, 1992):
#     for date, name in sorted(holidays.AT(years = y).items()):
#         if name in AT_SI:
#             list_holidays['SI'].append({date: name})
#     list_holidays['SI'].append(str(y)+'-02-08') #Preseren Day
#     list_holidays['SI'].append(str(y)+'-04-27') #Day of uprising against occupation
#     list_holidays['SI'].append(str(y)+'-05-01') #May 1
#     list_holidays['SI'].append(str(y)+'-05-02') #May 1 over two days
#     list_holidays['SI'].append(str(y)+'-06-25') #Statehood Day

In [337]:
# #Add Bulgarian holidays before 1990
# RO_BG = ['Paștele', 'Anul Nou', 'Ziua Muncii', 'Crăciunul'] #Orthodox Easter, New Year, First of May, Christmas
# for y in range(1980, 1990):
#     for date, name in sorted(holidays.RO(years = y).items()):
#         if name in RO_BG:
#             list_holidays['BG'].append({date: name})
#     list_holidays['BG'].append(str(y)+'-03-03') #Liberation Day
#     list_holidays['BG'].append(str(y)+'-05-06') #Saint George's Day
#     list_holidays['BG'].append(str(y)+'-05-24') #Bulgarian Education and Culture and Slavonic Literature Day
#     list_holidays['BG'].append(str(y)+'-09-06') #Unification Day
#     list_holidays['BG'].append(str(y)+'-09-22') #independence Day

Compute HDD and CDD for all years (with the threshold at 15.5 degrees Celsius).

In [338]:
temperatures_daily = temperatures.resample('1D').mean()
temperatures_daily['weekday'] = temperatures_daily.index.to_series().dt.dayofweek
temperatures_daily['holiday'] = False
temp_daily = {}
for i in countries:
    temp_daily[i] = pd.DataFrame(temperatures_daily[[i, 'weekday', 'holiday']])
    temp_daily[i].columns = ['temp', 'weekday', 'holiday']
temp_with_holidays = temp_daily.copy()
for i in temp_with_holidays.keys():
    for j in temp_with_holidays[i].index:
        if j in list_holidays[i]:
            temp_with_holidays[i].at[j, 'holiday'] = True

In [339]:
daily_hc = compute_cdd_hdd_artificial(temp_daily, countries, threshold_hdd = 15.5, threshold_cdd = 15.5)

Create the artificial demand

In [341]:
countries = list(temperatures.columns)

In [344]:
# artificial_demand_daily, days, first_day = create_daily_data(reg_parameters_days, reg_parameters_trend.loc["par_trend"], daily_hc, reg_parameters_temp.T, countries, start = 1980, end= 2021, validation_days=1826)
artificial_demand_daily, days, first_day = create_daily_data(reg_days_FR, pd.DataFrame(reg_trend_FR).loc["par_trend"], daily_hc, reg_temp_FR.T, countries, start = 2021, end= 2022, validation_days=1826)

In [348]:
artificial_demand_hourly = create_hourly_data(artificial_demand_daily, reg_hours_FR, temperatures, first_day, countries, start = 2021, end = 2022)

  0 + artificial[i][j // 24] * weekly_profile[i].iloc[k]


In [349]:
artificial_demand_ts = pd.concat(artificial_demand_hourly, axis = 1).round(1)

In [350]:
artificial_demand_ts

Unnamed: 0,KR
2021-01-01 00:00:00,62841.4
2021-01-01 01:00:00,61805.2
2021-01-01 02:00:00,59116.6
2021-01-01 03:00:00,56816.9
2021-01-01 04:00:00,57012.9
...,...
2021-12-31 19:00:00,84422.6
2021-12-31 20:00:00,80967.1
2021-12-31 21:00:00,78590.5
2021-12-31 22:00:00,82907.6


In [351]:
artificial_demand_ts.to_csv(f'KR_demand_artificial_2021-2022.csv')