In [41]:
import pandas as pd # type: ignore
from datetime import datetime # type: ignore
import glob
# https://github.com/greysonchung/Battery-Optimisation/blob/main/algorithms/battery_optimise.ipynb

#### Podatki

- df_eles_koledar
	- df_eles
	- df_prazniki

- df_entsoe

- df_all_prices
	- ida
		- ida1
		- ida2
		- sipx
	- df_prices_volumes
		- df_prices
		- df_volumes

### df_eles_koledar

##### df_eles

In [42]:
df_eles = pd.read_csv('data/ELES.csv', sep=',', index_col=0)
df_eles.rename(columns={'Unnamed: 0.1': 'datum', 'PREDVIDEN PREVZEM': 'predviden_prevzem', 'DEJANSKI PREVZEM': 'dejanski_prevzem'}, inplace=True)
df_eles['datum'] = pd.to_datetime(df_eles.index)
df_eles.drop(columns=['hour'], inplace=True)
df_eles.tail()

Unnamed: 0,PREDVIDENA PROIZVODNJA,DEJANSKA PROIZVODNJA,predviden_prevzem,dejanski_prevzem,datum
2024-11-11 19:00:00,2196.0,,1873.0,,2024-11-11 19:00:00
2024-11-11 20:00:00,1811.0,,1800.0,,2024-11-11 20:00:00
2024-11-11 21:00:00,1630.0,,1654.0,,2024-11-11 21:00:00
2024-11-11 22:00:00,1494.0,,1510.0,,2024-11-11 22:00:00
2024-11-11 23:00:00,1399.0,,1400.0,,2024-11-11 23:00:00


##### df_prazniki

In [43]:
from prophet.make_holidays import make_holidays_df # type: ignore

df_prazniki = make_holidays_df(
    year_list=[2022 + i for i in range(3)], country='SI'
)
df_prazniki = df_prazniki.rename(columns={'ds': 'datum'})
df_prazniki.sort_values('datum', inplace=True)
df_prazniki['datum'] = pd.to_datetime(df_prazniki['datum'])

# Generate a new DataFrame with all dates between 2022-01-01 and 2024-10-23
df_all_dates = pd.DataFrame({'datum': pd.date_range('2022-01-01', '2024-12-31')})

# Merge the new DataFrame with df_prazniki, filling in missing values with 0
df_prazniki = pd.merge(df_all_dates, df_prazniki, on='datum', how='outer').fillna(0)

df_prazniki.tail()

Unnamed: 0,datum,holiday
1091,2024-12-27,0
1092,2024-12-28,0
1093,2024-12-29,0
1094,2024-12-30,0
1095,2024-12-31,0


### df_entsoe

##### df_total_load

In [44]:
folder_path = 'data/total_load_day_ahead'
df_total_load = pd.concat([pd.read_csv(file) for file in glob.glob(folder_path + '/*.csv')], ignore_index=True)

# Concatenate all the DataFrames together
df_total_load['datum'] = pd.to_datetime(df_total_load['Time (CET/CEST)'].str.split(pat=' - ', expand=True)[0],
										format='%d.%m.%Y %H:%M')
df_total_load = df_total_load.drop(columns=['Time (CET/CEST)'])
df_total_load.head()

Unnamed: 0,Day-ahead Total Load Forecast [MW] - BZN|SI,Actual Total Load [MW] - BZN|SI,datum
0,1089.0,1150.0,2022-01-01 00:00:00
1,1056.0,1127.0,2022-01-01 01:00:00
2,1008.0,1083.0,2022-01-01 02:00:00
3,976.0,1053.0,2022-01-01 03:00:00
4,973.0,1047.0,2022-01-01 04:00:00


### df_all_prices

##### df_prices_volumes

In [45]:
# ni več aktualno so SIPX cene
df_prices = pd.read_csv('data/day_ahead_prices.csv', index_col=0)

# drop if there is a missing value in index
df_prices = df_prices[df_prices.index.notna()]

df_prices['datum'] = df_prices.index
df_prices['datum'] = pd.to_datetime(df_prices['datum']).apply(lambda d: d.strftime('%Y-%m-%d %H:%M:%S'))

df_prices.sort_values(by='datum', inplace=True)
df_prices.reset_index(drop=True, inplace=True)
df_prices.drop(columns='Hour_q', inplace=True)

df_prices.rename(columns={'Price': 'price'}, inplace=True)

df_prices.tail()

  df_prices['datum'] = pd.to_datetime(df_prices['datum']).apply(lambda d: d.strftime('%Y-%m-%d %H:%M:%S'))


Unnamed: 0,price,datum
25091,193.74,2024-11-11 19:00:00
25092,144.27,2024-11-11 20:00:00
25093,136.2,2024-11-11 21:00:00
25094,123.54,2024-11-11 22:00:00
25095,112.95,2024-11-11 23:00:00


In [46]:
df_volumes = pd.read_csv('data/day_ahead_volumes.csv', index_col=0)

# drop if there is a missing value in index
df_volumes = df_volumes[df_volumes.index.notna()]

df_volumes['datum'] = df_volumes.index
df_volumes['datum'] = pd.to_datetime(df_volumes['datum']).apply(lambda d: d.strftime('%Y-%m-%d %H:%M:%S'))

df_volumes.sort_values(by='datum', inplace=True)
df_volumes.reset_index(drop=True, inplace=True)
df_volumes.drop(columns='Hour_q', inplace=True)
df_volumes.rename(columns={'Price': 'volumes'}, inplace=True)

df_volumes.tail()

  df_volumes['datum'] = pd.to_datetime(df_volumes['datum']).apply(lambda d: d.strftime('%Y-%m-%d %H:%M:%S'))


Unnamed: 0,volumes,datum
25043,1737.5,2024-11-11 19:00:00
25044,1572.7,2024-11-11 20:00:00
25045,1543.4,2024-11-11 21:00:00
25046,1462.6,2024-11-11 22:00:00
25047,1324.1,2024-11-11 23:00:00


In [47]:
df_prices_volumes = pd.merge(df_prices, df_volumes, on='datum', how='left')
df_prices_volumes.tail()

Unnamed: 0,price,datum,volumes
25091,193.74,2024-11-11 19:00:00,1737.5
25092,144.27,2024-11-11 20:00:00,1572.7
25093,136.2,2024-11-11 21:00:00,1543.4
25094,123.54,2024-11-11 22:00:00,1462.6
25095,112.95,2024-11-11 23:00:00,1324.1


##### df_ida

In [48]:
# add IDA1, IDA2 and Sipx
ida1 = pd.read_csv('data/ida1_prices.csv')
ida1.rename(columns={'Price': 'IDA1price'}, inplace=True)

ida2 = pd.read_csv('data/ida2_prices.csv')
ida2.rename(columns={'Price': 'IDA2price'}, inplace=True)

sipx = pd.read_csv('data/sipx_prices.csv')
sipx = sipx[sipx['DeliveryDateTime'] > '2022-01-01']
sipx.rename(columns={'Price': 'SIPXprice'}, inplace=True)
sipx['DeliveryDateTime'] = pd.to_datetime(sipx['DeliveryDateTime']).apply(lambda d: d.strftime('%Y-%m-%d %H:%M:%S'))

# merge all three
ida1 = pd.merge(ida1, ida2, on='DeliveryDateTime', how='left').fillna(0)
ida1['DeliveryDateTime'] = pd.to_datetime(ida1['DeliveryDateTime']).apply(lambda d: d.strftime('%Y-%m-%d %H:%M:%S'))
df_ida = pd.merge(ida1, sipx, on='DeliveryDateTime', how='right')

# clean a bit
df_ida['datum'] = pd.to_datetime(df_ida['DeliveryDateTime']).apply(lambda d: d.strftime('%Y-%m-%d %H:%M:%S'))
df_ida.drop(columns=['Unnamed: 0_x', 'Unnamed: 0_y', 'Unnamed: 0', 'DeliveryDateTime'], inplace=True)
df_ida.sort_values(by='datum', inplace=True)
df_ida

  sipx['DeliveryDateTime'] = pd.to_datetime(sipx['DeliveryDateTime']).apply(lambda d: d.strftime('%Y-%m-%d %H:%M:%S'))
  ida1['DeliveryDateTime'] = pd.to_datetime(ida1['DeliveryDateTime']).apply(lambda d: d.strftime('%Y-%m-%d %H:%M:%S'))


Unnamed: 0,IDA1price,IDA2price,SIPXprice,datum
4180,,,61.83783,2022-01-01 00:00:00
4181,,,61.83783,2022-01-01 00:15:00
4182,,,61.83783,2022-01-01 00:30:00
4183,,,61.83783,2022-01-01 00:45:00
8364,,,51.44327,2022-01-01 01:00:00
...,...,...,...,...
92027,108.00,0.0,123.54000,2024-11-11 22:45:00
96208,137.70,0.0,112.95000,2024-11-11 23:00:00
96209,117.20,0.0,112.95000,2024-11-11 23:15:00
96210,110.38,0.0,112.95000,2024-11-11 23:30:00


In [49]:
df_all_prices = pd.merge(df_ida, df_prices_volumes, on='datum', how='left')
df_all_prices['price'].ffill(inplace=True)
df_all_prices['volumes'].ffill(inplace=True)
df_all_prices.sort_values(by='datum', inplace=True)
df_all_prices.tail()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_all_prices['price'].ffill(inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_all_prices['volumes'].ffill(inplace=True)


Unnamed: 0,IDA1price,IDA2price,SIPXprice,datum,price,volumes
100375,108.0,0.0,123.54,2024-11-11 22:45:00,123.54,1462.6
100376,137.7,0.0,112.95,2024-11-11 23:00:00,112.95,1324.1
100377,117.2,0.0,112.95,2024-11-11 23:15:00,112.95,1324.1
100378,110.38,0.0,112.95,2024-11-11 23:30:00,112.95,1324.1
100379,89.5,0.0,112.95,2024-11-11 23:45:00,112.95,1324.1


### Merge

In [50]:
df1 = pd.merge(df_eles, df_prazniki, on='datum', how='left')

df1['datum'] = pd.to_datetime(df1['datum']).apply(lambda d: d.strftime('%Y-%m-%d %H:%M:%S'))
df_prices_volumes['datum'] = pd.to_datetime(df_prices_volumes['datum']).apply(lambda d: d.strftime('%Y-%m-%d %H:%M:%S'))

df = pd.merge(df1, df_all_prices, on='datum', how='right')
df.tail()

Unnamed: 0,PREDVIDENA PROIZVODNJA,DEJANSKA PROIZVODNJA,predviden_prevzem,dejanski_prevzem,datum,holiday,IDA1price,IDA2price,SIPXprice,price,volumes
100399,,,,,2024-11-11 22:45:00,,108.0,0.0,123.54,123.54,1462.6
100400,1399.0,,1400.0,,2024-11-11 23:00:00,,137.7,0.0,112.95,112.95,1324.1
100401,,,,,2024-11-11 23:15:00,,117.2,0.0,112.95,112.95,1324.1
100402,,,,,2024-11-11 23:30:00,,110.38,0.0,112.95,112.95,1324.1
100403,,,,,2024-11-11 23:45:00,,89.5,0.0,112.95,112.95,1324.1


In [51]:
"""
Cell generated by Data Wrangler.
"""
def clean_data(df):
	# Drop duplicate rows in column: 'datum'
	df = df.drop_duplicates(subset=['datum'])
	# Renames
	df = df.rename(columns={'datum': 'date', 
							'PREDVIDENA PROIZVODNJA': 'forecast_production',
							'DEJANSKA PROIZVODNJA': 'production',
							'predviden_prevzem': 'forecast_consumption',
							'dejanski_prevzem': 'consumption',
						})
	df.ffill(inplace=True)
	
	df['date'] = df['date'].astype('datetime64[ns]')
	df = df[df['date'] < '2024-11-12 00:00:00']

	df = df[(df['date'].dt.day != 29) | (df['date'].dt.month != 2)]
	df = df.drop(df.index[0])

	return df


df = clean_data(df.copy())
df

Unnamed: 0,forecast_production,production,forecast_consumption,consumption,date,holiday,IDA1price,IDA2price,SIPXprice,price,volumes
1,965.0,968.0,989.0,1044.0,2022-01-01 00:15:00,New Year's Day,,,61.83783,61.83783,942.2
2,965.0,968.0,989.0,1044.0,2022-01-01 00:30:00,New Year's Day,,,61.83783,61.83783,942.2
3,965.0,968.0,989.0,1044.0,2022-01-01 00:45:00,New Year's Day,,,61.83783,61.83783,942.2
4,959.0,949.0,953.0,1018.0,2022-01-01 01:00:00,New Year's Day,,,51.44327,51.44327,1084.7
5,959.0,949.0,953.0,1018.0,2022-01-01 01:15:00,New Year's Day,,,51.44327,51.44327,1084.7
...,...,...,...,...,...,...,...,...,...,...,...
100399,1494.0,2370.0,1510.0,1647.0,2024-11-11 22:45:00,0,108.00,0.0,123.54000,123.54000,1462.6
100400,1399.0,2370.0,1400.0,1647.0,2024-11-11 23:00:00,0,137.70,0.0,112.95000,112.95000,1324.1
100401,1399.0,2370.0,1400.0,1647.0,2024-11-11 23:15:00,0,117.20,0.0,112.95000,112.95000,1324.1
100402,1399.0,2370.0,1400.0,1647.0,2024-11-11 23:30:00,0,110.38,0.0,112.95000,112.95000,1324.1


In [52]:
df.to_csv('data/dataset_task2.csv', index=False)