## Tennet - automatic data loading

In [5]:
import os
import datetime
import pytz
import xarray as xr
from pathlib import Path
from tennet import TenneTClient, DataType, OutputType
import zarr
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone

## Get minute data

In [17]:
year = 2024
client = TenneTClient(default_output=OutputType.CSV)
notdone = True
while notdone:
    start_date = pd.Timestamp(f'{year}-01-01')
    end_date = pd.Timestamp(f'{year}-12-31')
    if year == 2024:
        end_date = pd.Timestamp(f'{year}-11-14')
    if year == 2012:
        start_date = pd.Timestamp(f'{year}-09-21')
        notdone = False
    
    tennet_minute_imbalance_data = client.query_df(DataType.balansdeltaprices, d_from=start_date, d_to=end_date)

    # Prepare a list to store converted timestamps
    processed_times = []

    # Iterate over each row and handle DST explicitly
    for idx, row in tennet_minute_imbalance_data.iterrows():
        timestamp = row['timestamp']
        try:
            # Localize timestamp to 'Europe/Amsterdam', inferring DST where possible
            localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous='NaT', nonexistent='shift_forward')
            
            # If ambiguous and `NaT` (not inferred automatically), explicitly handle ambiguous hour
            if pd.isna(localized_ts):
                # Try first to set it to DST and non-DST version
                try:
                    # Attempt DST (summer time) version
                    localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous=True)
                except:
                    # If ambiguous fails, try non-DST (winter time) version
                    localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous=False)
            
            # Convert to UTC
            localized_ts_utc = localized_ts.tz_convert('UTC')
            
        except Exception as e:
            print(f"Error processing timestamp {timestamp} at index {idx}: {e}")
            localized_ts_utc = pd.NaT  # Assign NaT if localization fails
        
        # Append the processed timestamp to the list
        processed_times.append(localized_ts_utc)

    # Assign the processed times back to the DataFrame
    tennet_minute_imbalance_data['timestamp'] = processed_times

    if not Path(f'development/data/minute_imbalance_data-{year}.csv').exists():
        print(f'saving to development/data/minute_imbalance_data-{year}.csv')
        tennet_minute_imbalance_data.to_csv(f'development/data/minute_imbalance_data-{year}.csv')
    year -= 1
    

saving to development/data/minute_imbalance_data-2024.csv
saving to development/data/minute_imbalance_data-2023.csv
saving to development/data/minute_imbalance_data-2022.csv
saving to development/data/minute_imbalance_data-2021.csv
saving to development/data/minute_imbalance_data-2020.csv
saving to development/data/minute_imbalance_data-2019.csv
saving to development/data/minute_imbalance_data-2018.csv
saving to development/data/minute_imbalance_data-2017.csv
saving to development/data/minute_imbalance_data-2016.csv
saving to development/data/minute_imbalance_data-2015.csv
saving to development/data/minute_imbalance_data-2014.csv
saving to development/data/minute_imbalance_data-2013.csv
saving to development/data/minute_imbalance_data-2012.csv


## Get types of data

In [None]:

# initiate the client, you can specify a default output to not always specify it per call
client = TenneTClient(default_output=OutputType.CSV)
# retrieve data as text in default output (in this case csv)
tennet_imbalance_data = client.query_df(DataType.settlementprices, d_from=start_date, d_to=end_date)
tennet_measurementdata = client.query_df(DataType.measurementdata, d_from=start_date, d_to=end_date)
tennet_minute_imbalance_data = client.query_df(DataType.balansdeltaprices, d_from=start_date, d_to=end_date)
tennet_imbalance_igcc_data = client.query_df(DataType.BalansdeltaIGCC, d_from=start_date, d_to=end_date)

## Get imbalance IGCC data for mid price

In [7]:
#tennet_imbalance_igcc_data_formatted = tennet_imbalance_igcc_data.copy()
#tennet_imbalance_igcc_data_formatted['timestamp'] = pd.to_datetime(tennet_imbalance_igcc_data_formatted['Date'] + ' ' + tennet_imbalance_igcc_data_formatted['Time'] + '+01:00', utc=True)
#tennet_imbalance_igcc_data_formatted = tennet_imbalance_igcc_data_formatted.drop(columns=['Date', 'Time', 'Sequence_number'])
#tennet_imbalance_igcc_data_formatted
# retrieve data as text in default output (in this case csv)

year = 2024
client = TenneTClient(default_output=OutputType.CSV)
notdone = True
while notdone:
    #try:
    start_date = pd.Timestamp(f'{year}-01-01')
    end_date = pd.Timestamp(f'{year}-12-31')
    if year == 2024:
        end_date = pd.Timestamp(f'{year}-11-14')
    if year == 2012:
        start_date = pd.Timestamp(f'{year}-09-21')
        notdone = False

    tennet_imbalance_igcc_data = client.query_df(DataType.BalansdeltaIGCC, d_from=start_date, d_to=end_date)
    #tennet_imbalance_igcc_data = tennet_imbalance_igcc_data.rename(columns={
    #'Mid_price_upward': 'mid-price',
    #'Date': 'timestamp',
    #'Time': 'index'
    #})
    # Convert the 'timestamp' column to total minutes

    tennet_imbalance_igcc_data_formatted = tennet_imbalance_igcc_data.copy()
    # tennet_imbalance_igcc_data_formatted['times'] = pd.to_datetime(tennet_imbalance_igcc_data_formatted['Date'] + ' ' + tennet_imbalance_igcc_data_formatted['Time'] + '+01:00', utc=True)
    # tennet_imbalance_igcc_data_formatted = tennet_imbalance_igcc_data_formatted.drop(columns=['Date', 'Time', 'Sequence_number'])
    tennet_imbalance_igcc_data_formatted['times'] = pd.to_datetime(tennet_imbalance_igcc_data_formatted['Date'] + ' ' + tennet_imbalance_igcc_data_formatted['Time'], errors='coerce')
            
    #tennet_imbalance_igcc_data['timestamp'] = pd.to_datetime(tennet_imbalance_igcc_data['timestamp'] +' ' + tennet_imbalance_igcc_data['index'], utc=True, errors='coerce')
    #tennet_imbalance_igcc_data['index'] = tennet_imbalance_igcc_data['index'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]))


    processed_times = []

    # Iterate over each row and handle DST explicitly
    for idx, row in tennet_imbalance_igcc_data_formatted.iterrows():
        timestamp = row['times']
        try:
            # Localize timestamp to 'Europe/Amsterdam', inferring DST where possible
            localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous='NaT', nonexistent='shift_forward')
            
            # If ambiguous and NaT (not inferred automatically), explicitly handle ambiguous hour
            if pd.isna(localized_ts):
                # Try first to set it to DST and non-DST version
                try:
                    # Attempt DST (summer time) version
                    localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous=True)
                except:
                    # If ambiguous fails, try non-DST (winter time) version
                    localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous=False)
            
            # Convert to UTC
            localized_ts_utc = localized_ts.tz_convert('UTC')
            
        except Exception as e:
            print(f"Error processing timestamp {timestamp} at index {idx}: {e}")
            localized_ts_utc = pd.NaT  # Assign NaT if localization fails
        
        # Append the processed timestamp to the list
        processed_times.append(localized_ts_utc)

    # Assign the processed times back to the DataFrame
    tennet_imbalance_igcc_data_formatted['times'] = processed_times
    tennet_imbalance_igcc_data_formatted['times'] = pd.date_range(start=tennet_imbalance_igcc_data_formatted["times"].loc[0], periods=len(tennet_imbalance_igcc_data_formatted), freq="1min")
    tennet_imbalance_igcc_data_formatted = tennet_imbalance_igcc_data_formatted.rename(columns={
    'Mid_price_upward': 'mid-price',
    'times': 'timestamp',
    'Time': 'index'
    })
    tennet_imbalance_igcc_data_formatted = tennet_imbalance_igcc_data_formatted[['index','timestamp','mid-price']]
    # Assign the processed times back to the DataFrame
    tennet_imbalance_igcc_data_formatted = tennet_imbalance_igcc_data_formatted.set_index("index")

    #tennet_imbalance_igcc_data_formatted.index = pd.date_range(start=tennet_imbalance_igcc_data_formatted.index[0], periods=len(tennet_imbalance_igcc_data_formatted), freq="15min")

    output_file = f'development/data/imbalance_igcc-{year}.csv'
    tennet_imbalance_igcc_data_formatted.to_csv(output_file, index=False)
    print(f"Saved combined data to {output_file}")
    year -=1

Saved combined data to development/data/imbalance_igcc-2024.csv
Saved combined data to development/data/imbalance_igcc-2023.csv
Saved combined data to development/data/imbalance_igcc-2022.csv
Saved combined data to development/data/imbalance_igcc-2021.csv
Saved combined data to development/data/imbalance_igcc-2020.csv
Saved combined data to development/data/imbalance_igcc-2019.csv
Saved combined data to development/data/imbalance_igcc-2018.csv
Saved combined data to development/data/imbalance_igcc-2017.csv
Saved combined data to development/data/imbalance_igcc-2016.csv
Saved combined data to development/data/imbalance_igcc-2015.csv
Saved combined data to development/data/imbalance_igcc-2014.csv
Saved combined data to development/data/imbalance_igcc-2013.csv
Saved combined data to development/data/imbalance_igcc-2012.csv


In [None]:
import xarray as xr

tennet_imbalance_igcc_data_formatted['times'] = pd.to_datetime(tennet_imbalance_igcc_data_formatted['times'], utc=True).dt.tz_localize(None)

tennet_imbalance_igcc_data_formatted_xr: xr.Dataset = xr.Dataset.from_dataframe(tennet_imbalance_igcc_data_formatted)
tennet_imbalance_igcc_data_formatted_xr = tennet_imbalance_igcc_data_formatted_xr.assign_coords({"times": tennet_imbalance_igcc_data_formatted_xr["times"]})
tennet_imbalance_igcc_data_formatted_xr = tennet_imbalance_igcc_data_formatted_xr.swap_dims({"index": "times"})
if not Path("development/data/imbalance_prices.zarr").exists():
    tennet_imbalance_igcc_data_formatted_xr.to_zarr("development/data/imbalance_prices.zarr")
tennet_imbalance_igcc_data_formatted_xr

## Don't touch this part!!!

In [None]:
import datetime
import pytz
tennet_imbalance_data_formatted = tennet_imbalance_data.copy()
tennet_imbalance_data_formatted['period_until'] = pd.to_datetime(tennet_imbalance_data_formatted['period_until'], errors='coerce')

# Prepare a list to store converted timestamps
processed_times = []

# Iterate over each row and handle DST explicitly
for idx, row in tennet_imbalance_data_formatted.iterrows():
    timestamp = row['period_until']
    
    try:
        # Localize timestamp to 'Europe/Amsterdam', inferring DST where possible
        localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous='NaT', nonexistent='shift_forward')
        
        # If ambiguous and `NaT` (not inferred automatically), explicitly handle ambiguous hour
        if pd.isna(localized_ts):
            # Try first to set it to DST and non-DST version
            try:
                # Attempt DST (summer time) version
                localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous=True)
            except:
                # If ambiguous fails, try non-DST (winter time) version
                localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous=False)
        
        # Convert to UTC
        localized_ts_utc = localized_ts.tz_convert('UTC')
        
    except Exception as e:
        print(f"Error processing timestamp {timestamp} at index {idx}: {e}")
        localized_ts_utc = pd.NaT  # Assign NaT if localization fails
    
    # Append the processed timestamp to the list
    processed_times.append(localized_ts_utc)

# Assign the processed times back to the DataFrame
tennet_imbalance_data_formatted['period_until_utc'] = processed_times

tennet_imbalance_data_formatted = tennet_imbalance_data_formatted.set_index("period_until_utc")
tennet_imbalance_data_formatted.index = pd.date_range(start=tennet_imbalance_data_formatted.index[0], periods=len(tennet_imbalance_data_formatted), freq="15min")

tennet_imbalance_data_formatted = tennet_imbalance_data_formatted.drop(columns=["period_from","period_until"], errors="ignore")



In [None]:


tennet_imbalance_data_xr = tennet_imbalance_data_formatted.reset_index(names="times")
tennet_imbalance_data_xr['times'] = pd.to_datetime(tennet_imbalance_data_xr['times'], utc=True).dt.tz_localize(None)
print(type(tennet_imbalance_data_xr["upward_incident_reserve"]))
tennet_imbalance_data_xr["upward_incident_reserve"] = tennet_imbalance_data_xr["upward_incident_reserve"].astype(str).str.replace('*', '0').astype(float)
tennet_imbalance_data_xr["upward_incident_reserve"] = tennet_imbalance_data_xr["upward_incident_reserve"].fillna(0)
tennet_imbalance_data_xr["downward_incident_reserve"] = tennet_imbalance_data_xr["downward_incident_reserve"].astype(str).str.replace('*', '0').astype(float)
tennet_imbalance_data_xr["downward_incident_reserve"] = tennet_imbalance_data_xr["downward_incident_reserve"].fillna(0)


tennet_imbalance_data_xr: xr.Dataset = xr.Dataset.from_dataframe(tennet_imbalance_data_xr)
tennet_imbalance_data_xr = tennet_imbalance_data_xr.assign_coords({"times": tennet_imbalance_data_xr["times"]})
tennet_imbalance_data_xr = tennet_imbalance_data_xr.swap_dims({"index": "times"})
if not Path("development/data/settlement_prices.zarr").exists():
    print("imbalance saved to development/data/settlement_prices.zarr")
    tennet_imbalance_data_xr.to_zarr("development/data/settlement_prices.zarr")
tennet_imbalance_data_xr

## Get settlement prices

In [18]:
year = 2024
client = TenneTClient(default_output=OutputType.CSV)
notdone = True
while notdone:
    start_date = pd.Timestamp(f'{year}-01-01')
    end_date = pd.Timestamp(f'{year}-12-31')
    if year == 2024:
        end_date = pd.Timestamp(f'{year}-11-14')
    if year == 2012:
        start_date = pd.Timestamp(f'{year}-09-21')
        notdone = False

    tennet_settlement_data = client.query_df(DataType.settlementprices, d_from=start_date, d_to=end_date)
    tennet_settlement_data_formatted = tennet_settlement_data.copy()
    tennet_settlement_data_formatted['period_until'] = pd.to_datetime(tennet_settlement_data_formatted['period_until'], errors='coerce')

    # Prepare a list to store converted timestamps
    processed_times = []

    # Iterate over each row and handle DST explicitly
    for idx, row in tennet_settlement_data_formatted.iterrows():
        timestamp = row['period_until']
        
        try:
            # Localize timestamp to 'Europe/Amsterdam', inferring DST where possible
            localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous='NaT', nonexistent='shift_forward')
            
            # If ambiguous and `NaT` (not inferred automatically), explicitly handle ambiguous hour
            if pd.isna(localized_ts):
                # Try first to set it to DST and non-DST version
                try:
                    # Attempt DST (summer time) version
                    localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous=True)
                except:
                    # If ambiguous fails, try non-DST (winter time) version
                    localized_ts = timestamp.tz_localize('Europe/Amsterdam', ambiguous=False)
            
            # Convert to UTC
            localized_ts_utc = localized_ts.tz_convert('UTC')
            
        except Exception as e:
            print(f"Error processing timestamp {timestamp} at index {idx}: {e}")
            localized_ts_utc = pd.NaT  # Assign NaT if localization fails
        
        # Append the processed timestamp to the list
        processed_times.append(localized_ts_utc)

    # Assign the processed times back to the DataFrame
    tennet_settlement_data_formatted['period_until_utc'] = processed_times

    tennet_settlement_data_formatted = tennet_settlement_data_formatted.set_index("period_until_utc")
    tennet_settlement_data_formatted.index = pd.date_range(start=tennet_settlement_data_formatted.index[0], periods=len(tennet_settlement_data_formatted), freq="15min")

    tennet_settlement_data_formatted = tennet_settlement_data_formatted.drop(columns=["period_from","period_until"], errors="ignore")


    tennet_settlement_data = tennet_settlement_data_formatted.reset_index(names="times")
    tennet_settlement_data['times'] = pd.to_datetime(tennet_settlement_data['times'], utc=True).dt.tz_localize(None)
    tennet_settlement_data["upward_incident_reserve"] = tennet_settlement_data["upward_incident_reserve"].astype(str).str.replace('*', '0').astype(float)
    tennet_settlement_data["upward_incident_reserve"] = tennet_settlement_data["upward_incident_reserve"].fillna(0)
    tennet_settlement_data["downward_incident_reserve"] = tennet_settlement_data["downward_incident_reserve"].astype(str).str.replace('*', '0').astype(float)
    tennet_settlement_data["downward_incident_reserve"] = tennet_settlement_data["downward_incident_reserve"].fillna(0)


    #tennet_settlement_data_xr: xr.Dataset = xr.Dataset.from_dataframe(tennet_settlement_data)
    #tennet_settlement_data_xr = tennet_settlement_data_xr.assign_coords({"times": tennet_settlement_data_xr["times"]})
    #tennet_settlement_data_xr = tennet_settlement_data_xr.swap_dims({"index": "times"})

    if not Path(f'development/data/settlement_prices-{year}.csv').exists():
        print(f'settlement saved to development/data/settlement_prices-{year}.csv')
        tennet_settlement_data.to_csv(f'development/data/settlement_prices-{year}.csv')
    year -= 1


settlement saved to development/data/settlement_prices-2024.csv
settlement saved to development/data/settlement_prices-2023.csv
settlement saved to development/data/settlement_prices-2022.csv
settlement saved to development/data/settlement_prices-2021.csv
settlement saved to development/data/settlement_prices-2020.csv
settlement saved to development/data/settlement_prices-2019.csv
settlement saved to development/data/settlement_prices-2018.csv
settlement saved to development/data/settlement_prices-2017.csv
settlement saved to development/data/settlement_prices-2016.csv
settlement saved to development/data/settlement_prices-2015.csv
settlement saved to development/data/settlement_prices-2014.csv
settlement saved to development/data/settlement_prices-2013.csv
settlement saved to development/data/settlement_prices-2012.csv
