In [1]:
import logging
import pandas as pd
import os
import random
import time, datetime
from entsoe import EntsoePandasClient
from influxdb_client import InfluxDBClient, Point, WritePrecision
from influxdb_client.client.write_api import SYNCHRONOUS
from influxdb_client.client.query_api import QueryApi

In [2]:
# Set up logging
logging.basicConfig(filename='entsoe.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info('Script started at %s', pd.Timestamp.now(tz='UTC'))

In [3]:
# Set API key
API_KEY = '6276342c-e10c-4d88-8688-cb0a1cf163ca'

In [4]:
# Define InfluxDB connection details
influxdb_url = 'http://159.89.103.242:8086'
influxdb_token = 'dfRMxqDtwyHK7vDJHelAm0WKISLvKFUrmhclvaaAoMFOHRRTGNnYkV8bXd0jR9r4arvkg3l_lWNSHyKMG0WxSg=='
influxdb_org = 'entra'
influxdb_bucket = 'entra'

In [5]:
# Initialize the Entsoe client
client = EntsoePandasClient(api_key=API_KEY)

In [6]:
# Create InfluxDB client
influx_client = InfluxDBClient(url=influxdb_url, token=influxdb_token, org=influxdb_org)

In [7]:
# Create the write API
write_api = influx_client.write_api(write_options=SYNCHRONOUS)
# Instantiate the query API
query_api = QueryApi(influx_client)

In [16]:
# Define the time period
start = pd.Timestamp('2023-07-18', tz='UTC')
end = pd.Timestamp('2023-08-03', tz='UTC')

In [9]:
# Define the categories and their corresponding measurements
categories = {
    '6.1.A': 'actual_total_load',
    '6.1.B': 'day_ahead_total_load_forecast',
    '14.1.A': 'installed_generation_capacity',
    '16.1.B&C': 'actual_generation'
}

# Country codes
country_codes = sorted(["DE_50HZ", "IT_NORD_SI", "AL", "IT_PRGP", "DE_AMPRION", "IT_ROSN", "AT", "IT_SARD", "BY", "IT_SICI",
                 "BE", "IT_SUD", "BA", "RU_KGD", "BG", "LV", "CZ_DE_SK", "LT", "HR", "LU", "CWE", "MT", "CY", "ME",
                 "CZ", "GB", "DE_AT_LU", "NL", "DE_LU", "NO_1", "DK", "NO_2", "DK_1", "NO_3", "DK_2", "NO_4", "DK_CA",
                 "NO_5", "EE", "NO", "FI", "PL_CZ", "MK", "PL", "FR", "PT", "DE", "MD", "GR", "RO", "HU", "RU", "IS",
                 "SE_1", "IE_SEM", "SE_2", "IE", "SE_3", "IT", "SE_4", "IT_SACO_AC", "RS", "IT_SACO_DC", "SK", "IT_BRNN",
                 "SI", "IT_CNOR", "GB_NIR", "IT_CSUD", "ES", "IT_FOGN", "SE", "IT_GR", "CH", "IT_MACRO_NORTH",
                 "DE_TENNET", "IT_MACRO_SOUTH", "DE_TRANSNET", "IT_MALTA", "TR", "IT_NORD", "UA", "IT_NORD_AT",
                 "UA_DOBTPP", "IT_NORD_CH", "UA_BEI", "IT_NORD_FR", "UA_IPS"])
country_codes

['AL',
 'AT',
 'BA',
 'BE',
 'BG',
 'BY',
 'CH',
 'CWE',
 'CY',
 'CZ',
 'CZ_DE_SK',
 'DE',
 'DE_50HZ',
 'DE_AMPRION',
 'DE_AT_LU',
 'DE_LU',
 'DE_TENNET',
 'DE_TRANSNET',
 'DK',
 'DK_1',
 'DK_2',
 'DK_CA',
 'EE',
 'ES',
 'FI',
 'FR',
 'GB',
 'GB_NIR',
 'GR',
 'HR',
 'HU',
 'IE',
 'IE_SEM',
 'IS',
 'IT',
 'IT_BRNN',
 'IT_CNOR',
 'IT_CSUD',
 'IT_FOGN',
 'IT_GR',
 'IT_MACRO_NORTH',
 'IT_MACRO_SOUTH',
 'IT_MALTA',
 'IT_NORD',
 'IT_NORD_AT',
 'IT_NORD_CH',
 'IT_NORD_FR',
 'IT_NORD_SI',
 'IT_PRGP',
 'IT_ROSN',
 'IT_SACO_AC',
 'IT_SACO_DC',
 'IT_SARD',
 'IT_SICI',
 'IT_SUD',
 'LT',
 'LU',
 'LV',
 'MD',
 'ME',
 'MK',
 'MT',
 'NL',
 'NO',
 'NO_1',
 'NO_2',
 'NO_3',
 'NO_4',
 'NO_5',
 'PL',
 'PL_CZ',
 'PT',
 'RO',
 'RS',
 'RU',
 'RU_KGD',
 'SE',
 'SE_1',
 'SE_2',
 'SE_3',
 'SE_4',
 'SI',
 'SK',
 'TR',
 'UA',
 'UA_BEI',
 'UA_DOBTPP',
 'UA_IPS']

In [10]:
list(categories.values())

['actual_total_load',
 'day_ahead_total_load_forecast',
 'installed_generation_capacity',
 'actual_generation']

In [11]:
country_codes

['AL',
 'AT',
 'BA',
 'BE',
 'BG',
 'BY',
 'CH',
 'CWE',
 'CY',
 'CZ',
 'CZ_DE_SK',
 'DE',
 'DE_50HZ',
 'DE_AMPRION',
 'DE_AT_LU',
 'DE_LU',
 'DE_TENNET',
 'DE_TRANSNET',
 'DK',
 'DK_1',
 'DK_2',
 'DK_CA',
 'EE',
 'ES',
 'FI',
 'FR',
 'GB',
 'GB_NIR',
 'GR',
 'HR',
 'HU',
 'IE',
 'IE_SEM',
 'IS',
 'IT',
 'IT_BRNN',
 'IT_CNOR',
 'IT_CSUD',
 'IT_FOGN',
 'IT_GR',
 'IT_MACRO_NORTH',
 'IT_MACRO_SOUTH',
 'IT_MALTA',
 'IT_NORD',
 'IT_NORD_AT',
 'IT_NORD_CH',
 'IT_NORD_FR',
 'IT_NORD_SI',
 'IT_PRGP',
 'IT_ROSN',
 'IT_SACO_AC',
 'IT_SACO_DC',
 'IT_SARD',
 'IT_SICI',
 'IT_SUD',
 'LT',
 'LU',
 'LV',
 'MD',
 'ME',
 'MK',
 'MT',
 'NL',
 'NO',
 'NO_1',
 'NO_2',
 'NO_3',
 'NO_4',
 'NO_5',
 'PL',
 'PL_CZ',
 'PT',
 'RO',
 'RS',
 'RU',
 'RU_KGD',
 'SE',
 'SE_1',
 'SE_2',
 'SE_3',
 'SE_4',
 'SI',
 'SK',
 'TR',
 'UA',
 'UA_BEI',
 'UA_DOBTPP',
 'UA_IPS']

In [12]:
country_code = "EXAMPLE"
measurement = "actual_total_load"

In [13]:
countries_actual_total_load = ["BA", "BE", "BG", "CY", "CZ", "DE", "DE_50HZ", "DE_AMPRION", "DE_TENNET", "DE_TRANSNET", "IT_CNOR", "NO"]
done_actual_total_load = ["BA", "BE", "BG", "CY", "CZ", "DE", "DE_50HZ", "DE_AMPRION", "DE_TRANSNET", "IT_CNOR", "NO", "DE_TENNET"]
to_do_actual_total_load = [x for x in countries_actual_total_load if x not in done_actual_total_load]
to_do_actual_total_load

[]

In [None]:
#Actual Total Load - 1
country_code = "COUNTRY_CODE"  # Replace "COUNTRY_CODE" with the specific country code you want to use:
# Download the actual total load data
try:
    actual_total_load = client.query_load(country_code=country_code, start=start, end=end)
    print(f"Actual Total Load for {country_code}:")
    print(actual_total_load)  # Print the data

    # Rename the columns
    actual_total_load.columns = ['value']

    # Convert the index to UTC and format it as string
    actual_total_load.index = actual_total_load.index.tz_convert('UTC').strftime('%Y-%m-%dT%H:%M:%SZ')

    # Convert the data to InfluxDB Line Protocol format
    data_points = []
    for timestamp, value in actual_total_load.iterrows():
        data_point = Point("actual_total_load") \
            .tag("country", country_code) \
            .field("value", value['value']) \
            .time(timestamp, WritePrecision.NS)
        data_points.append(data_point)
            
    write_api.write(bucket=influxdb_bucket, record=data_points)
    logging.info('Data extraction (actual_total_load) completed for country: %s', country_code)
    print(f"Data extraction (actual_total_load) completed for country: {country_code}")
except Exception as e:
    print(f"Data extraction (actual_total_load) failed for country: {country_code}: {str(e)}")
    logging.error(f'Data extraction (actual_total_load) failed for country: {country_code}: {str(e)}')

In [15]:
countries_day_ahead_total_load_forecast = country_codes
done_day_ahead_total_load_forecast = []
to_do_day_ahead_total_load_forecast = [x for x in countries_day_ahead_total_load_forecast if x not in done_day_ahead_total_load_forecast]
to_do_day_ahead_total_load_forecast

['AL',
 'AT',
 'BA',
 'BE',
 'BG',
 'BY',
 'CH',
 'CWE',
 'CY',
 'CZ',
 'CZ_DE_SK',
 'DE',
 'DE_50HZ',
 'DE_AMPRION',
 'DE_AT_LU',
 'DE_LU',
 'DE_TENNET',
 'DE_TRANSNET',
 'DK',
 'DK_1',
 'DK_2',
 'DK_CA',
 'EE',
 'ES',
 'FI',
 'FR',
 'GB',
 'GB_NIR',
 'GR',
 'HR',
 'HU',
 'IE',
 'IE_SEM',
 'IS',
 'IT',
 'IT_BRNN',
 'IT_CNOR',
 'IT_CSUD',
 'IT_FOGN',
 'IT_GR',
 'IT_MACRO_NORTH',
 'IT_MACRO_SOUTH',
 'IT_MALTA',
 'IT_NORD',
 'IT_NORD_AT',
 'IT_NORD_CH',
 'IT_NORD_FR',
 'IT_NORD_SI',
 'IT_PRGP',
 'IT_ROSN',
 'IT_SACO_AC',
 'IT_SACO_DC',
 'IT_SARD',
 'IT_SICI',
 'IT_SUD',
 'LT',
 'LU',
 'LV',
 'MD',
 'ME',
 'MK',
 'MT',
 'NL',
 'NO',
 'NO_1',
 'NO_2',
 'NO_3',
 'NO_4',
 'NO_5',
 'PL',
 'PL_CZ',
 'PT',
 'RO',
 'RS',
 'RU',
 'RU_KGD',
 'SE',
 'SE_1',
 'SE_2',
 'SE_3',
 'SE_4',
 'SI',
 'SK',
 'TR',
 'UA',
 'UA_BEI',
 'UA_DOBTPP',
 'UA_IPS']

In [17]:
# Download day-ahead total load forecast - 2
country_code = "COUNTRY_CODE"  # Replace "COUNTRY_CODE" with the specific country code you want to use
for country_code in countries_day_ahead_total_load_forecast:
    try:
        day_ahead_total_load_forecast = client.query_load_forecast(country_code=country_code, start=start, end=end)
        print(f"Day-Ahead Total Load Forecast for {country_code}:")
        print(day_ahead_total_load_forecast)  # Print the data

        # Rename the columns
        day_ahead_total_load_forecast.columns = ['value']

        # Convert the index to UTC and format it as string
        day_ahead_total_load_forecast.index = day_ahead_total_load_forecast.index.tz_convert('UTC').strftime('%Y-%m-%dT%H:%M:%SZ')

        # Convert the data to InfluxDB Line Protocol format
        data_points = []
        for timestamp, value in day_ahead_total_load_forecast.iterrows():
            data_point = Point("day_ahead_total_load_forecast") \
                .tag("country", country_code) \
                .field("value", value['value']) \
                .time(timestamp, WritePrecision.NS)
            data_points.append(data_point)

        # Write data points to InfluxDB
        write_api.write(bucket=influxdb_bucket, record=data_points)
        logging.info('Data extraction (day_ahead_total_load_forecast) completed for country: %s', country_code)

    except Exception as e:
        logging.error(f'Data extraction (day_ahead_total_load_forecast) failed for {country_code}: {str(e)}')
        print(f'Data extraction (day_ahead_total_load_forecast) failed for country: {country_code} - {str(e)}')

Day-Ahead Total Load Forecast for AL:
                           Forecasted Load
2023-07-18 02:00:00+02:00            629.0
2023-07-18 02:15:00+02:00            629.0
2023-07-18 02:30:00+02:00            629.0
2023-07-18 02:45:00+02:00            629.0
2023-07-18 03:00:00+02:00            597.0
...                                    ...
2023-08-03 01:00:00+02:00            664.0
2023-08-03 01:15:00+02:00            664.0
2023-08-03 01:30:00+02:00            664.0
2023-08-03 01:45:00+02:00            664.0
2023-08-03 02:00:00+02:00            624.0

[1537 rows x 1 columns]
Day-Ahead Total Load Forecast for AT:
                           Forecasted Load
2023-07-18 02:00:00+02:00           5116.0
2023-07-18 02:15:00+02:00           5064.0
2023-07-18 02:30:00+02:00           5016.0
2023-07-18 02:45:00+02:00           4980.0
2023-07-18 03:00:00+02:00           4976.0
...                                    ...
2023-08-03 01:00:00+02:00           5044.0
2023-08-03 01:15:00+02:00           498

In [18]:
#NOTHING TO DO
countries_installed_generation_capacity = country_codes
done_installed_generation_capacity = []
to_do_installed_generation_capacity = [x for x in countries_installed_generation_capacity if x not in done_installed_generation_capacity]
to_do_installed_generation_capacity

['AL',
 'AT',
 'BA',
 'BE',
 'BG',
 'BY',
 'CH',
 'CWE',
 'CY',
 'CZ',
 'CZ_DE_SK',
 'DE',
 'DE_50HZ',
 'DE_AMPRION',
 'DE_AT_LU',
 'DE_LU',
 'DE_TENNET',
 'DE_TRANSNET',
 'DK',
 'DK_1',
 'DK_2',
 'DK_CA',
 'EE',
 'ES',
 'FI',
 'FR',
 'GB',
 'GB_NIR',
 'GR',
 'HR',
 'HU',
 'IE',
 'IE_SEM',
 'IS',
 'IT',
 'IT_BRNN',
 'IT_CNOR',
 'IT_CSUD',
 'IT_FOGN',
 'IT_GR',
 'IT_MACRO_NORTH',
 'IT_MACRO_SOUTH',
 'IT_MALTA',
 'IT_NORD',
 'IT_NORD_AT',
 'IT_NORD_CH',
 'IT_NORD_FR',
 'IT_NORD_SI',
 'IT_PRGP',
 'IT_ROSN',
 'IT_SACO_AC',
 'IT_SACO_DC',
 'IT_SARD',
 'IT_SICI',
 'IT_SUD',
 'LT',
 'LU',
 'LV',
 'MD',
 'ME',
 'MK',
 'MT',
 'NL',
 'NO',
 'NO_1',
 'NO_2',
 'NO_3',
 'NO_4',
 'NO_5',
 'PL',
 'PL_CZ',
 'PT',
 'RO',
 'RS',
 'RU',
 'RU_KGD',
 'SE',
 'SE_1',
 'SE_2',
 'SE_3',
 'SE_4',
 'SI',
 'SK',
 'TR',
 'UA',
 'UA_BEI',
 'UA_DOBTPP',
 'UA_IPS']

In [19]:
# Download installed generation capacity - 3
country_code = "COUNTRY_CODE"  # Replace "COUNTRY_CODE" with the specific country code you want to use
for country_code in country_codes:
    try:
        # Download the installed generation capacity data
        installed_generation_capacity = client.query_installed_generation_capacity(country_code, start=start, end=end, psr_type=None)
        print(f"Installed Generation Capacity for {country_code}:")
        print(installed_generation_capacity)

        # Convert the index to UTC and format it as a string
        installed_generation_capacity.index = installed_generation_capacity.index.tz_convert('UTC').strftime('%Y-%m-%dT%H:%M:%SZ')

        # Create data points for each row
        data_points = []
        for _, row in installed_generation_capacity.iterrows():
            for column in installed_generation_capacity.columns[1:]:
                production_type = column
                value = row[column]
                if pd.notnull(value):
                    data_point = Point("installed_generation_capacity") \
                        .tag("country", country_code) \
                        .tag("production_type", production_type) \
                        .field("value", value) \
                        .time(row.name)
                    data_points.append(data_point)

        # Write data points to InfluxDB
        write_api.write(bucket=influxdb_bucket, record=data_points)
        logging.info('Data extraction (installed_generation_capacity) completed for country: %s', country_code)
        print(f'Data extraction (installed_generation_capacity) completed for country: {country_code}')
    except Exception as e:
        logging.error(f'Data extraction (installed_generation_capacity) failed for {country_code}: {str(e)}')
        print(f'Data extraction (installed_generation_capacity) failed for country: {country_code} : {str(e)}')


Installed Generation Capacity for AL:
Empty DataFrame
Columns: [Biomass, Hydro Run-of-river and poundage]
Index: []
Data extraction (installed_generation_capacity) completed for country: AL
Installed Generation Capacity for AT:
Empty DataFrame
Columns: [Biomass, Fossil Brown coal/Lignite, Fossil Coal-derived gas, Fossil Gas, Fossil Hard coal, Fossil Oil, Fossil Oil shale, Fossil Peat, Geothermal, Hydro Pumped Storage, Hydro Run-of-river and poundage, Hydro Water Reservoir, Marine, Nuclear, Other, Other renewable, Solar, Waste, Wind Offshore, Wind Onshore]
Index: []
Data extraction (installed_generation_capacity) completed for country: AT
Installed Generation Capacity for BA:
Empty DataFrame
Columns: [Fossil Brown coal/Lignite, Hydro Pumped Storage, Hydro Run-of-river and poundage, Hydro Water Reservoir, Wind Onshore]
Index: []
Data extraction (installed_generation_capacity) completed for country: BA
Installed Generation Capacity for BE:
Empty DataFrame
Columns: [Biomass, Fossil Gas, Fo

In [None]:
countries_actual_generation = country_codes
done_actual_generation = []
failed = ["DE_AT_LU", "GB", "IT_BRNN", "IT_FOGN", 'IT_MACRO_NORTH', 'IT_MACRO_SOUTH', 'IT_PRGP', 'IT_ROSN']
to_do_actual_generation = [x for x in countries_actual_generation if x not in done_actual_generation and x not in failed]
to_do_actual_generation

In [20]:
# Define the time period
start = pd.Timestamp('2023-07-18', tz='UTC')
end = pd.Timestamp('2023-08-03', tz='UTC')

In [22]:
# Download and save the data for each country - Actual Generation per Production Type - 4
country_code = "EX"  # Replace "COUNTRY_CODE" with the specific country code you want to use
for country_code in to_do_actual_generation:
    try:
        actual_generation = client.query_generation(country_code=country_code, start=start, end=end)
        print(f"Actual Generation for {country_code}:")
        print(actual_generation)  # Print the data

        # Convert the index to UTC and format it as string
        actual_generation.index = actual_generation.index.tz_convert('UTC').strftime('%Y-%m-%dT%H:%M:%SZ')

        # Convert the data to InfluxDB Line Protocol format
        data_points = []
        for timestamp, row in actual_generation.iterrows():
            for column, value in row.items():
                if pd.notnull(value):
                    production_type = str(column).replace("/", "_").replace(" ", "_")
                    data_point = Point("actual_generation") \
                        .tag("country", country_code) \
                        .tag("production_type", production_type) \
                        .field("value", value) \
                        .time(timestamp, WritePrecision.NS)
                    data_points.append(data_point)

        # Write the data points to InfluxDB
        write_api.write(bucket=influxdb_bucket, record=data_points)
        logging.info('Data extraction (actual_generation) completed for country: %s', country_code)
        print(f'Data extraction (actual_generation) completed for country: {country_code}')

    except Exception as e:
        logging.error(f'Data extraction (actual_generation) failed for {country_code}: {str(e)}')
        print(f'Data extraction (actual_generation) failed for country: {country_code}')



Data extraction (actual_generation) failed for country: AL
Actual Generation for AT:
                                    Biomass                     \
                          Actual Aggregated Actual Consumption   
2023-07-18 02:00:00+02:00              84.0                0.0   
2023-07-18 02:15:00+02:00              84.0                0.0   
2023-07-18 02:30:00+02:00              84.0                0.0   
2023-07-18 02:45:00+02:00              84.0                0.0   
2023-07-18 03:00:00+02:00              84.0                0.0   
...                                     ...                ...   
2023-08-03 00:45:00+02:00             100.0                0.0   
2023-08-03 01:00:00+02:00             100.0                0.0   
2023-08-03 01:15:00+02:00             100.0                0.0   
2023-08-03 01:30:00+02:00             100.0                0.0   
2023-08-03 01:45:00+02:00             100.0                0.0   

                                 Fossil Gas             