In [21]:
# Import Libraries
import logging
import pandas as pd
from influxdb_client import InfluxDBClient, Point, WritePrecision
from influxdb_client.client.query_api import QueryApi
import influxdb_client
from influxdb_client.client.exceptions import InfluxDBError

In [22]:
# Set up logging
logging.basicConfig(filename='influxdb_query.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info('Script started at %s', pd.Timestamp.now(tz='UTC'))

In [23]:
# Define InfluxDB connection details
influxdb_url = 'http://159.89.103.242:8086'
influxdb_token = 'dfRMxqDtwyHK7vDJHelAm0WKISLvKFUrmhclvaaAoMFOHRRTGNnYkV8bXd0jR9r4arvkg3l_lWNSHyKMG0WxSg=='
influxdb_org = 'entra'
influxdb_bucket = 'entra'

In [24]:
# Create InfluxDB client
influx_client = InfluxDBClient(url=influxdb_url, token=influxdb_token, org=influxdb_org)
# Instantiate the query API
query_api = QueryApi(influx_client)

In [25]:
# Define measurements
measurements = ['actual_total_load',
                'day_ahead_total_load_forecast',
                'installed_generation_capacity',
                'actual_generation']

In [40]:
# Country codes
country_codes = sorted(["DE_50HZ", "IT_NORD_SI", "AL", "IT_PRGP", "DE_AMPRION", "IT_ROSN", "AT", "IT_SARD", "BY", "IT_SICI",
                 "BE", "IT_SUD", "BA", "RU_KGD", "BG", "LV", "CZ_DE_SK", "LT", "HR", "LU", "CWE", "MT", "CY", "ME",
                 "CZ", "GB", "DE_AT_LU", "NL", "DE_LU", "NO_1", "DK", "NO_2", "DK_1", "NO_3", "DK_2", "NO_4", "DK_CA",
                 "NO_5", "EE", "NO", "FI", "PL_CZ", "MK", "PL", "FR", "PT", "DE", "MD", "GR", "RO", "HU", "RU", "IS",
                 "SE_1", "IE_SEM", "SE_2", "IE", "SE_3", "IT", "SE_4", "IT_SACO_AC", "RS", "IT_SACO_DC", "SK", "IT_BRNN",
                 "SI", "IT_CNOR", "GB_NIR", "IT_CSUD", "ES", "IT_FOGN", "SE", "IT_GR", "CH", "IT_MACRO_NORTH",
                 "DE_TENNET", "IT_MACRO_SOUTH", "DE_TRANSNET", "IT_MALTA", "TR", "IT_NORD", "UA", "IT_NORD_AT",
                 "UA_DOBTPP", "IT_NORD_CH", "UA_BEI", "IT_NORD_FR", "UA_IPS"])
country_codes = sorted(country_codes)
country_codes

['AL',
 'AT',
 'BA',
 'BE',
 'BG',
 'BY',
 'CH',
 'CWE',
 'CY',
 'CZ',
 'CZ_DE_SK',
 'DE',
 'DE_50HZ',
 'DE_AMPRION',
 'DE_AT_LU',
 'DE_LU',
 'DE_TENNET',
 'DE_TRANSNET',
 'DK',
 'DK_1',
 'DK_2',
 'DK_CA',
 'EE',
 'ES',
 'FI',
 'FR',
 'GB',
 'GB_NIR',
 'GR',
 'HR',
 'HU',
 'IE',
 'IE_SEM',
 'IS',
 'IT',
 'IT_BRNN',
 'IT_CNOR',
 'IT_CSUD',
 'IT_FOGN',
 'IT_GR',
 'IT_MACRO_NORTH',
 'IT_MACRO_SOUTH',
 'IT_MALTA',
 'IT_NORD',
 'IT_NORD_AT',
 'IT_NORD_CH',
 'IT_NORD_FR',
 'IT_NORD_SI',
 'IT_PRGP',
 'IT_ROSN',
 'IT_SACO_AC',
 'IT_SACO_DC',
 'IT_SARD',
 'IT_SICI',
 'IT_SUD',
 'LT',
 'LU',
 'LV',
 'MD',
 'ME',
 'MK',
 'MT',
 'NL',
 'NO',
 'NO_1',
 'NO_2',
 'NO_3',
 'NO_4',
 'NO_5',
 'PL',
 'PL_CZ',
 'PT',
 'RO',
 'RS',
 'RU',
 'RU_KGD',
 'SE',
 'SE_1',
 'SE_2',
 'SE_3',
 'SE_4',
 'SI',
 'SK',
 'TR',
 'UA',
 'UA_BEI',
 'UA_DOBTPP',
 'UA_IPS']

In [32]:
# Define dictionaries to hold missing data counts for each measurement
missing_actual_total_load = {}
missing_day_ahead_total_load_forecast = {}
missing_installed_generation_capacity = {}
missing_actual_generation = {}

In [42]:
for measurement in measurements[3:]:
    measurement_exists = False
    success_countries = []
    failed_countries = []

    for country_code in country_codes:
        print(f"Measurement: {measurement}; Country: {country_code}")

        # Check if measurement exists
        query_exists = f'from(bucket: "{influxdb_bucket}") |> range(start: 2015-01-01T00:00:00Z) |> filter(fn: (r) => r._measurement == "{measurement}" and r.country == "{country_code}") |> count()'
        result_exists = query_api.query(query=query_exists, org=influxdb_org)
        
        if len(result_exists) == 0 or result_exists[0].records[0].get_value() == 0:
            failed_countries.append(country_code)
            print(f'NO : The measurement "{measurement}" does not exist for country {country_code}.')
            continue
        else:
            measurement_exists = True
            success_countries.append(country_code)
            print(f'The measurement "{measurement}" exists for country {country_code}.')

        # Check for rows with zero or NaN values
        query_check = f'from(bucket: "{influxdb_bucket}") |> range(start: 2015-01-01T00:00:00Z) |> filter(fn: (r) => r._measurement == "{measurement}" and r.country == "{country_code}") |> drop(columns: ["_start", "_stop", "_time", "country", "_measurement"]) |> map(fn: (r) => ({{ r with _value: float(v: r._value) }})) |> filter(fn: (r) => r._value == 0.0) |> count()'
        result_check = query_api.query(query=query_check, org=influxdb_org)
        missing_rows = 0
        if len(result_check) > 0:
            missing_rows = result_check[0].records[0].get_value()

        print(f'Number of missing rows for {measurement} in country {country_code}: {missing_rows}')

    if measurement_exists:
        print(f'The measurement "{measurement}" exists.')
        print(f'Successful countries for {measurement}: {success_countries}')
        print(f'Failed countries for {measurement}: {failed_countries}')
    else:
        print(f'The measurement "{measurement}" does not exist.')


Measurement: actual_generation; Country: AL
NO : The measurement "actual_generation" does not exist for country AL.
Measurement: actual_generation; Country: AT
NO : The measurement "actual_generation" does not exist for country AT.
Measurement: actual_generation; Country: BA
The measurement "actual_generation" exists for country BA.
Number of missing rows for actual_generation in country BA: 154
Measurement: actual_generation; Country: BE
The measurement "actual_generation" exists for country BE.
Number of missing rows for actual_generation in country BE: 13298
Measurement: actual_generation; Country: BG
The measurement "actual_generation" exists for country BG.
Number of missing rows for actual_generation in country BG: 13403
Measurement: actual_generation; Country: BY
NO : The measurement "actual_generation" does not exist for country BY.
Measurement: actual_generation; Country: CH
The measurement "actual_generation" exists for country CH.
Number of missing rows for actual_generation

In [None]:
logging.info('Script ended at %s', pd.Timestamp.now(tz='UTC'))