In [1]:
import pandas as pd
import os
import numpy as np
import requests
from datetime import datetime, timedelta
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from datetime import datetime, timedelta
import time
import json
from geopy.geocoders import Nominatim
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.ticker import MultipleLocator
import openmeteo_requests
import requests_cache
from retry_requests import retry
import hsfs
from pathlib import Path
from dotenv import load_dotenv
import hopsworks
import sys

root_dir = Path().resolve().parent
sys.path.append(str(root_dir))

from format_data import format_weather_data, format_price_data, process_weather_data
from get_electricity_prices import get_data
from get_weather_data import get_historical_weather, get_weather_forecast
from entsoe_data import fetch_historical_data, ensure_valid_series



In [2]:
load_dotenv()
hopsworks_api = os.getenv("HOPSWORKS_API_KEY")
entose_api = os.getenv("ENTSOE_API")

os.environ["HOPSWORKS_API_KEY"] = hopsworks_api

project = hopsworks.login()
fs = project.get_feature_store() 
print(f"Connected to project: {project.name}")

2025-01-05 16:22:36,954 INFO: Initializing external client
2025-01-05 16:22:36,954 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-05 16:22:38,415 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1164446
Connected to project: oskaralf


In [3]:
start_date = datetime.now().strftime('%Y-%m-%d')
end_date = (datetime.now() + timedelta(days=1)).strftime('%Y-%m-%d')

In [4]:
forecast = get_weather_forecast("Stockholm", "2022-11-01", "2025-01-03", 59.3294, 18.0687)

formatted_forecast_df = process_weather_data(forecast)
print(formatted_forecast_df)

Coordinates 59.32889938354492°N 18.072357177734375°E
Elevation 24.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
                   date  temperature_2m  precipitation  snow_depth  \
0   2025-01-05 00:00:00         -5.9325            0.0        0.07   
1   2025-01-05 01:00:00         -6.1325            0.0        0.07   
2   2025-01-05 02:00:00         -6.2325            0.0        0.07   
3   2025-01-05 03:00:00         -6.3825            0.0        0.07   
4   2025-01-05 04:00:00         -6.5825            0.0        0.07   
..                  ...             ...            ...         ...   
163 2025-01-11 19:00:00         -5.7500            0.0        0.08   
164 2025-01-11 20:00:00         -6.3500            0.0        0.08   
165 2025-01-11 21:00:00         -7.0000            0.0        0.08   
166 2025-01-11 22:00:00         -7.6000            0.0        0.08   
167 2025-01-11 23:00:00         -8.2500            0.0        0.08   

     pressure_msl  cloud_cover  w

In [5]:
# price_fg = fs.get_feature_group(
#     name='electricity_price_data_3',
#     version=1,
# )
weather_fg = fs.get_feature_group(
    name='weather_data_3',
    version=1,
)
entsoe_fg = fs.get_feature_group(
    name='entsoe_data_3',
    version=1,
)

In [6]:
weather_fg.insert(formatted_forecast_df)

2025-01-05 16:22:51,989 INFO: 	5 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1164446/fs/1155149/fg/1394585


Uploading Dataframe: 100.00% |██████████| Rows 168/168 | Elapsed Time: 00:01 | Remaining Time: 00:00


Use fg.materialization_job.run(args=-op offline_fg_materialization -path hdfs:///Projects/oskaralf/Resources/jobs/weather_data_3_1_offline_fg_materialization/config_1736090485793) to trigger the materialization job again.



(Job('weather_data_3_1_offline_fg_materialization', 'SPARK'),
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between",
         "kwargs": {
           "column": "precipitation",
           "min_value": -5.0,
           "max_value": 500.0
         },
         "meta": {
           "expectationId": 694386
         }
       },
       "result": {
         "observed_value": 0.0,
         "element_count": 168,
         "missing_count": null,
         "missing_percent": null
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2025-01-05T03:22:51.000989Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
       }
     },
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between",
       

In [13]:
# price = get_todays_data("SE3")
# price['time_start'] = price['time_start'].str.replace(r'\+\d{2}:\d{2}$', '', regex=True)
# print(price)
# print(price.dtypes)

In [8]:
entsoe_df = fetch_historical_data(entose_api, start_date, end_date)
entsoe_df.columns = entsoe_df.columns.str.lower()
entsoe_df.columns = entsoe_df.columns.str.lower().str.replace(' ', '_').str.replace('-', '_')
print(entsoe_df.columns)
print(entsoe_df.head())

[DEBUG] Successfully fetched load data for finland.
[DEBUG] Successfully fetched load data for norway.
[DEBUG] Successfully fetched load data for denmark.
[DEBUG] Successfully fetched cross-border flows SE3 to finland.
[DEBUG] Successfully fetched cross-border flows finland to SE3.
[DEBUG] Successfully fetched cross-border flows SE3 to norway.
[DEBUG] Successfully fetched cross-border flows norway to SE3.
[DEBUG] Successfully fetched cross-border flows SE3 to denmark.
[DEBUG] Successfully fetched cross-border flows denmark to SE3.
Index(['load_se3', 'load_finland', 'load_norway', 'load_denmark', 'prices',
       'total_generation_biomass', 'total_generation_fossil_gas',
       'total_generation_fossil_hard_coal', 'total_generation_fossil_oil',
       'total_generation_hydro_run_of_river_and_poundage',
       'total_generation_other_renewable', 'total_generation_solar',
       'total_generation_waste', 'total_generation_wind_offshore',
       'total_generation_wind_onshore', 'flows_se3_

In [12]:
# print(entsoe_df.tail(20))

In [16]:
# print(entose_api)
# entsoe_df = fetch_energy_data(entose_api, "2025-01-05", "2025-01-06")

# print(entsoe_df)

In [10]:
entsoe_fg.insert(entsoe_df)

2025-01-05 16:23:24,736 INFO: 	21 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1164446/fs/1155149/fg/1394586


Uploading Dataframe: 100.00% |██████████| Rows 15/15 | Elapsed Time: 00:01 | Remaining Time: 00:00


Use fg.materialization_job.run(args=-op offline_fg_materialization -path hdfs:///Projects/oskaralf/Resources/jobs/entsoe_data_3_1_offline_fg_materialization/config_1736090504477) to trigger the materialization job again.



(Job('entsoe_data_3_1_offline_fg_materialization', 'SPARK'),
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between",
         "kwargs": {
           "column": "total_generation_hydro_run_of_river_and_poundage",
           "min_value": -0.1,
           "max_value": 10000,
           "strict_min": true
         },
         "meta": {
           "expectationId": 694406
         }
       },
       "result": {
         "observed_value": 3.0,
         "element_count": 15,
         "missing_count": null,
         "missing_percent": null
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2025-01-05T03:23:24.000735Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
       }
     },
     {
       "success": true,
       "expectation_config": {
       

In [11]:
# price_fg.insert(price)