In [1]:
import pandas as pd
import os
import numpy as np
import requests
from datetime import datetime, timedelta
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import datetime
import time
import json
from geopy.geocoders import Nominatim
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.ticker import MultipleLocator
import openmeteo_requests
import requests_cache
from retry_requests import retry
import hsfs
from pathlib import Path
from dotenv import load_dotenv
import hopsworks
import sys

root_dir = Path().resolve().parent
sys.path.append(str(root_dir))

from format_data import format_weather_data, format_price_data, process_weather_data
from get_electricity_prices import get_data
from get_weather_data import get_historical_weather, get_weather_forecast
from entsoe_data import fetch_energy_data



In [2]:
load_dotenv()
hopsworks_api = os.getenv("HOPSWORKS_API_KEY")
entose_api = os.getenv("ENTSOE_API")

os.environ["HOPSWORKS_API_KEY"] = hopsworks_api

project = hopsworks.login()
fs = project.get_feature_store() 
print(f"Connected to project: {project.name}")

2025-01-05 10:56:48,657 INFO: Initializing external client
2025-01-05 10:56:48,658 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-05 10:56:50,179 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1164446
Connected to project: oskaralf


In [3]:
from datetime import datetime, timedelta
# Get today's date
today = datetime.now().strftime('%Y-%m-%d')

# Get tomorrow's date
tomorrow = (datetime.now() + timedelta(days=1)).strftime('%Y-%m-%d')

In [3]:
forecast = get_weather_forecast("Stockhom", "2022-11-01", "2025-01-03", 59.3294, 18.0687)

formatted_forecast_df = process_weather_data(forecast)
print(formatted_forecast_df)

Coordinates 59.32889938354492°N 18.072357177734375°E
Elevation 24.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
                   date  temperature_2m  precipitation  snow_depth  \
0   2025-01-05 00:00:00         -5.9325            0.0        0.07   
1   2025-01-05 01:00:00         -6.1325            0.0        0.07   
2   2025-01-05 02:00:00         -6.2325            0.0        0.07   
3   2025-01-05 03:00:00         -6.3825            0.0        0.07   
4   2025-01-05 04:00:00         -6.5825            0.0        0.07   
..                  ...             ...            ...         ...   
163 2025-01-11 19:00:00         -5.7500            0.0        0.08   
164 2025-01-11 20:00:00         -6.3500            0.0        0.08   
165 2025-01-11 21:00:00         -7.0000            0.0        0.08   
166 2025-01-11 22:00:00         -7.6000            0.0        0.08   
167 2025-01-11 23:00:00         -8.2500            0.0        0.08   

     pressure_msl  cloud_cover  w

In [4]:
# price_fg = fs.get_feature_group(
#     name='electricity_price_data_3',
#     version=1,
# )
weather_fg = fs.get_feature_group(
    name='weather_data_2',
    version=1,
)
entsoe_fg = fs.get_feature_group(
    name='entsoe_data',
    version=1,
)

In [5]:
weather_fg.insert(formatted_forecast_df)

2025-01-04 20:53:45,458 INFO: 	5 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1164446/fs/1155149/fg/1394547


Uploading Dataframe: 100.00% |██████████| Rows 168/168 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: weather_data_2_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1164446/jobs/named/weather_data_2_1_offline_fg_materialization/executions


(Job('weather_data_2_1_offline_fg_materialization', 'SPARK'),
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between",
         "kwargs": {
           "column": "wind_speed_10m",
           "min_value": -0.1,
           "max_value": 1000,
           "strict_min": true
         },
         "meta": {
           "expectationId": 694344
         }
       },
       "result": {
         "observed_value": 4.679999828338623,
         "element_count": 168,
         "missing_count": null,
         "missing_percent": null
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2025-01-04T07:53:45.000458Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
       }
     },
     {
       "success": true,
       "expectation_config": {
         "expectation_type

In [None]:
price = get_todays_data("SE3")
price['time_start'] = price['time_start'].str.replace(r'\+\d{2}:\d{2}$', '', regex=True)
print(price)
print(price.dtypes)

ConnectTimeout: HTTPSConnectionPool(host='www.elprisetjustnu.se', port=443): Max retries exceeded with url: /api/v1/prices/2025/01-03_SE4.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x1349f74d0>, 'Connection to www.elprisetjustnu.se timed out. (connect timeout=None)'))

In [None]:
print(entose_api)
entsoe_df = fetch_energy_data(entose_api, "2025-01-05", "2025-01-06")

print(entsoe_df)

ee3466c8-8daf-4454-8242-faa3823f6a9a
Hydro storage data unavailable for 2025-01-03 00:00:00+01:00 to 2025-01-04 00:00:00+01:00: 
Attempting to fetch the latest available data...
Successfully fetched the latest available hydro storage data.
    load_se  price_se  flows_se_finland  flows_se_norway  flows_se_denmark  \
0   11798.0     43.01             806.0             12.0             647.0   
1   11668.0     38.15             944.0            255.0             609.0   
2   11492.0     35.22             946.0            569.0             110.0   
3   11295.0     33.43            1023.0            738.0               0.0   
4   11282.0     29.73            1032.0            941.0               0.0   
5   11617.0     30.58            1058.0            657.0               0.0   
6   12240.0     38.36            1018.0            171.0               1.0   
7   13003.0     52.32            1124.0              0.0             268.0   
8   13555.0     80.90            1090.0              0.0  

In [None]:
entsoe_fg.insert(entsoe_df)

In [7]:
price_fg.insert(price)

2025-01-03 12:39:28,223 INFO: 	1 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1164446/fs/1155149/fg/1393146


Uploading Dataframe: 100.00% |██████████| Rows 24/24 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: electricity_price_data_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1164446/jobs/named/electricity_price_data_1_offline_fg_materialization/executions


(Job('electricity_price_data_1_offline_fg_materialization', 'SPARK'),
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between",
         "kwargs": {
           "column": "price",
           "min_value": -0.1,
           "max_value": 5000,
           "strict_min": true
         },
         "meta": {
           "expectationId": 695305
         }
       },
       "result": {
         "observed_value": 0.34317,
         "element_count": 24,
         "missing_count": null,
         "missing_percent": null
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2025-01-03T11:39:28.000223Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
       }
     }
   ],
   "evaluation_parameters": {},
   "statistics": {
     "evaluated_expectations": 1,
     "succ