In [1]:
import pandas as pd
import os
import numpy as np
import requests
import datetime
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import datetime
import time
import json
from geopy.geocoders import Nominatim
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.ticker import MultipleLocator
import openmeteo_requests
import requests_cache
from retry_requests import retry
import hsfs
from pathlib import Path
from dotenv import load_dotenv
import hopsworks
import sys

root_dir = Path().resolve().parent
sys.path.append(str(root_dir))

from format_data import format_weather_data, format_price_data
from get_electricity_prices import get_data, get_todays_data
from get_weather_data import get_historical_weather, get_weather_forecast



In [2]:
load_dotenv()
hopsworks_api = os.getenv("HOPSWORKS_API_KEY")

os.environ["HOPSWORKS_API_KEY"] = hopsworks_api

project = hopsworks.login()
fs = project.get_feature_store() 
print(f"Connected to project: {project.name}")

2024-12-23 14:46:13,054 INFO: Initializing external client
2024-12-23 14:46:13,055 INFO: Base URL: https://c.app.hopsworks.ai:443
2024-12-23 14:46:14,653 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1164446
Connected to project: oskaralf


In [3]:
forecast = get_weather_forecast("Luleå", "X", "X", 65.5841, 22.1547)
print(forecast)

Coordinates 65.58380889892578°N 22.155181884765625°E
Elevation 15.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
              time_start  temperature_2m  precipitation  snow_depth  \
0    2024-12-23T00:00:00         -11.276            0.0        0.24   
1    2024-12-23T01:00:00         -11.076            0.0        0.24   
2    2024-12-23T02:00:00         -10.876            0.0        0.24   
3    2024-12-23T03:00:00         -10.926            0.0        0.24   
4    2024-12-23T04:00:00         -11.226            0.0        0.24   
..                   ...             ...            ...         ...   
163  2024-12-29T19:00:00          -1.150            0.0        0.10   
164  2024-12-29T20:00:00          -1.600            0.0        0.10   
165  2024-12-29T21:00:00          -2.000            0.0        0.10   
166  2024-12-29T22:00:00          -2.450            0.0        0.10   
167  2024-12-29T23:00:00          -2.850            0.0        0.10   

     pressure_msl  cl

In [4]:
price_fg = fs.get_feature_group(
    name='electricity_price_data',
    version=1,
)
weather_fg = fs.get_feature_group(
    name='weather_data',
    version=1,
)

In [5]:
weather_fg.insert(forecast)

2024-12-23 14:46:27,140 INFO: 	5 expectation(s) included in expectation_suite.
Validation failed.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1164446/fs/1155149/fg/1393145


%5|1734961602.548|REQTMOUT|rdkafka#consumer-2| [thrd:GroupCoordinator]: GroupCoordinator/2: Timed out ApiVersionRequest in flight (after 10006ms, timeout #0)
%4|1734961602.549|FAIL|rdkafka#consumer-2| [thrd:GroupCoordinator]: GroupCoordinator: 51.161.81.208:9093: ApiVersionRequest failed: Local: Timed out: probably due to broker version < 0.10 (see api.version.request configuration) (after 10006ms in state APIVERSION_QUERY)
%4|1734961602.549|REQTMOUT|rdkafka#consumer-2| [thrd:GroupCoordinator]: GroupCoordinator/2: Timed out 1 in-flight, 0 retry-queued, 0 out-queue, 0 partially-sent requests
Uploading Dataframe: 100.00% |██████████| Rows 168/168 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: weather_data_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1164446/jobs/named/weather_data_1_offline_fg_materialization/executions


(Job('weather_data_1_offline_fg_materialization', 'SPARK'),
 {
   "success": false,
   "results": [
     {
       "success": false,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between",
         "kwargs": {
           "column": "precipitation",
           "min_value": -0.0,
           "max_value": 500.0,
           "strict_min": true
         },
         "meta": {
           "expectationId": 695303
         }
       },
       "result": {
         "observed_value": 0.0,
         "element_count": 168,
         "missing_count": null,
         "missing_percent": null
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2024-12-23T01:46:27.000140Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
       }
     },
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_col

In [7]:
price = get_todays_data("SE4")
price['time_start'] = price['time_start'].str.replace(r'\+\d{2}:\d{2}$', '', regex=True)
print(price)
print(price.dtypes)

         date           time_start    price
0  2024-12-23  2024-12-23T00:00:00  0.32057
1  2024-12-23  2024-12-23T01:00:00  0.21905
2  2024-12-23  2024-12-23T02:00:00  0.16069
3  2024-12-23  2024-12-23T03:00:00  0.10636
4  2024-12-23  2024-12-23T04:00:00  0.11234
5  2024-12-23  2024-12-23T05:00:00  0.16898
6  2024-12-23  2024-12-23T06:00:00  0.23666
7  2024-12-23  2024-12-23T07:00:00  0.51361
8  2024-12-23  2024-12-23T08:00:00  0.66716
9  2024-12-23  2024-12-23T09:00:00  0.67027
10 2024-12-23  2024-12-23T10:00:00  0.61006
11 2024-12-23  2024-12-23T11:00:00  0.60166
12 2024-12-23  2024-12-23T12:00:00  0.62664
13 2024-12-23  2024-12-23T13:00:00  0.63412
14 2024-12-23  2024-12-23T14:00:00  0.71239
15 2024-12-23  2024-12-23T15:00:00  0.85282
16 2024-12-23  2024-12-23T16:00:00  0.95550
17 2024-12-23  2024-12-23T17:00:00  1.13748
18 2024-12-23  2024-12-23T18:00:00  1.17765
19 2024-12-23  2024-12-23T19:00:00  1.12735
20 2024-12-23  2024-12-23T20:00:00  1.04068
21 2024-12-23  2024-12-23T21:00:

In [8]:
price_fg.insert(price)

2024-12-23 14:47:27,576 INFO: 	1 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1164446/fs/1155149/fg/1393146


Uploading Dataframe: 100.00% |██████████| Rows 24/24 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: electricity_price_data_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1164446/jobs/named/electricity_price_data_1_offline_fg_materialization/executions


(Job('electricity_price_data_1_offline_fg_materialization', 'SPARK'),
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between",
         "kwargs": {
           "column": "price",
           "min_value": -0.1,
           "max_value": 5000,
           "strict_min": true
         },
         "meta": {
           "expectationId": 695305
         }
       },
       "result": {
         "observed_value": 0.10636,
         "element_count": 24,
         "missing_count": null,
         "missing_percent": null
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2024-12-23T01:47:27.000575Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
       }
     }
   ],
   "evaluation_parameters": {},
   "statistics": {
     "evaluated_expectations": 1,
     "succ