In [11]:
import requests
import datetime
import pandas as pd
import requests
import hopsworks
from pathlib import Path
import json
import re
import os
import warnings
from dotenv import load_dotenv

In [12]:
# Load environment variables from .env file
load_dotenv()

HW_API_KEY = os.getenv("HOPSWORKS_API_KEY")
HW_PROJECT = os.getenv("HOPSWORKS_PROJECT")

if not HW_API_KEY or not HW_PROJECT:
    raise ValueError("Missing credentials in .env file")


# Connect to Hopsworks project and feature store
project = hopsworks.login(
    project=HW_PROJECT,
    api_key_value=HW_API_KEY,
    host="eu-west.cloud.hopsworks.ai",
)
fs = project.get_feature_store()

2026-01-11 20:16:25,274 INFO: Closing external client and cleaning up certificates.
2026-01-11 20:16:25,277 INFO: Connection closed.
2026-01-11 20:16:25,278 INFO: Initializing external client
2026-01-11 20:16:25,278 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2026-01-11 20:16:26,618 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/2186


In [13]:
def get_prices(date, price_class):
    url = (
        f"https://www.elprisetjustnu.se/api/v1/prices/"
        f"{date.year}/{date.month:02d}-{date.day:02d}_{price_class}.json"
    )
    r = requests.get(url)
    r.raise_for_status()
    return r.json()


In [14]:
today = datetime.date.today()
start = datetime.date(2021, 11, 1)
end   = today

current = start
all_data = []

while current <= end:
    try:
        daily_data = get_prices(current, "SE3")
        all_data.extend(daily_data)
    except requests.HTTPError:
        pass  # if any missing dates
    current += datetime.timedelta(days=1)

In [15]:
electricity_df = pd.DataFrame(all_data)

In [16]:
# --- Parse timestamps to UTC (DST-safe) ---
time_start_utc = pd.to_datetime(electricity_df["time_start"], utc=True)
time_end_utc   = pd.to_datetime(electricity_df["time_end"], utc=True)

# --- Compute resolution in minutes ---
electricity_df = electricity_df.copy()
electricity_df["resolution_minutes"] = (
    (time_end_utc - time_start_utc).dt.total_seconds().astype(int) // 60
)

# Use start time as event time (UTC)
electricity_df["date"] = time_start_utc

# Add hour bucket for aggregation
electricity_df["date_hour"] = electricity_df["date"].dt.floor("h")

# --- Aggregate to hourly (works for both 60-min and 15-min rows) ---
# If an hour already has a 60-min row, mean() just returns that value.
# If an hour has four 15-min rows, mean() aggregates them to hourly.
electricity_hourly = (
    electricity_df.groupby(["date_hour"], as_index=False)
      .agg(
          SEK_per_kWh=("SEK_per_kWh", "mean"),
          EUR_per_kWh=("EUR_per_kWh", "mean"),
          EXR=("EXR", "last"),  # or "mean"
          n_intervals=("resolution_minutes", "size"),  # 1 (hourly) or 4 (15-min)
          resolution_minutes=("resolution_minutes", "sum"),  # should be 60 for a full hour
      )
      .rename(columns={"date_hour": "date"})
)

# Add price area
electricity_hourly["price_area"] = "SE3"

# Optional: sanity checks
print("n_intervals distribution:\n", electricity_hourly["n_intervals"].value_counts().sort_index())
print("resolution_minutes distribution:\n", electricity_hourly["resolution_minutes"].value_counts().sort_index())

# Reorder columns
electricity_hourly = electricity_hourly[
    ["date", "price_area", "SEK_per_kWh", "EUR_per_kWh", "EXR", "resolution_minutes", "n_intervals"]
]

electricity_hourly["weather_key"] = "se3_set_v1"

print(electricity_hourly.dtypes)
print(electricity_hourly.head())


n_intervals distribution:
 n_intervals
1    34318
4     2473
Name: count, dtype: int64
resolution_minutes distribution:
 resolution_minutes
60     36787
120        4
Name: count, dtype: int64
date                  datetime64[ns, UTC]
price_area                         object
SEK_per_kWh                       float64
EUR_per_kWh                       float64
EXR                               float64
resolution_minutes                  int64
n_intervals                         int64
weather_key                        object
dtype: object
                       date price_area  SEK_per_kWh  EUR_per_kWh      EXR  \
0 2021-10-31 23:00:00+00:00        SE3      0.13777      0.01394  9.88308   
1 2021-11-01 00:00:00+00:00        SE3      0.12976      0.01313  9.88308   
2 2021-11-01 01:00:00+00:00        SE3      0.13026      0.01318  9.88308   
3 2021-11-01 02:00:00+00:00        SE3      0.13836      0.01400  9.88308   
4 2021-11-01 03:00:00+00:00        SE3      0.14281      0.01445  9.88308

In [21]:
# Get or create feature group 
electricity_fg = fs.get_or_create_feature_group(
    name='electricity_hourly',
    description='Electricity prices for each hour, 15-min prices aggregated to hourly',
    version=2,
    primary_key=['weather_key'],
    event_time="date",
)

In [22]:
# Insert data
electricity_fg.insert(electricity_hourly, wait=True)



Feature Group created successfully, explore it at 
https://eu-west.cloud.hopsworks.ai:443/p/2186/fs/2137/fg/3437




2026-01-11 20:20:02,105 INFO: Computing insert statistics


(None, None)