In [19]:
import requests
import datetime
import pandas as pd
import requests
import hopsworks
from pathlib import Path
import json
import re
import os
import warnings
from dotenv import load_dotenv
import openmeteo_requests
import requests_cache
from retry_requests import retry

In [20]:
load_dotenv()

HW_API_KEY = os.getenv("HOPSWORKS_API_KEY")
HW_PROJECT = os.getenv("HOPSWORKS_PROJECT")

if not HW_API_KEY or not HW_PROJECT:
    raise ValueError("Missing credentials in .env file")

# Cities to fetch weather data for
with open("locations.json") as f:
    cities = json.load(f)

# Connect to Hopsworks Feature Store
print("--- CONNECTING TO HOPSWORKS ---")
project = hopsworks.login(
    project=HW_PROJECT,
    api_key_value=HW_API_KEY,
    host="eu-west.cloud.hopsworks.ai",
)
fs = project.get_feature_store()

--- CONNECTING TO HOPSWORKS ---
2026-01-11 19:09:57,787 INFO: Closing external client and cleaning up certificates.
2026-01-11 19:09:57,789 INFO: Connection closed.
2026-01-11 19:09:57,790 INFO: Initializing external client
2026-01-11 19:09:57,790 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2026-01-11 19:09:58,487 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/2186


In [21]:
def get_historical_weather_wide(cities, start_date, end_date):
    """
    Fetch hourly weather for multiple cities and return ONE wide dataframe:
      - index: date (UTC)
      - columns: temperature_2m_<city>, precipitation_<city>, cloud_cover_<city>, wind_speed_10m_<city>
      - plus: weather_key (constant PK you can use in Hopsworks)
    cities: list of dicts like {"city": "...", "latitude": .., "longitude": .., "price_area": "..."}
    """

    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession(".cache", expire_after=-1)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = openmeteo_requests.Client(session=retry_session)

    url = "https://archive-api.open-meteo.com/v1/archive"

    wide_parts = []

    for c in cities:
        city = c["city"]
        latitude = c["latitude"]
        longitude = c["longitude"]

        params = {
            "latitude": latitude,
            "longitude": longitude,
            "start_date": start_date,
            "end_date": end_date,
            "hourly": ["temperature_2m", "precipitation", "cloud_cover", "wind_speed_10m"],
        }

        responses = openmeteo.weather_api(url, params=params)
        response = responses[0]

        hourly = response.Hourly()
        temperature_2m = hourly.Variables(0).ValuesAsNumpy()
        precipitation = hourly.Variables(1).ValuesAsNumpy()
        cloud_cover = hourly.Variables(2).ValuesAsNumpy()
        wind_speed_10m = hourly.Variables(3).ValuesAsNumpy()

        dates = pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left",
        )

        df_city = pd.DataFrame(
            {
                "date": dates,
                f"temperature_2m_{city}": temperature_2m,
                f"precipitation_{city}": precipitation,
                f"cloud_cover_{city}": cloud_cover,
                f"wind_speed_10m_{city}": wind_speed_10m,
            }
        ).dropna()

        wide_parts.append(df_city)

    # Outer merge on date to keep all timestamps; you'll likely get very few NaNs at the edges
    wide = wide_parts[0]
    for part in wide_parts[1:]:
        wide = wide.merge(part, on="date", how="outer")

    # Sort and (optional) drop rows where *any* city is missing (strict)
    wide = wide.sort_values("date").reset_index(drop=True)

    # Add a constant PK for Hopsworks (one row per timestamp for this fixed city set)
    wide["weather_key"] = "se3_set_v1"

    return wide


In [22]:
weather_wide_df = get_historical_weather_wide(
    cities=cities,
    start_date="2021-11-01",
    end_date="2026-01-11"
)

In [23]:
weather_wide_df

Unnamed: 0,date,temperature_2m_stockholm,precipitation_stockholm,cloud_cover_stockholm,wind_speed_10m_stockholm,temperature_2m_uppsala,precipitation_uppsala,cloud_cover_uppsala,wind_speed_10m_uppsala,temperature_2m_vasteras,...,wind_speed_10m_karlstad,temperature_2m_sundsvall,precipitation_sundsvall,cloud_cover_sundsvall,wind_speed_10m_sundsvall,temperature_2m_malmo,precipitation_malmo,cloud_cover_malmo,wind_speed_10m_malmo,weather_key
0,2021-11-01 00:00:00+00:00,6.65,0.0,100.0,11.874544,5.75,0.0,100.0,8.587338,5.50,...,21.578989,2.500000,0.0,100.0,6.763786,13.841499,0.0,100.0,32.217484,se3_set_v1
1,2021-11-01 01:00:00+00:00,7.55,0.0,100.0,10.805998,5.80,0.0,100.0,10.514218,6.15,...,24.363251,1.800000,0.0,100.0,6.830519,14.291500,0.0,100.0,34.388950,se3_set_v1
2,2021-11-01 02:00:00+00:00,8.50,0.0,100.0,11.384198,5.95,0.0,100.0,11.090103,6.60,...,27.698952,1.700000,0.0,100.0,5.351785,13.591499,0.0,100.0,29.555099,se3_set_v1
3,2021-11-01 03:00:00+00:00,8.65,0.0,100.0,13.392774,6.95,0.0,100.0,11.753876,7.35,...,33.291729,0.950000,0.0,100.0,2.902413,13.041500,0.0,100.0,23.039999,se3_set_v1
4,2021-11-01 04:00:00+00:00,9.05,0.0,100.0,15.778516,7.75,0.0,100.0,14.843180,8.25,...,31.353085,1.950000,0.0,100.0,1.835647,12.041500,0.0,100.0,21.575987,se3_set_v1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36787,2026-01-11 19:00:00+00:00,-3.15,0.0,100.0,14.441566,-4.35,0.1,100.0,13.803782,-10.35,...,9.195868,-17.450001,0.0,6.0,9.000000,-6.558500,0.0,10.0,5.559640,se3_set_v1
36788,2026-01-11 20:00:00+00:00,-3.20,0.0,100.0,14.450537,-3.80,0.0,100.0,13.021106,-9.15,...,6.720535,-17.250000,0.0,23.0,9.109138,-7.358500,0.0,17.0,5.400000,se3_set_v1
36789,2026-01-11 21:00:00+00:00,-3.40,0.0,100.0,13.499999,-3.50,0.0,100.0,13.450708,-8.10,...,6.323100,-16.400000,0.0,61.0,9.636307,-8.208500,0.0,0.0,4.108576,se3_set_v1
36790,2026-01-11 22:00:00+00:00,-3.60,0.0,100.0,11.792404,-3.35,0.0,100.0,14.118243,-7.20,...,8.131936,-15.250000,0.0,97.0,10.062305,-8.858500,0.0,9.0,4.060985,se3_set_v1


In [24]:
# Get or create feature group 
weather_fg = fs.get_or_create_feature_group(
    name='weather',
    description='Weather characteristics of each day',
    version=1,
    primary_key=['weather_key'],
    event_time="date",
)

In [25]:
# Insert data
weather_fg.insert(weather_wide_df, wait=True)

Feature Group created successfully, explore it at 
https://eu-west.cloud.hopsworks.ai:443/p/2186/fs/2137/fg/3434




2026-01-11 19:10:12,779 INFO: Computing insert statistics


(None, None)