In [119]:
import requests
import datetime
import pandas as pd
import requests
import hopsworks
from pathlib import Path
import json
import re
import os
import warnings
from dotenv import load_dotenv
import openmeteo_requests
import requests_cache
from retry_requests import retry

In [120]:
load_dotenv()

HW_API_KEY = os.getenv("HOPSWORKS_API_KEY")
HW_PROJECT = os.getenv("HOPSWORKS_PROJECT")

if not HW_API_KEY or not HW_PROJECT:
    raise ValueError("Missing credentials in .env file")

# Connect to Hopsworks Feature Store
print("--- CONNECTING TO HOPSWORKS ---")
project = hopsworks.login(
    project=HW_PROJECT,
    api_key_value=HW_API_KEY,
    host="eu-west.cloud.hopsworks.ai",
)
fs = project.get_feature_store()

--- CONNECTING TO HOPSWORKS ---
2026-01-11 15:43:32,528 INFO: Closing external client and cleaning up certificates.
2026-01-11 15:43:32,529 INFO: Connection closed.
2026-01-11 15:43:32,529 INFO: Initializing external client
2026-01-11 15:43:32,530 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2026-01-11 15:43:33,301 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/2186


In [121]:
def get_historical_weather(city, start_date,  end_date, latitude, longitude, price_area):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    # Make sure all required weather variables are listed here
    # The order of variables in hourly or daily is important to assign them correctly below
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": ["temperature_2m", "precipitation", "cloud_cover", "wind_speed_10m"],
    }
    responses = openmeteo.weather_api(url, params=params)

    # Process first location. Add a for-loop for multiple locations or weather models
    response = responses[0]
    print(f"Coordinates: {response.Latitude()}°N {response.Longitude()}°E")
    print(f"Elevation: {response.Elevation()} m asl")
    print(f"Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s")

    # Process hourly data. The order of variables needs to be the same as requested.
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()
    hourly_cloud_cover = hourly.Variables(2).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(3).ValuesAsNumpy()

    hourly_data = {"date": pd.date_range(
        start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
        end =  pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
        freq = pd.Timedelta(seconds = hourly.Interval()),
        inclusive = "left"
    )}

    hourly_data["temperature_2m"] = hourly_temperature_2m
    hourly_data["precipitation"] = hourly_precipitation
    hourly_data["cloud_cover"] = hourly_cloud_cover
    hourly_data["wind_speed_10m"] = hourly_wind_speed_10m

    hourly_dataframe = pd.DataFrame(data = hourly_data)
    hourly_dataframe = hourly_dataframe.dropna()
    hourly_dataframe['city'] = city
    hourly_dataframe['price_area'] = price_area
    
    return hourly_dataframe

In [122]:
locations_file = Path("locations.json")
with open(locations_file, "r") as f:
    locations = json.load(f)

# Set idx for location to backfill, locations.json
idx = 6
city = locations[idx]["city"]
latitude = locations[idx]["latitude"]
longitude = locations[idx]["longitude"]
price_area = locations[idx]["price_area"]

startdate = "2021-11-01"
enddate = "2026-01-10"

weather_df = get_historical_weather(
    city = city,
    start_date = startdate,
    end_date = enddate,
    latitude = latitude,
    longitude = longitude,
    price_area = price_area
)

Coordinates: 55.57117462158203°N 12.935222625732422°E
Elevation: 15.0 m asl
Timezone difference to GMT+0: 0s


In [127]:
weather_df

Unnamed: 0,date,temperature_2m,precipitation,cloud_cover,wind_speed_10m,city,price_area
0,2021-11-01 00:00:00+00:00,13.841499,0.0,100.0,32.217484,Malmö,SE4
1,2021-11-01 01:00:00+00:00,14.291500,0.0,100.0,34.388950,Malmö,SE4
2,2021-11-01 02:00:00+00:00,13.591499,0.0,100.0,29.555099,Malmö,SE4
3,2021-11-01 03:00:00+00:00,13.041500,0.0,100.0,23.039999,Malmö,SE4
4,2021-11-01 04:00:00+00:00,12.041500,0.0,100.0,21.575987,Malmö,SE4
...,...,...,...,...,...,...,...
36763,2026-01-10 19:00:00+00:00,-4.708500,0.0,0.0,23.220695,Malmö,SE4
36764,2026-01-10 20:00:00+00:00,-5.058500,0.0,1.0,24.122683,Malmö,SE4
36765,2026-01-10 21:00:00+00:00,-5.258500,0.0,0.0,24.856298,Malmö,SE4
36766,2026-01-10 22:00:00+00:00,-5.358500,0.0,0.0,24.503811,Malmö,SE4


In [124]:
# Get or create feature group 
weather_fg = fs.get_or_create_feature_group(
    name='weather',
    description='Weather characteristics of each day',
    version=1,
    primary_key=['city'],
    event_time="date",
)

In [125]:
# Insert data
weather_fg.insert(weather_df, wait=True)



2026-01-11 15:44:06,291 INFO: Computing insert statistics


(None, None)

In [126]:
#weather_fg.update_feature_description("date", "Timestamp (UTC) of measurement of weather")
#weather_fg.update_feature_description("temperature_2m", "Temperature in Celsius")
#weather_fg.update_feature_description("precipitation", "Precipitation (rain/snow) in mm")
#weather_fg.update_feature_description("wind_speed_10m", "Wind speed at 10m above ground")
#weather_fg.update_feature_description("city", "City where weather is measured/forecast for")
#weather_fg.update_feature_description("cloud_cover", "Cloud cover %")
#weather_fg.update_feature_description("price_area", "Price area where weather is measured/forecast for")