In [1]:
from pathlib import Path
from dotenv import load_dotenv
import warnings
import pandas as pd
import hopsworks
import datetime
import os

import openmeteo_requests

import requests_cache
from retry_requests import retry

warnings.filterwarnings("ignore")


In [2]:
load_dotenv("../.env", override=True)
HOPSWORKS_API_KEY = os.getenv("HOPSWORKS_API_KEY")
if HOPSWORKS_API_KEY is None:
    raise RuntimeError(
        "HOPSWORKS_API_KEY not found. "
    )


project = hopsworks.login(api_key_value=HOPSWORKS_API_KEY, project="project_scalable")
fs = project.get_feature_store()
today = datetime.date.today()

2026-01-15 11:53:27,695 INFO: Initializing external client
2026-01-15 11:53:27,696 INFO: Base URL: https://c.app.hopsworks.ai:443






2026-01-15 11:53:29,968 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1348756


In [3]:


#weather data
#taken from openmeteo

def hourly_weather_forecast(latitude, longitude, forecast_days):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after = 36002)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)
    
    # Make sure all required weather variables are listed here
    # The order of variables in hourly or daily is important to assign them correctly below
    url = "https://api.open-meteo.com/v1/forecast"
    params = {
    	"latitude": latitude,
    	"longitude": longitude,
        "hourly": ["temperature_2m", "shortwave_radiation", "cloud_cover", "wind_speed_10m", "precipitation"],
        "forecast_days":forecast_days,
    }
    responses = openmeteo.weather_api(url, params=params)
    
    # Process first location. Add a for-loop for multiple locations or weather models
    response = responses[0]
    print(f"Coordinates: {response.Latitude()}°N {response.Longitude()}°E")
    print(f"Elevation: {response.Elevation()} m asl")
    print(f"Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s")
    
    # Process hourly data. The order of variables needs to be the same as requested.
    hourly = response.Hourly()
    temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    shortwave_radiation = hourly.Variables(1).ValuesAsNumpy()
    cloud_cover = hourly.Variables(2).ValuesAsNumpy()
    wind_speed_10m = hourly.Variables(3).ValuesAsNumpy()
    precipitation = hourly.Variables(4).ValuesAsNumpy()
   
    
    hourly_data = {"datetime": pd.date_range(
    	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
    	end =  pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
    	freq = pd.Timedelta(seconds = hourly.Interval()),
    	inclusive = "left"
    )}
    
    hourly_data["temperature_2m"] = temperature_2m
    hourly_data["shortwave_radiation"] = shortwave_radiation
    hourly_data["cloud_cover"] = cloud_cover
    hourly_data["wind_speed_10m"] = wind_speed_10m
    hourly_data["precipitation"] = precipitation
    
    
    hourly_data_df = pd.DataFrame(data = hourly_data)
    hourly_data_df = hourly_data_df.dropna(how="all")

    
    hourly_data_df["datetime"] = hourly_data_df["datetime"].dt.tz_convert(None)
    print(f"Fetched {len(hourly_data_df)} hourly rows")
    return hourly_data_df


AREAS = {
    #"SE1": {"lat": 65.58, "lon": 22.15},
    #"SE2": {"lat": 62.39, "lon": 17.30},
    "SE3": {"lat": 59.33, "lon": 18.06},
    #SE4": {"lat": 55.60, "lon": 13.00},
}

weather_dfs = []

for area, coords in AREAS.items():
    df_weather = hourly_weather_forecast(
        latitude=coords["lat"],
        longitude=coords["lon"],
        forecast_days = 7
    )
    df_weather["area"] = area
    weather_dfs.append(df_weather)

weather_all = pd.concat(weather_dfs).reset_index(drop=True)


Coordinates: 59.329322814941406°N 18.054779052734375°E
Elevation: 15.0 m asl
Timezone difference to GMT+0: 0s
Fetched 168 hourly rows


In [4]:
weather_fg = fs.get_or_create_feature_group(
    name='weather_data_new',
    version=1,
)
weather_fg.insert(weather_all, overwrite = True)


Uploading Dataframe: 100.00% |█████████████████████████████| Rows 168/168 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: weather_data_new_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1348756/jobs/named/weather_data_new_1_offline_fg_materialization/executions


(Job('weather_data_new_1_offline_fg_materialization', 'SPARK'), None)

In [5]:
weather_all.head(15)

Unnamed: 0,datetime,temperature_2m,shortwave_radiation,cloud_cover,wind_speed_10m,precipitation,area
0,2026-01-15 00:00:00,1.4435,0.0,100.0,11.879999,0.0,SE3
1,2026-01-15 01:00:00,1.4435,0.0,100.0,11.159999,0.0,SE3
2,2026-01-15 02:00:00,1.5435,0.0,100.0,13.679999,0.0,SE3
3,2026-01-15 03:00:00,1.4435,0.0,100.0,12.599999,0.0,SE3
4,2026-01-15 04:00:00,1.5435,0.0,100.0,11.159999,0.0,SE3
5,2026-01-15 05:00:00,1.6435,0.0,99.0,12.24,0.0,SE3
6,2026-01-15 06:00:00,1.6435,0.0,100.0,13.32,0.0,SE3
7,2026-01-15 07:00:00,1.5935,0.0,100.0,12.24,0.0,SE3
8,2026-01-15 08:00:00,1.5435,2.0,100.0,10.799999,0.0,SE3
9,2026-01-15 09:00:00,1.8935,20.0,100.0,10.799999,0.0,SE3
