# Batch Daily Inference Pipeline
* Retrieve best model from Hopsworks
* Scrape weather forecasts for next 14 days
* Predict weather code and store predictions in Hopsworks

In [1]:
import hopsworks
import joblib
import numpy as np
import pandas as pd
from weather_utils import *

import sys
sys.path.append('..')  # Add the parent directory (project root) to the Python path
from config import *

# Disable annoying warnings
import warnings
warnings.filterwarnings("ignore")

# 1) Retrieve best model from Hopsworks

In [2]:
# Connect to Hopsworks
project = hopsworks.login()

# Retrieve Feature Store
fs = project.get_feature_store()

# Get Model Registry
mr = project.get_model_registry()

# Select best model based on evaluation metric
weather_code_model = mr.get_best_model(MODEL_NAME,
                          MODEL_METRIC,
                          OPTIMIZE_DIRECTION)

# Download model path
model_dir = weather_code_model.download()

# Download model
model = joblib.load(model_dir + '/'+ MODEL_NAME + '.pkl')

print("Model:", model_dir)

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/178324
Connected. Call `.close()` to terminate connection gracefully.
Connected. Call `.close()` to terminate connection gracefully.
Downloading file ... Model: /var/folders/kf/md7wd1294hv6n5qvkc2149qr0000gn/T/8681a4bf-aa64-4473-bfc9-400a351d49f5/weather_code_xgboost_model/1


# 2) Scrape weather forecasts for next 14 days

In [3]:
# Define query parameters
params = {
    "latitude": LATITUDE,
    "longitude": LONGITUDE,
    "daily": ["temperature_2m_min", "precipitation_sum", "wind_gusts_10m_max"],
    "timezone": TIMEZONE,
    "past_days": 0,
    "forecast_days": 14
}

# Setup connection with Open-Meteo
openmeteo = get_openmeteo_connection()

# Execute the query
responses = openmeteo.weather_api(BASELINE_URL_OPEN_METEO, params=params)

# Extract the location because the response can be done for multiple cities as well
response = responses[0]

# Process yesterday's data into a Pandas dataframe
df_forecasts = process_forecast_request(response)

display(df_forecasts)

Unnamed: 0,date,temperature_min,precipitation_sum,wind_gusts_max
0,2023-11-15,-1.3825,0.0,25.919998
1,2023-11-16,-0.7825,0.0,25.199999
2,2023-11-17,-3.7325,0.0,19.799999
3,2023-11-18,-4.7305,0.0,13.32
4,2023-11-19,-0.3305,0.0,16.199999
5,2023-11-20,-2.1455,0.0,18.359999
6,2023-11-21,-2.9455,7.5,58.32
7,2023-11-22,0.9045,5.399998,66.239998
8,2023-11-23,-6.3915,4.8,32.039997
9,2023-11-24,-5.8415,0.9,23.039999


# 3) Predict Weather Code

In [4]:
# Add a new column with the month as an integer
df_forecasts['month'] = pd.to_datetime(df_forecasts['date']).dt.month

# Select features for model
X = df_forecasts.drop(columns=["date"])
print(X.columns)

Index(['temperature_min', 'precipitation_sum', 'wind_gusts_max', 'month'], dtype='object')


In [5]:
# Train model on the training set
y = model.predict(X)

# Round predicted value to closest weather code
y = np.round(y).astype(int)

df_forecasts['weather_code'] = y

In [6]:
# Add weather code descriptions
df_forecasts = add_weather_code_labels(df_forecasts)

display(df_forecasts)

Unnamed: 0,date,temperature_min,precipitation_sum,wind_gusts_max,month,weather_code,weather_code_desc,weather_code_desc_short
0,2023-11-15,-1.3825,0.0,25.919998,11,2,"Mainly clear, partly cloudy, and overcast",Clear
1,2023-11-16,-0.7825,0.0,25.199999,11,2,"Mainly clear, partly cloudy, and overcast",Clear
2,2023-11-17,-3.7325,0.0,19.799999,11,2,"Mainly clear, partly cloudy, and overcast",Clear
3,2023-11-18,-4.7305,0.0,13.32,11,2,"Mainly clear, partly cloudy, and overcast",Clear
4,2023-11-19,-0.3305,0.0,16.199999,11,2,"Mainly clear, partly cloudy, and overcast",Clear
5,2023-11-20,-2.1455,0.0,18.359999,11,2,"Mainly clear, partly cloudy, and overcast",Clear
6,2023-11-21,-2.9455,7.5,58.32,11,6,"Rain: Slight, moderate and heavy intensity",Rain
7,2023-11-22,0.9045,5.399998,66.239998,11,5,Freezing Drizzle: Light and dense intensity,Drizzle
8,2023-11-23,-6.3915,4.8,32.039997,11,6,"Rain: Slight, moderate and heavy intensity",Rain
9,2023-11-24,-5.8415,0.9,23.039999,11,4,"Drizzle: Light, moderate, and dense intensity",Drizzle


# 4) Update forecasts in Hopsworks Feature Group

In [7]:
# Get or create Feature Group
forecast_weather_fg = fs.get_or_create_feature_group(
    name=FG_FORECAST_NAME,
    version=FG_FORECAST_V,
    primary_key=FG_FORECAST_PK,
    event_time=["date"],
    description=FG_FORECAST_DESC,
    statistics_config={"enabled": True,
                       "histograms": True,
                       "correlations": True}
)

# Upload data
forecast_weather_fg.insert(df_forecasts,
                           write_options={"wait_for_job" : True})

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/178324/fs/178243/fg/226673


Uploading Dataframe: 0.00% |          | Rows 0/14 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: weather_forecast_fg_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/178324/jobs/named/weather_forecast_fg_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x1457f0210>, None)

In [8]:
# save forecast locally
df_forecasts.to_csv('../resources/forecast.csv')

In [9]:
# upload forecast to Hopsworks cluster for Hugging Face
dataset_api = project.get_dataset_api()
dataset_api.upload("../resources/forecast.csv",
                   "Resources/weather_forecast",
                   overwrite=True # I do not want to display forecast of past days
                  )

Uploading: 0.000%|          | 0/1390 elapsed<00:00 remaining<?

'Resources/weather_forecast/forecast.csv'