# Batch Daily Inference Pipeline
* Retrieve best model from Hopsworks
* Scrape weather forecasts for next 14 days
* Predict weather code and store predictions in Hopsworks

In [21]:
import hopsworks
import joblib
import numpy as np
import pandas as pd
from datetime import datetime
from weather_utils import *

import sys
sys.path.append('..')  # Add the parent directory (project root) to the Python path
from config import *

# Disable annoying warnings
import warnings
warnings.filterwarnings("ignore")

# 1) Retrieve best model from Hopsworks

In [22]:
# Connect to Hopsworks
project = hopsworks.login()

# Retrieve Feature Store
fs = project.get_feature_store()

# Get Model Registry
mr = project.get_model_registry()

# Select best model based on evaluation metric
weather_code_model = mr.get_best_model(MODEL_NAME,
                          MODEL_METRIC,
                          OPTIMIZE_DIRECTION)

# Download model path
model_dir = weather_code_model.download()

# Download model
model = joblib.load(model_dir + '/'+ MODEL_NAME + '.pkl')

print("Model:", model_dir)

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/178324
Connected. Call `.close()` to terminate connection gracefully.
Connected. Call `.close()` to terminate connection gracefully.
Downloading file ... Model: /var/folders/kf/md7wd1294hv6n5qvkc2149qr0000gn/T/82cabf65-ff5f-424f-8338-5720fea7d2da/weather_code_xgboost_model/2


# 2) Scrape weather forecasts for next 14 days

In [28]:
# Define query parameters
params = {
    "latitude": LATITUDE,
    "longitude": LONGITUDE,
    "daily": ["temperature_2m_min", "precipitation_sum", "wind_gusts_10m_max"],
    "timezone": TIMEZONE,
    "past_days": 0,
    "forecast_days": 14
}

# Setup connection with Open-Meteo
openmeteo = get_openmeteo_connection()

# Execute the query
responses = openmeteo.weather_api(BASELINE_URL_OPEN_METEO, params=params)

# Extract the location because the response can be done for multiple cities as well
response = responses[0]

# Process yesterday's data into a Pandas dataframe
df_forecasts = process_forecast_request(response)

# Add today's date
df_forecasts['prediction_date'] = pd.to_datetime(datetime.today().date()).date()

# Add a new column with the month as an integer
df_forecasts['month'] = pd.to_datetime(df_forecasts['forecast_date']).dt.month

display(df_forecasts)

Unnamed: 0,forecast_date,temperature_min,precipitation_sum,wind_gusts_max,prediction_date,month
0,2023-11-19,-0.8325,0.1,15.119999,2023-11-20,11
1,2023-11-20,-2.5325,0.0,28.799999,2023-11-20,11
2,2023-11-21,-4.4825,5.3,69.839996,2023-11-20,11
3,2023-11-22,1.5195,0.7,33.119999,2023-11-20,11
4,2023-11-23,-2.4805,0.2,45.0,2023-11-20,11
5,2023-11-24,-4.2305,0.0,20.519999,2023-11-20,11
6,2023-11-25,-4.8455,0.0,29.519999,2023-11-20,11
7,2023-11-26,-5.6415,0.0,30.239998,2023-11-20,11
8,2023-11-27,-6.7415,0.0,25.919998,2023-11-20,11
9,2023-11-28,-7.3415,0.0,19.440001,2023-11-20,11


# 3) Predict Weather Code

In [29]:
# Select features for model
X = df_forecasts[['temperature_min', 'precipitation_sum', 'wind_gusts_max', 'month']]

print(X.columns)
display(X)

Index(['temperature_min', 'precipitation_sum', 'wind_gusts_max', 'month'], dtype='object')


Unnamed: 0,temperature_min,precipitation_sum,wind_gusts_max,month
0,-0.8325,0.1,15.119999,11
1,-2.5325,0.0,28.799999,11
2,-4.4825,5.3,69.839996,11
3,1.5195,0.7,33.119999,11
4,-2.4805,0.2,45.0,11
5,-4.2305,0.0,20.519999,11
6,-4.8455,0.0,29.519999,11
7,-5.6415,0.0,30.239998,11
8,-6.7415,0.0,25.919998,11
9,-7.3415,0.0,19.440001,11


In [30]:
# Train model on the training set
y = model.predict(X)

# Round predicted value to closest weather code
y = np.round(y).astype(int)

df_forecasts['weather_code'] = y

In [31]:
# Add weather code descriptions
df_forecasts = add_weather_code_labels(df_forecasts)

display(df_forecasts)

Unnamed: 0,forecast_date,temperature_min,precipitation_sum,wind_gusts_max,prediction_date,month,weather_code,weather_code_desc,weather_code_desc_short
0,2023-11-19,-0.8325,0.1,15.119999,2023-11-20,11,4,"Drizzle: Light, moderate, and dense intensity",Drizzle
1,2023-11-20,-2.5325,0.0,28.799999,2023-11-20,11,2,"Mainly clear, partly cloudy, and overcast",Clear
2,2023-11-21,-4.4825,5.3,69.839996,2023-11-20,11,5,Freezing Drizzle: Light and dense intensity,Drizzle
3,2023-11-22,1.5195,0.7,33.119999,2023-11-20,11,4,"Drizzle: Light, moderate, and dense intensity",Drizzle
4,2023-11-23,-2.4805,0.2,45.0,2023-11-20,11,4,"Drizzle: Light, moderate, and dense intensity",Drizzle
5,2023-11-24,-4.2305,0.0,20.519999,2023-11-20,11,2,"Mainly clear, partly cloudy, and overcast",Clear
6,2023-11-25,-4.8455,0.0,29.519999,2023-11-20,11,2,"Mainly clear, partly cloudy, and overcast",Clear
7,2023-11-26,-5.6415,0.0,30.239998,2023-11-20,11,2,"Mainly clear, partly cloudy, and overcast",Clear
8,2023-11-27,-6.7415,0.0,25.919998,2023-11-20,11,2,"Mainly clear, partly cloudy, and overcast",Clear
9,2023-11-28,-7.3415,0.0,19.440001,2023-11-20,11,2,"Mainly clear, partly cloudy, and overcast",Clear


# 4) Update forecasts in Hopsworks Feature Group

In [None]:
# Get or create Feature Group
forecast_weather_fg = fs.get_or_create_feature_group(
    name=FG_FORECAST_NAME,
    version=FG_FORECAST_V,
    primary_key=FG_FORECAST_PK,
    description=FG_FORECAST_DESC,
    statistics_config={"enabled": True,
                       "histograms": True,
                       "correlations": True}
)

# Upload data
forecast_weather_fg.insert(df_forecasts,
                           write_options={"wait_for_job" : True})

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/178324/fs/178243/fg/238964


Uploading Dataframe: 0.00% |          | Rows 0/14 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: weather_forecast_fg_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/178324/jobs/named/weather_forecast_fg_1_offline_fg_materialization/executions


In [None]:
# save forecast locally
df_forecasts.to_csv('../resources/forecast.csv')

In [None]:
# upload forecast to Hopsworks cluster for Hugging Face
dataset_api = project.get_dataset_api()
dataset_api.upload("../resources/forecast.csv",
                   "Resources/weather_forecast",
                   overwrite=True # I do not want to display forecast of past days
                  )