# Batch Daily Inference Pipeline
* Retrieve best model from Hopsworks
* Scrape weather forecasts for next 14 days
* Predict weather code and store predictions in Hopsworks

In [None]:
import hopsworks
import joblib
import numpy as np
import pandas as pd
from datetime import datetime
from weather_utils import *

import sys
sys.path.append('..')  # Add the parent directory (project root) to the Python path
from config import *

# Disable annoying warnings
import warnings
warnings.filterwarnings("ignore")

# 1) Retrieve best model from Hopsworks

In [None]:
# Connect to Hopsworks
project = hopsworks.login()

# Retrieve Feature Store
fs = project.get_feature_store()

# Get Model Registry
mr = project.get_model_registry()

# Select best model based on evaluation metric
weather_code_model = mr.get_best_model(MODEL_NAME,
                          MODEL_METRIC,
                          OPTIMIZE_DIRECTION)

# Download model path
model_dir = weather_code_model.download()

# Download model
model = joblib.load(model_dir + '/'+ MODEL_NAME + '.pkl')

print("Model:", model_dir)

# 2) Scrape weather forecasts for next 14 days

In [None]:
# Define query parameters
params = {
    "latitude": LATITUDE,
    "longitude": LONGITUDE,
    "daily": ["temperature_2m_min", "precipitation_sum", "wind_gusts_10m_max"],
    "timezone": TIMEZONE,
    "past_days": 0,
    "forecast_days": 15
}

# Setup connection with Open-Meteo
openmeteo = get_openmeteo_connection()

# Execute the query
responses = openmeteo.weather_api(BASELINE_URL_OPEN_METEO, params=params)

# Extract the location because the response can be done for multiple cities as well
response = responses[0]

# Process yesterday's data into a Pandas dataframe
df_forecasts = process_forecast_request(response)

# Add today's date
df_forecasts['prediction_date'] = pd.to_datetime(datetime.today().date()).date()

# Add a new column with the month as an integer
df_forecasts['month'] = pd.to_datetime(df_forecasts['forecast_date']).dt.month

display(df_forecasts)

# 3) Predict Weather Code

In [None]:
# Select features for model
X = df_forecasts[['temperature_min', 'precipitation_sum', 'wind_gusts_max', 'month']]

print(X.columns)
display(X)

In [None]:
# Train model on the training set
y = model.predict(X)

# Round predicted value to closest weather code
y = np.round(y).astype(int)

df_forecasts['weather_code'] = y

In [None]:
# Add weather code descriptions
df_forecasts = add_weather_code_labels(df_forecasts)

display(df_forecasts)

# 4) Update forecasts in Hopsworks Feature Group

In [None]:
# Get or create Feature Group
forecast_weather_fg = fs.get_or_create_feature_group(
    name=FG_FORECAST_NAME,
    version=FG_FORECAST_V,
    primary_key=FG_FORECAST_PK,
    description=FG_FORECAST_DESC,
    statistics_config={"enabled": True,
                       "histograms": True,
                       "correlations": True}
)

# Upload data
forecast_weather_fg.insert(df_forecasts,
                           write_options={"wait_for_job" : True})

In [None]:
# save forecast locally
df_forecasts.to_csv('../resources/forecast.csv')

In [None]:
# upload forecast to Hopsworks cluster for Hugging Face
dataset_api = project.get_dataset_api()
dataset_api.upload("../resources/forecast.csv",
                   "Resources/weather_forecast",
                   overwrite=True # I do not want to display forecast of past days
                  )