# Feature pipeline
* Query new weather data
* Feature Group Insertion

In [None]:
import pandas as pd
import hopsworks
import matplotlib.pyplot as plt
import great_expectations as ge

from weather_utils import *

import sys
sys.path.append('..')  # Add the parent directory (project root) to the Python path
from config import *

# Disable annoying warnings
import warnings
warnings.filterwarnings("ignore")

# 1) Query Weather Data

In [None]:
# Setup connection with Open-Meteo
openmeteo = get_openmeteo_connection()

# Define query parameters
params = {
    "latitude": LATITUDE,
    "longitude": LONGITUDE,
    "daily": ["weather_code", "temperature_2m_min", "precipitation_sum", "wind_gusts_10m_max"],
    "timezone": TIMEZONE,
    "past_days": 1,
    "forecast_days": 0
}

# Execute the query
responses = openmeteo.weather_api(BASELINE_URL_OPEN_METEO, params=params)

# Extract the location because the response can be done for multiple cities as well
response = responses[0]

# 3) Feature Engineering

### 3a) Convert request to dataframe

In [None]:
# Process yesterday's data into a Pandas dataframe
df_weather_yesterday = process_weather_request(response)

In [None]:
display(df_weather_yesterday)

### 3b) Inspect data

In [None]:
# Convert the WMO weather codes column to integers
df_weather_yesterday['weather_code_wmo'] = df_weather_yesterday['weather_code_wmo'].astype(int)

# Check again if there is any missing data
df_weather_yesterday.info()

### 3c) Format values

In [None]:
# Add a new column with the month as an integer
df_weather_yesterday['month'] = pd.to_datetime(df_weather_yesterday['date']).dt.month

In [None]:
# Group WMO codes into labels and new group code label
df_weather_yesterday = group_wmo_weather_codes(df_weather_yesterday)

In [None]:
# Add weather code descriptions
df_weather_yesterday = add_weather_code_labels(df_weather_yesterday)

In [None]:
display(df_weather_yesterday)

# 4) Upload to Hopsworks Feature Store

In [None]:
# Connect to HopsWorks
project = hopsworks.login()

# Retrieve feature store
fs = project.get_feature_store()

In [None]:
# Create new Feature Store (no backfill) or retrieve if existing (backfill done, or past days already inserted)
historical_weather_fg = fs.get_or_create_feature_group(
    name=FG_HISTORY_NAME,
    description=FG_HISTORY_DESC,
    version=FG_HISTORY_V,
    primary_key=FG_HISTORY_PK,
    event_time=["date"],
    statistics_config={"enabled": True,
                       "histograms": True,
                       "correlations": True}
)

# Insert data in the feature group
historical_weather_fg.insert(df_weather_yesterday,
                             write_options={"wait_for_job": False} # wait for job to end, so the new data is processed in Hopsworks and the next pipeline can use it
                            )