# Feature pipeline
* Query new weather data
* Feature Group Insertion

In [1]:
import pandas as pd
import hopsworks
import matplotlib.pyplot as plt
from weather_utils import *

import sys
sys.path.append('..')  # Add the parent directory (project root) to the Python path
from config import *

# Disable annoying warnings
import warnings
warnings.filterwarnings("ignore")

# 1) Query Weather Data

In [5]:
# Setup connection with Open-Meteo
openmeteo = get_openmeteo_connection()

# Define query parameters
params = {
    "latitude": LATITUDE,
    "longitude": LONGITUDE,
    "daily": ["weather_code", "temperature_2m_min", "precipitation_sum", "wind_gusts_10m_max"],
    "timezone": TIMEZONE,
    "past_days": 1,
    "forecast_days": 0
}

# Execute the query
responses = openmeteo.weather_api(BASELINE_URL_OPEN_METEO, params=params)

# Extract the location because the response can be done for multiple cities as well
response = responses[0]

# 3) Feature Engineering

### 3a) Convert request to dataframe

In [6]:
# Process yesterday's data into a Pandas dataframe
df_weather_yesterday = process_weather_request(response)

In [7]:
df_weather_yesterday

Unnamed: 0,date,weather_code_wmo,temperature_min,precipitation_sum,wind_gusts_max
0,2023-11-12,3.0,0.6675,0.0,31.319998
1,2023-11-13,51.0,1.4175,0.6,27.0


### 3b) Inspect data

In [8]:
# Convert the WMO weather codes column to integers
df_weather_yesterday['weather_code_wmo'] = df_weather_yesterday['weather_code_wmo'].astype(int)

# Check again if there is any missing data
df_weather_yesterday.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   date               1 non-null      object 
 1   weather_code_wmo   1 non-null      int64  
 2   temperature_min    1 non-null      float32
 3   precipitation_sum  1 non-null      float32
 4   wind_gusts_max     1 non-null      float32
dtypes: float32(3), int64(1), object(1)
memory usage: 160.0+ bytes


### 3c) Format values

In [9]:
# Add a new column with the month as an integer
df_weather_yesterday['month'] = pd.to_datetime(df_weather_yesterday['date']).dt.month

In [10]:
# Group WMO codes into labels and new group code label
df_weather_yesterday = group_wmo_weather_codes(df_weather_yesterday)

In [11]:
print(df_weather_yesterday)

Unnamed: 0,date,weather_code_wmo,temperature_min,precipitation_sum,wind_gusts_max,month,weather_code_label,weather_code
0,2023-11-05,80,8.487,1.8,60.839996,11,"Rain showers: Slight, moderate, and violent",10


# 4) Hopsworks Feature Store

In [12]:
# Connect to HopsWorks
project = hopsworks.login()

# Retrieve feature store
fs = project.get_feature_store() 

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/178324
Connected. Call `.close()` to terminate connection gracefully.


In [None]:
# CHECK THAT NEW DATA IS NOT ALREADY INSERTED

In [13]:
# Retrieve feature group
weather_fg = fs.get_or_create_feature_group(
    name = FEATURE_GROUP_HISTORY,
    version = 1
)

# Insert data in the feature group
weather_fg.insert(df_weather_yesterday)



Uploading Dataframe: 0.00% |          | Rows 0/1 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: weather_fg_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/178324/jobs/named/weather_fg_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x12c6b7550>, None)