# Daily Pipeline

In [18]:
import hopsworks
import sys
from pathlib import Path
import warnings
from dotenv import load_dotenv
import os
from util import *
import datetime
from locations import resort_locations
from dateutil.relativedelta import relativedelta
warnings.filterwarnings("ignore", module="IPython")

In [19]:
project = hopsworks.login(
    host="eu-west.cloud.hopsworks.ai",             # DNS of your Hopsworks instance
    project="ID2223_Project"
)

fs = project.get_feature_store()

today = datetime.date.today().strftime('%Y-%m-%d')
warning_fg = fs.get_feature_group(
    name='avalanche_warning',
    version=3
)
weather_fg = fs.get_feature_group(
    name="weather_sensor",
    version=2
)

2025-12-28 18:38:02,944 INFO: Closing external client and cleaning up certificates.
2025-12-28 18:38:02,953 INFO: Connection closed.
2025-12-28 18:38:02,957 INFO: Initializing external client
2025-12-28 18:38:02,958 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-28 18:38:03,877 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/2173


## Warnings

In [20]:
warning_data_df = warning_fg.read()
warning_data_df

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.39s) 


Unnamed: 0,location,latitude,longitude,date,warning_level
0,Rauland Skisenter,59.819122,8.161512,2025-05-01 00:00:00+00:00,1
1,Hafjell,61.298054,10.403024,2023-06-22 00:00:00+00:00,0
2,SkiGeilo,60.568687,8.144531,2024-02-22 00:00:00+00:00,2
3,Vrådal Panorama,59.365932,8.430677,2023-01-11 00:00:00+00:00,0
4,Galdhøpiggen Summer Ski Centre,61.748779,8.373905,2023-05-29 00:00:00+00:00,1
...,...,...,...,...,...
36695,Sauda Ski Centre,59.653477,6.223121,2025-01-23 00:00:00+00:00,1
36696,Kvitfjell ski resort,61.510162,10.087527,2025-09-16 00:00:00+00:00,0
36697,Skimore Oslo,60.163288,10.743011,2021-09-09 00:00:00+00:00,0
36698,Skimore Oslo,60.163288,10.743011,2020-12-21 00:00:00+00:00,0


In [21]:
rows=[]
for location, (lat, lon) in resort_locations.items():
    # print(f"Fetching {location}")
    warnings = get_warning_data(today, today, lat, lon)

    for w in warnings:
        rows.append({
            "location": location,
            "latitude": lat,
            "longitude": lon,
            "date": w.get("ValidFrom"),
            "warning_level": w.get("DangerLevel")
        })
    time.sleep(0.2)


In [22]:
warning_data_today_df = pd.DataFrame(rows)
warning_data_today_df['latitude'] = warning_data_today_df['latitude'].astype('float32')
warning_data_today_df['longitude'] = warning_data_today_df['longitude'].astype('float32')
warning_data_today_df['warning_level'] = warning_data_today_df['warning_level'].astype('int32')
warning_data_today_df['date'] = pd.to_datetime(warning_data_today_df['date'], format='%Y-%m-%dT%H:%M:%S')
warning_data_today_df.info()

warning_fg.insert(warning_data_today_df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   location       20 non-null     object        
 1   latitude       20 non-null     float32       
 2   longitude      20 non-null     float32       
 3   date           20 non-null     datetime64[ns]
dtypes: datetime64[ns](1), float32(2), int32(1), object(1)
memory usage: 688.0+ bytes


Uploading Dataframe: 100.00% |██████████| Rows 20/20 | Elapsed Time: 00:00 | Remaining Time: 00:00


Job started successfully, you can follow the progress at 




## Weather

In [23]:
dfs = []
for loc, (lat, lon) in resort_locations.items():
    hourly_df = get_hourly_weather_forecast(loc, lon, lat)
    hourly_df = hourly_df.set_index('date')
    daily_df = hourly_df.between_time('11:59', '12:01')
    daily_df = daily_df.reset_index()
    dfs.append(daily_df)


weather_daily_df = pd.concat(dfs, ignore_index=True)
print(len(weather_daily_df))
weather_fg.insert(weather_daily_df, wait=True)


140
2025-12-28 18:38:29,000 INFO: 	4 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://eu-west.cloud.hopsworks.ai:443/p/2173/fs/2122/fg/2202


Uploading Dataframe: 100.00% |██████████| Rows 140/140 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: weather_sensor_2_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/2173/jobs/named/weather_sensor_2_offline_fg_materialization/executions
2025-12-28 18:38:40,374 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-12-28 18:38:43,488 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-12-28 18:41:56,447 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-12-28 18:41:56,526 INFO: Waiting for log aggregation to finish.
2025-12-28 18:42:11,257 INFO: Execution finished successfully.


(Job('weather_sensor_2_offline_fg_materialization', 'SPARK'),
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between",
         "kwargs": {
           "column": "wind_speed_10m_max",
           "min_value": -0.1,
           "max_value": 1000.0,
           "strict_min": true
         },
         "meta": {
           "expectationId": 2109
         }
       },
       "result": {
         "observed_value": 0.8049845099449158,
         "element_count": 140,
         "missing_count": null,
         "missing_percent": null
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2025-12-28T05:38:28.000999Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
       }
     },
     {
       "success": true,
       "expectation_config": {
         "expectation