# Feature Backfill

1. Get historical avalanche warnings for the resort locations
2. Get historical weather data + terrain data
3. Store data in feature groups - avalanche_warnings, weather_data, terrain_data

#### Imports

In [1]:
import hopsworks
import sys
from pathlib import Path
import warnings
from dotenv import load_dotenv
import os
from util import *
import datetime
from locations import resort_locations
from dateutil.relativedelta import relativedelta
warnings.filterwarnings("ignore", module="IPython")

## Connect to Hopsworks

In [2]:
# Fix this: idk if this will work in git
load_dotenv()

api_key_value = os.getenv("HOPSWORKS_API_KEY")

project = hopsworks.login(
    host="eu-west.cloud.hopsworks.ai",             # DNS of your Hopsworks instance
    project="ID2223_Project"
)

2025-12-20 11:30:03,227 INFO: Initializing external client
2025-12-20 11:30:03,228 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-20 11:30:04,410 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/2173


## Historical warnings data

In [3]:
os.makedirs("historical data", exist_ok=True)
start_date = (datetime.datetime.now() - relativedelta(years=5)).strftime('%Y-%m-%d') 
#start_date = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') 
end_date = (datetime.datetime.now()).strftime('%Y-%m-%d')
if not os.path.exists("historical data/warnings.pkl"):
    rows = []


    for location, (lat, lon) in resort_locations.items():
        print(f"Fetching {location}")

        for chunk_start, chunk_end in date_chunks(start_date, end_date, chunk_days=60):
            warnings = get_warning_data(chunk_start, chunk_end, lat, lon)

            print(chunk_start, chunk_end)

            for w in warnings:
                rows.append({
                    "location": location,
                    "latitude": lat,
                    "longitude": lon,
                    "date": w.get("ValidFrom"),
                    "warning_level": w.get("DangerLevel")
                })

            time.sleep(0.2)

    warning_data_df = pd.DataFrame(rows)
    warning_data_df["date"] = pd.to_datetime(warning_data_df["date"]).dt.date

    warning_data_df.to_pickle("historical data/warnings.pkl")
else:
    warning_data_df = pd.read_pickle("historical data/warnings.pkl")

In [None]:
warning_data_df[warning_data_df["location"] == "Strandafjellet Skisenter"]


Unnamed: 0,location,latitude,longitude,date,warning_level
1827,Strandafjellet Skisenter,62.399663,6.899585,2020-12-20,1
1828,Strandafjellet Skisenter,62.399663,6.899585,2020-12-21,1
1829,Strandafjellet Skisenter,62.399663,6.899585,2020-12-22,2
1830,Strandafjellet Skisenter,62.399663,6.899585,2020-12-23,2
1831,Strandafjellet Skisenter,62.399663,6.899585,2020-12-24,2
...,...,...,...,...,...
3649,Strandafjellet Skisenter,62.399663,6.899585,2025-12-16,1
3650,Strandafjellet Skisenter,62.399663,6.899585,2025-12-17,1
3651,Strandafjellet Skisenter,62.399663,6.899585,2025-12-18,2
3652,Strandafjellet Skisenter,62.399663,6.899585,2025-12-19,2


In [7]:
len(warning_data_df)

36540

## Historical weather data

In [5]:
dfs = []
i = 1
for loc, (lat, lon) in resort_locations.items():
    dfs.append(get_historical_weather(loc, start_date, end_date, lon, lat))
    if i % 5 == 0:
        time.sleep(60)
    i+=1


weather_df = pd.concat(dfs, ignore_index=True)

Coordinates 68.50614929199219°N 17.129032135009766°E
Elevation 0.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 62.39015579223633°N 6.574306964874268°E
Elevation 161.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 60.140594482421875°N 10.699300765991211°E
Elevation 504.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 60.21089553833008°N 9.252336502075195°E
Elevation 885.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 61.265377044677734°N 10.46004867553711°E
Elevation 691.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 61.47627258300781°N 9.87804889678955°E
Elevation 880.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 59.789100646972656°N 10.161290168762207°E
Elevation 378.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 60.632686614990234°N 6.611374378204346°E
Elevation 442.0 m asl
Timezone None None
Timezone difference t

In [6]:
weather_df

Unnamed: 0,date,temperature_2m_mean,precipitation_sum,rain_sum,snowfall_sum,wind_speed_10m_max,wind_direction_10m_dominant,location
0,2020-12-20,2.729083,6.1,6.1,0.00,11.808878,77.291313,Narvik Ski Resort
1,2020-12-21,4.462417,4.4,4.0,0.28,11.457958,172.698822,Narvik Ski Resort
2,2020-12-22,-0.098000,0.0,0.0,0.00,15.277749,56.430901,Narvik Ski Resort
3,2020-12-23,-0.210500,0.2,0.0,0.14,14.113652,53.711338,Narvik Ski Resort
4,2020-12-24,-2.233417,1.1,0.0,0.77,10.086427,72.506844,Narvik Ski Resort
...,...,...,...,...,...,...,...,...
36535,2025-12-16,-5.006583,0.5,0.0,0.35,5.840993,120.320610,Bjorli Ski
36536,2025-12-17,-7.133667,0.0,0.0,0.00,9.290511,125.703499,Bjorli Ski
36537,2025-12-18,-3.406583,6.0,0.0,4.20,8.891343,120.196571,Bjorli Ski
36538,2025-12-19,-2.587833,4.7,0.0,3.29,11.666721,133.641022,Bjorli Ski


## Terrain data

## Add to Hopsworks

### Warnings

### Weather

### Terrain