### Install package requirements and import dependencies

In [21]:
!pip install -r requirements.txt --quiet


from dotenv import load_dotenv
import pandas as pd
import requests_cache
import subprocess
from retry_requests import retry
from io import StringIO
import hopsworks
import great_expectations as ge
from datetime import date

### Load Environment variables from the .env file

In [5]:
load_dotenv()

True

### Fetch former ski resorts data

In [31]:
# create supabase credential to authenticate towards endpoint
result = subprocess.run(
    ["curl", "https://abandonedskitowns.com/get_key.php"],    
    capture_output=True,
    text=True,
    check=True
)
api_key = result.stdout

# query the supabase instance for all ski resorts
command = [
    "curl",
    "https://uffrhqrrlipovcnrmgcz.supabase.co/rest/v1/main?select=*",
    "-H",
    f"apikey:{api_key}"
]

result = subprocess.run(
    command,
    capture_output=True,
    text=True,
    check=True
)
closed_resorts_json = result.stdout

# convert closed resorts JSON to pandas object
df_cr = pd.read_json(StringIO(closed_resorts_json))

# filter out all resorts that don't have a closing date
df_cr = df_cr[~df_cr["year_closed"].isna()]
df_cr = df_cr[~df_cr["year_closed"].str.contains("Unknown")]

# filter out all resorts that specify decade instead of exact year
df_cr = df_cr[~df_cr["year_closed"].str.contains("s")]

# convert closed year to int
df_cr['year_closed'] = pd.to_numeric(df_cr['year_closed'], downcast='integer', errors='coerce')

# filter out all resorts which are not in Europe or North America
df_cr = df_cr[(df_cr["area"] == "Europe") | (df_cr["area"] == "North America")]

# filter out all columns except id, name, closing year, latitude, longitude
df_cr = df_cr.filter(items=['id', 'name', 'year_closed', 'latitude', 'longitude'])

print(df_cr)

      id                                name  year_closed  latitude  longitude
0      2                       Splügen-Tambo         1998   46.5528     9.3217
1      5            Oberammergau Lift System         1993   47.5988    11.0674
4      7                        Monte Amiata         2015   42.8965    11.6111
14    10                       Lomnicky Štít         2005   49.1954    20.2135
18    17                 Sölkpass Ski Resort         1999   47.2956    13.7253
..   ...                                 ...          ...       ...        ...
353  492  Luchon-Superbagnères (Old Section)         2003   42.7903     0.5942
354  503          Sperrin Mountains Ski Area         1995   54.7500    -7.0500
358  564            Karawankenblick Ski Area         1993   46.6100    14.0200
386  563              Le Chazelet Ski Resort         2001   45.0400     6.2900
387  567                             La Tuca         1988   42.6356     0.7808

[241 rows x 5 columns]


### Define data validation rule for year
Should not be lower than 1900 and not larger than the current year

In [24]:
closed_resort_expectation_suite = ge.core.ExpectationSuite(
    expectation_suite_name="closed_resort_expectation_suite"
)

closed_resort_expectation_suite.add_expectation(
    ge.core.ExpectationConfiguration(
        expectation_type="expect_column_min_to_be_between",
        kwargs={
            "column":"year_closed",
            "min_value":1900,
            "max_value":date.today().year
        }
    )
)

{"expectation_type": "expect_column_min_to_be_between", "kwargs": {"column": "year_closed", "min_value": 1900, "max_value": 2025}, "meta": {}}

### Log in to hopsworks

In [38]:
project = hopsworks.login()

2025-12-23 17:00:11,153 INFO: Closing external client and cleaning up certificates.
2025-12-23 17:00:11,154 INFO: Connection closed.
2025-12-23 17:00:11,155 INFO: Initializing external client
2025-12-23 17:00:11,155 INFO: Base URL: https://c.app.hopsworks.ai:443
To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'







2025-12-23 17:00:13,208 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1271967


In [39]:
fs = project.get_feature_store()

### Send data to hopsworks feature store

In [35]:
# create/get feature store
closed_resorts_fg = fs.get_or_create_feature_group(
    name='former_resorts',
    description='Ski resorts which have closed down for buisness',
    version=1,
    primary_key=['latitude', 'longitude'],
    expectation_suite=closed_resort_expectation_suite
)

In [40]:
# Insert Dataframe into feature group
closed_resorts_fg.insert(df_cr)

2025-12-23 17:00:22,259 INFO: 	1 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1271967/fs/1258570/fg/1876342


FeatureStoreException: Failed to write to delta table in external cluster. Make sure datanode load balancer has been setup on the cluster.

In [44]:

##### CODE FROM OTHER PROJECT, WHOSE SOLE PURPOSE IS TO PROVIDE INSPIRATION
##### PLEASE REMOVE ONCE NOTEBOOK IS DONE

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"longitude": 15.382664,
  "latitude": 65.389107,
	"hourly": ["snow_depth", "snowfall", "temperature_2m", "surface_pressure", "cloud_cover"],
	"past_days": 0,
	"forecast_days": 1,
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates: {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation: {response.Elevation()} m asl")
print(f"Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_snow_depth = hourly.Variables(0).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(1).ValuesAsNumpy()
hourly_temperature_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(3).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(4).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end =  pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["snow_depth"] = hourly_snow_depth
hourly_data["snowfall"] = hourly_snowfall
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["cloud_cover"] = hourly_cloud_cover

hourly_dataframe = pd.DataFrame(data = hourly_data)
print("\nHourly data\n", hourly_dataframe)


NameError: name 'openmeteo_requests' is not defined