### Install package requirements and import dependencies

In [1]:
!pip install -r requirements.txt --quiet


from dotenv import load_dotenv
import pandas as pd
import requests_cache
import subprocess
from retry_requests import retry
from io import StringIO
import hopsworks
import great_expectations as ge
from datetime import date


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


### Load Environment variables from the .env file

In [2]:
load_dotenv()

True

### Fetch former ski resorts data

In [3]:
# create supabase credential to authenticate towards endpoint
result = subprocess.run(
    ["curl", "https://abandonedskitowns.com/get_key.php"],    
    capture_output=True,
    text=True,
    check=True
)
api_key = result.stdout

# query the supabase instance for all ski resorts
command = [
    "curl",
    "https://uffrhqrrlipovcnrmgcz.supabase.co/rest/v1/main?select=*",
    "-H",
    f"apikey:{api_key}"
]

result = subprocess.run(
    command,
    capture_output=True,
    text=True,
    check=True
)
closed_resorts_json = result.stdout

### Format former ski resorts data

In [4]:


# convert closed resorts JSON to pandas object
df_cr = pd.read_json(StringIO(closed_resorts_json))

# filter out all resorts that don't have a closing date
df_cr = df_cr[~df_cr["year_closed"].isna()]
df_cr = df_cr[~df_cr["year_closed"].str.contains("Unknown")]

# filter out all resorts that specify decade instead of exact year
df_cr = df_cr[~df_cr["year_closed"].str.contains("s")]

# convert closed year to int
df_cr['year_closed'] = pd.to_numeric(df_cr['year_closed'], downcast='integer', errors='coerce')

# filter out all resorts which are not in Europe or North America
df_cr = df_cr[(df_cr["area"] == "Europe") | (df_cr["area"] == "North America")]

# filter out all columns except id, name, closing year, latitude, longitude
df_cr = df_cr.filter(items=['id', 'name', 'year_closed', 'latitude', 'longitude'])

In [5]:
df_cr.info()

<class 'pandas.core.frame.DataFrame'>
Index: 241 entries, 0 to 387
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           241 non-null    int64  
 1   name         241 non-null    object 
 2   year_closed  241 non-null    int16  
 3   latitude     241 non-null    float64
 4   longitude    241 non-null    float64
dtypes: float64(2), int16(1), int64(1), object(1)
memory usage: 9.9+ KB


### Define data validation rule for year
Should not be lower than 1900 and not larger than the current year

In [6]:
closed_resort_expectation_suite = ge.core.ExpectationSuite(
    expectation_suite_name="closed_resort_expectation_suite"
)

closed_resort_expectation_suite.add_expectation(
    ge.core.ExpectationConfiguration(
        expectation_type="expect_column_min_to_be_between",
        kwargs={
            "column":"year_closed",
            "min_value":1900,
            "max_value":date.today().year
        }
    )
)

{"expectation_type": "expect_column_min_to_be_between", "kwargs": {"column": "year_closed", "min_value": 1900, "max_value": 2025}, "meta": {}}

### Log in to hopsworks

In [7]:
project = hopsworks.login()

2025-12-26 10:18:14,474 INFO: Initializing external client
2025-12-26 10:18:14,475 INFO: Base URL: https://c.app.hopsworks.ai:443
To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'







2025-12-26 10:18:16,427 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1271967


In [8]:
fs = project.get_feature_store()

### Send data to hopsworks feature store

In [20]:
# create/get feature store
closed_resorts_fg = fs.create_feature_group(
    name='former_resorts',
    description='Ski resorts which have closed down for buisness',
    version=1,
    primary_key=['id'],
    expectation_suite=closed_resort_expectation_suite
)

In [21]:
# Insert Dataframe into feature group
closed_resorts_fg.insert(df_cr)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1271967/fs/1258570/fg/1878437
2025-12-26 10:47:26,410 INFO: 	1 expectation(s) included in expectation_suite.
Validation succeeded.
Validation Report saved successfully, explore a summary at https://c.app.hopsworks.ai:443/p/1271967/fs/1258570/fg/1878437


Uploading Dataframe: 100.00% |████| Rows 241/241 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: former_resorts_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1271967/jobs/named/former_resorts_1_offline_fg_materialization/executions


(Job('former_resorts_1_offline_fg_materialization', 'SPARK'),
 {
   "success": true,
   "results": [
     {
       "success": true,
       "expectation_config": {
         "expectation_type": "expect_column_min_to_be_between",
         "kwargs": {
           "column": "year_closed",
           "min_value": 1900,
           "max_value": 2025
         },
         "meta": {
           "expectationId": 800776
         }
       },
       "result": {
         "observed_value": 1956,
         "element_count": 241,
         "missing_count": null,
         "missing_percent": null
       },
       "meta": {
         "ingestionResult": "INGESTED",
         "validationTime": "2025-12-26T09:47:26.000409Z"
       },
       "exception_info": {
         "raised_exception": false,
         "exception_message": null,
         "exception_traceback": null
       }
     }
   ],
   "evaluation_parameters": {},
   "statistics": {
     "evaluated_expectations": 1,
     "successful_expectations": 1,
     "unsu