# Set up environment

In [1]:
import sys
from pathlib import Path


def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False


def clone_repository() -> None:
    !git clone https://github.com/decodingml/hands-on-recommender-system.git
    %cd hands-on-recommender-system/


def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml


if is_google_colab():
    clone_repository()
    install_dependencies()

    root_dir = str(Path().absolute())
    print("⛳️ Google Colab environment")
else:
    root_dir = str(Path().absolute().parent)
    print("⛳️ Local environment")

# Add the root directory to the `PYTHONPATH` to use the `recsys` Python module from the notebook.
if root_dir not in sys.path:
    print(f"Adding the following directory to the PYTHONPATH: {root_dir}")
    sys.path.append(root_dir)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[2mpandas    [0m [32m---------------------------[2m---[0m[0m 10.34 MiB/11.67 MiB
[2mbotocore  [0m [32m----------------------[2m--------[0m[0m 9.09 MiB/12.74 MiB
[2mnvidia-nvjitlink-cu12[0m [32m-----------------------[2m-------[0m[0m 14.84 MiB/20.09 MiB
[2mnvidia-cuda-nvrtc-cu12[0m [32m-------------------[2m-----------[0m[0m 14.79 MiB/23.50 MiB
[2mnvidia-curand-cu12[0m [32m---------[2m---------------------[0m[0m 14.90 MiB/53.70 MiB
[2mcatboost  [0m [32m-----[2m-------------------------[0m[0m 14.56 MiB/93.98 MiB
[2mnvidia-cusolver-cu12[0m [32m----[2m--------------------------[0m[0m 14.83 MiB/122.01 MiB
[2mnvidia-cusparse-cu12[0m [32m---[2m---------------------------[0m[0m 15.02 MiB/197.84 MiB
[2mnvidia-cufft-cu12[0m [32m---[2m---------------------------[0m[0m 15.06 MiB/201.66 MiB
[2mnvidia-cublas-cu12[0m [32m--[2m----------------------------[0m[0m 14.72 MiB/346.60 

# <span style="color:#ff5f27"> Scheduling Hopsworks materialization jobs </span>


## 📝 Imports

In [2]:
from datetime import datetime, timezone

from recsys import hopsworks_integration

## <span style="color:#ff5f27">🔮 Connect to Hopsworks Feature Store </span>

In [3]:
project, fs = hopsworks_integration.get_feature_store()

jobs_api = project.get_jobs_api()

[32m2025-02-19 05:05:48.210[0m | [1mINFO    [0m | [36mrecsys.hopsworks_integration.feature_store[0m:[36mget_feature_store[0m:[36m18[0m - [1mLogin to Hopsworks using cached API key.[0m


Copy your Api Key (first register/login): https://c.app.hopsworks.ai/account/api/generated

Paste it here: ··········

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1213603


# Retrieving materialization jobs


In [4]:
interactions_job = jobs_api.get_job("interactions_1_offline_fg_materialization")
interactions_job

Job('interactions_1_offline_fg_materialization', 'SPARK')

In [5]:
transactions_job = jobs_api.get_job("transactions_1_offline_fg_materialization")
transactions_job

Job('transactions_1_offline_fg_materialization', 'SPARK')

# Running materialization jobs


In [6]:
interactions_job_execution = interactions_job.run()
interactions_job_execution

Launching job: interactions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1213603/jobs/named/interactions_1_offline_fg_materialization/executions


Execution('SUCCEEDED', 'FINISHED', '2025-02-19T05:06:08.000Z', '-op offline_fg_materialization -path hdfs:///Projects/recsys_h_m/Resources/jobs/interactions_1_offline_fg_materialization/config_1739275646900')

In [8]:
transactions_job_execution = transactions_job.run()
transactions_job_execution

Launching job: transactions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1213603/jobs/named/transactions_1_offline_fg_materialization/executions


Execution('SUCCEEDED', 'FINISHED', '2025-02-19T05:11:38.000Z', '-op offline_fg_materialization -path hdfs:///Projects/recsys_h_m/Resources/jobs/transactions_1_offline_fg_materialization/config_1739275464635')

## <span style="color:#ff5f27">⏰ Scheduling materialization jobs </span>


In [None]:
interactions_job.schedule(
    cron_expression="0 0 0 * * ?",  # Runs at midnight (00:00:00) every day
    start_time=datetime.now(tz=timezone.utc),
)
interactions_job.job_schedule.next_execution_date_time

datetime.datetime(2024, 12, 26, 0, 0, tzinfo=datetime.timezone.utc)

In [None]:
transactions_job.schedule(
    cron_expression="0 0 0 * * ?",  # Runs at midnight (00:00:00) every day
    start_time=datetime.now(tz=timezone.utc),
)
transactions_job.job_schedule.next_execution_date_time

datetime.datetime(2024, 12, 26, 0, 0, tzinfo=datetime.timezone.utc)

## <span style="color:#ff5f27"> Inspecting the materialization jobs in Hopsworks UI </span>

View results in [Hopsworks Serverless](https://rebrand.ly/serverless-github): **Compute → Ingestions**