## <span style='color:#ff5f27'> 📝 Imports

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timezone

## <span style='color:#ff5f27'> 🔗 Fetch historical interactions dataset</span>

In [None]:
# Fetch historical interactions dataset and backfill interactions feature group
data_interactions_df = pd.read_parquet('https://repo.hops.works/dev/davit/tiktok_recsys/interactions.parquet')

## <span style="color:#ff5f27">🔮 Connect to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

## <span style="color:#ff5f27">🪄 Feature Group Creation </span>

In [None]:
interactions_fg = fs.get_or_create_feature_group(
    name="interactions",
    version=1,
    description="Interactions data.",    
    primary_key=["interaction_id", "user_id", "video_id"],
    partition_key = ["interaction_month"],
    online_enabled=True,
    event_time="interaction_date",
    statistics_config =  {
                    "enabled": True,
                    "histograms": True,
                    "correlations": True,
                }
)

interactions_fg.insert(data_interactions_df)
print('Done ✅')

In [None]:
feature_descriptions = [
    {"name": "id", "description": "Unique id for the interaction"},
    {"name": "user_id", "description": "Unique identifier for each user."},
    {"name": "video_id", "description": "Identifier for the video."},
    {"name": "category_id", "description": "Id of the video category."},
    {"name": "interaction_type", "description": "Type of interaction"},
    {"name": "watch_time", "description": "Time in seconds how long user watched the video."},
    {"name": "interaction_date", "description": "Date of inteaction."},
    {"name": "interaction_month", "description": "Month of interaction, derived from interaction_date."}
]

for desc in feature_descriptions:
    interactions_fg.update_feature_description(desc["name"], desc["description"])

In [None]:
interactions_fg.materialization_job.schedule(cron_expression="0 */15 * ? * *",
                                             start_time=datetime.now(tz=timezone.utc))


---