## <span style='color:#ff5f27'> 📝 Imports

In [None]:
import pandas as pd
import numpy as np
import great_expectations as ge
from great_expectations.core import ExpectationSuite, ExpectationConfiguration

## <span style='color:#ff5f27'> 🎥 Fetch Content Data</span>


In [None]:
data_video_df = pd.read_parquet('https://repo.hops.works/dev/davit/tiktok_recsys/videos.parquet')

## <span style="color:#ff5f27">👮🏻‍♂️ Great Expectations </span>

In [None]:
# Create a Great Expectations DataFrame from the pandas DataFrame
ge_video_df = ge.from_pandas(data_video_df)

# Initialize the expectation suite
expectation_suite_videos = ge_video_df.get_expectation_suite()
expectation_suite_videos.expectation_suite_name = "video_data_suite"

# Expectation: Views, Likes, and Video Length should be non-negative
for column in ["video_length"]:
    expectation_suite_videos.add_expectation(
        ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_between",
            kwargs={"column": column, "min_value": 0, "max_value": None}
        )
    )

# Expectation: Valid date format for upload_date
expectation_suite_videos.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_dateutil_parseable",
        kwargs={"column": "upload_date"}
    )
)

## <span style="color:#ff5f27">🔮 Connect to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

## <span style="color:#ff5f27">🪄 Feature Group Creation </span>


In [None]:
videos_fg = fs.get_or_create_feature_group(
    name="videos",
    version=1,
    description="Videos data.",
    primary_key=["video_id"],
    partition_key=["upload_month"],
    online_enabled=True,
    event_time="upload_date",
    expectation_suite=expectation_suite_videos,
    statistics_config = {
            "enabled": True,
            "histograms": True,
            "correlations": True,
        }
)

videos_fg.insert(data_video_df)
print('Done ✅')

In [None]:
feature_descriptions = [
    {"name": "video_id", "description": "Identifier for the video."},
    {"name": "category_id", "description": "Id of the video category."},   
    {"name": "category", "description": "Name of the video category."},
    {"name": "video_length", "description": "Video length in sconds."},
    {"name": "upload_date", "description": "Date of upload for the video."},
    {"name": "upload_month", "description": "Month of upload for the video, derived from upload_date."},
]

for desc in feature_descriptions: 
    videos_fg.update_feature_description(desc["name"], desc["description"])

---