# Setup

In [None]:
import hopsworks
from hsfs.feature import Feature
import json

In [None]:
project = hopsworks.login()
fs = project.get_feature_store()
kafka_api = project.get_kafka_api()

In [None]:
EVENTS_TOPIC = "clickstream_events"

events_schema = {
    "type": "record",
    "name": "clickstream_events_schema",
    "fields": [
        {"name": "user_id", "type": ["null", "string"]},
        {"name": "event_type", "type": ["null", "string"]},
        {"name": "timestamp", "type": ["null", "long"]}
    ]
}

kafka_api.create_schema("clickstream_events_schema", events_schema)
kafka_api.create_topic(EVENTS_TOPIC, "clickstream_events_schema", 1, replicas=1, partitions=1)

## Feature Group for CTR

In [None]:
CTR_TOPIC = f"ctr_5min_{project.id}"

ctr_fg = fs.get_or_create_feature_group(
    name="ctr_5min_pyspark",
    version=1,
    primary_key=["user_id"],
    event_time="window_end",
    online_enabled=True,
    stream=True,
    topic_name=CTR_TOPIC,
    features=[
        Feature("user_id", type="string"),
        Feature("impressions", type="bigint"),
        Feature("clicks", type="bigint"),
        Feature("ctr", type="double"),
        Feature("window_end", type="timestamp")
    ]
)

ctr_fg.save()

if CTR_TOPIC not in [topic.name for topic in kafka_api.get_topics()]:
    kafka_api.create_schema(CTR_TOPIC, json.loads(ctr_fg.avro_schema))
    kafka_api.create_topic(CTR_TOPIC, CTR_TOPIC, 1, replicas=1, partitions=1)