# Setup

In [None]:
import hopsworks
from hsfs.feature import Feature
import json

In [None]:
project = hopsworks.login()
fs = project.get_feature_store()
kafka_api = project.get_kafka_api()

## Create Kafka Topic for Events

## Feature Group for CTR

With `stream=True`, Hopsworks auto-ingests from Kafka topic to online store.

In [None]:
CTR_TOPIC = f"ctr_5min_{project.id}"

ctr_fg = fs.get_or_create_feature_group(
    name="ctr_5min",
    version=1,
    description="Click-through rate calculated over 5-minute tumbling windows",
    primary_key=["user_id"],
    event_time="window_end",
    online_enabled=True,
    stream=True,              # ‚Üê Enables streaming ingestion from Kafka
    topic_name=CTR_TOPIC,      # ‚Üê Kafka topic Hopsworks listens to
    features=[
        Feature("user_id", type="string"),
        Feature("impressions", type="bigint"),
        Feature("clicks", type="bigint"),
        Feature("ctr", type="double"),
        Feature("window_end", type="timestamp")
    ]
)

ctr_fg.save()

# Create Kafka topic for feature group
if CTR_TOPIC not in [topic.name for topic in kafka_api.get_topics()]:
    kafka_api.create_schema(CTR_TOPIC, json.loads(ctr_fg.avro_schema))
    kafka_api.create_topic(CTR_TOPIC, CTR_TOPIC, 1, replicas=1, partitions=1)

print(f"‚úÖ Created feature group: ctr_5min")
print(f"   - Online enabled: {ctr_fg.online_enabled}")
print(f"   - Stream enabled: {ctr_fg.stream}")
print(f"   - Kafka topic: {CTR_TOPIC}")
print(f"\nüìù Hopsworks will now auto-ingest features written to: {CTR_TOPIC}")

## Feature Group for CTR

In [None]:
from confluent_kafka import Producer
import time
import random
from datetime import datetime

kafka_config = kafka_api.get_default_config()
producer = Producer(kafka_config)

users = [f"user_{i:03d}" for i in range(10)]

print("Sending events... (Ctrl+C to stop)")

try:
    for i in range(100):
        event = {
            "user_id": random.choice(users),
            "event_type": random.choice(["impression"] * 10 + ["click"]),  # 10% CTR
            "timestamp": int(datetime.now().timestamp() * 1000)
        }
        
        producer.produce(EVENTS_TOPIC, json.dumps(event).encode('utf-8'))
        
        if i % 10 == 0:
            producer.flush()
            print(f"Sent {i} events")
        
        time.sleep(0.1)
    
    producer.flush()
    print(f"\n‚úÖ Sent 100 events")
    
except KeyboardInterrupt:
    print("\nStopped")
    producer.flush()