In [None]:
# Subscribed to topic "cpu-batch" and simple consumer 

In [None]:
def simple_print_consumer(
    kafka_bootstrap: str = os.getenv("KAFKA_BOOTSTRAP_SERVERS", "kafka.apache-kafka.svc.cluster.local:9092"),
    topic: str = os.getenv("KAFKA_TOPIC", "cpu-batch"),
    group_id: str = "cpu-batch-simple-printer",
    max_batches: int = None,    # None = infinite
    # timeout_sec: int = 120,  # <-- REMOVE
):
    consumer = KafkaConsumer(
        topic,
        bootstrap_servers=[s.strip() for s in kafka_bootstrap.split(",") if s.strip()],
        value_deserializer=lambda v: json.loads(v.decode("utf-8")),
        auto_offset_reset="latest",
        group_id=group_id,
        enable_auto_commit=True,
        # DO NOT SET consumer_timeout_ms
    )

    batches = 0
    try:
        for msg in consumer:
            batch = msg.value
            ts_list = batch.get("batch_ts", [])
            cpu_list = batch.get("batch_cpu_pct", [])
            for i, (ts, v) in enumerate(zip(ts_list, cpu_list), 1):
                print(f"[{i:02d}] {ts}, {v:.8f}")
            print("-" * 40)
            batches += 1
            if max_batches is not None and batches >= max_batches:
                break
    finally:
        consumer.close()


In [None]:
simple_print_consumer(
    kafka_bootstrap="kafka.apache-kafka.svc.cluster.local:9092",
    topic="cpu-batch",
    group_id="cpu-batch-simple-printer",
    max_batches=None,  #None for infinite or set 
)


# V2

1. Get and Print the batch data as v1 (the same)

2. Convert the batch to a pandas DataFrame (with timestamps as datetime)

3. Convert the DataFrame to a Darts TimeSeries

4. Print info/validation for both DataFrame and TimeSeries, so you can visually confirm the transformation is correct


In [3]:
# %pip install kafka-python pandas darts

import os
import json
import pandas as pd
from kafka import KafkaConsumer

from darts import TimeSeries

def batch_to_df_and_darts(batch):
    # 1. Make DataFrame
    df = pd.DataFrame({
        "timestamp": batch.get("batch_ts", []),
        "cpu": batch.get("batch_cpu_pct", []),
    })
    df["timestamp"] = pd.to_datetime(df["timestamp"])
    print("\n[INFO] DataFrame sample (head):")
    print(df.head(3))
    print("[INFO] DataFrame dtypes:\n", df.dtypes)

    # 2. To Darts TimeSeries
    ts = TimeSeries.from_dataframe(df, time_col="timestamp", value_cols="cpu")
    print(f"[INFO] Darts TimeSeries: {ts}")
    print("[INFO] Darts TimeSeries values (first 3):")
    print(ts.to_dataframe().head(3))
    print("==="*10)
    return df, ts

def simple_print_consumer(
    kafka_bootstrap: str = os.getenv("KAFKA_BOOTSTRAP_SERVERS", "kafka.apache-kafka.svc.cluster.local:9092"),
    topic: str = os.getenv("KAFKA_TOPIC", "cpu-batch"),
    group_id: str = "cpu-batch-simple-printer",
    max_batches: int = None,    # None = infinite
):
    consumer = KafkaConsumer(
        topic,
        bootstrap_servers=[s.strip() for s in kafka_bootstrap.split(",") if s.strip()],
        value_deserializer=lambda v: json.loads(v.decode("utf-8")),
        auto_offset_reset="latest",
        group_id=group_id,
        enable_auto_commit=True,
    )

    batches = 0
    try:
        for msg in consumer:
            batch = msg.value
            ts_list = batch.get("batch_ts", [])
            cpu_list = batch.get("batch_cpu_pct", [])
            # Print just like before
            for i, (ts, v) in enumerate(zip(ts_list, cpu_list), 1):
                print(f"[{i:02d}] {ts}, {v:.8f}")
            print("-" * 40)
            # ------------- New: convert and print info -------------
            df, ts_obj = batch_to_df_and_darts(batch)
            print("[INFO] Batch converted to pandas DataFrame and Darts TimeSeries!\n")
            batches += 1
            if max_batches is not None and batches >= max_batches:
                break
    finally:
        consumer.close()

# Run the consumer (use max_batches=2 just to demo, remove for infinite)
simple_print_consumer(
    kafka_bootstrap="kafka.apache-kafka.svc.cluster.local:9092",
    topic="cpu-batch",
    group_id="cpu-batch-simple-printer",
    max_batches=2,  # or None for infinite
)


[01] 2025-09-18 21:07:50, 29.96500000
[02] 2025-09-18 21:08:00, 29.96500000
[03] 2025-09-18 21:08:10, 29.96500000
[04] 2025-09-18 21:08:20, 29.89300000
[05] 2025-09-18 21:08:30, 29.89300000
[06] 2025-09-18 21:08:40, 29.89300000
[07] 2025-09-18 21:08:50, 29.32700000
[08] 2025-09-18 21:09:00, 29.32700000
[09] 2025-09-18 21:09:10, 29.32700000
[10] 2025-09-18 21:09:20, 29.08300000
[11] 2025-09-18 21:09:30, 29.08300000
[12] 2025-09-18 21:09:40, 29.08300000
[13] 2025-09-18 21:09:50, 29.49033333
[14] 2025-09-18 21:10:00, 29.49033333
[15] 2025-09-18 21:10:10, 29.49033333
[16] 2025-09-18 21:10:20, 29.28966667
[17] 2025-09-18 21:10:30, 29.28966667
[18] 2025-09-18 21:10:40, 29.28966667
[19] 2025-09-18 21:10:50, 30.02833333
[20] 2025-09-18 21:11:00, 30.02833333
[21] 2025-09-18 21:11:10, 30.02833333
[22] 2025-09-18 21:11:20, 29.43033333
[23] 2025-09-18 21:11:30, 29.43033333
[24] 2025-09-18 21:11:40, 29.43033333
----------------------------------------

[INFO] DataFrame sample (head):
            ti