In [None]:
from confluent_kafka import Producer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer
from confluent_kafka.serialization import SerializationContext, MessageField
import json
from datetime import datetime, timezone
import uuid

# =============================
# Config
# =============================
BOOTSTRAP_SERVERS = "redpanda.kafka.svc:9092"
SCHEMA_REGISTRY_URL = "http://redpanda.kafka.svc:8081"

BLOCKS_TOPIC = "blockchain.blocks.eth.mainnet"
STATE_TOPIC = "blockchain.ingestion-state.eth.mainnet"

JOB_NAME = "eth_backfill_job"

start_date = '2026-01-02'
end_date = '2026-01-02'



TRANSACTIONAL_ID = f"{JOB_NAME}_{start_date}_{end_date}".replace("-","")

# =============================
# Schema Registry
# =============================
schema_registry = SchemaRegistryClient({
    "url": SCHEMA_REGISTRY_URL
})


def current_utctime():
    """
    返回当前 UTC 时间的 ISO 8601 字符串，格式类似 '2026-01-06T12:35:01.123Z'
    毫秒精度，末尾 Z 表示 UTC
    """
    now = datetime.now(timezone.utc)
    return now.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"

In [64]:
# --- Avro schemas（直接从 registry 拉）
blocks_key_schema = schema_registry.get_latest_version(
    "blockchain.blocks.eth.mainnet-key"
).schema.schema_str

blocks_value_schema = schema_registry.get_latest_version(
    "blockchain.blocks.eth.mainnet-value"
).schema.schema_str

state_key_schema = schema_registry.get_latest_version(
    "blockchain.ingestion-state.eth.mainnet-key"
).schema.schema_str

state_value_schema = schema_registry.get_latest_version(
    "blockchain.ingestion-state.eth.mainnet-value"
).schema.schema_str

In [65]:
# =============================
# Serializers
# =============================
blocks_key_serializer = AvroSerializer(
    schema_registry,
    blocks_key_schema
)

blocks_value_serializer = AvroSerializer(
    schema_registry,
    blocks_value_schema
)

state_key_serializer = AvroSerializer(
    schema_registry,
    state_key_schema
)

state_value_serializer = AvroSerializer(
    schema_registry,
    state_value_schema
)

# Ethereum, block "number"
# Solana, block "slot"
# Sui, block "checkpoint": 18723498

def delivery_report(err, msg):
    if err is not None:
        print(f"❌ Delivery failed: {err}")
    else:
        print(
            f"✅ Delivered to {msg.topic()} "
            f"[{msg.partition()}] @ {msg.offset()}"
        )

# =============================
# Producer
# =============================
producer = Producer({
    "bootstrap.servers": BOOTSTRAP_SERVERS,
    "enable.idempotence": True,
    "acks": "all",
    "retries": 3,
    "linger.ms": 5,
    "transactional.id": TRANSACTIONAL_ID
})

producer.init_transactions()

In [66]:
# =============================
# Example data
# =============================
block_number = 24170103
run_id = str(uuid.uuid4())
current_ts = current_utctime()
start_block = 24170000
end_block = 24180000
mode = "backfill"
status = "running"

block_key = {
    "block_height": block_number
}

block_value = {
    "block_height": block_number,
    "job_name": TRANSACTIONAL_ID,
    "run_id": run_id,
    "mode": mode,
    "inserted_at": current_ts,
    "raw": {"example": "raw block payload"}
}

state_key = {
    "job_name": TRANSACTIONAL_ID
}

state_value = {
    "job_name": TRANSACTIONAL_ID,
    "mode": mode,
    "run_id": run_id,
    "block_begin_height": start_block,
    "block_current_height": block_number,
    "block_end_height": end_block,
    "status": status,
    "updated_at": current_ts
}

In [None]:
# =============================
# Transaction
# 事务里：produce → poll → commit
# =============================
try:
    producer.begin_transaction()
    
    key_bytes = TRANSACTIONAL_ID.encode()
    print("PRODUCE KEY:", key_bytes)
    
    producer.produce(
        topic=BLOCKS_TOPIC,
        key=blocks_key_serializer(
            block_key,
            SerializationContext(BLOCKS_TOPIC, MessageField.KEY)
        ),
        value=blocks_value_serializer(
            block_value,
            SerializationContext(BLOCKS_TOPIC, MessageField.VALUE)
        ),
        on_delivery=delivery_report
    ) # 放入队列（blocks）

    producer.poll(0) # 立刻处理可能的失败, poll会触发delivery report

    producer.produce(
        topic=STATE_TOPIC,
        key= TRANSACTIONAL_ID.encode("utf-8"),   # ✅ string key

        value=state_value_serializer(
            state_value,
            SerializationContext(STATE_TOPIC, MessageField.VALUE)
        ),
        on_delivery=delivery_report
    ) # 放入队列（state）

    producer.poll(0) # 再处理一次

    producer.commit_transaction()
    print("✅ Transaction committed") # 这是两个 produce 阶段，我不希望错误被延迟到 commit 才爆

except Exception as e:
    print("❌ Transaction failed:", e)
    producer.abort_transaction()

finally:
    producer.flush()

PRODUCE KEY: b'eth_backfill_job_20260102_20260102'
✅ Delivered to blockchain.blocks.eth.mainnet [0] @ 40
✅ Delivered to blockchain.ingestion-state.eth.mainnet [0] @ 40
✅ Transaction committed
