# Send Smart Meters To Kafka

This notebook will simulate smart meter events, and will send them to Apache Kafka. Data will be then processed by Kafka Connect and will eventually end up on a QuestDB table.

We first create the QuestDB table. It would automatically be created if it didn't exist in any case, but this way we can see the schema.

In [1]:
#ignore deprecation warnings in this demo
import warnings
warnings.simplefilter("ignore", category=DeprecationWarning)

In [2]:
import psycopg as pg
import os

# Fetch environment variables with defaults
host = os.getenv('QDB_CLIENT_HOST', 'questdb')
port = os.getenv('QDB_CLIENT_PORT', '8812')
user = os.getenv('QDB_CLIENT_USER', 'admin')
password = os.getenv('QDB_CLIENT_PASSWORD', 'quest')

# Create the connection string using the environment variables or defaults
conn_str = f'user={user} password={password} host={host} port={port} dbname=qdb'
with pg.connect(conn_str, autocommit=True) as connection:
    with connection.cursor() as cur:
        cur.execute(
        """
        CREATE TABLE IF NOT EXISTS  'smart_meters' (
  device_id SYMBOL capacity 256 CACHE,
  mark_model SYMBOL capacity 256 CACHE,
  status SYMBOL capacity 256 CACHE,
  frequency LONG,
  energy_consumption DOUBLE,
  voltage DOUBLE,
  current DOUBLE,
  power_factor DOUBLE,
  timestamp TIMESTAMP
) timestamp (timestamp) PARTITION BY HOUR WAL DEDUP UPSERT KEYS(timestamp, device_id);
""")
                    


## Sending the data to Kafka

The script will send data in Avro binary format into a topic named `smart-meters`.

By default, the script will simulate 1000 devices sending each a data point every 300 seconds, and will stop after 5000 messages have been delivered. You can change that by modifying the `args` object at the top of the script.

This script will keep sending data until you click stop or exit the notebook, or until the max_messages number has ben reached.

While the script is running, you can check the data in the table directly at QuestDB's web console at http://localhost:9000 or a live Grafana Dashboard powered by QuestDB at http://localhost:3000/d/fdhyqi3bax8n4c/smart-meters?orgId=1&refresh=1s (user `admin` and password `quest`).


In [3]:
import time
from confluent_kafka import avro
from confluent_kafka.avro import AvroProducer
import random
from types import SimpleNamespace

args = SimpleNamespace(num_devices=10000, interval=300, topic='smart-meters',
                       broker='broker:29092,broker-2:29092', schema_registry='http://schema_registry:8081', 
                       max_messages=5000)

# Define the Avro schema for device data
value_schema_str = """
{
   "namespace": "com.example.avro",
   "type": "record",
   "name": "DeviceData",
   "fields": [
       {"name": "device_id", "type": "string"},
       {"name": "timestamp", "type": "long"},
       {"name": "mark_model", "type": "string"},
       {"name": "status", "type": "string"},
       {"name": "energy_consumption", "type": "float"},
       {"name": "voltage", "type": "float"},
       {"name": "current", "type": "float"},
       {"name": "power_factor", "type": "float"},
       {"name": "frequency", "type": "int"}
   ]
}
"""
value_schema = avro.loads(value_schema_str)
key_schema = avro.loads('{"type": "string"}')

def generate_device_id(index):
    letters = index // (16**4) % (26**3)
    letter_part = ''.join(chr(65 + (letters // (26**i) % 26)) for i in range(3)[::-1])
    hex_part = format(index % (16**4), '04x').upper()
    return f"{letter_part}{hex_part}"

def generate_device_data(device_index, timestamp_micros):
    device_id = generate_device_id(device_index)
    mark_model = f"ACME-{1 + device_index % 2000}"
    status = random.choices(["Active", "Inactive", "Faulty"], weights=[95, 4, 1], k=1)[0]
    energy_consumption = random.uniform(-5.0, 15.0) if status != "Faulty" else 0
    voltage = random.uniform(110, 240)
    current = random.uniform(0, 30) if status != "Faulty" else 0
    power_factor = random.uniform(0.5, 1.0)
    frequency = random.choice([50, 60])

    return {
        "device_id": device_id,
        "timestamp": timestamp_micros,
        "mark_model": mark_model,
        "status": status,
        "energy_consumption": energy_consumption,
        "voltage": voltage,
        "current": current,
        "power_factor": power_factor,
        "frequency": frequency,
    }

avro_producer = AvroProducer({
    'bootstrap.servers': args.broker,
    'schema.registry.url': args.schema_registry
}, default_key_schema=key_schema, default_value_schema=value_schema)

total_messages_sent = 0
start_time = time.time()

print(f"Start sending {args.max_messages} messages from {args.num_devices} devices at {args.interval} seconds intervals.")

while total_messages_sent < args.max_messages:
    for device_index in range(args.num_devices):
        current_time = time.time()
        elapsed_time = current_time - start_time

        # Calculate the target time for this message
        target_time = total_messages_sent * (args.interval / args.num_devices)

        # Calculate the delay needed to align with the target time
        delay = target_time - elapsed_time
        if delay > 0:
            time.sleep(delay)

        timestamp_micros = int(time.time() * 1e6)
        data = generate_device_data(device_index, timestamp_micros)
        avro_producer.produce(topic=args.topic, value=data, key=str(device_index))
        avro_producer.poll(0)
        total_messages_sent += 1

        if total_messages_sent >= args.max_messages:
            break

avro_producer.flush()
print(f"Finished sending {total_messages_sent} messages.")


Start sending 5000 messages from 10000 devices at 300 seconds intervals.


KeyboardInterrupt: 

## Verify we have ingested some data

The data you send to Kafka will be processed by Kafka Connect and passed to QuestDB, where it will be stored into a table named `smart_meters`. Let's check we can actually see some data

In [5]:
import requests
import os

HTTP_ENDPOINT = os.getenv('QUESTDB_HTTP_ENDPOINT', 'questdb:9000')
REST_TOKEN = os.getenv('QUESTDB_REST_TOKEN')

if REST_TOKEN is not None:
  host = f'https://admin:quest@{HTTP_ENDPOINT}'
else:
  host = f'http://admin:quest@questdb:9000'


sql_query = 'SELECT * FROM smart_meters LIMIT -5;'

try:
    response = requests.get(
        host + '/exec',
        params={'query': sql_query}, verify=False).json()
    for row in response['dataset']:
        print(row)
except requests.exceptions.RequestException as e:
    print(f'Error: {e}')

['AAA0024', 'ACME-37', 'Active', 50, 14.524668693542, 232.402313232421, 19.677511215209, 0.549238502979, '2024-10-28T12:19:59.280741Z']
['AAA0025', 'ACME-38', 'Active', 50, 4.368036746978, 179.752502441406, 2.532448291778, 0.761913359165, '2024-10-28T12:19:59.580739Z']
['AAA0026', 'ACME-39', 'Active', 50, 11.194232940673, 199.518417358398, 10.793740272521, 0.591314613819, '2024-10-28T12:19:59.880756Z']
['AAA0027', 'ACME-40', 'Active', 60, 14.164017677307, 162.715301513671, 20.322994232177, 0.642223656177, '2024-10-28T12:20:00.180737Z']
['AAA0028', 'ACME-41', 'Active', 50, 7.164101123809, 193.357879638671, 5.990210056304, 0.83853071928, '2024-10-28T12:20:00.480738Z']


