In [40]:
!pip install confluent-kafka

Collecting confluent-kafka
  Downloading confluent_kafka-2.4.0-cp311-cp311-manylinux_2_28_aarch64.whl.metadata (2.3 kB)
Downloading confluent_kafka-2.4.0-cp311-cp311-manylinux_2_28_aarch64.whl (14.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.2/14.2 MB[0m [31m84.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: confluent-kafka
Successfully installed confluent-kafka-2.4.0


In [1]:
import sys
import json
import logging
import confluent_kafka
from confluent_kafka import KafkaError, KafkaException


class KafkaObject(object):
    def __init__(
            self,
            bootstrap_servers,
            buffering_max_messages=2000000,
            session_timeout=1740000,
            max_pol_interval_ms=1750000,
            heartbeat_interval_ms=30000,
            connections_max_handle_ms=54000000,
            off_set_reset='earliest'
    ):
        self.bootstrap_servers = bootstrap_servers
        self.producer_conf = {
            'bootstrap.servers': self.bootstrap_servers,
            'queue.buffering.max.messages': buffering_max_messages
        }
        self.consumer_conf = {
            'bootstrap.servers': self.bootstrap_servers,
            'session.timeout.ms': session_timeout,
            'heartbeat.interval.ms': heartbeat_interval_ms,
            'connections.max.idle.ms': connections_max_handle_ms,
            'max.poll.interval.ms': max_pol_interval_ms,
            'fetch.wait.max.ms': 1000,
            'socket.keepalive.enable': 'true',
            'default.topic.config': {
                'auto.offset.reset': off_set_reset
            }
        }
        logging.basicConfig(level=logging.DEBUG)
        self.logger = logging.getLogger('kafka-object')


    def handler(self, msg):
        json_result = json.loads(msg.value().decode())
        # dict_keys(['labels', 'name', 'timestamp', 'value'])
        result = json_result["labels"]
        metric_name = result["__name__"]
        if metric_name == "aces_pod_memory_utilization" and 'pod' in result.keys():
            self.logger.info(f"pod: {result['pod']}")
            self.logger.info(f"pod: {json_result['timestamp']}")
            self.logger.info(f"pod: {json_result['value']}")

    def producer(
            self,
            msg,
            topic
    ):
        messages_overflow = 0
        producer = confluent_kafka.Producer(**self.producer_conf)
        try:
            producer.produce(topic, value=json.dumps(msg))
        except BufferError as e:
            messages_overflow += 1

        # checking for overflow
        self.logger.error(f'BufferErrors: {messages_overflow}')
        producer.flush()

    def consumer(
            self,
            list_of_topics,
            group_id
    ):
        consumer_config = self.consumer_conf
        consumer_config['group.id'] = group_id
        consumer = confluent_kafka.Consumer(**consumer_config)
        consumer.subscribe(list_of_topics)

        while True:
            msg = consumer.poll()
            if msg is None:
                continue

            if msg.error():
                if msg.error().code() == KafkaError._PARTITION_EOF:
                    # End of partition event
                    sys.stderr.write(
                        '%% %s [%d] reached end at offset %d\n' % (msg.topic(), msg.partition(), msg.offset()))
                elif msg.error():
                    # Error
                    raise KafkaException(msg.error())
            else:
                self.handler(msg)

In [2]:
import os
KAFKA_HOST = os.environ.get("KAFKA_HOST", "broker")
KAFKA_PORT = os.environ.get("KAFKA_PORT", 29092)
GROUP_ID = os.environ.get("GROUP_ID", "temp8087")
TARGET_TOPICS = ["metrics"]

# TSCALE SETTINGS
TSCALE_HOST = os.environ.get("TSCALE_HOST", "timescaledb")
TSCALE_USER = os.environ.get("TSCALE_NAME", "aces")
TSCALE_DB = os.environ.get("TSCALE_DB", "aces")
TSCALE_PASS = os.environ.get("TSCALE_PASS", "aces")

# NEO4J SETTINGS
NEO4J_HOST = os.environ.get("NEO4J_HOST", "neo4j")
NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j")
NEO4J_PASS = os.environ.get("NEO4J_PASS", "neo4j290292")


kafka_obj = KafkaObject(
    bootstrap_servers=f'{KAFKA_HOST}:{KAFKA_PORT}'
)

In [3]:
kafka_obj.consumer(
    TARGET_TOPICS,
    GROUP_ID
)

INFO:kafka-object:pod: notebook-76d88568cf-6jwk8
INFO:kafka-object:pod: 2024-06-17T13:51:03Z
INFO:kafka-object:pod: 0
INFO:kafka-object:pod: metrics-consumer-85457d847d-qzx5d
INFO:kafka-object:pod: 2024-06-17T13:51:03Z
INFO:kafka-object:pod: 0
INFO:kafka-object:pod: prometheus-alertmanager-0
INFO:kafka-object:pod: 2024-06-17T13:51:03Z
INFO:kafka-object:pod: 15907.433425038062
INFO:kafka-object:pod: metrics-catalogue-6cbf9fb8bb-2fb9t
INFO:kafka-object:pod: 2024-06-17T13:51:03Z
INFO:kafka-object:pod: 2152082.334318392
INFO:kafka-object:pod: kube-proxy-ftgrh
INFO:kafka-object:pod: 2024-06-17T13:51:03Z
INFO:kafka-object:pod: 2239.1466666666665
INFO:kafka-object:pod: broker-64c5757fcc-rnlwm
INFO:kafka-object:pod: 2024-06-17T13:51:03Z
INFO:kafka-object:pod: 5559.739992861701
INFO:kafka-object:pod: timescale-sts-0
INFO:kafka-object:pod: 2024-06-17T13:51:03Z
INFO:kafka-object:pod: 0
INFO:kafka-object:pod: prometheus-kube-state-metrics-7b74466fbb-6mt6z
INFO:kafka-object:pod: 2024-06-17T13:51:03

KeyboardInterrupt: 

In [None]:
{'labels': {'__name__': 'aces_pod_cpu_utilization', 'pod': 'notebook-76d88568cf-6jwk8'}, 'name': 'aces_pod_cpu_utilization', 'timestamp': '2024-06-17T13:51:03Z', 'value': '0.0006843431393815961'}

In [1]:
! pip install psycopg2-binary

Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (4.4 kB)
Downloading psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m29.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.9


In [2]:
import datetime
import os
import psycopg2


class TimeScaleDB(object):

    @staticmethod
    def construct_uri(
            host,
            username,
            password,
            database,
            port=5432
    ):
        this_uri = f"postgres://{username}:{password}@{host}:{port}/{database}"
        return this_uri

    def __init__(
            self,
            host,
            username,
            password,
            database
    ):
        self.conn = psycopg2.connect(
            self.construct_uri(host=host, username=username, password=password, database=database)
        )
        self.cursor = self.conn.cursor()

        def close_client(
            self
        ):
            self.conn.commit()

    def create_temp_table(self, table_name="temp_table"):
        table_creation_query = f"""
            CREATE TABLE {table_name} ( 
                time TIMESTAMPTZ NOT NULL,
                pod TEXT,
                num_of_restarts INTEGER
            )"""
        create_hyper_table = f"""SELECT create_hypertable('{table_name}', by_range('time'))"""
        self.cursor.execute(table_creation_query)
        self.cursor.execute(create_hyper_table)
        self.close_client()

In [3]:
# TSCALE SETTINGS
TSCALE_HOST = os.environ.get("TSCALE_HOST", "timescaledb")
TSCALE_USER = os.environ.get("TSCALE_NAME", "aces")
TSCALE_DB = os.environ.get("TSCALE_DB", "aces")
TSCALE_PASS = os.environ.get("TSCALE_PASS", "aces")

In [8]:
tdb = TimeScaleDB(
    host=TSCALE_HOST,
    username=TSCALE_USER,
    database=TSCALE_DB,
    password=TSCALE_PASS
)

In [34]:
tdb.cursor.execute(
    f"""
    SELECT time, resource, unit, value  FROM container_resource_limits WHERE pod='neo4j-0'
    ORDER BY time ASC
    """
)

In [35]:
records = tdb.cursor.fetchall()

In [36]:
records[2:4]

[(datetime.datetime(2024, 6, 17, 8, 56, 6, tzinfo=datetime.timezone.utc),
  'memory',
  'byte',
  2147483648.0),
 (datetime.datetime(2024, 6, 17, 8, 56, 6, tzinfo=datetime.timezone.utc),
  'cpu',
  'core',
  0.5)]

In [37]:
results = []
num_of_resources = 2
for i in range(0, len(records), num_of_resources):
    results.append({
        records[i][0]: {
            records[i][1]: {"unit": records[i][2], "value": records[i][3]},
            records[i+1][1]: {"unit": records[i+1][2], "value": records[i+1][3]}
        }
    })

In [38]:
results = [
    {
        records[i][0]: {
            records[i][1]: {"unit": records[i][2], "value": records[i][3]},
            records[i+1][1]: {"unit": records[i+1][2], "value": records[i+1][3]}
        }
    } for i in range(0, len(records), num_of_resources)
]

In [39]:
results

[{datetime.datetime(2024, 6, 17, 8, 56, 6, tzinfo=datetime.timezone.utc): {'memory': {'unit': 'byte',
    'value': 2147483648.0},
   'cpu': {'unit': 'core', 'value': 0.5}}},
 {datetime.datetime(2024, 6, 17, 8, 56, 6, tzinfo=datetime.timezone.utc): {'memory': {'unit': 'byte',
    'value': 2147483648.0},
   'cpu': {'unit': 'core', 'value': 0.5}}},
 {datetime.datetime(2024, 6, 17, 8, 56, 6, tzinfo=datetime.timezone.utc): {'memory': {'unit': 'byte',
    'value': 2147483648.0},
   'cpu': {'unit': 'core', 'value': 0.5}}},
 {datetime.datetime(2024, 6, 17, 8, 56, 6, tzinfo=datetime.timezone.utc): {'memory': {'unit': 'byte',
    'value': 2147483648.0},
   'cpu': {'unit': 'core', 'value': 0.5}}},
 {datetime.datetime(2024, 6, 17, 8, 57, 6, tzinfo=datetime.timezone.utc): {'memory': {'unit': 'byte',
    'value': 2147483648.0},
   'cpu': {'unit': 'core', 'value': 0.5}}},
 {datetime.datetime(2024, 6, 17, 8, 57, 6, tzinfo=datetime.timezone.utc): {'memory': {'unit': 'byte',
    'value': 2147483648.0},
