In [1]:
import json
import time
import random
import pandas as pd
import psycopg2

from confluent_kafka.admin import AdminClient, NewTopic
from confluent_kafka.admin import AdminClient
from confluent_kafka import Producer, Consumer, KafkaError, TopicPartition

# Kafka

## Create Topic

In [2]:
admin_client = AdminClient({'bootstrap.servers': 'broker:9092'})

# Retrieve the list of topics
topic_metadata = admin_client.list_topics(timeout=10)
[topic for topic in topic_metadata.topics if not topic.startswith('_')]

[]

In [3]:
# create new topic
new_topics = [NewTopic('trades', num_partitions=3, replication_factor=1)]
fs = admin_client.create_topics(new_topics)
for topic, f in fs.items():
    try:
        f.result()
        print("Topic {} created".format(topic))
    except Exception as e:
        print("Failed to create topic {}: {}".format(topic, e))

Topic trades created


In [4]:
topics = admin_client.list_topics().topics
for topic, topic_info in topics.items():
    if not topic.startswith('_'):
        print(f"Topic: {topic}, Partitions: {len(topic_info.partitions)}")
        for p_id, p_info in topic_info.partitions.items():
            print(f"  Partition: {p_id}, Leader: {p_info.leader}, Replicas: {p_info.replicas}")

Topic: trades, Partitions: 3
  Partition: 0, Leader: 1, Replicas: [1]
  Partition: 1, Leader: 1, Replicas: [1]
  Partition: 2, Leader: 1, Replicas: [1]


## Producer

In [5]:
trades = [
    {
        'e': 'trade',
        'E': 1713991342890,
        's': 'BTCUSDT',
        't': 97236,
        'p': '0.006390',
        'q': '2551',
        'b': 69,
        'a': 607,
        'T': 1713991342890,
        'm': True,
        'M': True
    },
    {
        'e': 'trade',
        'E': 1713991378161,
        's': 'BNBUSDT',
        't': 77422,
        'p': '0.008396',
        'q': '2831',
        'b': 307,
        'a': 539,
        'T': 1713991378161,
        'm': False,
        'M': True
    },
    {
        'e': 'trade',
        'E': 1713991429451,
        's': 'BNBBTC',
        't': 43014,
        'p': '0.004122',
        'q': '3493',
        'b': 123,
        'a': 664,
        'T': 1713991429451,
        'm': True,
        'M': True
    },
]

In [6]:
def acked(err, msg):
    if err is not None:
        print("Failed to deliver message: %s: %s" % (str(msg), str(err)))
    else:
        print(f"Message produced: {msg.topic()} {msg.partition()} {msg.key()}")

producer = Producer({'bootstrap.servers': 'broker:9092'})

for trade in trades:
    producer.produce('trades', key=trade['s'], value=json.dumps(trade), callback=acked)

# Wait for any outstanding messages to be delivered
producer.flush()

Message produced: trades 0 b'BTCUSDT'
Message produced: trades 1 b'BNBBTC'
Message produced: trades 2 b'BNBUSDT'


0

## Consumer

In [7]:
# Configure the consumer
consumer_config = {
    'bootstrap.servers': 'broker:9092',
    'group.id': 'group1',
    'auto.offset.reset': 'earliest'
}

# Initialize the consumer
consumer = Consumer(consumer_config)

# Subscribe to the topic
topic_name = 'trades'
consumer.subscribe([topic_name])


try:
    start_time = time.time()
    while True:
        if time.time() - start_time > 10:  # Break the loop after 10 seconds
            break
            
        msg = consumer.poll(timeout=1.0)  # Poll for a message (timeout in seconds)
        
        if msg is None:
            continue  # No message available within the timeout period
            
        if msg.error():
            if msg.error().code() == KafkaError._PARTITION_EOF:
                # End of partition event
                print(f'Reached end of {msg.partition()} at offset {msg.offset()}')
            else:
                print(f'Error: {msg.error()}')
            continue

        # Access and print message details
        print(f'Message: {msg.value().decode("utf-8")}')
        print(f'Partition: {msg.partition()}')
        print(f'Offset: {msg.offset()}')
finally:
    consumer.close()

Message: {"e": "trade", "E": 1713991378161, "s": "BNBUSDT", "t": 77422, "p": "0.008396", "q": "2831", "b": 307, "a": 539, "T": 1713991378161, "m": false, "M": true}
Partition: 2
Offset: 0
Message: {"e": "trade", "E": 1713991429451, "s": "BNBBTC", "t": 43014, "p": "0.004122", "q": "3493", "b": 123, "a": 664, "T": 1713991429451, "m": true, "M": true}
Partition: 1
Offset: 0
Message: {"e": "trade", "E": 1713991342890, "s": "BTCUSDT", "t": 97236, "p": "0.006390", "q": "2551", "b": 69, "a": 607, "T": 1713991342890, "m": true, "M": true}
Partition: 0
Offset: 0


# PostgreSQL database

## Create database

In [8]:
# Connect to the default database (postgres) to issue commands like creating a new database
conn = psycopg2.connect(
    dbname="postgres",
    user="postgres",
    password="postgres",
    host="pgdatabase",
    port="5432"
)

# Set the connection to autocommit mode
conn.autocommit = True
cur = conn.cursor()

# Execute the command to create a new database
try:
    cur.execute("CREATE DATABASE trades;")
    print("Database created successfully.")
except psycopg2.Error as e:
    print(f"An error occurred: {e}")

cur.close()
conn.close()

Database created successfully.


## Create table

In [9]:
conn_params = {
    "dbname": "trades",
    "user": "postgres",
    "password": "postgres",
    "host": "pgdatabase",
    "port": "5432"
}

In [10]:
try:
    with psycopg2.connect(**conn_params) as conn:
        with conn.cursor() as cur:
            create_table_command = """
                CREATE TABLE IF NOT EXISTS binance (
                    event_type TEXT,
                    event_time BIGINT,
                    symbol TEXT,
                    trade_id BIGINT,
                    price NUMERIC,
                    quantity NUMERIC,
                    buyer_order_id BIGINT,
                    seller_order_id BIGINT,
                    trade_time BIGINT,
                    is_buyer_maker BOOLEAN,
                    ignore_in_price BOOLEAN
                );
            """
            cur.execute(create_table_command)
            print("Table created successfully within a transaction.")

except psycopg2.Error as e:    
    print(f"Failed to create table: {e}")


Table created successfully within a transaction.


## Insert sample data into the table

In [11]:
# Data to be inserted
data = {
    'e': 'trade',
    'E': 1714015066671,
    's': 'BNBBTC',
    't': 74686,
    'p': '0.006215',
    'q': '2112',
    'b': 88,
    'a': 544,
    'T': 1714015066671,
    'm': False,
    'M': True
}

# SQL query to insert data
insert_query = """
    INSERT INTO binance 
        (
            event_type, event_time, symbol, trade_id, price, quantity, buyer_order_id, 
            seller_order_id, trade_time, is_buyer_maker, ignore_in_price
        )
    VALUES (%(e)s, %(E)s, %(s)s, %(t)s, %(p)s, %(q)s, %(b)s, %(a)s, %(T)s, %(m)s, %(M)s);
"""

with psycopg2.connect(**conn_params) as conn:
    with conn.cursor() as cur:
        cur.execute(insert_query, data)
        print('Record inserted')

Record inserted


## Kafka consumer reads data from the Kafka stream and insert it into the table

In [12]:
consumer_config = {
    'bootstrap.servers': 'broker:9092',
    'group.id': 'group_db',
    'auto.offset.reset': 'earliest'
}

# Initialize the consumer
consumer = Consumer(consumer_config)

# Subscribe to the topic
topic_name = 'trades'
consumer.subscribe([topic_name])

try:
    start_time = time.time()
    
    with psycopg2.connect(**conn_params) as conn:
        with conn.cursor() as cursor:
            while True:
                
                if time.time() - start_time > 10:  # Break the loop after 10 seconds
                    break
                    
                msg = consumer.poll(timeout=1.0)  # Poll for a message (timeout in seconds)
                
                if msg is None:
                    continue  # No message available within the timeout period
                    
                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        print(f'Reached end of {msg.partition()} at offset {msg.offset()}')
                    else:
                        print(f'Error: {msg.error()}')
                    continue
        
                # Access and print message details
                message_data = json.loads(msg.value().decode('utf-8'))
                print(f'Received message: {message_data} from partition {msg.partition()}')
                cursor.execute(insert_query, message_data)
                print('Record inserted')
                conn.commit()

finally:
    consumer.close()

Received message: {'e': 'trade', 'E': 1713991378161, 's': 'BNBUSDT', 't': 77422, 'p': '0.008396', 'q': '2831', 'b': 307, 'a': 539, 'T': 1713991378161, 'm': False, 'M': True} from partition 2
Record inserted
Received message: {'e': 'trade', 'E': 1713991429451, 's': 'BNBBTC', 't': 43014, 'p': '0.004122', 'q': '3493', 'b': 123, 'a': 664, 'T': 1713991429451, 'm': True, 'M': True} from partition 1
Record inserted
Received message: {'e': 'trade', 'E': 1713991342890, 's': 'BTCUSDT', 't': 97236, 'p': '0.006390', 'q': '2551', 'b': 69, 'a': 607, 'T': 1713991342890, 'm': True, 'M': True} from partition 0
Record inserted


## Explore data from PostgresSQL database

In [13]:
%load_ext sql
%sql postgresql://postgres:postgres@pgdatabase

In [14]:
%%sql
SELECT datname FROM pg_database;

 * postgresql://postgres:***@pgdatabase
4 rows affected.


datname
postgres
trades
template1
template0


In [15]:
%sql postgresql://postgres:postgres@pgdatabase/trades

In [16]:
%%sql
SELECT table_schema, table_name
    FROM information_schema.tables
    WHERE table_type = 'BASE TABLE' AND
    table_schema NOT IN ('pg_catalog', 'information_schema', 'priv');

   postgresql://postgres:***@pgdatabase
 * postgresql://postgres:***@pgdatabase/trades
1 rows affected.


table_schema,table_name
public,binance


In [18]:
%%sql
-- Print the schema of this table.
SELECT column_name, data_type FROM Information_schema.Columns
    WHERE table_name = 'binance';

   postgresql://postgres:***@pgdatabase
 * postgresql://postgres:***@pgdatabase/trades
11 rows affected.


column_name,data_type
ignore_in_price,boolean
event_time,bigint
trade_time,bigint
is_buyer_maker,boolean
trade_id,bigint
price,numeric
quantity,numeric
buyer_order_id,bigint
seller_order_id,bigint
symbol,text


In [20]:
%%sql
SELECT count(1) FROM binance;

   postgresql://postgres:***@pgdatabase
 * postgresql://postgres:***@pgdatabase/trades
1 rows affected.


count
4


In [21]:
%%sql 
    SELECT 
        event_type,
        event_time,
        symbol,
        trade_id,
        price,
        quantity,
        buyer_order_id,
        seller_order_id,
        trade_time,
        is_buyer_maker,
        ignore_in_price 
    FROM binance;

   postgresql://postgres:***@pgdatabase
 * postgresql://postgres:***@pgdatabase/trades
4 rows affected.


event_type,event_time,symbol,trade_id,price,quantity,buyer_order_id,seller_order_id,trade_time,is_buyer_maker,ignore_in_price
trade,1714015066671,BNBBTC,74686,0.006215,2112,88,544,1714015066671,False,True
trade,1713991378161,BNBUSDT,77422,0.008396,2831,307,539,1713991378161,False,True
trade,1713991429451,BNBBTC,43014,0.004122,3493,123,664,1713991429451,True,True
trade,1713991342890,BTCUSDT,97236,0.00639,2551,69,607,1713991342890,True,True


## Read data from PostgresSQL databaseusing Python

In [24]:
def read_data_from_db():
    try:
        # Connect to the database
        with psycopg2.connect(**conn_params) as conn:
            with conn.cursor() as cur:
                query = """
                    SELECT 
                        event_type,
                        event_time,
                        symbol,
                        trade_id,
                        price,
                        quantity,
                        buyer_order_id,
                        seller_order_id,
                        trade_time,
                        is_buyer_maker,
                        ignore_in_price 
                    FROM binance;
                """
                cur.execute(query)
                rows = cur.fetchall()
                column_names = [desc[0] for desc in cur.description]
                return [dict(zip(column_names, row)) for row in rows]
    except psycopg2.Error as e:
        print(f"Database error: {e}")
        return []

# Call the function to read data
data = read_data_from_db()
pd.DataFrame(data)

Unnamed: 0,event_type,event_time,symbol,trade_id,price,quantity,buyer_order_id,seller_order_id,trade_time,is_buyer_maker,ignore_in_price
0,trade,1714015066671,BNBBTC,74686,0.006215,2112,88,544,1714015066671,False,True
1,trade,1713991378161,BNBUSDT,77422,0.008396,2831,307,539,1713991378161,False,True
2,trade,1713991429451,BNBBTC,43014,0.004122,3493,123,664,1713991429451,True,True
3,trade,1713991342890,BTCUSDT,97236,0.00639,2551,69,607,1713991342890,True,True
