<a href="https://colab.research.google.com/github/martin-fabbri/colab-notebooks/blob/master/kafka/kafka_python_produce_consume_pynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title ## Install/Upgrade packages
#@markdown pip installs ``confluent-kafka`` and upgrades ``ipython`` to suport async/await

#@markdown Ignore jupyter-console 5.2.0 has requirement ...

#@markdown Ignore ERROR: google-colab 1.0.0 has requirement ipython ...

#@markdown **Follow instructions to restart kernel after setup is complete**

!pip install -q --upgrade ipython
!pip install -q --upgrade ipykernel
!pip install -q confluent-kafka
!pip install -q pykafka

from IPython.core.display import HTML
HTML("""
  <div style="border: 5px solid green; padding: 20px; margin: 20px;"> 
    <h2 style="color:red;">Restart the runtime</h2>
    <p>We have upgraded IPython and you must restart the 
       runtime (Runtime > Restart runtime ...) before continuing.</p>
  </div>  
""")

[K     |████████████████████████████████| 788kB 2.7MB/s 
[K     |████████████████████████████████| 358kB 7.6MB/s 
[31mERROR: jupyter-console 5.2.0 has requirement prompt-toolkit<2.0.0,>=1.0.0, but you'll have prompt-toolkit 3.0.5 which is incompatible.[0m
[31mERROR: google-colab 1.0.0 has requirement ipython~=5.5.0, but you'll have ipython 7.13.0 which is incompatible.[0m
[K     |████████████████████████████████| 122kB 2.7MB/s 
[31mERROR: jupyter-console 5.2.0 has requirement prompt-toolkit<2.0.0,>=1.0.0, but you'll have prompt-toolkit 3.0.5 which is incompatible.[0m
[31mERROR: google-colab 1.0.0 has requirement ipykernel~=4.10, but you'll have ipykernel 5.2.1 which is incompatible.[0m
[31mERROR: google-colab 1.0.0 has requirement ipython~=5.5.0, but you'll have ipython 7.13.0 which is incompatible.[0m
[K     |████████████████████████████████| 8.0MB 2.8MB/s 
[K     |████████████████████████████████| 143kB 2.7MB/s 
[K     |████████████████████████████████| 133kB 37.1MB/s

In [0]:
#@title ## Setup Kafka
#@markdown This cell will install Kafka 2.12
%%bash
sudo apt-get update -qq
sudo wget https://www-us.apache.org/dist/kafka/2.3.0/kafka_2.12-2.3.0.tgz -q
sudo tar -xzf kafka_2.12-2.3.0.tgz
sudo mv kafka_2.12-2.3.0 /opt/kafka

In [0]:
#@title ## Start services
#@markdown Start ``zookeeper on port 2181`` and  ``kafka on port 9092``(default ports).

%%bash
sudo nohup /opt/kafka/bin/zookeeper-server-start.sh -daemon /opt/kafka/config/zookeeper.properties > /dev/null 2>&1 &
sleep 5
sudo nohup /opt/kafka/bin/kafka-server-start.sh -daemon /opt/kafka/config/server.properties > /dev/null 2>&1 &

## List all the Kafka topics available on the server

The `--zookeeper` parameter is required everytime you want to call the `kafka-topics` command

In [0]:
%%bash
/opt/kafka/bin/kafka-topics.sh --list --zookeeper localhost:2181

## Imports

In [0]:
import asyncio

from confluent_kafka import Consumer
from confluent_kafka import Producer
from confluent_kafka.admin import AdminClient, NewTopic

from pykafka import KafkaClient

## Config

In [0]:
BROKER_URL = 'localhost:9092'
TOPIC_NAME = 'python-test-topic'

## Instantiate Kafka admin client

In [6]:
client = AdminClient({'bootstrap.servers': BROKER_URL})
client

<confluent_kafka.admin.AdminClient at 0x7f5b65d1e340>

## Create topic

No need for a ``zookeeper`` reference ???

In [7]:
topic = NewTopic(TOPIC_NAME, num_partitions=1, replication_factor=1)
client.create_topics([topic])

{'python-test-topic': <Future at 0x7f5b6640bcc0 state=running>}

x

In [0]:
async def produce(topic_name):
  """Produces data into a kafka topic"""
  p = Producer({'bootstrap.servers': BROKER_URL})
  for curr_iteration in range(20):
    print(f'Producer: New message: {curr_iteration}')
    p.produce(topic_name, f'Message: {curr_iteration}')
    await asyncio.sleep(1)

In [0]:
async def consume(topic_name):
  """Consumes data from a Kafka topic"""
  c = Consumer({'bootstrap.servers': BROKER_URL, 'group.id': 'python-test-consumer'})
  c.subscribe([topic_name])

  for _ in range(30):
    message = c.poll(1.0)
    if message is None:
      print('Consumer: No message reveived')
    elif message.error() is not None:
      print(f'Consumer: Message had an error {message.error()}')
    else:
      print(f'Consumer: Key: {message.key()}, {message.value()}')
    await asyncio.sleep(1)

In [10]:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(asyncio.gather(
    produce(TOPIC_NAME),
    consume(TOPIC_NAME)
))
loop.close()

Producer: New message: 0
Consumer: No message reveived
Producer: New message: 1
Consumer: No message reveived
Producer: New message: 2
Consumer: No message reveived
Producer: New message: 3
Consumer: No message reveived
Producer: New message: 4
Consumer: Key: None, b'Message: 4'
Producer: New message: 5
Consumer: Key: None, b'Message: 5'
Producer: New message: 6
Consumer: Key: None, b'Message: 6'
Producer: New message: 7
Consumer: Key: None, b'Message: 7'
Producer: New message: 8
Consumer: Key: None, b'Message: 8'
Producer: New message: 9
Consumer: Key: None, b'Message: 9'
Producer: New message: 10
Consumer: Key: None, b'Message: 10'
Producer: New message: 11
Consumer: Key: None, b'Message: 11'
Producer: New message: 12
Consumer: Key: None, b'Message: 12'
Producer: New message: 13
Consumer: Key: None, b'Message: 13'
Producer: New message: 14
Consumer: Key: None, b'Message: 14'
Producer: New message: 15
Consumer: Key: None, b'Message: 15'
Producer: New message: 16
Consumer: Key: None, b

## Produce/Consume messages

In [15]:
client = KafkaClient(hosts=BROKER_URL)
client.topics

{b'python-test-topic': <pykafka.topic.Topic at 0x7f5b62aeaa20 (name=b'python-test-topic')>}

In [0]:
async def produce(topic_name):
  """Produces data into a kafka topic"""
  topic = client.topics[b'python-test-topic']
  message_key = 0
  with topic.get_producer() as producer:
    for curr_iteration in range(20):
      msg = f'test message {message_key}'
      print(f'Producer: New message: {msg}')
      producer.produce(msg.encode())
      message_key += 1
      await asyncio.sleep(1)


In [0]:
async def consume(topic_name):
  """Consumes data from a Kafka topic"""
  topic = client.topics[b'python-test-topic']
  consumer = topic.get_simple_consumer()
  for message in consumer:
    if message is None:
      print('Consumer: No message reveived')
    else:
      print(f'Consumer: Key: {message.offset}, {message.value}')
    await asyncio.sleep(1)

In [0]:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(asyncio.gather(
    produce(TOPIC_NAME),
    consume(TOPIC_NAME)
))
loop.close()

Producer: New message: test message 0
Consumer: Key: 0, b'Message: 0'
Producer: New message: test message 1
Consumer: Key: 1, b'Message: 1'
Producer: New message: test message 2
Consumer: Key: 2, b'Message: 2'
Producer: New message: test message 3
Consumer: Key: 3, b'Message: 3'
Producer: New message: test message 4
Consumer: Key: 4, b'Message: 4'
Producer: New message: test message 5
Consumer: Key: 5, b'Message: 5'
Producer: New message: test message 6
Consumer: Key: 6, b'Message: 6'
Producer: New message: test message 7
Consumer: Key: 7, b'Message: 7'
Producer: New message: test message 8
Consumer: Key: 8, b'Message: 8'
Producer: New message: test message 9
Consumer: Key: 9, b'Message: 9'
Producer: New message: test message 10
Consumer: Key: 10, b'Message: 10'
Producer: New message: test message 11
Consumer: Key: 11, b'Message: 11'
Producer: New message: test message 12
Consumer: Key: 12, b'Message: 12'
Producer: New message: test message 13
Consumer: Key: 13, b'Message: 13'
Producer