# Resources

https://github.com/BogdanCojocar/medium-articles/blob/master/realtime_kafka/realtime_kafka.ipynb

In [1]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = "--packages=org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.4 pyspark-shell"

In [2]:
import findspark
findspark.init()
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *

from confluent_kafka import Producer

import pandas as pd
import uuid
import random
import json

import requests

In [3]:
simple_messages = [
'I love this pony',
'This restaurant is great',
'The weather is bad today',
'I will go to the beach this weekend',
'She likes to swim',
'Apple is a great company'
]

bootstrap_servers = 'kafka:9092'
topic = 'test'
msg_count = 5

In [6]:
def delivery_report(err, msg):    
    """ Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {}'.format(msg.topic()))

def confluent_kafka_producer():
    
    p = Producer({'bootstrap.servers': bootstrap_servers})
    
    print('Trying plain localhost connect')
    p1 = Producer({'bootstrap.servers': 'localhost', 'broker.address.family': 'v4'})

    print('Trying port 9092 on localhost')
    p2 = Producer({'bootstrap.servers': 'localhost:9092', 'broker.address.family': 'v4'})

    print('Trying port 29092 on localhost')
    p3 = Producer({'bootstrap.servers': 'localhost:29092', 'broker.address.family': 'v4'})

    print('Trying to connect to broker (container name)')
    p4 = Producer({'bootstrap.servers': 'broker', 'broker.address.family': 'v4'})
    
    for data in simple_messages:
        
        record_key = str(uuid.uuid4())
        print('Sending message with:'
              f'record_key: {record_key}'
              f'data: {data}')
        record_value = json.dumps({'data': data})
         
        print(f'Producer {p}'
              f'producd to topic {topic}')
        p.produce(topic, key=record_key, value=record_value, on_delivery=delivery_report)
       
        p.poll(0)

    p.flush()
    print('we\'ve sent {count} messages to {brokers}'.format(count=len(simple_messages), brokers=bootstrap_servers))

In [7]:
confluent_kafka_producer()

Trying plain localhost connect
Trying port 9092 on localhost
Trying port 29092 on localhost
Trying to connect to broker (container name)
Sending message with:record_key: 83734b56-078f-42dd-a0ae-25d40bd0555bdata: I love this pony
Producer <cimpl.Producer object at 0x10ce53370>producd to topic test
Sending message with:record_key: 8f30df0d-c4a6-47a9-a229-c4ccc6cdd4addata: This restaurant is great
Producer <cimpl.Producer object at 0x10ce53370>producd to topic test
Sending message with:record_key: 425fce74-23ad-4195-9a1c-a6bcd2f8b7acdata: The weather is bad today
Producer <cimpl.Producer object at 0x10ce53370>producd to topic test
Sending message with:record_key: 386bdaf8-3edd-444d-b248-c3a785dbb7e5data: I will go to the beach this weekend
Producer <cimpl.Producer object at 0x10ce53370>producd to topic test
Sending message with:record_key: 219d4f45-f993-4766-a380-6338ad805d4adata: She likes to swim
Producer <cimpl.Producer object at 0x10ce53370>producd to topic test
Sending message with:r