In [0]:
def error_cb(err):
    """ The error callback is used for generic client errors. These
        errors are generally to be considered informational as the client will
        automatically try to recover from all errors, and no extra action
        is typically required by the application.
        For this example however, we terminate the application if the client
        is unable to connect to any broker (_ALL_BROKERS_DOWN) and on
        authentication errors (_AUTHENTICATION). """

    print("Client error: {}".format(err))
    if err.code() == KafkaError._ALL_BROKERS_DOWN or \
       err.code() == KafkaError._AUTHENTICATION:
        # Any exception raised from this callback will be re-raised from the
        # triggering flush() or poll() call.
        raise KafkaException(err)


def acked(err, msg):
    """ 
        Error callback is used for generic issues for producer errors. 
        
        Parameters:
            err (err): Error flag.
            msg (str): Error message that was part of the callback.
    """
    if err is not None:
        print("Failed to deliver message: %s: %s" % (str(msg), str(err)))
    else:
        print("Message produced: %s" % (str(msg)))

In [0]:
from confluent_kafka import Consumer
from time import sleep
import uuid
from confluent_kafka import Producer, Consumer, KafkaError, KafkaException
import json
from confluent_kafka.admin import AdminClient, NewTopic


#KAFKA variables, Move to the OS variables or configuration
# This will work in local Jupiter Notebook, but in a databrick, hiding config.py is tougher. 
confluentClusterName = "stage3talent"
confluentBootstrapServers = "pkc-ldvmy.centralus.azure.confluent.cloud:9092"
confluentTopicName = "arms-data"
schemaRegistryUrl = "https://psrc-gq7pv.westus2.azure.confluent.cloud"
confluentApiKey = "YHMHG7E54LJA55XZ"
confluentSecret = "/XYn+w3gHGMqpe9l0TWvA9FznMYNln2STI+dytyPqtZ9QktH0TbGXUqepEsJ/nR0"
confluentRegistryApiKey = "YHMHG7E54LJA55XZ"
confluentRegistrySecret = "/XYn+w3gHGMqpe9l0TWvA9FznMYNln2STI+dytyPqtZ9QktH0TbGXUqepEsJ/nR0"

In [0]:
c = Consumer({
    'bootstrap.servers': confluentBootstrapServers,
    'sasl.mechanism': 'PLAIN',
    'security.protocol': 'SASL_SSL',
    'sasl.username': confluentApiKey,
    'sasl.password': confluentSecret,
    'group.id': str(uuid.uuid1()),  # this will create a new consumer group on each invocation.
    'auto.offset.reset': 'earliest',
    'error_cb': error_cb,
})

c.subscribe(['farm-production'])

In [0]:
aString = {}

kafkaListDictionaries = []

In [0]:

try:
    for i in range(1000):
        msg = c.poll(timeout=1.0)
        if msg is None:
            break
        elif msg.error():
            print("Consumer error: {}".format(msg.error()))
            break
        else:
            aString=json.loads('{}'.format(msg.value().decode('utf-8')))
            aString['timestamp'] = msg.timestamp()[1]
            kafkaListDictionaries.append(aString)
            print(msg.offset())
            c.commit()

except Exception as e:
    print(e)

for message in kafkaListDictionaries:
    print(message)
        

In [0]:
database = "gold-standard-DB"
table = "dbo.FarmOutputs"
user = "goldstandard"
password  = "G.S.1983!"
server = "gen10-data-fundamentals-21-11-sql-server.database.windows.net"

#read table data into a spark dataframe
jdbcDF = spark.read.format("jdbc") \
    .option("url", f"jdbc:sqlserver://{server}:1433;databaseName={database};") \
    .option("dbtable", table) \
    .option("user", user) \
    .option("password", password) \
    .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
    .load()
 
#show the data loaded into dataframe
jdbcDF.show()

In [0]:
farm_df = spark.createDataFrame(kafkaListDictionaries)
farm_df = farm_df.select('state', 'Specialty', 'estimate', 'timestamp')
farm_df = farm_df.withColumnRenamed('state', 'StateName').withColumnRenamed('specialty', 'Specialty').withColumnRenamed('timestamp', 'TimeStamp').withColumnRenamed('estimate', 'OutputValue')
display(farm_df)

StateName,Specialty,OutputValue,TimeStamp
Wisconsin,General Cash Grains,549229.0,1644335660792
Wisconsin,Corn,1797769.0,1644335662523
Wisconsin,Poultry,197009.0,1644335663575
Wisconsin,Dairy,6227512.0,1644335664626
Wisconsin,All other livestock,92359.0,1644335665674
Texas,Corn,823006.0,1644335666724
Texas,"Tobacco, Cotton, Peanuts",1803342.0,1644335667775
Texas,Other Field Crops,2135418.0,1644335668824
Texas,"Specialty Crops (F,V,N)",564914.0,1644335669894
Texas,Hogs,13863.0,1644335670943


In [0]:
farm_df.write.format('jdbc').option("url", f"jdbc:sqlserver://{server}:1433;databaseName={database};") \
    .mode("append") \
    .option("dbtable", table) \
    .option("user", user) \
    .option("password", password) \
    .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
    .save()

In [0]:
c = Consumer({
    'bootstrap.servers': confluentBootstrapServers,
    'sasl.mechanism': 'PLAIN',
    'security.protocol': 'SASL_SSL',
    'sasl.username': confluentApiKey,
    'sasl.password': confluentSecret,
    'group.id': str(uuid.uuid1()),  # this will create a new consumer group on each invocation.
    'auto.offset.reset': 'earliest',
    'error_cb': error_cb,
})

c.subscribe(['mn-farm-count'])

In [0]:
aString = {}
kafkaListDictionaries = []
try:
    for i in range(1000):
        msg = c.poll(timeout=1.0)
        if msg is None:
            break
        elif msg.error():
            print("Consumer error: {}".format(msg.error()))
            break
        else:
            aString=json.loads('{}'.format(msg.value().decode('utf-8')))
            aString['timestamp'] = msg.timestamp()[1]
            kafkaListDictionaries.append(aString)
            print(msg.offset())
            c.commit()

except Exception as e:
    print(e)

for message in kafkaListDictionaries:
    print(message)

In [0]:
mn_df = spark.createDataFrame(kafkaListDictionaries)
mn_df = mn_df.select('Income Class', 'Specialty', 'estimate')
mn_df = mn_df.withColumnRenamed('Income Class', 'IncomeClass').withColumnRenamed('estimate', 'Count')
display(mn_df)

IncomeClass,Specialty,Count
"$500,000 to $999,999",General Cash Grains,563.0
"$250,000 to $499,999",General Cash Grains,1035.0
"$100,000 to $249,999",General Cash Grains,942.0
"$100,000 to $249,999",Wheat,0.0
"Less than $100,000",Wheat,0.0
"$100,000 to $249,999",Corn,2489.0
"Less than $100,000",Corn,3532.0
"$500,000 to $999,999",Soybean,601.0
"$250,000 to $499,999",Soybean,290.0
"$100,000 to $249,999",Soybean,2997.0


In [0]:
table = 'FarmCount'
mn_df.write.format('jdbc').option("url", f"jdbc:sqlserver://{server}:1433;databaseName={database};") \
    .mode("append") \
    .option("dbtable", table) \
    .option("user", user) \
    .option("password", password) \
    .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
    .save()

In [0]:
try:
    jdbcDF = spark.read.format("jdbc") \
        .option("url", f"jdbc:sqlserver://{server}:1433;databaseName={database};") \
        .option("dbtable", table) \
        .option("user", user) \
        .option("password", password) \
        .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
        .load()

    #show the data loaded into dataframe
    jdbcDF.show()
except Exception as e:
    print(f'Error occured: {e}')
    