In [62]:
import hopsworks
import json
import pandas as pd

from confluent_kafka import Producer

In [63]:
project = hopsworks.login()

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://pocs.cloud.hopsworks.ai/p/125


In [None]:
# create kafka topic
KAFKA_TOPIC_NAME = "real_time_live_transactions"
SCHEMA_NAME = "live_transactions_schema"

kafka_api = project.get_kafka_api()

schema = {
    "type": "record",
    "name": SCHEMA_NAME,
    "namespace": "ai.hopsworks.examples.bytewax.fraud",
    "fields": [
        {
            "name": "tid",
            "type": [
                "null",
                "string"
            ]
        },
        {
            "name": "timestamp",
            "type": [
                "null",
                {
                    "type": "long",
                    "logicalType": "timestamp-micros"
                }
            ]
        },
        {
            "name": "cc_num",
            "type": [
                "null",
                "long"
            ]
        },
        {
            "name": "amount",
            "type": [
                "null",
                "double"
            ]
        },
    ]
}

kafka_api.create_schema(SCHEMA_NAME, schema)
kafka_api.create_topic(KAFKA_TOPIC_NAME, SCHEMA_NAME, 1, replicas=1, partitions=1)

In [66]:
# setup kafka producer
kafka_config = kafka_api.get_default_config()

print(kafka_config)
producer = Producer(kafka_config)

{'security.protocol': 'SSL', 'ssl.ca.location': '/tmp/ca_chain.pem', 'ssl.certificate.location': '/tmp/client_cert.pem', 'ssl.key.location': '/tmp/client_key.pem', 'client.id': 'jupyter-real-time-fraud-python--fabiopoc-5dd447c85f-jpmw4', 'group.id': 'my-group-id', 'ssl.endpoint.identification.algorithm': 'none', 'bootstrap.servers': '172.16.4.223:9091'}


In [64]:
transactions_pdf = pd.read_csv("../../../../../RawData/historical_transactions.csv")

In [65]:
transactions_pdf

Unnamed: 0.1,Unnamed: 0,tid,datetime,cc_num,category,amount,latitude,longitude,city,country,fraud_label
0,0,889245b951dbe4af5e813fd3a796f690,2023-10-12 18:13:57,1fec5056a7aa1d952fde354e31fa3095,Grocery,42.51,42.425100,-71.066160,Malden,US,0
1,1,6ba02a6dbd3491939a42db9029d2efcd,2023-10-12 18:14:51,943571826a9ef8c079815b80f4870c5c,Domestic Transport,52.39,39.435340,-84.202990,Lebanon,US,0
2,2,18181617fc23876176c13e38bd35f711,2023-10-12 18:17:42,bf3296da48b23a6fba5315f90151ceab,Grocery,96.16,35.747880,-95.369690,Muskogee,US,0
3,3,d4335ac3dae3b103023e25287f250723,2023-10-12 18:44:07,a883ea3831e0a0bd58672655c586f990,Clothing,98.97,27.099780,-82.454260,Venice,US,0
4,4,35d25c232389667ff39b64d461f4ba63,2023-10-12 18:51:32,68a5869412e56139ebce00f393ba6fb5,Restaurant/Cafeteria,65.24,42.739200,-84.620810,Waverly,US,0
...,...,...,...,...,...,...,...,...,...,...,...
74803,74803,330416731e4bfd843120a447478c13f7,2024-03-18 08:13:11,aca95724ff0b9ad2a73540234cae2d59,Cash Withdrawal,19.74,41.149146,-73.507012,New Canaan,US,0
74804,74804,8e7d56e902f631f4f66514219ba9be48,2024-03-16 09:13:11,aca95724ff0b9ad2a73540234cae2d59,Cash Withdrawal,2.88,41.140591,-73.499096,New Canaan,US,0
74805,74805,a8a4f239d25986aa2f99b3fa2635e41b,2024-03-14 10:13:11,aca95724ff0b9ad2a73540234cae2d59,Cash Withdrawal,440.57,41.143777,-73.506725,New Canaan,US,0
74806,74806,1209e928850d0cc60966aaae7741864f,2024-03-12 11:13:11,aca95724ff0b9ad2a73540234cae2d59,Cash Withdrawal,0.38,41.153046,-73.510644,New Canaan,US,0


In [67]:
data = json.loads(transactions_pdf.to_json(orient="records"))

In [68]:
data[1]

{'Unnamed: 0': 1,
 'tid': '6ba02a6dbd3491939a42db9029d2efcd',
 'datetime': '2023-10-12 18:14:51',
 'cc_num': '943571826a9ef8c079815b80f4870c5c',
 'category': 'Domestic Transport',
 'amount': 52.39,
 'latitude': 39.43534,
 'longitude': -84.20299,
 'city': 'Lebanon',
 'country': 'US',
 'fraud_label': 0}

In [70]:
batch_size = 0
for t in data:
    producer.produce(KAFKA_TOPIC_NAME, json.dumps(t))
    batch_size += 1
    
    if batch_size == 10:
        producer.flush()
        batch_size = 0