In [0]:
%pip install azure-eventhub

In [0]:
dbutils.library.restartPython()

Subscrever o EventHubs e armazenar no BlobStorage 

In [0]:
from azure.eventhub import EventHubConsumerClient
from azure.storage.blob import BlobServiceClient
import json
import pandas as pd
import io

import os

# Azure Event Hubs Config
EVENT_HUB_CONNECTION_STRING = os.getenv("EVENT_HUB_CONNECTION_STRING")
EVENT_HUB_NAME = "weather-data"
CONSUMER_GROUP = "$Default"

# Azure Blob Storage Config
BLOB_CONNECTION_STRING = os.getenv("BLOB_CONNECTION_STRING")
BLOB_CONTAINER = "weather-data"
BLOB_NAME = "weather-data.parquet"

# Blob Container Config
blob_service_client = BlobServiceClient.from_connection_string(BLOB_CONNECTION_STRING)
container_client = blob_service_client.get_container_client(BLOB_CONTAINER)
client = EventHubConsumerClient.from_connection_string(EVENT_HUB_CONNECTION_STRING, CONSUMER_GROUP, eventhub_name=EVENT_HUB_NAME)

# Awaits for the producer to send new data through the Event Hubs
def on_event_batch(partition_context, events):
    new_data = []

    for event in events:
        event_data = event.body_as_str()
        print(f"Recebido: {event_data}")
        new_data.append(event_data)
    
    if not new_data:
        return
    
    # Convert new data in a dataframe
    new_df = pd.DataFrame([eval(event) for event in new_data])
    
    # Read existing data (If there is already data)
    try:
        blob_client = container_client.get_blob_client(BLOB_NAME)
        existing_data = io.BytesIO()
        blob_client.download_blob().readinto(existing_data)
        df_existing = pd.read_parquet(existing_data)
        
        # Add new data to existing data
        df_final = pd.concat([df_existing, new_df], ignore_index=True)
    except Exception:
        print("New Parquet file created.")
        df_final = new_df
    
    # Save the DataFrame in Blob Storage
    output = io.BytesIO()
    df_final.to_parquet(output, engine="pyarrow", index=False)
    output.seek(0)
    blob_client.upload_blob(output, overwrite=True)
    print(f"Data stored in Blob Storage: {BLOB_NAME}")

# Receive event
client.receive_batch(on_event_batch, max_batch_size=1, starting_position="-1")

# Close client
client.close()