In [None]:
import json
import logging
from io import BytesIO

import polars as pl
from azure.identity import ClientSecretCredential
from azure.storage.blob import BlobServiceClient, ContainerClient

# Get the logger for 'azure' and set its level to WARNING
logger = logging.getLogger("azure")
logger.setLevel(logging.WARNING)


def connect_blob(conn_str_path: str) -> BlobServiceClient:
    """Connect to Azure Blob Storage using a connection string stored in a file."""
    with open(conn_str_path) as f:
        conn_str = f.read()
    return BlobServiceClient.from_connection_string(conn_str)


def json_to_blob_client(service_path: str, storage_url: str) -> BlobServiceClient:
    """Create a BlobServiceClient object using a connection string."""
    with open(service_path) as file:
        config = json.load(file)

    # Extract the Azure AD credentials
    tenant_id = config["tenantId"]
    client_id = config["clientId"]
    client_secret = config["clientSecret"]

    credential = ClientSecretCredential(tenant_id, client_id, client_secret)
    blob_service_client = BlobServiceClient(storage_url, credential=credential)
    return blob_service_client


def create_container(container_name: str, blob_service_client: BlobServiceClient) -> ContainerClient:
    """Create a container in Azure Blob Storage. If the container already exists, the function will return False."""
    if not blob_service_client.get_container_client(container_name).exists():
        blob_service_client.create_container(container_name)
        return True
    print(f"Container {container_name} already exists")
    return False


def upload_blob(
    data: bytes, container_name: str, blob_name: str, blob_service_client: BlobServiceClient, overwrite: bool = False
) -> None:
    """Upload a blob to Azure Blob Storage. If overwrite is False, the function will not upload the blob if it already exists."""
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    if not overwrite and blob_client.exists():
        print(f"Blob {blob_name} already exists")
        return False
    blob_client.upload_blob(data, overwrite=True)
    return True


def download_blob(container_name: str, blob_name: str, blob_service_client: BlobServiceClient) -> BytesIO:
    """Download a blob from Azure Blob Storage and return it as a BytesIO object."""
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    if not blob_client.exists():
        print(f"Blob {blob_name} does not exist")
        return None
    blob_data = blob_client.download_blob().readall()
    return BytesIO(blob_data)


def list_blobs(container_name: str, blob_service_client: BlobServiceClient) -> list:
    """List all blobs in a container."""
    container_client = blob_service_client.get_container_client(container_name)
    return [blob.name for blob in container_client.list_blobs()]


def delete_blob(container_name: str, blob_name: str, blob_service_client: BlobServiceClient) -> None:
    """Delete a blob from Azure Blob Storage."""
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    blob_client.delete_blob()
    print(f"Blob {blob_name} deleted")
    return True


def delete_container(container_name: str, blob_service_client: BlobServiceClient) -> None:
    """Delete a container from Azure Blob Storage."""
    container_client = blob_service_client.get_container_client(container_name)
    container_client.delete_container()
    print(f"Container {container_name} deleted")
    return True


def list_containers(blob_service_client: BlobServiceClient) -> list:
    """List all containers in the Azure Blob Storage account."""
    return [container.name for container in blob_service_client.list_containers()]

In [None]:
def get_bs_client(creds_id: str = "central") -> BlobServiceClient:
    """Get the blob service client for the central or external blob storage account."""
    blob_url = "stplugindhdp.blob.core.windows.net"
    if creds_id == "central":
        secret_path = "../.secrets/enterprise-application-plugin-blob-central-p.json"
    elif creds_id == "external":
        secret_path = "../.secrets/enterprise-application-plugin-blob-external-p.json"
    elif creds_id == "admin":
        secret_path = "../.secrets/acr-prod-pushpull.json"
    else:
        raise ValueError(f"Invalid creds_id: {creds_id}")

    blob_service_client = json_to_blob_client(secret_path, blob_url)
    return blob_service_client

In [None]:
def write_polars_to_blob(
    df: pl.DataFrame, container_name: str, blob_name: str, blob_service_client: BlobServiceClient
) -> None:
    """Write Polars DataFrame to Azure Blob Storage as parquet."""
    try:
        # Convert DataFrame to parquet bytes
        buffer = BytesIO()
        df.write_parquet(buffer)
        data = buffer.getvalue()

        # Upload using existing function
        success = upload_blob(
            data=data,
            container_name=container_name,
            blob_name=blob_name,
            blob_service_client=blob_service_client,
            overwrite=True,
        )

        if success:
            print(f"Successfully uploaded {blob_name}")

    except Exception as e:
        print(f"Error uploading DataFrame: {e}")


def retrieve_polars_from_blob(
    container_name: str, blob_name: str, blob_service_client: BlobServiceClient
) -> pl.DataFrame:
    """Retrieve a parquet file from Azure Blob Storage and return it as a Polars DataFrame."""
    try:
        # Download blob as BytesIO object
        buffer = download_blob(container_name, blob_name, blob_service_client)
        if buffer is None:
            return None

        # Read parquet file into Polars DataFrame
        df = pl.read_parquet(buffer)
        return df

    except Exception as e:
        print(f"Error downloading DataFrame: {e}")
        return None

In [None]:
central_container = "plugin-aioc-central"
external_container = "plugin-aioc-external"
df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})

In [None]:
admin_cli = get_bs_client(creds_id="admin")


# test write to central container
write_polars_to_blob(df, central_container, "test-admin.pq", admin_cli)
# test read to central container
print(retrieve_polars_from_blob(central_container, "test-admin.pq", admin_cli))

# test write to external container
write_polars_to_blob(df, external_container, "test-admin.pq", admin_cli)

# test read to external container
print(retrieve_polars_from_blob(external_container, "test-admin.pq", admin_cli))

In [None]:
central_cli = get_bs_client(creds_id="central")

# test write to central container
print("writing to central container")
write_polars_to_blob(df, central_container, "test-central.pq", central_cli)

print("-------------------")

# test read to central container
print("reading from central container")
print(retrieve_polars_from_blob(central_container, "test-central.pq", central_cli))

print("-------------------")

# test write to external container
print("writing to external container")
write_polars_to_blob(df, external_container, "test-central.pq", central_cli)

print("-------------------")

# test read to external container
print("reading from external container")
print(retrieve_polars_from_blob(external_container, "test-central.pq", central_cli))

In [None]:
external_cli = get_bs_client(creds_id="external")

# test write to central container
print("test write to central container")
write_polars_to_blob(df, central_container, "test-external.pq", external_cli)

print("-----------------------------------")

# test read to central container
print("test read to central container")
print(retrieve_polars_from_blob(central_container, "test-admin.pq", external_cli))

print("-----------------------------------")

# test write to external container
print("test write to external container")
write_polars_to_blob(df, external_container, "test-external.pq", external_cli)

print("-----------------------------------")

# test read to external container
print("test read to external container")
print(retrieve_polars_from_blob(external_container, "test-admin.pq", external_cli))

enterprise-application-plugin-blob-central-p -> mist nog lezen van de external en central container
- kan schrijven naar plugin-aioc-central
- kan schrijven naar plugin-aioc-external
- kan niet lezen van beiden container

enterprise-application-plugin-blob-external-p -> staat nu helemaal goed
- kan lezen van plugin-aioc-central
- kan niet schrijven naar plugin-aioc-central
- kan schrijven naar plugin-aioc-external
- kan niet lezen naar plugin-aioc-external