In [1]:
import os
import logging
from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient
from azure.core.exceptions import HttpResponseError

In [2]:
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Storage Account and Container info
STORAGE_ACCOUNT_NAME = "amldatalakestore"
CONTAINER_NAME = "default-prediction-data"
PARQUET_DIRECTORY = r"C:/Users/ishii/Desktop/American-Express-Default-Prediction/data/processed"  # Updated to your absolute path

# Authenticate using Azure Identity
credential = DefaultAzureCredential()

INFO:azure.identity._credentials.environment:No environment configuration found.
INFO:azure.identity._credentials.managed_identity:ManagedIdentityCredential will use IMDS


In [3]:
try:
    # Connect to the Data Lake Storage Account
    blob_service_client = BlobServiceClient(
        account_url=f"https://{STORAGE_ACCOUNT_NAME}.blob.core.windows.net",
        credential=credential
    )

    # Get the container client
    container_client = blob_service_client.get_container_client(CONTAINER_NAME)

    # Iterate through each Parquet file in the processed folder
    for parquet_file in os.listdir(PARQUET_DIRECTORY):
        if parquet_file.endswith('.parquet'):
            parquet_file_path = os.path.join(PARQUET_DIRECTORY, parquet_file)
            try:
                logger.info(f"Uploading '{parquet_file}' to container '{CONTAINER_NAME}'...")
                with open(parquet_file_path, "rb") as data:
                    container_client.upload_blob(
                        name=parquet_file,
                        data=data,
                        overwrite=True
                    )
                logger.info(f"'{parquet_file}' uploaded successfully.")
            
            except HttpResponseError as e:
                logger.error(f"An HTTP error occurred while uploading '{parquet_file}': {e}")
            except Exception as e:
                logger.error(f"An unexpected error occurred while uploading '{parquet_file}': {e}")

except HttpResponseError as e:
    logger.error(f"An HTTP error occurred during connection setup: {e}")
except Exception as e:
    logger.error(f"An unexpected error occurred during connection setup: {e}")

INFO:__main__:Uploading 'test.parquet' to container 'default-prediction-data'...
INFO:azure.core.pipeline.policies.http_logging_policy:Request URL: 'http://169.254.169.254/metadata/identity/oauth2/token?api-version=REDACTED&resource=REDACTED'
Request method: 'GET'
Request headers:
    'User-Agent': 'azsdk-python-identity/1.19.0 Python/3.9.20 (Windows-10-10.0.22631-SP0)'
No body was attached to the request
INFO:azure.identity._credentials.chained:DefaultAzureCredential acquired a token from AzureCliCredential
INFO:azure.core.pipeline.policies.http_logging_policy:Request URL: 'https://amldatalakestore.blob.core.windows.net/default-prediction-data/test.parquet?comp=REDACTED&blockid=REDACTED'
Request method: 'PUT'
Request headers:
    'Content-Length': '4194304'
    'x-ms-version': 'REDACTED'
    'Content-Type': 'application/octet-stream'
    'Accept': 'application/xml'
    'User-Agent': 'azsdk-python-storage-blob/12.23.1 Python/3.9.20 (Windows-10-10.0.22631-SP0)'
    'x-ms-date': 'REDACTE