In [None]:
import sys
import os
import yaml
import configparser
import json

import logging

# Add the src directory to the sys.path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

# 1. Environment Configuration

## 1.1 Import Dependencies

In [None]:
import boto3
import json
import os
from uuid import uuid4
from datetime import datetime
from datetime import timedelta
import time
import random
import uuid
import logging
# import numpy as np

import pyspark.sql.types as t
import pyspark.sql.functions as f

In [None]:
# from spark_session import create_spark_session
from schemas import *
from functions import *

## 1.2 Extract AWS credentials

In [None]:
def load_aws_credentials(profile_name="default"):

    # Load credentials from the .aws/credentials file (local development)
    try:
        credentials = configparser.ConfigParser()
        credentials.read(os.path.join('..', '.aws', 'credentials'))
        
        logging.info("Successfully loaded credentials variables from .aws file.")
    except Exception as e:
        logging.error(f"Error loading .aws file: {e}")
        sys.exit(1)

    aws_access_key_id = credentials[profile_name]["aws_access_key_id"]
    aws_secret_access_key = credentials[profile_name]["aws_secret_access_key"]

    if not aws_access_key_id or not aws_secret_access_key:
        logging.error("AWS credentials not found.")
        sys.exit(1)

    return aws_access_key_id, aws_secret_access_key

aws_access_key_id, aws_secret_access_key = load_aws_credentials()

## 1.3 Extract AWS Config parameters

In [None]:
def load_aws_config():
    """
    Loads AWS configuration settings from the .aws/config file.

    :param profile_name: The profile name in the AWS config file (default: "default").
    :return: The region_name as a string.
    """
    try:
        config = configparser.ConfigParser()
        config.read(os.path.join('..', '.aws', 'config'))
        logging.info("Successfully loaded config variables from .aws file.")

        return config
    except Exception as e:
        logging.error(f"Error loading .aws file: {e}")
        sys.exit(1)

config = load_aws_config()

In [None]:
BUCKET_NAME = "vproptimiserplatform"
ORDERS = "orders"
STREAM_NAME = "orders_stream_3"
REGION = config["default"]["REGION"]

BRONZE = "bronze"
SILVER = "silver"
GOLD = "gold"
DELTA = "delta"

PROCESSING_TRIGGER = "5 seconds"

## Append Checkpoints 
EVENTS_CHECKPOINT_LOCATION = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/checkpoints/events"
ORDERS_CHECKPOINT_LOCATION = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/checkpoints/orders"
ORDERS_ITEMS_CHECKPOINT_LOCATION = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/checkpoints/orders_items"

## Update Checkpoints
EVENTS_UPDATE_CHECKPOINT_LOCATION = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/checkpoints/events_update"
ORDERS_UPDATE_CHECKPOINT_LOCATION = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/checkpoints/orders_update"
ORDERS_ITEMS_UPDATE_CHECKPOINT_LOCATION = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/checkpoints/orders_items_update"
PRODUCTS_CHECKPOINT_LOCATION = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/checkpoints/inventory_update"

## paths
EVENTS = "events"
ORDERS = "orders"
ORDERS_ITEMS = "orders_items"
PRODUCTS = "products_table"

# TODO TBC medallion architecture??
EVENTS_PATH = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/{BRONZE}/{EVENTS}"
ORDERS_PATH = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/{BRONZE}/{ORDERS}"
ORDERS_ITEMS_PATH = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/{BRONZE}/{ORDERS_ITEMS}"
PRODUCTS_PATH = f"s3a://{BUCKET_NAME}/{ORDERS}/{GOLD}/{PRODUCTS}"

ORDERS_STREAM_PATH = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/{BRONZE}/{STREAM_NAME}"
ORDERS_STREAM_CHECKPOINT_LOCATION = f"s3a://{BUCKET_NAME}/{ORDERS}/{DELTA}/checkpoints/{STREAM_NAME}"

## Tables
EVENTS_TABLE = f"{EVENTS}_table"
ORDERS_TABLE = f"{ORDERS}_table"
ORDERS_ITEMS_TABLE = f"{ORDERS_ITEMS}_table"

TOPIC_NAME = "order_stream"
BOOTSTRAP_SERVER = "51.92.77.20:9092"

## 1.4 Initialise Spar Session

In [None]:
spark = SparkSession \
        .builder \
        .appName("PySpark Structured Streaming with Kafka Demo") \
        .master("local[*]") \
        .config("spark.jars", "file:///D://work//development//spark_structured_streaming_kafka//spark-sql-kafka-0-10_2.11-2.4.0.jar,
        file:///D://work//development//spark_structured_streaming_kafka//kafka-clients-1.1.0.jar") \
        .config("spark.executor.extraClassPath", "file:///D://work//development//spark_structured_streaming_kafka//spark-sql-kafka-0-10_2.11-2.4.0.jar:file:///D://work//development//spark_structured_streaming_kafka//kafka-clients-1.1.0.jar") \
        .config("spark.executor.extraLibrary", "file:///D://work//development//spark_structured_streaming_kafka//spark-sql-kafka-0-10_2.11-2.4.0.jar:file:///D://work//development//spark_structured_streaming_kafka//kafka-clients-1.1.0.jar") \
        .config("spark.driver.extraClassPath", "file:///D://work//development//spark_structured_streaming_kafka//spark-sql-kafka-0-10_2.11-2.4.0.jar:file:///D://work//development//spark_structured_streaming_kafka//kafka-clients-1.1.0.jar") \
        .getOrCreate()

In [None]:
from delta import *
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf

def create_spark_session(aws_access_key_id, aws_secret_access_key, cores_number="2"):
    """
    Create and configure a Spark session with AWS credentials and required Kafka and Delta Lake packages.
    
    :param aws_access_key_id: AWS access key ID.
    :param aws_secret_access_key: AWS secret access key.
    :param cores_number: Number of cores to use for the Spark session (default is 2).
    :return: SparkSession
    """
    try:
        # Configure the Spark session with AWS, Kafka, and Delta Lake settings
        conf = (
            SparkConf()
            .setAppName("VPR-data_landing")
            .set("spark.hadoop.fs.s3a.endpoint", "s3.eu-south-2.amazonaws.com")
            .set("spark.jars.packages", 
                 "io.delta:delta-core_2.12:2.3.0,org.apache.hadoop:hadoop-aws:3.3.2,org.apache.spark:spark-sql-kafka-0-10_2.12:3.4.2,org.apache.kafka:kafka-clients:3.4.2"
                )
            .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
            .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
            .set("spark.hadoop.fs.s3a.access.key", aws_access_key_id)
            .set("spark.hadoop.fs.s3a.secret.key", aws_secret_access_key)
            .setMaster(f"local[{cores_number}]")  # Use the specified number of cores
        )
        
        # Build the Spark session with Delta configuration
        builder = SparkSession.builder.config(conf=conf)
        spark = configure_spark_with_delta_pip(builder).getOrCreate()

        return spark

    except Exception as e:
        print(f"An error occurred while creating the Spark session: {str(e)}")
        raise  # Re-raise the exception after logging or handling

spark = create_spark_session(aws_access_key_id, aws_secret_access_key)

In [None]:
kafka_order_stream = (
    spark
    .readStream
    .format("kafka")
    .option("kafka.bootstrap.servers", BOOTSTRAP_SERVER)
    .option("suscribe", TOPIC_NAME)
    .option("startingOffsets", "earliest")
    .load()
)

In [None]:
kinesis_order_stream.printSchema()

In [None]:
(
    kinesis_order_stream
    .withColumn("json_data", f.expr("CAST(unbase64(data) AS STRING)"))
    .select("json_data")
    .writeStream
    .format("console")
    .outputMode("append")
    .start()
)

In [None]:
# df_stream_test = ( 
#     kinesis_order_stream.writeStream
#     .outputMode("append")
#     .queryName("kinesis_orders")
#     .option("checkpointLocation", "checkpoints/kineisis_orders_tests_13")
#     .option("kinesis.endpointUrl", "https://kinesis.eu-south-2.amazonaws.com")
#     .format("memory")
#     .start()
# )

# spark.table("kinesis_orders").show()

In [None]:
df_orders_stream = (
    kinesis_order_stream
    .withColumn("json_data", f.expr("CAST(unbase64(data) AS STRING)"))
    .withColumn("orders", f.from_json("json_data", orders_schema))
    .select("orders.*")
)

In [None]:
(
    df_orders_stream
    .writeStream
    .format("console")
    .outputMode("append")
    .start()
)

In [None]:
df_orders_stream.printSchema()

In [None]:
# client = boto3.client(
#     'kinesis',
#     aws_access_key_id=aws_access_key_id,
#     aws_secret_access_key=aws_secret_access_key,
#     region_name=REGION
# )

# response = client.describe_stream(StreamName=STREAM_NAME)
# print(response)

In [None]:
# client = boto3.client(
#     'kinesis',
#     aws_access_key_id=aws_access_key_id,
#     aws_secret_access_key=aws_secret_access_key,
#     region_name=REGION
# )

# # Get the shard iterator for the stream
# response = client.describe_stream(StreamName=STREAM_NAME)
# shard_id = response['StreamDescription']['Shards'][0]['ShardId']

# shard_iterator_response = client.get_shard_iterator(
#     StreamName=STREAM_NAME,
#     ShardId=shard_id,
#     ShardIteratorType='TRIM_HORIZON'  # Use 'LATEST' for most recent records
# )
# shard_iterator = shard_iterator_response['ShardIterator']

# # Fetch records
# record_response = client.get_records(ShardIterator=shard_iterator, Limit=100)
# record_response

In [None]:
# response['ResponseMetadata']

# 2. Orders Stream Suscriber workarround / Connection to kinesis failed

In [None]:
def get_kinesis_client(aws_access_key_id, aws_secret_access_key, region_name):
    """Initialize the Kinesis Boto3 client with error handling."""
    try:
        kinesis_client = boto3.client(
            'kinesis',
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            region_name=region_name
        )
        print("Kinesis client initialized successfully.")
        return kinesis_client
    except Exception as e:
        print(f"Failed to initialize Kinesis client: {e}")
        return None

def fetch_kinesis_data(kinesis_client, stream_name, orders_schema, shard_iterator_type = "TRIM_HORIZON"):
    """
    Fetch records from a Kinesis stream and convert them into a Spark DataFrame.

    Parameters:
    - kinesis_client: Boto3 Kinesis client instance.
    - stream_name: Name of the Kinesis stream to read from.
    - orders_schema: Spark DataFrame schema to apply to the incoming records.
    - shard_iterator_type: Use 'LATEST' for most recent records

    Returns:
    - Spark DataFrame containing the fetched records.
    """
    try:
        # Get the shard iterator for the stream
        response = kinesis_client.describe_stream(StreamName=stream_name)
        shard_id = response['StreamDescription']['Shards'][0]['ShardId']

        shard_iterator = kinesis_client.get_shard_iterator(
            StreamName=stream_name,
            ShardId=shard_id,
            ShardIteratorType=shard_iterator_type
        )['ShardIterator']

        response = kinesis_client.get_records(ShardIterator=shard_iterator, Limit=500)
        records = response['Records']

        # Convert the Kinesis records into a list of JSON strings
        records_data = [json.loads(record['Data']) for record in records]

        # Create a Spark DataFrame from the fetched records
        df = spark.createDataFrame(records_data, schema=orders_schema)

        return df

    except kinesis_client.exceptions.ResourceNotFoundException:
        print(f"Stream {stream_name} not found.")
    except kinesis_client.exceptions.ProvisionedThroughputExceededException:
        print("Throughput limit exceeded, please try again later.")
    except kinesis_client.exceptions.InvalidArgumentException as e:
        print(f"Invalid argument: {e}")
    except Exception as e:
        print(f"An error occurred while fetching data from Kinesis: {e}")
        raise

def save_df_as_delta(df, table_path, mode='append'):
    """
    Saves a PySpark DataFrame as a Delta table.

    Parameters:
    - df: The DataFrame to be saved.
    - table_path: The path for the Delta table.
    - mode: The save mode for the table. Default is 'append'.

    Raises:
    - Exception: If the saving process fails.
    """
    try:
        # Save the DataFrame to a Delta table at the specified path
        df.write.format("delta") \
            .mode(mode) \
            .save(table_path)
        print(f"DataFrame successfully saved as Delta table at: {table_path}")
    except Exception as e:
        raise Exception(f"Failed to save DataFrame as Delta table: {str(e)}")

def read_delta_table_as_stream(delta_table_path):
    """
    Reads a Delta table as a streaming DataFrame.

    Parameters:
    - delta_table_path: The path to the Delta table.

    Returns:
    - A streaming DataFrame representing the Delta table.

    Raises:
    - Exception: If the streaming read process fails.
    """
    try:
        # Read the Delta table as a streaming DataFrame
        streaming_df = (
            spark.readStream
            .format("delta")
            .load(delta_table_path)
        )
        
        return streaming_df  # Returning the streaming DataFrame
    except Exception as e:
        raise Exception(f"Failed to read Delta table as stream: {str(e)}")

# 3. Write Orders Stream

In [None]:
def process_events_stream(df_order_stream, events_path, checkpoint_location):
    """
    Processes the order stream by selecting event-specific columns and writing them 
    as a stream into a Delta table.

    Parameters:
    - df_order_stream: The input PySpark DataFrame containing order stream data.
    - events_path: The path where the Delta table for events should be written.
    - checkpoint_location: The location where checkpoint data will be stored for fault-tolerance.

    Raises:
    - Exception: If the streaming process fails or encounters an error.
    """
    try:
        # Define the stream transformation and writing process
        events_stream = (
            df_order_stream
            .select(
                f.col("event_id"),
                f.col("event_type"),
                f.col("event_timestamp"),
                f.col("order_id")
            )
            .writeStream
            .format("delta")
            .outputMode("append")
            .trigger(once=True)  # Using trigger(once=True) as per requirement
            .option("path", events_path)
            .option("checkpointLocation", checkpoint_location)
            .start()
        )
        
        # Log the status of the streaming process
        logging.info(f"Streaming process status: {events_stream.status}")
        
        print(f"Events stream successfully written to {events_path}")

        # return stream
        return events_stream
        
    except Exception as e:
        # Log the error and raise an exception
        logging.error(f"Failed to process events stream: {str(e)}")
        raise Exception(f"Failed to process events stream: {str(e)}")

def process_orders_stream(df_order_stream, orders_path, checkpoint_location):
    """
    Processes the order stream by selecting specific columns related to orders and writing them 
    as a stream into a Delta table.

    Parameters:
    - df_order_stream: The input PySpark DataFrame containing order stream data.
    - orders_path: The path where the Delta table for orders should be written.
    - checkpoint_location: The location where checkpoint data will be stored for fault-tolerance.

    Raises:
    - Exception: If the streaming process fails or encounters an error.
    """    
    try:
        # Define the stream transformation and writing process
        orders_stream = (
            df_order_stream
            .select(
                f.col("order_id"),
                f.col("order_details.customer_id").alias("customer_id"),
                f.col("order_details.total_weight").alias("total_weight"),
                f.col("order_details.total_volume").alias("total_volume"),
                f.col("order_details.total_amount").alias("total_price"),
                f.col("order_details.order_timestamp").alias("order_timestamp"),
                f.col("order_details.status").alias("status"),
                f.col("order_details.destination_address.lat").alias("lat"),
                f.col("order_details.destination_address.lon").alias("lon")
            )
            .writeStream
            .format("delta")
            .outputMode("append")
            .trigger(once=True)  # Using trigger(once=True) as per requirement
            .option("path", orders_path)
            .option("checkpointLocation", checkpoint_location)
            .start()
        )
        
        # Log the status of the streaming process
        logging.info(f"Streaming process status: {orders_stream.status}")
        
        print(f"Orders stream successfully written to {orders_path}")

        # Return Stream
        return orders_stream
        
    except Exception as e:
        # Log the error and raise an exception
        logging.error(f"Failed to process orders stream: {str(e)}")
        raise Exception(f"Failed to process orders stream: {str(e)}")

def process_orders_items_stream(df_order_stream: DataFrame) -> DataFrame:
    """
    Transforms the incoming order stream DataFrame by exploding order items,
    generating inventory IDs, and adding a status column.

    Parameters:
    - df_order_stream (DataFrame): Input DataFrame representing the order stream.

    Returns:
    - DataFrame: Transformed streaming DataFrame with the required columns.

    Raises:
    - ValueError: If the input DataFrame is empty or has unexpected schema.
    """    
    try:
        # Validate input streaming DataFrame
        # if df_order_stream is None or len(df_order_stream.columns) == 0:
        #     raise ValueError("Input DataFrame is empty or not provided")

        # Apply the transformation logic for the streaming DataFrame
        df_orders_items_stream = (
            df_order_stream            
            .withColumn("order_exploded", f.explode(f.col("order_details.items")))
            .withColumn("package_exploded", f.explode(f.col("order_exploded.packages")))
            .withColumn("inventory_id", f.concat(f.lit("inv-"), f.expr("uuid()")))
            .withColumn("items_quantity", f.col("order_exploded.quantity") * f.col("package_exploded.quantity"))
            .withColumn("items_weight", f.col("items_quantity") * f.col("package_exploded.weight"))
            .withColumn("items_volume", f.col("items_quantity") * f.col("package_exploded.volume"))
            .withColumn("status", f.lit("PENDING"))
            .select(
                f.col("inventory_id"),
                f.col("order_id"),
                f.col("order_exploded.product_id").alias("product_id"),
                f.col("order_exploded.product_name").alias("product_name"),
                f.col("order_exploded.price").alias("order_price"),
                f.col("package_exploded.package_id").alias("package_id"),
                f.col("package_exploded.subpackage_id").alias("subpackage_id"),
                f.col("items_quantity"),
                f.col("items_weight"),
                f.col("items_volume"),
                f.col("order_details.order_timestamp").alias("order_timestamp"),
                f.col("status")
            )
        )
        # Log success message
        logging.info("Transformation applied successfully to the streaming DataFrame.")
        return df_orders_items_stream

    except Exception as e:
        logging.error(f"Error occurred during transformation: {str(e)}")
        raise  # Re-raise exception for further handling


def write_orders_items_stream(df_orders_items_stream: DataFrame, path: str, checkpoint_location: str):
    """
    Writes the transformed order items stream DataFrame to Delta format.

    Parameters:
    - df_orders_items_stream (DataFrame): Input streaming DataFrame representing the transformed order items stream.
    - path (str): The destination path for the Delta table.
    - checkpoint_location (str): The location for checkpointing the stream.

    Returns:
    - StreamingQuery: The StreamingQuery object representing the started stream.

    Raises:
    - ValueError: If the input DataFrame is empty or has unexpected schema.
    - Exception: For any other errors during stream initialization.
    """
    try:
        # Validate input streaming DataFrame
        # if df_orders_items_stream is None or len(df_orders_items_stream.columns) == 0:
        #     raise ValueError("Input DataFrame is empty or not provided")

        # Configure and start the streaming write
        orders_items_stream = (
            df_orders_items_stream
            .writeStream
            .format("delta")
            .outputMode("append")
            .trigger(once=True)  # Use `once=True` for single run
            .option("path", path)
            .option("checkpointLocation", checkpoint_location)
            .start()
        )

        # Log success message
        logging.info("Stream started successfully and data is being written to the Delta table.")
        return orders_items_stream

    except Exception as e:
        logging.error(f"Error occurred during stream write operation: {str(e)}")
        raise  # Re-raise exception for further handling

## 3.1 Process Events Stream

In [None]:
events_stream = process_events_stream(df_orders_stream, EVENTS_PATH, EVENTS_CHECKPOINT_LOCATION)

In [None]:
events_stream.lastProgress

In [None]:
events_stream.status

In [None]:
(
    spark
    .read
    .format("delta")
    .load(EVENTS_PATH)
    .orderBy("event_timestamp", ascending=False)
).show()

## 3.2 Process Orders Stream

In [None]:
process_orders_stream(df_orders_stream, ORDERS_PATH, ORDERS_CHECKPOINT_LOCATION)

In [None]:
(
    spark
    .read
    .format("delta")
    .load(ORDERS_PATH)
).show()

## 3.3 Process Orders Iventory Stream

In [None]:
df_orders_items_stream = process_orders_items_stream(df_orders_stream)
write_orders_items_stream(df_orders_items_stream, ORDERS_ITEMS_PATH, ORDERS_ITEMS_CHECKPOINT_LOCATION)

In [None]:
(
    spark
    .read
    .format("delta")
    .load(ORDERS_ITEMS_PATH)
).show()

# 4. Write Inventory Streams

## 4.1 Upsert Orders Items Stream

In [None]:
df_packages = (
    spark
    .read
    .format("delta")
    .load(f"s3a://{BUCKET_NAME}/{ORDERS}/{GOLD}/package_table")
)
df_packages.show()

In [None]:
df_packages.dropDuplicates(["package_id", "subpackage_id"]).count()

In [None]:
def upsert_to_package(microBatchDF, batchId):
    """
    Upserts the incoming micro-batch DataFrame into the Delta table for products.

    Parameters:
    - microBatchDF: The micro-batch DataFrame from the streaming source.
    - batchId: The unique identifier for the micro-batch.
    """
    deltaTableProducts = DeltaTable.forPath(spark, f"s3a://{BUCKET_NAME}/{ORDERS}/{GOLD}/package_table") #PRODUCTS_PATH)

    # WorkArround Preprocess the micro-batch DataFrame to removemicrobath duplicates integrity lo
    deduplicatedBatchDF = (
        microBatchDF
        .groupBy("package_id", "subpackage_id")
        .agg(
            f.sum("items_quantity").alias("items_quantity"),
        )
    )
    (
        deltaTableProducts.alias("t")
        .merge(
            deduplicatedBatchDF.alias("s"),
            "s.package_id = t.package_id AND s.subpackage_id = t.subpackage_id"  
        )
        .whenMatchedUpdate(
            # Quantity availability to fulfill order
            condition=f.col("t.stock_quantity") >= f.col("s.items_quantity"),
            set={
                "stock_quantity": f.col("t.stock_quantity") - f.col("s.items_quantity"),
                # "updated_at": f.current_timestamp()
            },
        )
        .execute()
    )

def update_orders_items_stream(
    df_orders_items_stream: DataFrame, 
    packages_path: str, 
    checkpoint_location: str, 
    upsert_function
):
    """
    Initializes and starts a streaming query to update the products table using the upsert function.

    Parameters:
    - df_orders_items_stream (DataFrame): The input streaming DataFrame containing orders items data.
    - packages_path (str): The path to the Delta table where the packages data will be stored.
    - checkpoint_location (str): The checkpoint location for the stream query.
    - upsert_function (function): The function to perform upsert operations on the Delta table.

    Returns:
    - StreamingQuery: The StreamingQuery object representing the started stream.
    """
    try:
        # Start the streaming write process with upsert logic using foreachBatch
        update_products_stream = (
            df_orders_items_stream
            .select(
                f.col("package_id"),
                f.col("subpackage_id"),
                f.col("items_quantity")
            )
            .writeStream
            .format("delta")
            .outputMode("update")
            .foreachBatch(upsert_to_package)  # Use the provided upsert function
            .option("path", packages_path)
            .option("checkpointLocation", checkpoint_location)
            .trigger(once=True)  # Single-trigger mode for processing the micro-batch once
            .start()
        )

        # Log successful stream start
        logging.info("Streaming query for packages update started successfully.")
        return update_products_stream

    except Exception as e:
        logging.error(f"Error starting the streaming query for packagess update: {str(e)}")
        raise  # Re-raise exception for further handling


In [None]:
df_update_orders_items_stream = update_orders_items_stream(
    df_orders_items_stream, 
    f"s3a://{BUCKET_NAME}/{ORDERS}/{GOLD}/package_table", 
    ORDERS_ITEMS_UPDATE_CHECKPOINT_LOCATION, 
    upsert_to_package
)

In [None]:
(
    spark
    .read
    .format("delta")
    .load(f"s3a://{BUCKET_NAME}/{ORDERS}/{GOLD}/package_table")
    .orderBy(["stock_quantity"], ascending=False)
).show()