In [1]:
import sys
import os
import yaml
import configparser

# Add the src directory to the sys.path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

Consideraciones:
* generar schema de validacion de streams
* introducir crdenciales a traves del entorno de docker - ENV variables en .ipynb no funcionan
* optimizar filtrado de id incluir particion?
* structura de micro servicios: inventory consumer - producer
* diseño de sistemas: filtrar ordenes efectuadas para los siguientes envios
* confirmar Best pracitces para el manejo de streams: memorytable vs readStream
* confirmar best practice para pertion key del stream
* Arquitectura Lambda
* builling de S3 por fichero registrado: definir criterios de particion
* emplear memory format para lectura de streams
* best practices para gestionar checkpoints
* firehose implementarlo

# 1. Environment Configuration

## 1.1 Import dependencies

In [2]:
import boto3
import json
import os
from uuid import uuid4
from datetime import datetime
from datetime import timedelta
import time
import random
import uuid
import logging
import numpy as np

import pyspark.sql.types as t
import pyspark.sql.functions as f

In [3]:
from spark_session import create_spark_session
from schemas import *
from functions import *

## 1.2 Extract AWS credentials

In [4]:
def load_aws_credentials(profile_name="default"):

    # Load credentials from the .aws/credentials file (local development)
    try:
        credentials = configparser.ConfigParser()
        credentials.read(os.path.join('..', '.aws', 'credentials'))
        
        logging.info("Successfully loaded credentials variables from .aws file.")
    except Exception as e:
        logging.error(f"Error loading .aws file: {e}")
        sys.exit(1)

    aws_access_key_id = credentials[profile_name]["aws_access_key_id"]
    aws_secret_access_key = credentials[profile_name]["aws_secret_access_key"]

    if not aws_access_key_id or not aws_secret_access_key:
        logging.error("AWS credentials not found.")
        sys.exit(1)

    return aws_access_key_id, aws_secret_access_key

aws_access_key_id, aws_secret_access_key = load_aws_credentials()

## 1.3 Constants variables

In [5]:
# Initialize logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

In [6]:
def load_aws_config():
    """
    Loads AWS configuration settings from the .aws/config file.

    :param profile_name: The profile name in the AWS config file (default: "default").
    :return: The region_name as a string.
    """
    try:
        config = configparser.ConfigParser()
        config.read(os.path.join('..', '.aws', 'config'))
        logging.info("Successfully loaded config variables from .aws file.")

        return config
    except Exception as e:
        logging.error(f"Error loading .aws file: {e}")
        sys.exit(1)

config = load_aws_config()

In [7]:
BUCKET_NAME = config["paths"]["BUCKET_NAME"]
RAW = config["paths"]["RAW"]
ORDERS = config["paths"]["ORDERS"]

BRONZE = config["paths"]["BRONZE"]
SILVER = config["paths"]["SILVER"]
GOLD = config["paths"]["GOLD"]

ADDRESS_DATA = config["raw_data"]["ADDRESS_DATA"]
CLIENTS_DATA = config["raw_data"]["CLIENTS_DATA"]
PRODUCTS_DATA = config["raw_data"]["PRODUCTS_DATA"]

ADDRESS_TABLE = config["table_names"]["ADDRESS_TABLE"]
CLIENTS_TABLE = config["table_names"]["CLIENTS_TABLE"]
CLIENTS_ADDRESS_TABLE = config["table_names"]["CLIENTS_ADDRESS_TABLE"]
PRODUCTS_TABLE = config["table_names"]["PRODUCTS_TABLE"]
PACKAGE_TABLE = config["table_names"]["PACKAGE_TABLE"]

RAW_ADDRESS_PATH = os.path.join(BUCKET_NAME, RAW, ADDRESS_DATA)
RAW_CIENTS_PATH = os.path.join(BUCKET_NAME, RAW, CLIENTS_DATA)
RAW_PRODUCTS_PATH = os.path.join(BUCKET_NAME, RAW, PRODUCTS_DATA)

BRONZE_ADDRESS_PATH = os.path.join(BUCKET_NAME, ORDERS, BRONZE, ADDRESS_TABLE)
BRONZE_CLIENTS_PATH = os.path.join(BUCKET_NAME, ORDERS, BRONZE, CLIENTS_TABLE)
BRONZE_PRODUCTS_PATH = os.path.join(BUCKET_NAME, ORDERS, BRONZE, PRODUCTS_TABLE)


SILVER_ADDRESS_PATH = os.path.join(BUCKET_NAME, ORDERS, SILVER, ADDRESS_TABLE)
SILVER_CLIENTS_PATH = os.path.join(BUCKET_NAME, ORDERS, SILVER, CLIENTS_TABLE)
SILVER_PRODUCTS_PATH = os.path.join(BUCKET_NAME, ORDERS, SILVER, PRODUCTS_TABLE)

GOLD_CLIENTS_ADDRESS_PATH = os.path.join(BUCKET_NAME, ORDERS, GOLD, CLIENTS_ADDRESS_TABLE)
GOLD_PRODUCTS_PATH = os.path.join(BUCKET_NAME, ORDERS, GOLD, PRODUCTS_TABLE)
GOLD_PACKAGE_PATH = os.path.join(BUCKET_NAME, ORDERS, GOLD, PACKAGE_TABLE)

In [8]:
# stream_name = config["default"]["STREAM_NAME"]
stream_name = "orders_stream_4"

# 2. Initialize Spark Session

In [9]:
spark = create_spark_session(aws_access_key_id, aws_secret_access_key)

24/09/30 23:41:40 WARN Utils: Your hostname, Miguels-MacBook-Air.local resolves to a loopback address: 127.0.0.1; using 192.168.0.16 instead (on interface en0)
24/09/30 23:41:40 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


:: loading settings :: url = jar:file:/Users/miguelgranica/Documents/MBIT-DE/vpr-data_publisher/.venv/lib/python3.10/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /Users/miguelgranica/.ivy2/cache
The jars for the packages stored in: /Users/miguelgranica/.ivy2/jars
io.delta#delta-spark_2.12 added as a dependency
org.apache.hadoop#hadoop-aws added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-05897ee3-5d1c-4275-83c0-3194c7fceedd;1.0
	confs: [default]
	found io.delta#delta-spark_2.12;3.2.0 in central
	found io.delta#delta-storage;3.2.0 in central
	found org.antlr#antlr4-runtime;4.9.3 in central
	found org.apache.hadoop#hadoop-aws;3.3.1 in central
	found com.amazonaws#aws-java-sdk-bundle;1.11.901 in central
	found org.wildfly.openssl#wildfly-openssl;1.0.7.Final in central
:: resolution report :: resolve 130ms :: artifacts dl 7ms
	:: modules in use:
	com.amazonaws#aws-java-sdk-bundle;1.11.901 from central in [default]
	io.delta#delta-spark_2.12;3.2.0 from central in [default]
	io.delta#delta-storage;3.2.0 from central in [default]
	org.antlr#antlr4-runtime;4.9.3 from central in [default]
	

# 2. Data generation

In [10]:
def generate_order_payload(order_details):
    """
    Generate a payload for an order event.

    :param order_details: Dictionary containing order details.
    :return: Dictionary containing the payload for the order event.
    """
    return {
        "event_id": f"ev-{uuid.uuid4()}",
        "event_type": "ORDER_CREATED",
        "event_timestamp": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        "order_id": f"ord-{uuid.uuid4()}",
        "order_details": order_details
    }

In [11]:
# def generate_order_details(df_clients, df_products, df_packages):
#     """
#     Generate order details based on client information and item list.

#     :param df_clients: DataFrame containing client information.
#     :param df_products: DataFrame containing product information.
#     :param df_packages: DataFrame containing package information.
#     :return: Dictionary containing order details.
#     """
#     current_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
#     item_list = generate_item_list(df_products, df_packages)
#     client_details = select_client_order_details(df_clients)

#     return {
#         "customer_id": client_details["client_id"],
#         "order_timestamp": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
#         "order_date": datetime.now().strftime('%Y-%m-%d'),
#         "items": item_list,
#         "total_amount": generate_item_agg(item_list, "price"),
#         "total_volume": generate_item_measure_agg(item_list, "volume"),
#         "status": "RECEIVED",
#         "destination_address": generate_destination_address_dict(client_details),
#         "payment_details": {
#             "payment_method": "",
#             "payment_status": "",
#             "transaction_id": ""
#         }
#     }

# def select_client_order_details(df, primary_key_col="address_id"):
#     """
#     Select a random row from a DataFrame based on a unique primary key column.
    
#     :param df: DataFrame to select from.
#     :param primary_key_col: Name of the primary key column. default value: address_id
#     :return: DataFrame containing a single randomly selected row.
#     """
#     # Get a list of all primary key values
#     primary_keys = df.select(primary_key_col).rdd.flatMap(lambda x: x).collect()

#     # Randomly select one primary key value
#     random_primary_key = random.choice(primary_keys)

#     # Filter the DataFrame to get the row with the random primary key
#     random_row_df = df.filter(f.col(primary_key_col) == random_primary_key)

#     # Convert the DataFrame row to dictionary and return
#     return random_row_df.first().asDict() if random_row_df else None

# def select_product_order_details(df_product, df_package,  quantity=3, primary_key_col="product_id"):
#     """
#     Select a random row from a DataFrame based on a unique primary key column.
    
#     :param df: DataFrame to select from.
#     :param primary_key_col: Name of the primary key column. default value: product_id
#     :param quantity: Maximum quantity of each product (default: 3).
#     :return: DataFrame containing a single randomly selected row.
#     """
#     # Get a list of all primary key values
#     primary_keys = df_product.select(primary_key_col).rdd.flatMap(lambda x: x).collect()

#     # Randomly select one primary key value
#     random_primary_key = random.choice(primary_keys)

#     # Register random choice function as a UDF
#     weighted_random_choice_udf = f.udf(weighted_random_choice, t.IntegerType())

#     # Filter the DataFrame to get the row with the random primary key
#     random_product_df = (
#         df_product
#         .filter(f.col(primary_key_col) == random_primary_key)
#         # Set random product quantity
#         .withColumn("product_quantity", weighted_random_choice_udf(f.lit(quantity)))
#         # Explode package annidations
#         .withColumn("product_components_explode", f.explode(f.col("product_components")))
#         # Extract package information
#         .withColumn("package_id", f.explode(f.col("product_components_explode.package_id")))
#         .withColumn("package_quantity", f.explode(f.col("product_components_explode.package_quantity")))
#         # Rename column
#         .withColumnRenamed("name", "product_name")
#         # Join package measures
#         .join(
#             df_package, on="package_id", how="left"
#         )
#         # Select Columns
#         .select(
#             f.col("product_id"),
#             f.col("product_quantity"),
#             f.col("package_id"),
#             f.col("product_name"),
#             f.col("price"),
#             f.col("package_quantity"),
#             f.col("width"),
#             f.col("height"),
#             f.col("length"),
#             f.col("volume")
#         )
#     )

#     # Convert each DataFrame row to list of dictionary and return
#     return [row.asDict() for row in random_product_df.collect()]

# def generate_destination_address_dict(clients_dict):
#     """
#     Filter unnecessary keys from the client's address dictionary.

#     :param clients_dict: Dictionary containing client information.
#     :return: Dictionary containing filtered address information.
#     """
#     address_keys = [
#         'address_id', 'neighborhood', 'coordinates', 'road', 'house_number',
#         'suburb', 'city_district', 'state', 'postcode', 'country', 'lat', 'lon'
#     ]
#     return {k: v for k, v in clients_dict.items() if k in address_keys}

# def generate_item_list(df_products, df_packages, items=5, quantity=3):
#     """
#     Generate a list of items with details.
    
#     Parameters:
#     - items: Number of items to generate details for (default: 5).
#     - quantity: Maximum quantity of each item (default: 3).

#     Returns:
#     - List of dictionaries containing item details.
#     """
#     return [        
#         {
#             "product_id": packages[0]["product_id"], 
#             "product_name": packages[0]["product_name"], 
#             "price": packages[0]["price"],
#             "quantity": packages[0]["product_quantity"],
#             "packages": [
#                 {
#                     "package_id": package["package_id"],
#                     "quantity": package["package_quantity"],
#                     "volume": package["volume"]
#                 } for package in packages 
#             ],
#         }
#         for packages in [select_product_order_details(df_products, df_packages) for num in range(weighted_random_choice(5))]
#     ]
    
#     # return [ 
#     #     package 
#     #     for num in range(weighted_random_choice(5)) 
#     #     for package in select_product_order_details(df_products, df_packages, quantity)
#     # ]

#     # return [
#     #     {
#     #         "product_id": item["product_id"], 
#     #         "product_name": item["product_name"], 
#     #         "price": item["price"], 
#     #         "weight": item["weight"],
#     #         "quantity": weighted_random_choice(quantity)
#     #     }
#     #     for item in [ select_client_order_details(df_products, primary_key_col="product_id") for num in range(weighted_random_choice(items))]
#     # ]

# def weighted_random_choice(numbers_len):
#     """
#     Select a random number from a range starting from 1 with weights based on reciprocal values.

#     Parameters:
#     - numbers_len: Length of the range of numbers starting from 1.

#     Returns:
#     - A randomly selected number based on the reciprocal weights.
#     """
#     # Define numbers range starting from 1 to numbers_len
#     numbers = np.arange(1, numbers_len + 1)
    
#     # Calculate weights based on reciprocal values
#     weights = 1 / numbers
    
#     # Ensure the weights sum to 1
#     normalized_weights = weights / np.sum(weights)
    
#     # Select a random number with the specified weights
#     random_number = int(np.random.choice(numbers, p=normalized_weights))
    
#     return random_number

# def generate_item_agg(items, property_name):
#     """
#     Generate the aggregate value of a property for a list of items.

#     :param items: List of dictionaries containing item details.
#     :param property_name: Name of the property to aggregate.
#     :return: Aggregate value of the specified property.
#     """
#     return sum([(item['quantity'] * item[property_name]) for item in items])

# def generate_item_measure_agg(items, property_name="volume", quantity="quantity"):
#     """
#     Generate the aggregate value of a specified property for a list of items.

#     This function calculates the sum of the product of item quantities, package quantities, 
#     and a specified property (e.g., volume) for each package within each item. 
#     If the specified property is `None`, it is treated as `1` to ensure the multiplication proceeds.

#     :param items: List of dictionaries, where each dictionary contains details about an item and its packages.
#     :param quantity: The key name in the dictionaries for the quantity of the item and packages (default is "quantity").
#     :param property_name: The key name in the dictionaries for the property to aggregate (default is "volume").
#     :return: The aggregated value of the specified property.
#     """
#     return sum([
#         item[quantity]* package[quantity] * (package[property_name] if package[property_name] is not None else 1) 
#         for item in items 
#         for package in item['packages']
#     ])

In [12]:
df_clients_address = read_file(spark, GOLD_CLIENTS_ADDRESS_PATH, "parquet", gold_clients_address_schema)
df_products = read_file(spark, GOLD_PRODUCTS_PATH, "delta")#spark.read.format("delta").load(GOLD_PRODUCTS_PATH)
df_packages = read_file(spark, GOLD_PACKAGE_PATH, "delta") #spark.read.format("delta").load(GOLD_PACKAGE_PATH)

24/09/30 23:42:19 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties


In [13]:
df_products.printSchema()

root
 |-- product_id: string (nullable = true)
 |-- name: string (nullable = true)
 |-- category: string (nullable = true)
 |-- url: string (nullable = true)
 |-- price: float (nullable = true)
 |-- currency: string (nullable = true)
 |-- product_components: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- package_id: string (nullable = true)
 |    |    |-- subpackage_id: integer (nullable = true)
 |    |    |-- package_quantity: integer (nullable = true)



In [14]:
df_packages.show()

24/09/30 23:42:34 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

+----------+-------------+----------+-----+------+------+------+-------+--------------+
|package_id|subpackage_id|      name|width|height|length|weight| volume|stock_quantity|
+----------+-------------+----------+-----+------+------+------+-------+--------------+
|  00245842|            1|     BESTÅ| 41.0|   8.0| 196.0| 18.01|64288.0|          1000|
|  00263850|            1|     BILLY| 29.0|  13.0| 206.0|  37.9|77662.0|          1000|
|  00275848|            1|    KALLAX| 41.0|  12.0| 155.0|  15.2|76260.0|          1000|
|  00278578|            1|    HYLLIS| 29.0|   4.0| 140.0|  5.76|16240.0|          1000|
|  00286677|            1|     HEJNE|  7.0|   3.0| 171.0|  1.77| 3591.0|          1000|
|  00295554|            1|     BESTÅ| 36.0|   2.0|  59.0|  2.47| 4248.0|          1000|
|  00324518|            1|    KALLAX| 41.0|  16.0| 150.0| 21.14|98400.0|          1000|
|  00340047|            1|      EKET|  9.0|   3.0|  33.0|  0.32|  891.0|          1000|
|  00415603|            1|     B

                                                                                

In [None]:
# # Register the function as a UDF
# weighted_random_choice_udf = f.udf(weighted_random_choice, t.IntegerType())
# (
#     df_products
#     # Set random product quantity
#     .withColumn("product_quantity", weighted_random_choice_udf(f.lit(3)))
#     # Explode package annidations
#     .withColumn("product_components_explode", f.explode(f.col("product_components")))
#     # Extract package information
#     .withColumn("package_id", f.col("product_components_explode.package_id"))
#     .withColumn("subpackage_id", f.col("product_components_explode.subpackage_id"))
#     .withColumn("package_quantity", f.col("product_components_explode.package_quantity"))
#     .withColumnRenamed("name", "product_name")
#     .join(
#         df_packages, on=["package_id", "subpackage_id"], how="left"
#     )
#     # Select Columns
#     .select(
#         f.col("product_id"),
#         f.col("product_quantity"),
#         f.col("package_id"),
#         f.col("subpackage_id"),
#         # f.col("name"),
#         f.col("product_name"),
#         f.col("price"),
#         f.col("package_quantity"),
#         f.col("width"),
#         f.col("height"),
#         f.col("length"),
#         f.col("weight"),
#         f.col("volume")
#     )
# ).show()

In [15]:
select_product_order_details(df_products, df_packages)

                                                                                

[{'product_id': 'prod-a0380a40-4626-4fc7-ad86-5e15b73e8649',
  'product_quantity': 1,
  'package_id': '30559732',
  'subpackage_id': 1,
  'product_name': 'PLATSA',
  'price': 79.0,
  'package_quantity': 1,
  'width': 43.0,
  'height': 8.0,
  'length': 126.0,
  'weight': 14.529999732971191,
  'volume': 43344.0}]

In [16]:
item_list = generate_item_list(df_products, df_packages)
item_list

[{'product_id': 'prod-ebf36584-be78-4ac6-a212-5f2b112a6eb3',
  'product_name': 'FJÄLLBO',
  'price': 69.98999786376953,
  'quantity': 2,
  'packages': [{'package_id': '70342199',
    'subpackage_id': 1,
    'quantity': 1,
    'weight': 11.0,
    'volume': 44955.0}]},
 {'product_id': 'prod-01b0fd21-90e9-4e38-9ff6-2fa8166f8f1b',
  'product_name': 'BILLY',
  'price': 99.0,
  'quantity': 1,
  'packages': [{'package_id': '10401931',
    'subpackage_id': 1,
    'quantity': 1,
    'weight': 29.059999465942383,
    'volume': 65600.0},
   {'package_id': '10407565',
    'subpackage_id': 1,
    'quantity': 1,
    'weight': 11.0,
    'volume': 25320.0}]}]

In [17]:
generate_item_measure_agg(item_list)

180830.0

In [18]:
order_details = generate_order_details(df_clients_address, df_products, df_packages)
print(json.dumps(order_details, indent=4))

{
    "customer_id": "cus-d1adb46b-2542-4037-88e0-1edecb3d14e0",
    "order_timestamp": "2024-09-30 23:43:11",
    "order_date": "2024-09-30",
    "items": [
        {
            "product_id": "prod-5693e293-45b3-4aa7-9805-6116b4fb6b94",
            "product_name": "BILLY",
            "price": 34.9900016784668,
            "quantity": 1,
            "packages": [
                {
                    "package_id": "80263832",
                    "subpackage_id": 1,
                    "quantity": 1,
                    "weight": 11.760000228881836,
                    "volume": 25506.0
                }
            ]
        },
        {
            "product_id": "prod-df7617ef-f810-429c-836b-0a565c45bf98",
            "product_name": "KALLAX",
            "price": 25.0,
            "quantity": 2,
            "packages": [
                {
                    "package_id": "90508508",
                    "subpackage_id": 1,
                    "quantity": 1,
                    "wei

                                                                                

In [19]:
order_stream = generate_order_payload(order_details)
print(json.dumps(order_stream, indent=4))

{
    "event_id": "ev-751d5158-32ac-4cb2-9c7f-7bee43cf5a39",
    "event_type": "ORDER_CREATED",
    "event_timestamp": "2024-09-30 23:43:11",
    "order_id": "ord-c715d632-cff4-4bae-90a4-7aaaf91f408c",
    "order_details": {
        "customer_id": "cus-d1adb46b-2542-4037-88e0-1edecb3d14e0",
        "order_timestamp": "2024-09-30 23:43:11",
        "order_date": "2024-09-30",
        "items": [
            {
                "product_id": "prod-5693e293-45b3-4aa7-9805-6116b4fb6b94",
                "product_name": "BILLY",
                "price": 34.9900016784668,
                "quantity": 1,
                "packages": [
                    {
                        "package_id": "80263832",
                        "subpackage_id": 1,
                        "quantity": 1,
                        "weight": 11.760000228881836,
                        "volume": 25506.0
                    }
                ]
            },
            {
                "product_id": "prod-df7617ef-f810

In [20]:
order_stream['order_id'] #['orderDetails']['destinationAddress']['neighborhood']

'ord-c715d632-cff4-4bae-90a4-7aaaf91f408c'

# 3. Stream producer

In [21]:
kinesis_client = boto3.client(
    'kinesis', 
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    # aws_session_token=aws_session_token,
    region_name='eu-south-2'
)

In [22]:
response = kinesis_client.describe_stream(StreamName=stream_name)
print(response['StreamDescription']['StreamStatus'])

ACTIVE


In [23]:
print(response['StreamDescription']['Shards'])

[{'ShardId': 'shardId-000000000000', 'HashKeyRange': {'StartingHashKey': '0', 'EndingHashKey': '340282366920938463463374607431768211455'}, 'SequenceNumberRange': {'StartingSequenceNumber': '49656326133938285393239909474654951511725388248247173122'}}]


In [24]:
def produce_order(payload):
    try:
        # Ensure payload is correctly formatted and partition key is a string
        if 'event_type' not in payload or not isinstance(payload['event_type'], str):
            raise ValueError("Payload must include 'event_type' as a string")
        
        data = json.dumps(payload).encode()
        put_response = kinesis_client.put_record(
            StreamName=stream_name,
            Data=data,
            PartitionKey=payload['event_type']
        )
        
        # Log response details
        logger.info(f"Put record response: {put_response}")
        return put_response
    except Exception as e:
        logger.error(f"Failed to put record to stream: {e}", exc_info=True)
        return None

In [25]:
order_payload = generate_order_payload(generate_order_details(df_clients_address, df_products, df_packages))
print(json.dumps(order_payload, indent=4))

{
    "event_id": "ev-8c46f837-2adb-40b0-829d-75e1b743995f",
    "event_type": "ORDER_CREATED",
    "event_timestamp": "2024-09-30 23:43:35",
    "order_id": "ord-a719c627-e658-4502-85d9-b31934cd2239",
    "order_details": {
        "customer_id": "cus-4851ac82-038f-4f72-ac88-f22579210d32",
        "order_timestamp": "2024-09-30 23:43:35",
        "order_date": "2024-09-30",
        "items": [
            {
                "product_id": "prod-3a27bedc-bc22-4474-ba9d-b24ad4922c9a",
                "product_name": "BILLY",
                "price": 159.99000549316406,
                "quantity": 2,
                "packages": [
                    {
                        "package_id": "00263850",
                        "subpackage_id": 1,
                        "quantity": 1,
                        "weight": 37.900001525878906,
                        "volume": 77662.0
                    },
                    {
                        "package_id": "90275617",
                     

In [26]:
order_payload = generate_order_payload(generate_order_details(df_clients_address, df_products, df_packages))
produce_order(order_payload)

{'ShardId': 'shardId-000000000000',
 'SequenceNumber': '49656326133938285393239909490760261280632782821668159490',
 'ResponseMetadata': {'RequestId': 'eadeb704-5658-254e-b5ab-aad5a4460350',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'eadeb704-5658-254e-b5ab-aad5a4460350',
   'x-amz-id-2': '4fAG6N6OTHBspASY1/23GgosZ4dRns9aAbxuTBikEJnO8wBDZ8Pb7kNteH6T7pT9SFkEmhqu6AO/EFQanDb721AWw/Hdm9mx',
   'date': 'Mon, 30 Sep 2024 21:43:45 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '110',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

In [27]:
# Produce orders at regular intervals (for example, every second)
for order in range(1, 10 + 1):
    order_payload = order_payload = generate_order_payload(generate_order_details(df_clients_address, df_products, df_packages))
    produce_order(order_payload)
    time.sleep(5)

                                                                                