# Setup Env

In [0]:
!pip install elasticsearch

Collecting elasticsearch
  Obtaining dependency information for elasticsearch from https://files.pythonhosted.org/packages/c0/50/16306f4722ca2fcb64a5875bc1fa9b4d0bcb08c05967f60c23acd4cbb019/elasticsearch-8.17.2-py3-none-any.whl.metadata
  Downloading elasticsearch-8.17.2-py3-none-any.whl.metadata (8.8 kB)
Collecting elastic-transport<9,>=8.15.1 (from elasticsearch)
  Obtaining dependency information for elastic-transport<9,>=8.15.1 from https://files.pythonhosted.org/packages/cf/cd/b71d5bc74cde7fc6fd9b2ff9389890f45d9762cbbbf81dc5e51fd7588c4a/elastic_transport-8.17.1-py3-none-any.whl.metadata
  Downloading elastic_transport-8.17.1-py3-none-any.whl.metadata (3.8 kB)
Downloading elasticsearch-8.17.2-py3-none-any.whl (717 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/718.0 kB[0m [31m?[0m eta [36m-:--:--[0m
[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m716.8/718.0 kB[0m [31m29.3 MB/s[0m eta [36m0:00:01[0m
[2K   [90m━━━━━━━━━━━━━

In [0]:
# Get credentials from Databricks secret scope to authenticate with elasticsearch
es_cloud_url = dbutils.secrets.get(scope = "snocko", key = "es_cloud_url")
es_user = dbutils.secrets.get(scope = "snocko", key = "es_user")
es_pass = dbutils.secrets.get(scope = "snocko", key = "es_pass")
es_cloud_url_full = "https://" + es_cloud_url + ":443"

from elasticsearch import Elasticsearch

es = Elasticsearch(
    [es_cloud_url_full],
    basic_auth=(es_user, es_pass)
)

# Load Sample Dataset in Elasticsearch
- remove sample dataset in Elasticsearch and corresponding tables if `Pyspark batch processing from Elasticsearch Notebook` was executed previously

In [0]:
# Delete trade_txn_aapl_202503 & trade_txn_amzn_202503 if it exists
indices_to_delete = ["trade_txn_aapl_202503", "trade_txn_amzn_202503", "trade_txn_tsla_202504"]

for index in indices_to_delete:
    if es.indices.exists(index=index):
        es.indices.delete(index=index)

In [0]:
# Create Indices and load sample data

# Create trade_txn_aapl_202503 index
es.indices.create(index="trade_txn_aapl_202503", body={
    "mappings": {
        "properties": {
            "create_timestamp": {"type": "date"},
            "order_id": {"type": "keyword"},
            "price": {"type": "float"},
            "quantity": {"type": "long"},
            "status": {"type": "keyword"},
            "stock_symbol": {
                "type": "text",
                "fields": {
                    "keyword": {"type": "keyword", "ignore_above": 256}
                }
            },
            "sub_transactions": {
                "type": "nested",
                "properties": {
                    "matched_order_id": {"type": "keyword"},
                    "price_executed": {"type": "float"},
                    "quantity_fulfilled": {"type": "long"},
                    "sub_transaction_id": {"type": "keyword"},
                    "timestamp": {"type": "date"}
                }
            },
            "trader_id": {"type": "keyword"},
            "type": {"type": "keyword"},
            "update_timestamp": {"type": "date"}
        }
    }
})

# Create trade_txn_amzn_202503 index
es.indices.create(index="trade_txn_amzn_202503", body={
    "mappings": {
        "properties": {
            "create_timestamp": {"type": "date"},
            "order_id": {"type": "keyword"},
            "price": {"type": "float"},
            "quantity": {"type": "long"},
            "status": {"type": "keyword"},
            "stock_symbol": {
                "type": "text",
                "fields": {
                    "keyword": {"type": "keyword", "ignore_above": 256}
                }
            },
            "sub_transactions": {
                "type": "nested",
                "properties": {
                    "matched_order_id": {"type": "keyword"},
                    "price_executed": {"type": "float"},
                    "quantity_fulfilled": {"type": "long"},
                    "sub_transaction_id": {"type": "keyword"},
                    "timestamp": {"type": "date"}
                }
            },
            "trader_id": {"type": "keyword"},
            "type": {"type": "keyword"},
            "update_timestamp": {"type": "date"}
        }
    }
})

# write sample data into trade_txn_aapl_202503
es.index(index="trade_txn_aapl_202503", id= "7c3ba3ec-2cca-467b-b32e-caf253746205", document = {
    "order_id": "7c3ba3ec-2cca-467b-b32e-caf253746205",
    "trader_id": "5b2654f5-cba4-486f-8f2a-449c76b5d208",
    "type": "buy",
    "stock_symbol": "AAPL",
    "quantity": 4086,
    "price": 103.72,
    "status": "partially_filled",
    "create_timestamp": "2025-03-23T11:16:40.689000Z",
    "update_timestamp": "2025-03-30T08:10:43.282000Z",
    "sub_transactions": [
        {
            "sub_transaction_id": "5c94aa8c-7f72-4454-83d0-1d921a680a6b",
            "matched_order_id": "899bee50-0349-4ba5-93f2-672f6a570d72",
            "quantity_fulfilled": 887,
            "price_executed": 104.61,
            "timestamp": "2025-03-23T18:17:14.828000Z"
        },
        {
            "sub_transaction_id": "0c49cfee-562e-4439-8398-d7eaeed3c1ca",
            "matched_order_id": "8675f9d8-3298-4701-bd37-be3b23c74c09",
            "quantity_fulfilled": 493,
            "price_executed": 102.87,
            "timestamp": "2025-03-26T10:29:22.262000Z"
        },
        {
            "sub_transaction_id": "c91574d2-01be-45b2-86ee-64c308b9d6f7",
            "matched_order_id": "47b727b5-002d-4d0c-9440-649d30628cb4",
            "quantity_fulfilled": 574,
            "price_executed": 104.59,
            "timestamp": "2025-03-28T22:29:07.009000Z"
        },
        {
            "sub_transaction_id": "96d9c2e2-0c9e-494c-9693-a2f6df8a2bfe",
            "matched_order_id": "c1ea7c62-4e9b-422c-a854-80a8c22012a6",
            "quantity_fulfilled": 357,
            "price_executed": 103.93,
            "timestamp": "2025-03-30T08:10:43.282000Z"
        }
    ]
})

# write sample data into trade_txn_amzn_202503
es.index(index="trade_txn_amzn_202503", id="77d80c91-5cd7-46ed-aef4-2cfeac060d20", document = {
    "order_id": "77d80c91-5cd7-46ed-aef4-2cfeac060d20",
    "trader_id": "ec390790-1521-491a-9636-ff4a32c8307a",
    "type": "sell",
    "stock_symbol": "AMZN",
    "quantity": 1673,
    "price": 274.77,
    "status": "open",
    "create_timestamp": "2025-03-30T10:10:43.282000Z",
    "update_timestamp": "2025-03-30T10:10:43.282000Z",
    "sub_transactions": []
})

ObjectApiResponse({'_index': 'trade_txn_amzn_202503', '_id': '77d80c91-5cd7-46ed-aef4-2cfeac060d20', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [0]:
# Drop Tables if exist
spark.sql("DROP TABLE IF EXISTS hive_metastore.default.trade_txn_amzn_202503")
spark.sql("DROP TABLE IF EXISTS hive_metastore.default.trade_txn_aapl_202503")
spark.sql("DROP TABLE IF EXISTS hive_metastore.default.trade_txn_tsla_202504")
spark.sql("DROP TABLE IF EXISTS hive_metastore.default.tracking_table")

DataFrame[]

# Execute `Pyspark batch processing from Elasticsearch` Notebook
- Execute the notebook to load the initial sample dataset into the tables
- Verify count of records in elasticsearch indices is the same as what is written into tables

In [0]:
# Get count of records in respective Elasticsearch Indices
def count_nested_objects(index_name):
    query = {
        "size": 0,
        "aggs": {
            "nested_count": {
                "nested": {
                    "path": "sub_transactions"
                },
                "aggs": {
                    "count": {
                        "value_count": {
                            "field": "sub_transactions.sub_transaction_id"
                        }
                    }
                }
            }
        }
    }
    response = es.search(index=index_name, body=query)
    return response['aggregations']['nested_count']['count']

# Get count of trade records in trade_txn_aapl_202503
aapl_count = es.count(index="trade_txn_aapl_202503")['count']
aapl_nested_count = count_nested_objects("trade_txn_aapl_202503")

print("Elasticsearch | Count of records in trade_txn_aapl_202503: ", aapl_count)
print("Elasticsearch | Count of nested objects in trade_txn_aapl_202503: ", aapl_nested_count)

# Get count of trade records in trade_txn_amzn_202503
amzn_count = es.count(index="trade_txn_amzn_202503")['count']
amzn_nested_count = count_nested_objects("trade_txn_amzn_202503")

print("Elasticsearch | Count of records in trade_txn_amzn_202503: ", amzn_count)
print("Elasticsearch | Count of nested objects in trade_txn_amzn_202503: ", amzn_nested_count)


Elasticsearch | Count of records in trade_txn_aapl_202503:  1
Elasticsearch | Count of nested objects in trade_txn_aapl_202503:  {'value': 4}
Elasticsearch | Count of records in trade_txn_amzn_202503:  1
Elasticsearch | Count of nested objects in trade_txn_amzn_202503:  {'value': 0}


In [0]:
# Get count of records in respective Tables
from pyspark.sql.functions import max

# Check last_execution_time in tracking_table
tracking_table = "hive_metastore.default.tracking_table"
last_execution_time = spark.table(tracking_table).select(max("last_execution_time")).collect()[0][0]
print("last_execution_time: " + str(last_execution_time))

# Get count of trade records in trade_txn_aapl_202503
trade_txn_aapl_202503 = spark.table("trade_txn_aapl_202503")
aapl_sub_txn_count = trade_txn_aapl_202503.selectExpr("size(sub_transactions) as sub_transaction_count").agg({"sub_transaction_count": "sum"}).collect()[0][0]

print("Table | Count of records in trade_txn_aapl_202503: " + str(trade_txn_aapl_202503.count()))
print("Table | Count of objects in sub_transactions for trade_txn_aapl_202503: " + str(aapl_sub_txn_count))

# Get count of trade records in trade_txn_amzn_202503
trade_txn_amzn_202503 = spark.table("trade_txn_amzn_202503")
amzn_sub_txn_count = trade_txn_amzn_202503.selectExpr("size(sub_transactions) as sub_transaction_count").agg({"sub_transaction_count": "sum"}).collect()[0][0]

print("Table | Count of records in trade_txn_amzn_202503: " + str(trade_txn_amzn_202503.count()))
print("Table | Count of objects in sub_transactions for trade_txn_amzn_202503: " + str(amzn_sub_txn_count))

tracking_df = spark.table(tracking_table)
print("latest timestamp in tracking_table: " + tracking_df.select(max("last_execution_time")).collect()[0][0])

last_execution_time: 2025-03-30T10:10:43.282000
Table | Count of records in trade_txn_aapl_202503: 1
Table | Count of objects in sub_transactions for trade_txn_aapl_202503: 4
Table | Count of records in trade_txn_amzn_202503: 1
Table | Count of objects in sub_transactions for trade_txn_amzn_202503: 0
latest timestamp in tracking_table: 2025-03-30T10:10:43.282000


# Simulate new data in Elasticsearch Indices
- new trade transactions
- existing trade transactions updated with new sub transactions in nested field

In [0]:
# Simulate adding of new data & updating transactions in Elasticsearch

# Update existing document in trade_txn_aapl_202503
es.index(index="trade_txn_aapl_202503", id="7c3ba3ec-2cca-467b-b32e-caf253746205", document = {
    "order_id": "7c3ba3ec-2cca-467b-b32e-caf253746205",
    "trader_id": "5b2654f5-cba4-486f-8f2a-449c76b5d208",
    "type": "buy",
    "stock_symbol": "AAPL",
    "quantity": 4086,
    "price": 103.72,
    "status": "fully_filled",
    "create_timestamp": "2025-03-23T11:16:40.689000Z",
    "update_timestamp": "2025-04-02T09:50:12.959000Z",
    "sub_transactions": [
        {
            "sub_transaction_id": "5c94aa8c-7f72-4454-83d0-1d921a680a6b",
            "matched_order_id": "899bee50-0349-4ba5-93f2-672f6a570d72",
            "quantity_fulfilled": 887,
            "price_executed": 104.61,
            "timestamp": "2025-03-23T18:17:14.828000Z"
        },
        {
            "sub_transaction_id": "0c49cfee-562e-4439-8398-d7eaeed3c1ca",
            "matched_order_id": "8675f9d8-3298-4701-bd37-be3b23c74c09",
            "quantity_fulfilled": 493,
            "price_executed": 102.87,
            "timestamp": "2025-03-26T10:29:22.262000Z"
        },
        {
            "sub_transaction_id": "c91574d2-01be-45b2-86ee-64c308b9d6f7",
            "matched_order_id": "47b727b5-002d-4d0c-9440-649d30628cb4",
            "quantity_fulfilled": 574,
            "price_executed": 104.59,
            "timestamp": "2025-03-28T22:29:07.009000Z"
        },
        {
            "sub_transaction_id": "96d9c2e2-0c9e-494c-9693-a2f6df8a2bfe",
            "matched_order_id": "c1ea7c62-4e9b-422c-a854-80a8c22012a6",
            "quantity_fulfilled": 357,
            "price_executed": 103.93,
            "timestamp": "2025-03-30T08:10:43.282000Z"
        },
        {
            "sub_transaction_id": "3be60b36-ca8f-4d7a-a9c3-f5911f5ce764",
            "matched_order_id": "fa25297a-29ba-4e85-8cd9-6f1db0486d21",
            "quantity_fulfilled": 326,
            "price_executed": 103.39,
            "timestamp": "2025-04-02T00:14:06.623000Z"
        },
        {
            "sub_transaction_id": "efb07594-f427-4a45-94c2-703413a3e70f",
            "matched_order_id": "1aa72bea-fe89-4687-8dd7-93b64e28c56b",
            "quantity_fulfilled": 670,
            "price_executed": 103.36,
            "timestamp": "2025-04-02T01:26:14.285000Z"
        },
        {
            "sub_transaction_id": "6fcf333b-b966-4016-8403-c2e60b86eb20",
            "matched_order_id": "c600f79f-7bbb-4c62-9d42-2a379a55342e",
            "quantity_fulfilled": 293,
            "price_executed": 103.85,
            "timestamp": "2025-04-02T09:25:16.523000Z"
        },
        {
            "sub_transaction_id": "0ce52064-709b-46ba-ab31-128488dc99e6",
            "matched_order_id": "9182a249-c86a-455e-ae4c-eadec03bc96f",
            "quantity_fulfilled": 486,
            "price_executed": 103.58,
            "timestamp": "2025-04-02T09:50:12.959000Z"
        }
    ]
})

# Update existing document in trade_txn_amzn_202503
es.index(index="trade_txn_amzn_202503", id="77d80c91-5cd7-46ed-aef4-2cfeac060d20", document = {
    "order_id": "77d80c91-5cd7-46ed-aef4-2cfeac060d20",
    "trader_id": "ec390790-1521-491a-9636-ff4a32c8307a",
    "type": "sell",
    "stock_symbol": "AMZN",
    "quantity": 1673,
    "price": 274.77,
    "status": "fully_filled",
    "create_timestamp": "2025-03-30T10:10:43.282000Z",
    "update_timestamp": "2025-03-31T13:15:33.481000Z",
    "sub_transactions": [
      {
        "sub_transaction_id": "e096e1b8-9e19-49ba-af56-a7561fda5b6f",
        "matched_order_id": "98192ab8-cb0e-4f2a-a674-dc55e926e79d",
        "quantity_fulfilled": 790,
        "price_executed": 274.76,
        "timestamp": "2025-03-30T11:10:43.282000Z"
      },
      {
        "sub_transaction_id": "8b80c719-8731-417e-badb-6ac572613a41",
        "matched_order_id": "cc4a0ac9-5fd2-4e79-94cb-cda786797b29",
        "quantity_fulfilled": 852,
        "price_executed": 273.79,
        "timestamp": "2025-03-30T15:10:12.185000Z"
      },
      {
        "sub_transaction_id": "a236ad4e-6ea6-4e32-a021-0628bf70ed9e",
        "matched_order_id": "1c631818-b798-46f9-9681-81473ede1828",
        "quantity_fulfilled": 31,
        "price_executed": 274.27,
        "timestamp": "2025-03-31T13:15:33.481000Z"
      }
    ]
})

# New document in trade_txn_amzn_202503
es.index(index="trade_txn_amzn_202503", id="168c2ee9-e4b7-44f0-bbad-60fab8c0cec8", document = {
    "order_id": "168c2ee9-e4b7-44f0-bbad-60fab8c0cec8",
    "trader_id": "14a81084-b8c6-4fdd-b94d-0bd88e49b7de",
    "type": "buy",
    "stock_symbol": "AMZN",
    "quantity": 3266,
    "price": 244.02,
    "status": "fully_filled",
    "create_timestamp": "2025-03-30T21:32:14.087000Z",
    "update_timestamp": "2025-04-01T17:57:49.363000Z",
    "sub_transactions": [
      {
        "sub_transaction_id": "f16f2dcb-fead-48e1-aa6a-3d176d149b10",
        "matched_order_id": "2809cda0-9b64-4e9a-94b4-ae44db2e2e56",
        "quantity_fulfilled": 1166,
        "price_executed": 244.6,
        "timestamp": "2025-03-30T21:32:14.087000Z"
      },
      {
        "sub_transaction_id": "ef9c3466-d708-4633-804a-a156fe5c9c3b",
        "matched_order_id": "53b54b9f-d064-4d78-a136-caaea2f6bf85",
        "quantity_fulfilled": 1000,
        "price_executed": 243.84,
        "timestamp": "2025-04-01T13:33:03.844000Z"
      },
      {
        "sub_transaction_id": "4043fb0d-b435-4255-ac3b-f422263273fa",
        "matched_order_id": "750861d2-3618-4b08-bd47-5ca21848110d",
        "quantity_fulfilled": 1100,
        "price_executed": 242.5,
        "timestamp": "2025-04-01T17:57:49.363000Z"
      }
    ]
})

# Trade on new stock symbol trade_txn_tsla_202503

# Create trade_txn_tsla_202504 index
es.indices.create(index="trade_txn_tsla_202504", body={
    "mappings": {
        "properties": {
            "create_timestamp": {"type": "date"},
            "order_id": {"type": "keyword"},
            "price": {"type": "float"},
            "quantity": {"type": "long"},
            "status": {"type": "keyword"},
            "stock_symbol": {
                "type": "text",
                "fields": {
                    "keyword": {"type": "keyword", "ignore_above": 256}
                }
            },
            "sub_transactions": {
                "type": "nested",
                "properties": {
                    "matched_order_id": {"type": "keyword"},
                    "price_executed": {"type": "float"},
                    "quantity_fulfilled": {"type": "long"},
                    "sub_transaction_id": {"type": "keyword"},
                    "timestamp": {"type": "date"}
                }
            },
            "trader_id": {"type": "keyword"},
            "type": {"type": "keyword"},
            "update_timestamp": {"type": "date"}
        }
    }
})

es.index(index="trade_txn_tsla_202504", id="afb6cf8a-47aa-4116-9615-53967d72085e", document = {
    "order_id": "afb6cf8a-47aa-4116-9615-53967d72085e",
    "trader_id": "c658f3f1-e69f-4538-993c-f6ce032afb82",
    "type": "sell",
    "stock_symbol": "TSLA",
    "quantity": 2403,
    "price": 299.43,
    "status": "fully_filled",
    "create_timestamp": "2025-04-01T01:49:13.466000Z",
    "update_timestamp": "2025-04-02T05:39:02.988000Z",
    "sub_transactions": [
      {
        "sub_transaction_id": "132e9c5b-e387-4d6c-b719-1bfc33d6a6e9",
        "matched_order_id": "69aacf41-c7fa-4293-b871-a6d069dd1fc6",
        "quantity_fulfilled": 542,
        "price_executed": 299.38,
        "timestamp": "2025-04-01T01:49:13.466000Z"
      },
      {
        "sub_transaction_id": "eeffe6ec-0c79-4e28-8580-2ab2ca884014",
        "matched_order_id": "d8ad4eda-f7ef-460e-836e-1e18b8b889e4",
        "quantity_fulfilled": 311,
        "price_executed": 300.08,
        "timestamp": "2025-04-01T14:05:56.802000Z"
      },
      {
        "sub_transaction_id": "4b6486f4-1855-4f26-ad2d-adda010c0107",
        "matched_order_id": "2bc268c6-abd2-47dd-8722-277a5541f7d5",
        "quantity_fulfilled": 1550,
        "price_executed": 298.75,
        "timestamp": "2025-04-02T05:39:02.988000Z"
      }
    ]
})

ObjectApiResponse({'_index': 'trade_txn_tsla_202504', '_id': 'afb6cf8a-47aa-4116-9615-53967d72085e', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

# 2nd Execution of `Pyspark batch processing from Elasticsearch` Notebook
- Execute the notebook to update dataset into the tables
- Verify count of records in elasticsearch indices is the same as what is written into tables

In [0]:
# Get count of records in respective Elasticsearch Indices
def count_nested_objects(index_name):
    query = {
        "size": 0,
        "aggs": {
            "nested_count": {
                "nested": {
                    "path": "sub_transactions"
                },
                "aggs": {
                    "count": {
                        "value_count": {
                            "field": "sub_transactions.sub_transaction_id"
                        }
                    }
                }
            }
        }
    }
    response = es.search(index=index_name, body=query)
    return response['aggregations']['nested_count']['count']

# Get count of trade records in trade_txn_aapl_202503
aapl_count = es.count(index="trade_txn_aapl_202503")['count']
aapl_nested_count = count_nested_objects("trade_txn_aapl_202503")

print("Elasticsearch | Count of records in trade_txn_aapl_202503: ", aapl_count)
print("Elasticsearch | Count of nested objects in trade_txn_aapl_202503: ", aapl_nested_count)

# Get count of trade records in trade_txn_amzn_202503
amzn_count = es.count(index="trade_txn_amzn_202503")['count']
amzn_nested_count = count_nested_objects("trade_txn_amzn_202503")

print("Elasticsearch | Count of records in trade_txn_amzn_202503: ", amzn_count)
print("Elasticsearch | Count of nested objects in trade_txn_amzn_202503: ", amzn_nested_count)

# Get count of trade records in trade_txn_tsla_202504
tsla_count = es.count(index="trade_txn_tsla_202504")['count']
tsla_nested_count = count_nested_objects("trade_txn_tsla_202504")

print("Elasticsearch | Count of records in trade_txn_tsla_202504: ", tsla_count)
print("Elasticsearch | Count of nested objects in trade_txn_tsla_202504: ", tsla_nested_count)


Elasticsearch | Count of records in trade_txn_aapl_202503:  1
Elasticsearch | Count of nested objects in trade_txn_aapl_202503:  {'value': 8}
Elasticsearch | Count of records in trade_txn_amzn_202503:  2
Elasticsearch | Count of nested objects in trade_txn_amzn_202503:  {'value': 6}
Elasticsearch | Count of records in trade_txn_tsla_202504:  1
Elasticsearch | Count of nested objects in trade_txn_tsla_202504:  {'value': 3}


In [0]:
# Get count of records in respective Tables
from pyspark.sql.functions import max

# Check last_execution_time in tracking_table
tracking_table = "hive_metastore.default.tracking_table"
last_execution_time = spark.table(tracking_table).select(max("last_execution_time")).collect()[0][0]
print("last_execution_time: " + str(last_execution_time))

# Get count of trade records in trade_txn_aapl_202503
trade_txn_aapl_202503 = spark.table("trade_txn_aapl_202503")
aapl_sub_txn_count = trade_txn_aapl_202503.selectExpr("size(sub_transactions) as sub_transaction_count").agg({"sub_transaction_count": "sum"}).collect()[0][0]

print("Table | Count of records in trade_txn_aapl_202503: " + str(trade_txn_aapl_202503.count()))
print("Table | Count of objects in sub_transactions for trade_txn_aapl_202503: " + str(aapl_sub_txn_count))

# Get count of trade records in trade_txn_amzn_202503
trade_txn_amzn_202503 = spark.table("trade_txn_amzn_202503")
amzn_sub_txn_count = trade_txn_amzn_202503.selectExpr("size(sub_transactions) as sub_transaction_count").agg({"sub_transaction_count": "sum"}).collect()[0][0]

print("Table | Count of records in trade_txn_amzn_202503: " + str(trade_txn_amzn_202503.count()))
print("Table | Count of objects in sub_transactions for trade_txn_amzn_202503: " + str(amzn_sub_txn_count))

# Get count of trade records in trade_txn_tsla_202504
trade_txn_tsla_202504 = spark.table("trade_txn_tsla_202504")
tsla_sub_txn_count = trade_txn_tsla_202504.selectExpr("size(sub_transactions) as sub_transaction_count").agg({"sub_transaction_count": "sum"}).collect()[0][0]

print("Table | Count of records in trade_txn_tsla_202504: " + str(trade_txn_tsla_202504.count()))
print("Table | Count of objects in sub_transactions for trade_txn_tsla_202504: " + str(tsla_sub_txn_count))

print("latest timestamp in tracking_table: " + tracking_df.select(max("last_execution_time")).collect()[0][0])

last_execution_time: 2025-04-02T09:50:12.959000
Table | Count of records in trade_txn_aapl_202503: 1
Table | Count of objects in sub_transactions for trade_txn_aapl_202503: 8
Table | Count of records in trade_txn_amzn_202503: 2
Table | Count of objects in sub_transactions for trade_txn_amzn_202503: 6
Table | Count of records in trade_txn_tsla_202504: 1
Table | Count of objects in sub_transactions for trade_txn_tsla_202504: 3
latest timestamp in tracking_table: 2025-04-02T09:50:12.959000
