In [0]:
# ------------------------------
# Notebook Setup and Logging
# ------------------------------

%load_ext autoreload
%autoreload 2

import logging
import datetime
from mdd.logger import *
from mdd.datareader import DeltaTableReader

# Initialize logging
log_folder = "mdd_test"
log_file_name = "test_datareader_deltatable"
log_timestamp = datetime.datetime.now()
debug = False

Logger.init(log_folder, log_file_name, log_timestamp, debug)

# Set the Delta table name to use across all tests
source_name = "bronze.paytronix_mid352_combinedcards"  # Replace with your actual Delta table name


In [0]:
%%sql
select _record_timestamp, count(*)
from lakehouse.bronze.paytronix_mid352_combinedcards
group by _record_timestamp
order by _record_timestamp desc;

In [0]:
# Test: Full mode with timestamp filter (reads only data after the given timestamp)
config = {
    "source_name": source_name,
    "mode": "full", # full, backfill, incremental
    "full_max_processed_timestamp": "2025-05-19 00:00:00",
    "backfill_days": None,
    "incremental_max_processed_version": None
}

reader = DeltaTableReader(spark=spark, config=config, debug=True)
df = reader.read()
#df.show()
display(df)


In [0]:
# Test: Full mode with no timestamp (reads the entire table)
config = {
    "source_name": source_name,
    "mode": "full", # full, backfill, incremental
    "full_max_processed_timestamp": None,
    "backfill_days": None,
    "incremental_max_processed_version": None
}

reader = DeltaTableReader(spark=spark, config=config, debug=True)
df = reader.read()
#df.show()
display(df)


In [0]:
# Test: Backfill mode (reads rows from the last N days)
config = {
    "source_name": source_name,
    "mode": "backfill", # full, backfill, incremental
    "full_max_processed_timestamp": None,
    "backfill_days": 7,
    "incremental_max_processed_version": None
}

reader = DeltaTableReader(spark=spark, config=config, debug=True)
df = reader.read()
#df.show()
display(df)



In [0]:
# Test: Incremental mode with a valid CDF version (streaming read using Change Data Feed)
config = {
    "source_name": source_name,
    "mode": "incremental", # full, backfill, incremental
    "full_max_processed_timestamp": None,
    "backfill_days": None,
    "incremental_max_processed_version": 0
}


reader = DeltaTableReader(spark=spark, config=config, debug=True)
df = reader.read()
display(df)


In [0]:
# Test: Incremental mode with out-of-range CDF version (falls back to backfill)
config = {
    "source_name": source_name,
    "mode": "incremental", # full, backfill, incremental
    "full_max_processed_timestamp": None,
    "backfill_days": 5,
    "incremental_max_processed_version": -999
}

reader = DeltaTableReader(spark=spark, config=config, debug=True)
df = reader.read()
#df.show()
display(df)
