In [1]:
from datetime import date
from src.pipeline.ageing_report import AgeingReportPipeline
from src.utils.logger import setup_logging, get_logger
from src.utils.spark_utils import create_spark_session, load_config, read_input_file
from src.config.models import AppConfig
from pyspark.sql.types import (
    DateType,
    StringType,
    StructField,
    StructType,
    DoubleType,
)

In [2]:
as_at_date = date(2025, 7, 7)
env = "prod"

In [3]:
# Load configuration
config_path = f"config/config_{env}.yaml"
config_dict = load_config(config_path)
config = AppConfig(**config_dict)

In [4]:
logger = setup_logging(level=config.logging.level, format_str=config.logging.format)
logger = get_logger(__name__)

In [5]:
logger.info(f"Starting Ageing Report Pipeline for environment: {env}")
logger.info(f"As-at date: {as_at_date}")

# Create Spark session
spark = create_spark_session(config)

# Initialize pipeline
pipeline = AgeingReportPipeline(spark, config)

# Run the pipeline
pipeline.generate_ageing_report(as_at_date)

ageing_report_schema = StructType(
    [
        StructField("centre_id", StringType(), True),
        StructField("class_id", StringType(), True),
        StructField("document_id", StringType(), True),
        StructField("document_date", DateType(), True),
        StructField("student_id", StringType(), True),
        StructField("day_30", DoubleType(), True),
        StructField("day_60", DoubleType(), True),
        StructField("day_90", DoubleType(), True),
        StructField("day_120", DoubleType(), True),
        StructField("day_150", DoubleType(), True),
        StructField("day_180", DoubleType(), True),
        StructField("day_180_and_above", DoubleType(), True),
        StructField("document_type", StringType(), False),
        StructField("as_at_date", DateType(), False),
    ]
)

ageing_report = read_input_file(
    spark=spark,
    file_format=config.pipeline.outputs.format,
    file_path=f"{config.pipeline.outputs.path}_{as_at_date.strftime('%Y-%m-%d')}.{config.pipeline.outputs.format}",
    schema=ageing_report_schema,
)

2025-07-18 22:53:31,424 - __main__ - INFO - Starting Ageing Report Pipeline for environment: prod
2025-07-18 22:53:31,427 - __main__ - INFO - As-at date: 2025-07-07


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/07/18 22:53:34 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


2025-07-18 22:53:38,423 - src.pipeline.data_validator - INFO - Starting comprehensive data validation...
2025-07-18 22:53:38,425 - src.pipeline.data_validator - INFO - Validating invoices dataframe...
2025-07-18 22:53:41,596 - src.pipeline.data_validator - INFO - invoices row count: 8
2025-07-18 22:53:42,847 - src.pipeline.data_validator - INFO - invoices validation completed successfully
2025-07-18 22:53:42,849 - src.pipeline.data_validator - INFO - Validating credit_notes dataframe...
2025-07-18 22:53:43,018 - src.pipeline.data_validator - INFO - credit_notes row count: 8
2025-07-18 22:53:43,893 - src.pipeline.data_validator - INFO - credit_notes validation completed successfully
2025-07-18 22:53:43,895 - src.pipeline.data_validator - INFO - Validating payments dataframe...
2025-07-18 22:53:44,005 - src.pipeline.data_validator - INFO - payments row count: 8
2025-07-18 22:53:44,756 - src.pipeline.data_validator - INFO - payments validation completed successfully
2025-07-18 22:53:44,75

In [6]:
ageing_report.show(truncate=False)

+---------+--------+-----------+-------------+----------+------+------+------+-------+-------+-------+-----------------+-------------+----------+
|centre_id|class_id|document_id|document_date|student_id|day_30|day_60|day_90|day_120|day_150|day_180|day_180_and_above|document_type|as_at_date|
+---------+--------+-----------+-------------+----------+------+------+------+-------+-------+-------+-----------------+-------------+----------+
|0_02     |cls_01  |cr_007     |2025-05-05   |stu_007   |0.0   |0.0   |110.0 |0.0    |0.0    |0.0    |0.0              |credit_note  |2025-07-07|
|0_02     |cls_03  |cr_003     |2024-12-01   |stu_003   |0.0   |0.0   |0.0   |0.0    |0.0    |0.0    |200.0            |credit_note  |2025-07-07|
|0_03     |c15_03  |cr_006     |2025-02-28   |stu_006   |0.0   |0.0   |0.0   |0.0    |40.0   |0.0    |0.0              |credit_note  |2025-07-07|
|c_01     |cls_01  |cr_001     |2025-05-15   |5tu_001   |0.0   |100.0 |0.0   |0.0    |0.0    |0.0    |0.0              |cred