In [3]:
# Install required packages
!pip install click pandas



In [None]:
import pandas as pd
from datetime import date
import os
from pathlib import Path
import functools
import time
import logging
from typing import Optional, Union

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Decorator for timing function execution
def timer(func):
    """Decorator to measure execution time of functions."""
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        execution_time = end_time - start_time
        logger.info(f"Function '{func.__name__}' executed in {execution_time:.2f} seconds")
        return result
    return wrapper

# Decorator for logging function calls
def log_calls(func):
    """Decorator to log function calls with arguments."""
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        # Log function call with parameters (excluding sensitive data)
        params = []
        if args:
            params.extend([f"arg{i}={arg}" for i, arg in enumerate(args[:2])])  # Only first 2 args
        if kwargs:
            params.extend([f"{k}={v}" for k, v in list(kwargs.items())[:3]])  # Only first 3 kwargs
        params_str = ", ".join(params)
        logger.info(f"Calling {func.__name__}({params_str})")
        
        try:
            result = func(*args, **kwargs)
            logger.info(f"Function '{func.__name__}' completed successfully")
            return result
        except Exception as e:
            logger.error(f"Function '{func.__name__}' failed with error: {str(e)}")
            raise
    return wrapper

# Decorator for input validation
def validate_inputs(func):
    """Decorator to validate input parameters."""
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        # Validate data_dir exists
        if 'data_dir' in kwargs:
            data_dir = kwargs['data_dir']
        elif len(args) > 0:
            data_dir = args[0]
        else:
            data_dir = '../data'  # default
            
        if not Path(data_dir).exists():
            logger.warning(f"Data directory '{data_dir}' does not exist")
            
        # Validate variance_threshold is positive
        threshold = kwargs.get('variance_threshold', 1.0 if len(args) <= 2 else args[2])
        if not isinstance(threshold, (int, float)) or threshold < 0:
            raise ValueError(f"variance_threshold must be a positive number, got {threshold}")
            
        return func(*args, **kwargs)
    return wrapper

# Decorator for retry logic with exponential backoff
def retry(max_attempts=3, delay=1, backoff=2):
    """Decorator to retry functions with exponential backoff."""
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            attempts = 0
            current_delay = delay
            
            while attempts < max_attempts:
                try:
                    return func(*args, **kwargs)
                except (FileNotFoundError, pd.errors.EmptyDataError, pd.errors.ParserError) as e:
                    attempts += 1
                    if attempts == max_attempts:
                        logger.error(f"Function '{func.__name__}' failed after {max_attempts} attempts")
                        raise e
                    
                    logger.warning(f"Attempt {attempts} failed: {str(e)}. Retrying in {current_delay} seconds...")
                    time.sleep(current_delay)
                    current_delay *= backoff
                except Exception as e:
                    # Don't retry for non-recoverable errors
                    logger.error(f"Non-recoverable error in '{func.__name__}': {str(e)}")
                    raise e
            
            return None
        return wrapper
    return decorator

# Decorator for result caching (simple memoization)
def cache_result(func):
    """Decorator for simple result caching based on function arguments."""
    cache = {}
    
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        # Create a cache key from arguments
        key = str(args) + str(sorted(kwargs.items()))
        
        if key in cache:
            logger.info(f"Cache hit for function '{func.__name__}'")
            return cache[key]
        
        result = func(*args, **kwargs)
        cache[key] = result
        logger.info(f"Cache miss for function '{func.__name__}' - result cached")
        return result
    return wrapper

@timer
@log_calls
@validate_inputs
@retry(max_attempts=2, delay=0.5)
def load_data_files(data_path: Path) -> tuple:
    """Load all required data files with retry logic."""
    orders = pd.read_csv(data_path / "orders.csv")
    orders = orders[orders["order_id"] != "order_id"]  # Remove header duplicates
    
    refunds = pd.read_csv(data_path / "refunds.csv")
    psp = pd.read_csv(data_path / "psp_settlements.csv")
    gl = pd.read_csv(data_path / "gl_entries.csv")
    
    logger.info(f"Loaded {len(orders)} orders, {len(refunds)} refunds, {len(psp)} PSP settlements, {len(gl)} GL entries")
    return orders, refunds, psp, gl

@timer
@log_calls
def process_data(orders: pd.DataFrame, gl: pd.DataFrame, variance_threshold: float) -> pd.DataFrame:
    """Process and reconcile orders with GL data."""
    
    # Data processing
    orders = orders.rename(columns={"net_amount": "expected_net"})
    gl = gl.rename(columns={"amount": "gl_amount"})
    
    # Normalize column names
    orders.columns = orders.columns.str.strip().str.lower()
    gl.columns = gl.columns.str.strip().str.lower()

    # Rename for consistency
    orders = orders.rename(columns={"net_amount": "expected_net"})
    gl = gl.rename(columns={"reference": "order_id"})

    # Compute signed GL amount
    gl["debit"] = pd.to_numeric(gl["debit"], errors="coerce")
    gl["credit"] = pd.to_numeric(gl["credit"], errors="coerce")
    gl["signed_amount"] = gl["debit"].fillna(0) - gl["credit"].fillna(0)

    # Aggregate GL to order level
    gl_order = (
        gl.groupby("order_id", as_index=False)
          .agg(gl_amount=("signed_amount", "sum"))
    )

    # Merge Orders → GL
    merged = orders.merge(gl_order, on="order_id", how="left")

    # Reconciliation difference
    merged["variance"] = merged["expected_net"] - merged["gl_amount"]

    # Identify exceptions using parameterized threshold
    exceptions = merged[
        merged["gl_amount"].isna() | (merged["variance"].abs() > variance_threshold)
    ]

    return exceptions

@timer
@log_calls
def create_exception_report(exceptions: pd.DataFrame) -> pd.DataFrame:
    """Create formatted exception report."""
    report = pd.DataFrame({
        "report_date": date.today(),
        "exception_type": "GL_MISMATCH",
        "order_id": exceptions["order_id"],
        "variance": exceptions["variance"],
        "status": "OPEN",
        "priority": "HIGH"
    })
    
    return report

@timer
@log_calls
@validate_inputs
def generate_reconciliation_report(data_dir='../data', output_dir='../output', variance_threshold=1.0) -> Optional[pd.DataFrame]:
    """
    Generate daily reconciliation report with configurable directories.
    
    This function is decorated with multiple decorators for:
    - Timing execution
    - Logging function calls and parameters
    - Input validation
    - Comprehensive error handling
    """
    
    # Convert to Path objects for easier manipulation
    data_path = Path(data_dir)
    output_path = Path(output_dir)
    
    # Create output directory if it doesn't exist
    output_path.mkdir(parents=True, exist_ok=True)
    
    try:
        # Load data with retry logic
        orders, refunds, psp, gl = load_data_files(data_path)
        
        # Process data
        exceptions = process_data(orders, gl, variance_threshold)
        
        # Create report
        report = create_exception_report(exceptions)

        # Write output to parameterized directory
        output_file = output_path / "daily_exception_report.csv"
        report.to_csv(output_file, index=False)

        logger.info(f"Generated {len(report)} exceptions")
        logger.info(f"Report saved to: {output_file}")
        
        return report
        
    except Exception as e:
        logger.error(f"Failed to generate reconciliation report: {str(e)}")
        return None
    finally:
        logger.info("Reconciliation report generation process completed.")

# For Jupyter notebook usage - run with default parameters
print("Running reconciliation report with default parameters...")
try:
    report = generate_reconciliation_report()
    if report is not None:
        print(f"""
[SUCCESS] Report generated successfully!
Report shape: {report.shape}
Exception summary:
{report['exception_type'].value_counts()}
""")
    else:
        print("Report generation failed due to missing data files.")
except Exception as e:
    print(f"""
Note: {e}

This is expected if data files don't exist yet.
finally:    print("Reconciliation report execution completed.")
""")

2026-02-01 23:19:54,672 - INFO - Calling generate_reconciliation_report()
2026-02-01 23:19:54,674 - INFO - Calling load_data_files(arg0=..\data)
2026-02-01 23:19:54,679 - INFO - Loaded 12 orders, 4 refunds, 7 PSP settlements, 15 GL entries
2026-02-01 23:19:54,680 - INFO - Function 'load_data_files' completed successfully
2026-02-01 23:19:54,680 - INFO - Function 'load_data_files' executed in 0.01 seconds
2026-02-01 23:19:54,684 - INFO - Calling process_data(arg0=   order_id  order_date customer_id product_id quantity  unit_price  \
0   ORD-001  2024-01-15    CUST-101     PROD-A        2        50.0   
1   ORD-002  2024-01-15    CUST-102     PROD-B        1        75.0   
2   ORD-003  2024-01-16    CUST-103     PROD-A        3        50.0   
3   ORD-004  2024-01-16    CUST-104     PROD-C        1       200.0   
4   ORD-005  2024-01-17    CUST-105     PROD-B        2        75.0   
5   ORD-006  2024-01-17    CUST-106     PROD-D        1       300.0   
6   ORD-007  2024-01-18    CUST-107 

Running reconciliation report with default parameters...

[SUCCESS] Report generated successfully!
Report shape: (12, 6)
Exception summary:
exception_type
GL_MISMATCH    12
Name: count, dtype: int64



In [19]:
# Click command-line interface (for script usage)
import click

@click.command()
@click.option('--data-dir', '-d', 
              default='../data', 
              help='Directory containing input data files (orders.csv, refunds.csv, etc.)')
@click.option('--output-dir', '-o', 
              default='../output', 
              help='Directory for output files')
@click.option('--variance-threshold', '-t', 
              default=1.0, 
              type=float,
              help='Variance threshold for exception reporting')
@click.option('--verbose', '-v', 
              is_flag=True,
              help='Enable verbose logging')
def cli_generate_report(data_dir, output_dir, variance_threshold, verbose):
    """Command-line interface for the reconciliation report."""
    if verbose:
        logging.getLogger().setLevel(logging.DEBUG)
        
    report = generate_reconciliation_report(data_dir, output_dir, variance_threshold)
    return report

# Enhanced script content with decorators
script_content = '''#!/usr/bin/env python
"""
Finance Order-to-Cash Reconciliation Report Generator with Python Decorators

Usage:
    python generate_daily_report.py --data-dir ./data --output-dir ./output --variance-threshold 1.0
"""
import pandas as pd
import click
from datetime import date
import os
from pathlib import Path
import functools
import time
import logging
from typing import Optional

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Decorators
def timer(func):
    """Decorator to measure execution time of functions."""
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        execution_time = end_time - start_time
        logger.info(f"Function '{func.__name__}' executed in {execution_time:.2f} seconds")
        return result
    return wrapper

def log_calls(func):
    """Decorator to log function calls with arguments."""
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        params = []
        if args:
            params.extend([f"arg{i}={arg}" for i, arg in enumerate(args[:2])])
        if kwargs:
            params.extend([f"{k}={v}" for k, v in list(kwargs.items())[:3]])
        params_str = ", ".join(params)
        logger.info(f"Calling {func.__name__}({params_str})")
        
        try:
            result = func(*args, **kwargs)
            logger.info(f"Function '{func.__name__}' completed successfully")
            return result
        except Exception as e:
            logger.error(f"Function '{func.__name__}' failed with error: {str(e)}")
            raise
    return wrapper

def validate_inputs(func):
    """Decorator to validate input parameters."""
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        data_dir = kwargs.get('data_dir', args[0] if args else '../data')
        if not Path(data_dir).exists():
            logger.warning(f"Data directory '{data_dir}' does not exist")
            
        threshold = kwargs.get('variance_threshold', args[2] if len(args) > 2 else 1.0)
        if not isinstance(threshold, (int, float)) or threshold < 0:
            raise ValueError(f"variance_threshold must be a positive number, got {threshold}")
            
        return func(*args, **kwargs)
    return wrapper

def retry(max_attempts=3, delay=1, backoff=2):
    """Decorator to retry functions with exponential backoff."""
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            attempts = 0
            current_delay = delay
            
            while attempts < max_attempts:
                try:
                    return func(*args, **kwargs)
                except (FileNotFoundError, pd.errors.EmptyDataError) as e:
                    attempts += 1
                    if attempts == max_attempts:
                        logger.error(f"Function '{func.__name__}' failed after {max_attempts} attempts")
                        raise e
                    
                    logger.warning(f"Attempt {attempts} failed: {str(e)}. Retrying in {current_delay} seconds...")
                    time.sleep(current_delay)
                    current_delay *= backoff
                except Exception as e:
                    logger.error(f"Non-recoverable error in '{func.__name__}': {str(e)}")
                    raise e
            return None
        return wrapper
    return decorator

@timer
@log_calls
@validate_inputs
@retry(max_attempts=2, delay=0.5)
def load_data_files(data_path):
    """Load all required data files with retry logic."""
    orders = pd.read_csv(data_path / "orders.csv")
    orders = orders[orders["order_id"] != "order_id"]
    
    refunds = pd.read_csv(data_path / "refunds.csv")
    psp = pd.read_csv(data_path / "psp_settlements.csv")
    gl = pd.read_csv(data_path / "gl_entries.csv")
    
    logger.info(f"Loaded {len(orders)} orders, {len(refunds)} refunds, {len(psp)} PSP settlements, {len(gl)} GL entries")
    return orders, refunds, psp, gl

@timer
@log_calls
def process_data(orders, gl, variance_threshold):
    """Process and reconcile orders with GL data."""
    orders = orders.rename(columns={"net_amount": "expected_net"})
    gl = gl.rename(columns={"amount": "gl_amount"})
    
    orders.columns = orders.columns.str.strip().str.lower()
    gl.columns = gl.columns.str.strip().str.lower()

    orders = orders.rename(columns={"net_amount": "expected_net"})
    gl = gl.rename(columns={"reference": "order_id"})

    gl["debit"] = pd.to_numeric(gl["debit"], errors="coerce")
    gl["credit"] = pd.to_numeric(gl["credit"], errors="coerce")
    gl["signed_amount"] = gl["debit"].fillna(0) - gl["credit"].fillna(0)

    gl_order = (gl.groupby("order_id", as_index=False)
                  .agg(gl_amount=("signed_amount", "sum")))

    merged = orders.merge(gl_order, on="order_id", how="left")
    merged["variance"] = merged["expected_net"] - merged["gl_amount"]

    exceptions = merged[
        merged["gl_amount"].isna() | (merged["variance"].abs() > variance_threshold)
    ]
    return exceptions

@timer
@log_calls
def create_exception_report(exceptions):
    """Create formatted exception report."""
    report = pd.DataFrame({
        "report_date": date.today(),
        "exception_type": "GL_MISMATCH", 
        "order_id": exceptions["order_id"],
        "variance": exceptions["variance"],
        "status": "OPEN",
        "priority": "HIGH"
    })
    return report

@timer
@log_calls
@validate_inputs
def generate_reconciliation_report(data_dir='../data', output_dir='../output', variance_threshold=1.0):
    """Generate daily reconciliation report with configurable directories."""
    
    data_path = Path(data_dir)
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    try:
        orders, refunds, psp, gl = load_data_files(data_path)
        exceptions = process_data(orders, gl, variance_threshold)
        report = create_exception_report(exceptions)

        output_file = output_path / "daily_exception_report.csv"
        report.to_csv(output_file, index=False)

        logger.info(f"Generated {len(report)} exceptions")
        logger.info(f"Report saved to: {output_file}")
        
        return report
        
    except Exception as e:
        logger.error(f"Failed to generate reconciliation report: {str(e)}")
        return None

@click.command()
@click.option('--data-dir', '-d', default='../data', 
              help='Directory containing input data files')
@click.option('--output-dir', '-o', default='../output', 
              help='Directory for output files')
@click.option('--variance-threshold', '-t', default=1.0, type=float,
              help='Variance threshold for exception reporting')
@click.option('--verbose', '-v', is_flag=True,
              help='Enable verbose logging')
def main(data_dir, output_dir, variance_threshold, verbose):
    """Generate daily reconciliation report with configurable directories."""
    if verbose:
        logging.getLogger().setLevel(logging.DEBUG)
    generate_reconciliation_report(data_dir, output_dir, variance_threshold)

if __name__ == "__main__":
    main()
'''

# Write the enhanced script with decorators
with open('generate_daily_report.py', 'w', encoding='utf-8') as f:
    f.write(script_content)
    
print("""
Created enhanced standalone script with decorators: generate_daily_report.py

Added decorators:
  • @timer - Measures execution time
  • @log_calls - Logs function calls and parameters
  • @validate_inputs - Validates input parameters
  • @retry - Retries failed operations with exponential backoff

Usage examples:
  python generate_daily_report.py
  python generate_daily_report.py --verbose
  python generate_daily_report.py --data-dir ./my_data --output-dir ./my_output --verbose
  python generate_daily_report.py --variance-threshold 0.5 -v
""")


Created enhanced standalone script with decorators: generate_daily_report.py

Added decorators:
  • @timer - Measures execution time
  • @log_calls - Logs function calls and parameters
  • @validate_inputs - Validates input parameters
  • @retry - Retries failed operations with exponential backoff

Usage examples:
  python generate_daily_report.py
  python generate_daily_report.py --verbose
  python generate_daily_report.py --data-dir ./my_data --output-dir ./my_output --verbose
  python generate_daily_report.py --variance-threshold 0.5 -v



In [None]:
# Example usage with custom parameters - showcasing decorator functionality
print("=== Demonstrating Decorated Functions ===")

# Example 1: Test decorator functionality with valid parameters
print("\n1. Using decorated function with valid parameters:")
try:
    report_default = generate_reconciliation_report(
        data_dir='../data', 
        output_dir='../output', 
        variance_threshold=1.0
    )
    if report_default is not None:
        print(f"   [SUCCESS] Generated report with {len(report_default)} exceptions")
    else:
        print("   [WARNING] Report generation returned None")
except Exception as e:
    print(f"   [ERROR] Error: {e}")
finally:
    print("   Example 1 execution completed.")

# Example 2: Test input validation decorator
print("\n2. Testing input validation decorator:")
try:
    report_invalid = generate_reconciliation_report(
        data_dir='../data', 
        output_dir='../test_output', 
        variance_threshold=-1.0  # Invalid negative threshold
    )
except ValueError as e:
    print(f"   [SUCCESS] Input validation caught invalid parameter: {e}")
except Exception as e:
    print(f"   [ERROR] Unexpected error: {e}")
finally:
    print("   Example 2 validation test completed.")

# Example 3: Test with custom threshold
print("\n3. Using custom variance threshold (0.5):")
try:
    report_custom = generate_reconciliation_report(
        data_dir='../data',
        output_dir='../custom_output', 
        variance_threshold=0.5
    )
    if report_custom is not None:
        print(f"   [SUCCESS] Generated report with stricter threshold: {len(report_custom)} exceptions")
    else:
        print("   [WARNING] Report generation returned None")
except Exception as e:
    print(f"   [ERROR] Error: {e}")
finally:
    print("   Example 3 custom threshold test completed.")

# Example 4: Demonstrate individual decorated helper functions
print("\n4. Testing individual decorated functions:")
try:
    from pathlib import Path
    data_path = Path('../data')
    
    if data_path.exists():
        print("   Testing load_data_files function (with @timer and @retry decorators):")
        orders, refunds, psp, gl = load_data_files(data_path)
        print(f"      [SUCCESS] Data loaded successfully: {len(orders)} orders, {len(gl)} GL entries")
        
        print("   Testing process_data function (with @timer and @log_calls decorators):")
        exceptions = process_data(orders, gl, 1.0)
        print(f"      [SUCCESS] Processing complete: {len(exceptions)} exceptions found")
    else:
        print("   [WARNING] Data directory doesn't exist - skipping individual function tests")
        
except Exception as e:
    print(f"   [ERROR] Error testing individual functions: {e}")
finally:
    print("   Example 4 individual function tests completed.")

print("""

=== Decorator Features Summary ===
• @timer - Measures and logs execution time for performance monitoring
• @log_calls - Comprehensive logging of function calls and outcomes
• @validate_inputs - Input validation with meaningful error messages
• @retry - Automatic retry with exponential backoff for resilient file operations
• @functools.wraps - Preserves function metadata and docstrings

These decorators provide:
   • Better debugging and monitoring capabilities
   • Improved error handling and resilience
   • Cleaner separation of concerns
   • Enhanced maintainability and observability

=== Command line options (with decorators) ===

--data-dir, -d         : Directory containing input CSV files""")

--output-dir, -o       : Directory for output filespython generate_daily_report.py --data-dir ./my_data --output-dir ./my_output --variance-threshold 0.5 --verbose

--variance-threshold, -t : Variance threshold for exceptions (validated)Example command line usage:

--verbose, -v          : Enable detailed logging output

2026-02-01 23:21:47,216 - INFO - Calling generate_reconciliation_report(data_dir=../data, output_dir=../output, variance_threshold=1.0)
2026-02-01 23:21:47,217 - INFO - Calling load_data_files(arg0=..\data)
2026-02-01 23:21:47,224 - INFO - Loaded 12 orders, 4 refunds, 7 PSP settlements, 15 GL entries
2026-02-01 23:21:47,225 - INFO - Function 'load_data_files' completed successfully
2026-02-01 23:21:47,226 - INFO - Function 'load_data_files' executed in 0.01 seconds
2026-02-01 23:21:47,234 - INFO - Calling process_data(arg0=   order_id  order_date customer_id product_id quantity  unit_price  \
0   ORD-001  2024-01-15    CUST-101     PROD-A        2        50.0   
1   ORD-002  2024-01-15    CUST-102     PROD-B        1        75.0   
2   ORD-003  2024-01-16    CUST-103     PROD-A        3        50.0   
3   ORD-004  2024-01-16    CUST-104     PROD-C        1       200.0   
4   ORD-005  2024-01-17    CUST-105     PROD-B        2        75.0   
5   ORD-006  2024-01-17    CUST-106     PROD-

=== Demonstrating Decorated Functions ===

1. Using decorated function with valid parameters:
   [SUCCESS] Generated report with 12 exceptions

2. Testing input validation decorator:
   [SUCCESS] Input validation caught invalid parameter: variance_threshold must be a positive number, got -1.0

3. Using custom variance threshold (0.5):
   [SUCCESS] Generated report with stricter threshold: 12 exceptions

4. Testing individual decorated functions:
   Testing load_data_files function (with @timer and @retry decorators):
      [SUCCESS] Data loaded successfully: 12 orders, 15 GL entries
   Testing process_data function (with @timer and @log_calls decorators):
      [SUCCESS] Processing complete: 12 exceptions found


=== Decorator Features Summary ===
• @timer - Measures and logs execution time for performance monitoring
• @log_calls - Comprehensive logging of function calls and outcomes
• @validate_inputs - Input validation with meaningful error messages
• @retry - Automatic retry with ex