In [227]:
# Explanation:
# - This function loads the CSV file and immediately checks schema.
# - If any required column is missing, we generate fallback columns with placeholder values (e.g., NaN for amounts).
# - This design ensures the pipeline doesn't break when data is incomplete.

def load_transaction_data(filepath: str):
    try:
        df = pd.read_csv(filepath)

        # Step 1: Validate required columns
        valid, missing = validate_schema(df, REQUIRED_COLUMNS)

        # Step 2: If columns are missing, add them with fallback values
        if not valid:
            for col in missing:
                if col == 'transaction_amount':
                    logging.warning("Missing 'transaction_amount'. Adding NaN as placeholder.")
                    df[col] = np.nan
                elif col == 'transaction_date':
                    logging.warning("Missing 'transaction_date'. Adding today's date.")
                    df[col] = pd.Timestamp.today().normalize()
                else:
                    logging.warning(f"Missing '{col}'. Filling with default value 'Unknown'.")
                    df[col] = 'Unknown'

        logging.info(f"Data loaded successfully with shape: {df.shape}")
        return df

    except Exception as e:
        logging.error(f"Failed to load data: {str(e)}")
        return pd.DataFrame()  # Return empty DataFrame to prevent pipeline crash
