# Ingestion Notebook (Modular + Clean)

This notebook demonstrates a clean use of the modular ingestion framework.  
All widget setup, configuration parsing, file handling, transformation, validation, and logging have been abstracted into reusable modules.

Run this notebook to execute ingestion based on parameters selected through widgets.


In [0]:
import sys
import os

# Add the ingestion/ folder to sys.path so Python can find arkhamanalytics/
ingestion_path = os.path.abspath(os.path.join(os.getcwd(), "../"))
if ingestion_path not in sys.path:
    sys.path.insert(0, ingestion_path)

# Now import from the arkhamanalytics package
from arkhamanalytics.file_utils import detect_and_read_file, resolve_file_path
from arkhamanalytics.schema_utils import validate_schema
from arkhamanalytics.audit_logger import log_ingestion_audit
from arkhamanalytics.transformations import apply_transformations
from arkhamanalytics.widget_manager import WidgetManager, get_config_from_widgets
from arkhamanalytics.widget_presets import create_base_widgets


In [0]:
wm = WidgetManager(dbutils)
wm.remove_all()
create_base_widgets(wm)
config = get_config_from_widgets(dbutils)

In [0]:
matched_file_path = resolve_file_path(config.container_name, config.file_pattern)

df_raw = detect_and_read_file(
    spark=spark,
    file_path=matched_file_path,
    encoding=config.encoding,
    sheet_name=config.sheet_name,
    start_cell=config.excel_starting_cell
)

df_raw.display()

In [0]:
# Apply standard transformations
df_transformed = apply_transformations(df_raw)
df_transformed.display()


In [0]:
# Validate schema against expected structure
validation_result = validate_schema(df_transformed, config.file_pattern)
print("Schema valid:", validation_result["valid"])
if not validation_result["valid"]:
    print("Schema errors:", validation_result["errors"])


In [0]:
# Log the ingestion result to the audit table
log_ingestion(
    file_path=config.file_pattern,
    success=validation_result["valid"],
    errors=validation_result["errors"]
)
