In [1]:
import os
import great_expectations as gx
from dotenv import load_dotenv
from datahub.integrations.great_expectations.action import DataHubValidationAction
import importlib
import pkgutil
from pathlib import Path
from great_expectations.checkpoint import Checkpoint

# Load environment variables
load_dotenv()

# Get secrets from environment variables
pg_connection_string = os.getenv("PG_CONNECTION_STRING")
datahub_token = os.getenv("DATAHUB_TOKEN")
datahub_server_url = os.getenv("DATAHUB_SERVER_URL")

def initialize_context():
    """Initialize a Great Expectations context."""
    context = gx.get_context()
    
    try:
        # Add PostgreSQL datasource using fluent API
        datasource = context.sources.add_postgres(
            name="prisons_demo",
            connection_string=pg_connection_string
        )
        print("Successfully added PostgreSQL datasource")
        return context

    except Exception as e:
        print(f"Error adding datasource: {str(e)}")
        raise

def load_validation_suites():
    """Dynamically load all validation suites from the validations directory."""
    validation_modules = []
    
    # Get the notebook's directory and construct path to validations
    notebook_dir = Path().absolute()
    validations_dir = notebook_dir / "validations"
    
    # Ensure the validations directory exists
    if not validations_dir.exists():
        print(f"Warning: validations directory not found at {validations_dir}")
        return validation_modules
    
    # Import all validation modules
    for _, name, _ in pkgutil.iter_modules([str(validations_dir)]):
        if name.endswith('_validation'):
            try:
                module = importlib.import_module(f"validations.{name}")
                if hasattr(module, 'run_validation'):
                    validation_modules.append(module)
                    print(f"Loaded validation suite: {name}")
            except ImportError as e:
                print(f"Error loading validation module {name}: {str(e)}")
    
    return validation_modules

def main():
    try:
        # Initialize context
        context = initialize_context()

        # Load all validation suites
        validation_modules = load_validation_suites()
        
        if not validation_modules:
            print("No validation suites found!")
            return

        # Process each validation suite
        for validation_module in validation_modules:
            try:
                # Get batch request and suite name from the validation module
                batch_request, suite_name = validation_module.run_validation(context)
                
                print(f"\nProcessing validation for {suite_name}")

                # Get the datasource
                datasource = context.get_datasource("prisons_demo")

                # Create asset (table asset)
                asset = datasource.add_table_asset(
                    name=batch_request["data_asset_name"],
                    table_name=batch_request["table_name"],
                    schema_name=batch_request["schema_name"]
                )

                # Get the validation batch
                batch = asset.build_batch_request()

                # Run validation with the current context
                validator = context.get_validator(
                    batch_request=batch,
                    expectation_suite_name=suite_name
                )
                
                # Run the validation
                validation_results = validator.validate()

                # Instantiate the DataHubValidationAction with data_context
                datahub_action = DataHubValidationAction(
                    server_url=datahub_server_url,
                    token=datahub_token,
                    data_context=context
                )

                # Create checkpoint configuration
                # Create checkpoint configuration
                checkpoint_config = {
                    "name": f"checkpoint_{suite_name}",
                    "config_version": 1.0,
                    "run_name_template": "%Y%m%d-%H%M%S-my-run-name-template",
                    "validations": [
                        {
                            "batch_request": batch,
                            "expectation_suite_name": suite_name,
                            "action_list": [
                                {
                                    "name": "store_validation_result",
                                    "action": {"class_name": "StoreValidationResultAction"}
                                },
                                {
                                    "name": "store_evaluation_params",
                                    "action": {"class_name": "StoreEvaluationParametersAction"}
                                },
                                {
                                    "name": "update_data_docs",
                                    "action": {"class_name": "UpdateDataDocsAction"}
                                },
                                {
                                    "name": "datahub_action",
                                    "action": {
                                        "class_name": "DataHubValidationAction",
                                        "module_name": "datahub.integrations.great_expectations.action",
                                        "server_url": datahub_server_url,
                                        "token": datahub_token,
                                    }
                                }
                            ]
                        }
                    ]
                }

                # Create and run checkpoint
                checkpoint = Checkpoint(**checkpoint_config, data_context=context)

                results = checkpoint.run()
                print(f"\nValidation Results for {suite_name}:")
                print(results)

                # Create and run checkpoint
                checkpoint = Checkpoint(**checkpoint_config, data_context=context)

                results = checkpoint.run()
                print(f"\nValidation Results for {suite_name}:")
                print(results)

            except Exception as e:
                print(f"Error processing suite {validation_module.__name__}: {str(e)}")
                continue

    except Exception as e:
        print(f"Error in main: {str(e)}")
        raise

if __name__ == "__main__":
    main()


Successfully added PostgreSQL datasource
Loaded validation suite: tbl_prisoner_validation

Processing validation for prisoner_validation_suite


Calculating Metrics:   0%|          | 0/25 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/31 [00:00<?, ?it/s]

Datasource prisons_demo is not present in platform_instance_map



Validation Results for prisoner_validation_suite:
{
  "run_id": {
    "run_name": "20241115-112651-my-run-name-template",
    "run_time": "2024-11-15T11:26:51.912076+00:00"
  },
  "run_results": {
    "ValidationResultIdentifier::prisoner_validation_suite/20241115-112651-my-run-name-template/20241115T112651.912076Z/prisons_demo-prisoner": {
      "validation_result": {
        "success": true,
        "results": [
          {
            "success": true,
            "expectation_config": {
              "expectation_type": "expect_column_values_to_be_between",
              "kwargs": {
                "column": "cell_num",
                "max_value": 600,
                "min_value": 1,
                "batch_id": "prisons_demo-prisoner"
              },
              "meta": {}
            },
            "result": {
              "element_count": 3536,
              "unexpected_count": 0,
              "unexpected_percent": 0.0,
              "partial_unexpected_list": [],
         

Calculating Metrics:   0%|          | 0/31 [00:00<?, ?it/s]

Datasource prisons_demo is not present in platform_instance_map



Validation Results for prisoner_validation_suite:
{
  "run_id": {
    "run_name": "20241115-112653-my-run-name-template",
    "run_time": "2024-11-15T11:26:53.340502+00:00"
  },
  "run_results": {
    "ValidationResultIdentifier::prisoner_validation_suite/20241115-112653-my-run-name-template/20241115T112653.340502Z/prisons_demo-prisoner": {
      "validation_result": {
        "success": true,
        "results": [
          {
            "success": true,
            "expectation_config": {
              "expectation_type": "expect_column_values_to_be_between",
              "kwargs": {
                "column": "cell_num",
                "max_value": 600,
                "min_value": 1,
                "batch_id": "prisons_demo-prisoner"
              },
              "meta": {}
            },
            "result": {
              "element_count": 3536,
              "unexpected_count": 0,
              "unexpected_percent": 0.0,
              "partial_unexpected_list": [],
         