In [13]:
import os
from dotenv import load_dotenv
import great_expectations as ge
from great_expectations.checkpoint import Checkpoint
from datahub.integrations.great_expectations.action import DataHubValidationAction
import importlib
import pkgutil

# Load environment variables from .env file
load_dotenv()

# Step 1: Get the secrets from environment variables
pg_connection_string = os.getenv("PG_CONNECTION_STRING")
datahub_token = os.getenv("DATAHUB_TOKEN")
datahub_server_url = os.getenv("DATAHUB_SERVER_URL")


In [None]:

# Step 2: Configure the PostgreSQL Datasource with environment variable
context = ge.get_context()

datasource_config = {
    "name": "pg_datasource",
    "class_name": "Datasource",
    "execution_engine": {
        "class_name": "SqlAlchemyExecutionEngine",
        "module_name": "great_expectations.execution_engine",
        "connection_string": pg_connection_string,
    },
    "data_connectors": {
        "default_inferred_data_connector_name": {
            "class_name": "InferredAssetSqlDataConnector",
            "module_name": "great_expectations.datasource.data_connector",
            "include_schema_name": True,
            "name": "default_inferred_data_connector",
        }
    },
}

context.add_datasource(**datasource_config)


In [None]:

# Step 3: Import all validation modules from the 'validations' folder
validations_folder = "validations"

for _, module_name, _ in pkgutil.iter_modules([validations_folder]):
    full_module_name = f"{validations_folder}.{module_name}"
    validation_module = importlib.import_module(full_module_name)
    
    # Run the validation
    batch_request, expectation_suite_name = validation_module.run_validation(context)

    # Step 4: Register the custom DataHub action with token from the environment variable
    custom_actions = {
        "send_to_datahub": {
            "class_name": "DataHubValidationAction",
            "module_name": "datahub.integrations.great_expectations.action",
            "server_url": datahub_server_url,
            "token": datahub_token,
        }
    }


In [None]:

    # Step 5: Define a Checkpoint that will use the custom action
    checkpoint_config = {
        "name": f"my_pg_checkpoint_{module_name}",
        "config_version": 1.0,
        "class_name": "Checkpoint",
        "run_name_template": "%Y-%m-%d-%H-%M-%S-my-checkpoint",
        "expectation_suite_name": expectation_suite_name,
        "batch_request": batch_request,
        "action_list": [
            {
                "name": "store_validation_result",
                "action": {
                    "class_name": "StoreValidationResultAction",
                },
            },
            {
                "name": "store_evaluation_params",
                "action": {
                    "class_name": "StoreEvaluationParametersAction",
                },
            },
            {
                "name": "send_to_datahub",
                "action": custom_actions["send_to_datahub"],
            },
        ],
    }

    context.add_checkpoint(**checkpoint_config)

    # Step 6: Run the Checkpoint
    context.run_checkpoint(checkpoint_name=f"my_pg_checkpoint_{module_name}")
