In [20]:
import os
from dotenv import load_dotenv
import great_expectations as ge

# load env variables
load_dotenv()

# get secrets from environment variables
pg_connection_string = os.getenv("PG_CONNECTION_STRING")
datahub_token = os.getenv("DATAHUB_TOKEN")
datahub_server_url = os.getenv("DATAHUB_SERVER_URL")


In [21]:

# configure the datasource - must exist in datahub for UI visibility
context = ge.get_context()

datasource_config = {
    "name": "pg_datasource",
    "class_name": "Datasource",
    "execution_engine": {
        "class_name": "SqlAlchemyExecutionEngine",
        "module_name": "great_expectations.execution_engine",
        "connection_string": pg_connection_string,
    },
    "data_connectors": {
        "default_inferred_data_connector_name": {
            "class_name": "InferredAssetSqlDataConnector",
            "module_name": "great_expectations.datasource.data_connector",
            "include_schema_name": True,
            "name": "default_inferred_data_connector",
        }
    },
}

context.add_datasource(**datasource_config)


<great_expectations.datasource.new_datasource.Datasource at 0x28913928160>

In [22]:
from validations.validation_suite import run_combined_validation

# send to datahub action
custom_actions = {
    "send_to_datahub": {
        "class_name": "DataHubValidationAction",
        "module_name": "datahub.integrations.great_expectations.action",
        "server_url": datahub_server_url,
        "token": datahub_token,
    }
}

# run tests from validation suite
batch_requests_and_suites = run_combined_validation(context)


Calculating Metrics:   0%|          | 0/11 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/11 [00:00<?, ?it/s]

In [23]:

# cycle through batch requests
for batch_request, expectation_suite_name in batch_requests_and_suites:
    checkpoint_config = {
        "name": f"my_pg_checkpoint_{expectation_suite_name}",
        "config_version": 1.0,
        "class_name": "Checkpoint",
        "run_name_template": "%Y-%m-%d-%H-%M-%S-my-checkpoint",
        "expectation_suite_name": expectation_suite_name,
        "batch_request": batch_request,
        "action_list": [
            {
                "name": "store_validation_result",
                "action": {
                    "class_name": "StoreValidationResultAction",
                },
            },
            {
                "name": "store_evaluation_params",
                "action": {
                    "class_name": "StoreEvaluationParametersAction",
                },
            },
            {
                "name": "send_to_datahub",
                "action": custom_actions["send_to_datahub"],
            },
        ],
    }
    
    # Add and run the checkpoint
    context.add_checkpoint(**checkpoint_config)
    context.run_checkpoint(checkpoint_name=f"my_pg_checkpoint_{expectation_suite_name}")


Calculating Metrics:   0%|          | 0/13 [00:00<?, ?it/s]

Datasource pg_datasource is not present in platform_instance_map


Calculating Metrics:   0%|          | 0/13 [00:00<?, ?it/s]

Datasource pg_datasource is not present in platform_instance_map
