In [1]:
import great_expectations as gx
from great_expectations.core.batch import BatchRequest, RuntimeBatchRequest
from ruamel import yaml

In [2]:
context = gx.get_context()

In [3]:
config = """
name: trino_datasource
class_name: Datasource
execution_engine:
  class_name: SqlAlchemyExecutionEngine
  connection_string: trino://trino@trino:8080/delta/my_schema
data_connectors:
   default_runtime_data_connector_name:
       class_name: RuntimeDataConnector
       batch_identifiers:
           - default_identifier_name
   default_inferred_data_connector_name:
       class_name: InferredAssetSqlDataConnector
       include_schema_name: true
"""

In [4]:
context.test_yaml_config(config)

Attempting to instantiate class from config...
	Instantiating as a Datasource, since class_name is Datasource
	Successfully instantiated Datasource


ExecutionEngine class name: SqlAlchemyExecutionEngine
Data Connectors:
	default_inferred_data_connector_name : InferredAssetSqlDataConnector

	Available data_asset_names (2 of 2):
		my_schema.appl_stock_delta_table (1 of 1): [{}]
		my_schema.appl_stock_delta_table_version_2 (1 of 1): [{}]

	Unmatched data_references (0 of 0):[]

	default_runtime_data_connector_name:RuntimeDataConnector

	Available data_asset_names (0 of 0):
		Note : RuntimeDataConnector will not have data_asset_names until they are passed in through RuntimeBatchRequest

	Unmatched data_references (0 of 0): []



<great_expectations.datasource.new_datasource.Datasource at 0x7f09d7a8c2d0>

In [5]:
context.add_datasource(**yaml.load(config))

<great_expectations.datasource.new_datasource.Datasource at 0x7f09d6f52c50>

In [6]:
trino = context.get_datasource("trino_datasource")

trino.get_available_data_asset_names()

{'default_runtime_data_connector_name': [],
 'default_inferred_data_connector_name': ['my_schema.appl_stock_delta_table',
  'my_schema.appl_stock_delta_table_version_2']}

In [7]:
batch_request = BatchRequest(
    datasource_name="trino_datasource",
    data_connector_name="default_inferred_data_connector_name",
    data_asset_name="my_schema.appl_stock_delta_table",  # this is the name of the table you want to retrieve
)
context.create_expectation_suite(
    expectation_suite_name="test_suite", overwrite_existing=True
)
validator = context.get_validator(
    batch_request=batch_request, expectation_suite_name="test_suite"
)

In [8]:
validator.expect_column_to_exist("close")

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "result": {},
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true
}

In [9]:
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Error closing cursor
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1900, in _execute_context
    self.dialect.do_execute(
  File "/usr/local/lib/python3.11/site-packages/trino/sqlalchemy/dialect.py", line 365, in do_execute
    cursor.execute(statement, parameters)
  File "/usr/local/lib/python3.11/site-packages/trino/dbapi.py", line 487, in execute
    self._query = self._execute_prepared_statement(
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/trino/dbapi.py", line 386, in _execute_prepared_statement
    sql = 'EXECUTE ' + statement_name + ' USING ' + ','.join(map(self._format_prepared_param, params))
                                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/trino/dbapi.py", line 463, in _format_prepared_param
    raise trino.exceptions.NotSupportedError("Query parameter

Unnamed: 0,date,open,high,low,close,volume,adj close
0,2010-01-04,213.429998,214.499996,212.380001,214.009998,123432400,27.727039
1,2010-01-05,214.599998,215.589994,213.249994,214.379993,150476200,27.774976
2,2010-01-06,214.379993,215.23,210.750004,210.969995,138040000,27.333178
3,2010-01-07,211.75,212.000006,209.050005,210.58,119282800,27.28265
4,2010-01-08,210.299994,212.000006,209.060005,211.980005,111902700,27.464034


In [10]:
validator.expect_column_min_to_be_between("close",0)

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "result": {
    "observed_value": 90.279999
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "success": true
}

## Save Expectations to Expectation Suite

In [11]:
validator.save_expectation_suite()

In [12]:
validator.get_expectation_suite()

{
  "expectations": [
    {
      "kwargs": {
        "column": "close"
      },
      "meta": {},
      "expectation_type": "expect_column_to_exist"
    },
    {
      "kwargs": {
        "column": "close",
        "min_value": 0
      },
      "meta": {},
      "expectation_type": "expect_column_min_to_be_between"
    }
  ],
  "expectation_suite_name": "test_suite",
  "data_asset_type": null,
  "ge_cloud_id": null,
  "meta": {
    "great_expectations_version": "0.16.4"
  }
}

In [13]:
context.get_available_data_asset_names()

{'trino_datasource': {'default_runtime_data_connector_name': [],
  'default_inferred_data_connector_name': ['my_schema.appl_stock_delta_table',
   'my_schema.appl_stock_delta_table_version_2']}}

## Create A Checkpoint from Our DataSource and Expectation Suite

In [14]:
checkpoint_config = """
name: test_checkpoint 
config_version: 1
class_name: SimpleCheckpoint
validations:
  - batch_request:
      datasource_name: trino_datasource  # Update this value.
      data_connector_name: default_inferred_data_connector_name  # Update this value.
      data_asset_name: my_schema.appl_stock_delta_table  # Update this value.
      data_connector_query:
        index: -1
    expectation_suite_name: test_suite  # Update this value.
"""

In [15]:
## Add Datahub Integration if Using Datahub (note indentation: action_list is a key inside of validations
checkpoint_config += """
    action_list:
      - name: datahub_action
        action:
          module_name: datahub.integrations.great_expectations.action
          class_name: DataHubValidationAction
          server_url: http://datahub-gms:8080 #DataHub server url
"""

In [16]:
print(checkpoint_config)


name: test_checkpoint 
config_version: 1
class_name: SimpleCheckpoint
validations:
  - batch_request:
      datasource_name: trino_datasource  # Update this value.
      data_connector_name: default_inferred_data_connector_name  # Update this value.
      data_asset_name: my_schema.appl_stock_delta_table  # Update this value.
      data_connector_query:
        index: -1
    expectation_suite_name: test_suite  # Update this value.

    action_list:
      - name: datahub_action
        action:
          module_name: datahub.integrations.great_expectations.action
          class_name: DataHubValidationAction
          server_url: http://datahub-gms:8080 #DataHub server url



In [17]:
context.test_yaml_config(yaml_config=checkpoint_config)

Attempting to instantiate class from config...
	Instantiating as a SimpleCheckpoint, since class_name is SimpleCheckpoint
	Successfully instantiated SimpleCheckpoint


Checkpoint class name: SimpleCheckpoint


{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "store_evaluation_params",
      "action": {
        "class_name": "StoreEvaluationParametersAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction",
        "site_names": []
      }
    }
  ],
  "batch_request": {},
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "module_name": "great_expectations.checkpoint",
  "name": "test_checkpoint",
  "profilers": [],
  "runtime_configuration": {},
  "validations": [
    {
      "expectation_suite_name": "test_suite",
      "action_list": [
        {
          "name": "datahub_action",
          "action": {
            "module_name": "datahub.integrations.great_expectations.action",
            "class_name": "DataHubValidationAction",
            "server_url": "http:/

In [18]:
context.add_checkpoint(**yaml.load(checkpoint_config))

{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "store_evaluation_params",
      "action": {
        "class_name": "StoreEvaluationParametersAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction",
        "site_names": []
      }
    }
  ],
  "batch_request": {},
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "module_name": "great_expectations.checkpoint",
  "name": "test_checkpoint",
  "profilers": [],
  "runtime_configuration": {},
  "validations": [
    {
      "batch_request": {
        "datasource_name": "trino_datasource",
        "data_connector_name": "default_inferred_data_connector_name",
        "data_asset_name": "my_schema.appl_stock_delta_table",
        "data_connector_query": {
          "index": -1
        }
      },
      "expectation_

In [19]:
context.run_checkpoint("test_checkpoint")

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Datasource trino_datasource is not present in platform_instance_map


{
  "run_id": {
    "run_name": null,
    "run_time": "2023-05-06T01:59:57.093991+00:00"
  },
  "run_results": {
    "ValidationResultIdentifier::test_suite/__none__/20230506T015957.093991Z/35356f3bfe8c30524095725b1a8dcf60": {
      "validation_result": {
        "results": [
          {
            "result": {},
            "expectation_config": {
              "kwargs": {
                "column": "close",
                "batch_id": "35356f3bfe8c30524095725b1a8dcf60"
              },
              "meta": {},
              "expectation_type": "expect_column_to_exist"
            },
            "meta": {},
            "exception_info": {
              "raised_exception": false,
              "exception_traceback": null,
              "exception_message": null
            },
            "success": true
          },
          {
            "result": {
              "observed_value": 90.279999
            },
            "expectation_config": {
              "kwargs": {
                "