In [2]:
import great_expectations as gx
from great_expectations.core.batch import BatchRequest, RuntimeBatchRequest
from ruamel import yaml

In [3]:
context = gx.get_context()

In [21]:
config = """
name: trino_datasource
class_name: Datasource
execution_engine:
  class_name: SqlAlchemyExecutionEngine
  connection_string: trino://trino@trino:8080/delta/my_schema
data_connectors:
   default_runtime_data_connector_name:
       class_name: RuntimeDataConnector
       batch_identifiers:
           - default_identifier_name
   default_inferred_data_connector_name:
       class_name: InferredAssetSqlDataConnector
       include_schema_name: true
"""

In [6]:
context.test_yaml_config(config)

Attempting to instantiate class from config...
	Instantiating as a Datasource, since class_name is Datasource
	Successfully instantiated Datasource


ExecutionEngine class name: SqlAlchemyExecutionEngine
Data Connectors:
	default_inferred_data_connector_name : InferredAssetSqlDataConnector

	Available data_asset_names (1 of 1):
		my_schema.appl_stock_delta_table (1 of 1): [{}]

	Unmatched data_references (0 of 0):[]

	default_runtime_data_connector_name:RuntimeDataConnector

	Available data_asset_names (0 of 0):
		Note : RuntimeDataConnector will not have data_asset_names until they are passed in through RuntimeBatchRequest

	Unmatched data_references (0 of 0): []



<great_expectations.datasource.new_datasource.Datasource at 0x7fc6070ae0d0>

In [7]:
context.add_datasource(**yaml.load(config))

<great_expectations.datasource.new_datasource.Datasource at 0x7fc607a79bd0>

In [8]:
trino = context.get_datasource("trino_datasource")

trino.get_available_data_asset_names()

{'default_runtime_data_connector_name': [],
 'default_inferred_data_connector_name': ['my_schema.appl_stock_delta_table']}

In [9]:
batch_request = BatchRequest(
    datasource_name="trino_datasource",
    data_connector_name="default_inferred_data_connector_name",
    data_asset_name="my_schema.appl_stock_delta_table",  # this is the name of the table you want to retrieve
)
context.create_expectation_suite(
    expectation_suite_name="test_suite", overwrite_existing=True
)
validator = context.get_validator(
    batch_request=batch_request, expectation_suite_name="test_suite"
)

In [10]:
validator.expect_column_to_exist("close")

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

{
  "success": true,
  "meta": {},
  "result": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [11]:
validator.head()

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Error closing cursor
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1900, in _execute_context
    self.dialect.do_execute(
  File "/usr/local/lib/python3.11/site-packages/trino/sqlalchemy/dialect.py", line 365, in do_execute
    cursor.execute(statement, parameters)
  File "/usr/local/lib/python3.11/site-packages/trino/dbapi.py", line 487, in execute
    self._query = self._execute_prepared_statement(
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/trino/dbapi.py", line 386, in _execute_prepared_statement
    sql = 'EXECUTE ' + statement_name + ' USING ' + ','.join(map(self._format_prepared_param, params))
                                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/trino/dbapi.py", line 463, in _format_prepared_param
    raise trino.exceptions.NotSupportedError("Query parameter

Unnamed: 0,date,open,high,low,close,volume,adj_close,month,year
0,2012-01-03,409.399998,412.499989,408.999989,411.23,75555200,53.278774,1,2012
1,2016-11-01,113.459999,113.769997,110.529999,111.489998,43825800,110.441678,11,2016
2,2014-08-01,94.900002,96.620003,94.809998,96.129997,48511000,91.212006,8,2014
3,2014-08-04,96.370003,96.580002,95.169998,95.589996,39958000,90.699631,8,2014
4,2014-08-05,95.360001,95.68,94.360001,95.120003,55933000,90.253683,8,2014


In [12]:
validator.expect_column_min_to_be_between("close",0)

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

{
  "success": true,
  "meta": {},
  "result": {
    "observed_value": 90.279999
  },
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

## Save Expectations to Expectation Suite

In [15]:
validator.save_expectation_suite()

In [16]:
validator.get_expectation_suite()

{
  "expectations": [
    {
      "expectation_type": "expect_column_to_exist",
      "kwargs": {
        "column": "close"
      },
      "meta": {}
    },
    {
      "expectation_type": "expect_column_min_to_be_between",
      "kwargs": {
        "column": "close",
        "min_value": 0
      },
      "meta": {}
    }
  ],
  "ge_cloud_id": null,
  "expectation_suite_name": "test_suite",
  "data_asset_type": null,
  "meta": {
    "great_expectations_version": "0.15.50"
  }
}

In [17]:
context.get_available_data_asset_names()

{'trino_datasource': {'default_runtime_data_connector_name': [],
  'default_inferred_data_connector_name': ['my_schema.appl_stock_delta_table']},
 'delta_lake': {'default_runtime_data_connector_name': []}}

## Create A Checkpoint from Our DataSource and Expectation Suite

In [37]:
checkpoint_config = """
name: test_checkpoint 
config_version: 1
class_name: SimpleCheckpoint
validations:
  - batch_request:
      datasource_name: trino_datasource  # Update this value.
      data_connector_name: default_inferred_data_connector_name  # Update this value.
      data_asset_name: my_schema.appl_stock_delta_table  # Update this value.
      data_connector_query:
        index: -1
    expectation_suite_name: test_suite  # Update this value.
"""

In [38]:
## Add Datahub Integration if Using Datahub (note indentation: action_list is a key inside of validations
checkpoint_config += """
    action_list:
      - name: datahub_action
        action:
          module_name: datahub.integrations.great_expectations.action
          class_name: DataHubValidationAction
          server_url: http://datahub-gms:8080 #DataHub server url
"""

In [39]:
print(checkpoint_config)


name: test_checkpoint 
config_version: 1
class_name: SimpleCheckpoint
validations:
  - batch_request:
      datasource_name: trino_datasource  # Update this value.
      data_connector_name: default_inferred_data_connector_name  # Update this value.
      data_asset_name: my_schema.appl_stock_delta_table  # Update this value.
      data_connector_query:
        index: -1
    expectation_suite_name: test_suite  # Update this value.

    action_list:
      - name: datahub_action
        action:
          module_name: datahub.integrations.great_expectations.action
          class_name: DataHubValidationAction
          server_url: http://datahub-gms:8080 #DataHub server url



In [40]:
context.test_yaml_config(yaml_config=checkpoint_config)

Attempting to instantiate class from config...
	Instantiating as a SimpleCheckpoint, since class_name is SimpleCheckpoint
	Successfully instantiated SimpleCheckpoint


Checkpoint class name: SimpleCheckpoint


{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "store_evaluation_params",
      "action": {
        "class_name": "StoreEvaluationParametersAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction",
        "site_names": []
      }
    }
  ],
  "batch_request": {},
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "module_name": "great_expectations.checkpoint",
  "name": "test_checkpoint",
  "profilers": [],
  "runtime_configuration": {},
  "validations": [
    {
      "action_list": [
        {
          "name": "datahub_action",
          "action": {
            "module_name": "datahub.integrations.great_expectations.action",
            "class_name": "DataHubValidationAction",
            "server_url": "http://datahub-gms:8080"
          }
        }
     

In [41]:
context.add_checkpoint(**yaml.load(checkpoint_config))

{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "store_evaluation_params",
      "action": {
        "class_name": "StoreEvaluationParametersAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction",
        "site_names": []
      }
    }
  ],
  "batch_request": {},
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "module_name": "great_expectations.checkpoint",
  "name": "test_checkpoint",
  "profilers": [],
  "runtime_configuration": {},
  "validations": [
    {
      "batch_request": {
        "datasource_name": "trino_datasource",
        "data_connector_name": "default_inferred_data_connector_name",
        "data_asset_name": "my_schema.appl_stock_delta_table",
        "data_connector_query": {
          "index": -1
        }
      },
      "expectation_

In [42]:
context.run_checkpoint("test_checkpoint")

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Datasource trino_datasource is not present in platform_instance_map


{
  "run_id": {
    "run_name": null,
    "run_time": "2023-03-04T23:46:06.419497+00:00"
  },
  "run_results": {
    "ValidationResultIdentifier::test_suite/__none__/20230304T234606.419497Z/35356f3bfe8c30524095725b1a8dcf60": {
      "validation_result": {
        "statistics": {
          "evaluated_expectations": 2,
          "successful_expectations": 2,
          "unsuccessful_expectations": 0,
          "success_percent": 100.0
        },
        "results": [
          {
            "expectation_config": {
              "expectation_type": "expect_column_to_exist",
              "kwargs": {
                "column": "close",
                "batch_id": "35356f3bfe8c30524095725b1a8dcf60"
              },
              "meta": {}
            },
            "success": true,
            "meta": {},
            "result": {},
            "exception_info": {
              "raised_exception": false,
              "exception_traceback": null,
              "exception_message": null
        