# Initialize using CLI

In [2]:
# DO NOT RUN
# Run the 
# great_expectations init 
# in terminal

'''  ___              _     ___                  _        _   _
 / __|_ _ ___ __ _| |_  | __|_ ___ __  ___ __| |_ __ _| |_(_)___ _ _  ___
| (_ | '_/ -_) _` |  _| | _|\ \ / '_ \/ -_) _|  _/ _` |  _| / _ \ ' \(_-<
 \___|_| \___\__,_|\__| |___/_\_\ .__/\___\__|\__\__,_|\__|_\___/_||_/__/
                                |_|
             ~ Always know what to expect from your data ~

Let's create a new Data Context to hold your project configuration.

Great Expectations will create a new directory with the following structure:

    great_expectations
    |-- great_expectations.yml
    |-- expectations
    |-- checkpoints
    |-- plugins
    |-- .gitignore
    |-- uncommitted
        |-- config_variables.yml
        |-- data_docs
        |-- validations

OK to proceed? [Y/n]: y

================================================================================

Congratulations! You are now ready to customize your Great Expectations configuration.

You can customize your configuration in many ways. Here are some examples:

  Use the CLI to:
    - Run `great_expectations datasource new` to connect to your data.
    - Run `great_expectations checkpoint new <checkpoint_name>` to bundle data with Expectation Suite(s) in a Checkpoint for later re-validation.
    - Run `great_expectations suite --help` to create, edit, list, profile Expectation Suites.
    - Run `great_expectations docs --help` to build and manage Data Docs sites.

  Edit your configuration in great_expectations.yml to:
    - Move Stores to the cloud
    - Add Slack notifications, PagerDuty alerts, etc.
    - Customize your Data Docs

Please see our documentation for more configuration options!'''



# Import all the libraries that you need

In [1]:
import os
import great_expectations as gx
from great_expectations.checkpoint import Checkpoint

# Set up GX

In [3]:
context = gx.get_context()

In [4]:
print(context)

{
  "anonymous_usage_statistics": {
    "enabled": true,
    "usage_statistics_url": "https://stats.greatexpectations.io/great_expectations/v1/usage_statistics",
    "explicit_url": false,
    "explicit_id": true,
    "data_context_id": "24dd165d-d0a8-44be-b0a8-cdcb35b42b2f"
  },
  "checkpoint_store_name": "checkpoint_store",
  "config_version": 3,
  "data_docs_sites": {
    "local_site": {
      "class_name": "SiteBuilder",
      "show_how_to_buttons": true,
      "store_backend": {
        "class_name": "TupleFilesystemStoreBackend",
        "base_directory": "C:\\Users\\SYAUQI~1.HID\\AppData\\Local\\Temp\\tmpdipncjff"
      },
      "site_index_builder": {
        "class_name": "DefaultSiteIndexBuilder"
      }
    }
  },
  "datasources": {},
  "evaluation_parameter_store_name": "evaluation_parameter_store",
  "expectations_store_name": "expectations_store",
  "fluent_datasources": {},
  "include_rendered_content": {
    "expectation_suite": false,
    "globally": false,
    "expect

## Connect to your data

In [5]:
# Set environment variables using os.environ() (GAUSAH DIPAKE)
'''os.environ["MY_DB_PW"] = "KantorAHP123!"
os.environ[
    "POSTGRES_CONNECTION_STRING"
] = "postgresql://postgres:${MY_DB_PW}@localhost:5432/postgres"'''

# Use a connection_string to securely connect to your PostgreSQL instance (GAUSAH DIPAKE)
'''
MY_DB_PW = "KantorAHP123!"
PG_CONNECTION_STRING = "postgresql://postgres:${MY_DB_PW}@localhost:5432/postgres"
PG_CONNECTION_STRING = "postgresql+psycopg2://postgres:'KantorAHP123!':@localhost:5432/postgres"
'''

# First set values by adding the commands to your ~/.bashrc file (Type nano ~/. bashrc and named it bashrc): (GAUSAH DIPAKE)
'''export MY_DB_PW=KantorAHP123!
export POSTGRES_CONNECTION_STRING=postgresql://postgres:${MY_DB_PW}@localhost:5432/postgres
'''

'export MY_DB_PW=KantorAHP123!\nexport POSTGRES_CONNECTION_STRING=postgresql://postgres:${MY_DB_PW}@localhost:5432/postgres\n'

In [6]:
# Run the following command to create a Data Source to represent the data available in your PostgreSQL database
# (GAUSAH DI RUN LAGI)
PG_CONNECTION_STRING = "postgresql+psycopg2://postgres:KantorAHP123!@localhost:5432/postgres"
pg_datasource = context.sources.add_sql(
    name="pg_datasource", 
    connection_string=PG_CONNECTION_STRING
)

In [7]:
# This next snippet shows how to retrieve the Data Source from the Data Context.
datasource = context.datasources["pg_datasource"]
print(datasource)

connection_string: postgresql+psycopg2://postgres:KantorAHP123!@localhost:5432/postgres
name: pg_datasource
type: sql



In [8]:
# Check the Context
context

{
  "anonymous_usage_statistics": {
    "enabled": true,
    "usage_statistics_url": "https://stats.greatexpectations.io/great_expectations/v1/usage_statistics",
    "explicit_url": false,
    "explicit_id": true,
    "data_context_id": "24dd165d-d0a8-44be-b0a8-cdcb35b42b2f"
  },
  "checkpoint_store_name": "checkpoint_store",
  "config_version": 3,
  "data_docs_sites": {
    "local_site": {
      "class_name": "SiteBuilder",
      "show_how_to_buttons": true,
      "store_backend": {
        "class_name": "TupleFilesystemStoreBackend",
        "base_directory": "C:\\Users\\SYAUQI~1.HID\\AppData\\Local\\Temp\\tmpdipncjff"
      },
      "site_index_builder": {
        "class_name": "DefaultSiteIndexBuilder"
      }
    }
  },
  "datasources": {},
  "evaluation_parameter_store_name": "evaluation_parameter_store",
  "expectations_store_name": "expectations_store",
  "fluent_datasources": {},
  "include_rendered_content": {
    "expectation_suite": false,
    "globally": false,
    "expect

In [9]:
# Run the following command to create a Data Asset to represent a discrete set of data
table_asset = datasource.add_table_asset(
    name="payments", table_name="payments"
)

In [10]:
# Run the following command to build a Batch Request using the Data Asset you configured previously:
batch_request = datasource.get_asset("payments").build_batch_request()

# Create Expectations

You'll use a **Validator** to interact with your batch of data and generate an **Expectation Suite**.

Every time you evaluate an Expectation with `validator.expect_*`, it is immediately Validated against your data. This instant feedback helps you identify unexpected data and removes the guesswork from data exploration. The Expectation configuration is stored in the Validator. When you are finished running the Expectations on the dataset, you can use `validator.save_expectation_suite()` to save all of your Expectation configurations into an Expectation Suite for later use in a checkpoint.

In [11]:
# Run the following command to create the suite and get a Validator
expectation_suite_name = "payments_great_expectation_suite"
context.add_or_update_expectation_suite(expectation_suite_name=expectation_suite_name)
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name=expectation_suite_name,
)

print(validator.head())

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

   id  orderid  paymentmethod   status  amount     created
0   1        1    credit_card  success    1000  2018-01-01
1   2        2    credit_card  success    2000  2018-01-02
2   3        3         coupon  success     100  2018-01-04
3   4        4         coupon  success    2500  2018-01-05
4   5        5  bank_transfer     fail    1700  2018-01-05


In [41]:
# Run the following command to use the Validator to add a few Expectations:

# ORDERID
validator_order_id = validator.expect_column_values_to_not_be_null(column="orderid")

print(validator_order_id)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "result": {
    "element_count": 120,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": []
  },
  "meta": {},
  "expectation_config": {
    "expectation_type": "expect_column_values_to_not_be_null",
    "meta": {},
    "kwargs": {
      "column": "orderid",
      "batch_id": "pg_datasource-payments"
    }
  }
}


In [39]:
# STATUS 
validator_status = validator.expect_column_values_to_be_in_set(
    column="status", value_set=["success", "fail"], 
    meta={
        "dimension": "Integrity"}
)

print(validator_status)

Calculating Metrics:   0%|          | 0/11 [00:00<?, ?it/s]

{
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "result": {
    "element_count": 120,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "expectation_config": {
    "expectation_type": "expect_column_values_to_be_in_set",
    "meta": {
      "dimension": "Integrity"
    },
    "kwargs": {
      "column": "status",
      "value_set": [
        "success",
        "fail"
      ],
      "batch_id": "pg_datasource-payments"
    }
  }
}


In [37]:
# AMOUNT (Not as expected)
validator_amount = validator.expect_column_values_to_be_between(
    column="amount", 
    min_value=3000, max_value=4000, 
    result_format={
        "result_format" : "COMPLETE",
        "unexpected_index_column_names" : ["id"], 
        "return_unexpected_index_query" : True,
    }, 
    meta={
        "dimension": "Consistency based on ID"
    }
)

print(validator_amount)

Calculating Metrics:   0%|          | 0/13 [00:00<?, ?it/s]

{
  "success": false,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  },
  "result": {
    "element_count": 120,
    "unexpected_count": 117,
    "unexpected_percent": 97.5,
    "partial_unexpected_list": [
      1000,
      2000,
      100,
      2500,
      1700,
      1700,
      600,
      1600,
      2300,
      2300,
      0,
      2600,
      2700,
      100,
      500,
      500,
      1400,
      300,
      2200,
      1000
    ],
    "unexpected_index_column_names": [
      "id"
    ],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 97.5,
    "unexpected_percent_nonmissing": 97.5,
    "partial_unexpected_index_list": [
      {
        "id": 1,
        "amount": 1000
      },
      {
        "id": 2,
        "amount": 2000
      },
      {
        "id": 3,
        "amount": 100
      },
      {
        "id": 4,
        "amount": 2500
      },
      {
        "id": 5,
     

In [38]:
unexpected_index_query_variable_amount = validator_amount.get_metric(
    "expect_column_values_to_be_between.result.unexpected_index_query",
    column="amount"
)

print(unexpected_index_query_variable_amount)

SELECT id, amount 
FROM payments 
WHERE amount IS NOT NULL AND NOT (amount >= 3000 AND amount <= 4000);


In [15]:
# Run the following command to get your Expectation Suite
print(validator.get_expectation_suite(discard_failed_expectations=False))

{
  "expectations": [
    {
      "expectation_type": "expect_column_values_to_not_be_null",
      "meta": {},
      "kwargs": {
        "column": "orderid"
      }
    },
    {
      "expectation_type": "expect_column_values_to_be_in_set",
      "meta": {
        "dimension": "Integrity"
      },
      "kwargs": {
        "column": "status",
        "value_set": [
          "success",
          "fail"
        ]
      }
    },
    {
      "expectation_type": "expect_column_values_to_be_between",
      "meta": {
        "dimension": "Consistency based on ID"
      },
      "kwargs": {
        "column": "amount",
        "min_value": 3000,
        "max_value": 4000
      }
    }
  ],
  "data_asset_type": null,
  "meta": {
    "great_expectations_version": "0.17.5"
  },
  "ge_cloud_id": null,
  "expectation_suite_name": "payments_great_expectation_suite"
}


In [16]:
# Run the following command to save your Expectation Suite (all the unique Expectation Configurations from each run of validator.expect_*) to your Expectation Store:
validator.save_expectation_suite("my_payments_postgres_great_expectations.json", discard_failed_expectations=False)

# Validate your data

You'll create and store a *Checkpoint* for your batch, which you can use to validate and run post-validation actions.

In [17]:
# Run the following command to create the Checkpoint configuration that uses your Data Context:
my_checkpoint_name = "my_payments_great_expectations_postgres_checkpoint"

checkpoint = Checkpoint(
    name=my_checkpoint_name,
    run_name_template="%Y%m%d-%H%M%S-my_payments_great_expectations_postgres_checkpoint",
    data_context=context,
    batch_request=batch_request,
    expectation_suite_name=expectation_suite_name,
    action_list=[
        {
            "name": "store_validation_result",
            "action": {"class_name": "StoreValidationResultAction"},
        },
        {
            "name": "store_evaluation_params",
            "action": {"class_name": "StoreEvaluationParametersAction"},
        },
        {
            "name": "update_data_docs", 
            "action": {"class_name": "UpdateDataDocsAction"}
        },
    ],
)

The `store_validation_result` action saves your validation results from the Checkpoint run and allows the results to be persisted for future use. The `store_evaluation_params` store evaluation parameters from a validation result. The `update_data_docs` action builds Data Docs files for the validations run in the Checkpoint.

In [18]:
# Run the following command to save the Checkpoint:
context.add_or_update_checkpoint(checkpoint=checkpoint)

{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "store_evaluation_params",
      "action": {
        "class_name": "StoreEvaluationParametersAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction"
      }
    }
  ],
  "batch_request": {
    "datasource_name": "pg_datasource",
    "data_asset_name": "payments",
    "options": {}
  },
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "expectation_suite_name": "payments_great_expectation_suite",
  "module_name": "great_expectations.checkpoint",
  "name": "my_payments_great_expectations_postgres_checkpoint",
  "profilers": [],
  "run_name_template": "%Y%m%d-%H%M%S-my_payments_great_expectations_postgres_checkpoint",
  "runtime_configuration": {},
  "validations": []
}

In [20]:
# Run the following command to run the Checkpoint and pass in your Batch Request (your data) and your Expectation Suite (your tests):
checkpoint_result = checkpoint.run()

Calculating Metrics: 0it [00:00, ?it/s]

In [21]:
# View the full Checkpoint configuration
print(checkpoint.get_config().to_yaml_str())

name: my_payments_great_expectations_postgres_checkpoint
config_version: 1.0
template_name:
module_name: great_expectations.checkpoint
class_name: Checkpoint
run_name_template: '%Y%m%d-%H%M%S-my_payments_great_expectations_postgres_checkpoint'
expectation_suite_name: payments_great_expectation_suite
batch_request:
  datasource_name: pg_datasource
  data_asset_name: payments
  options: {}
  batch_slice:
action_list:
  - name: store_validation_result
    action:
      class_name: StoreValidationResultAction
  - name: store_evaluation_params
    action:
      class_name: StoreEvaluationParametersAction
  - name: update_data_docs
    action:
      class_name: UpdateDataDocsAction
evaluation_parameters: {}
runtime_configuration: {}
validations: []
profilers: []
ge_cloud_id:
expectation_suite_ge_cloud_id:



# Build and view Data Docs

Your Checkpoint contained an `UpdateDataDocsAction`, so your Data Docs have already been built from the validation you ran and your Data Docs store contains a new rendered validation result.

In [22]:
# Run the following command to open your Data Docs and review the results of your Checkpoint run:
context.open_data_docs()