# Tiny great_expectations sandbox example

In [17]:
import great_expectations as ge
import pandas as pd
from datetime import datetime

# assumes you are running in a location with initialized great_expectations
context = ge.DataContext()

# define some demo data
dataset_name = "orders"
orders = {"order_id": ["a123", "b123", "c123"], "price": [10, 100, 50]}
df = pd.DataFrame(orders)
batch_kwargs = {"dataset": df, "datasource": dataset_name}

# before we can use a datasource - we need to add it to the context (if it does not exist)
# you can do this also with the CLI interactively if preferred
context.add_datasource("orders", class_name="PandasDatasource", overwrite_existing=True)


# create an empty suite and check the batch
expectation_suite = context.create_expectation_suite("orders_expectations", overwrite_existing=True)
batch = context.get_batch(batch_kwargs, expectation_suite_name="orders_expectations")
batch.head()

Unnamed: 0,order_id,price
0,a123,10
1,b123,100
2,c123,50


In [18]:
# add a simple expectation
batch.expect_column_median_to_be_between(column="price", min_value=20, max_value=30)


{
  "success": false,
  "meta": {},
  "result": {
    "observed_value": 50.0,
    "element_count": 3,
    "missing_count": null,
    "missing_percent": null
  },
  "exception_info": null
}

In [19]:
# ok, lets adjust then
batch.expect_column_median_to_be_between(column="price", min_value=20, max_value=70)

{
  "success": true,
  "meta": {},
  "result": {
    "observed_value": 50.0,
    "element_count": 3,
    "missing_count": null,
    "missing_percent": null
  },
  "exception_info": null
}

### Observe unexpected data
If the validation is not successful, the result will show you what it observed instead or which values where unexpected. This is quite important to act quickly when you investigate an issue.

In [20]:
# check format of order_id
batch.expect_column_values_to_match_regex(column="order_id", regex="^[0-9]{1}[a-z]{3}$")

{
  "success": false,
  "meta": {},
  "result": {
    "element_count": 3,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 3,
    "unexpected_percent": 100.0,
    "unexpected_percent_nonmissing": 100.0,
    "partial_unexpected_list": [
      "a123",
      "b123",
      "c123"
    ]
  },
  "exception_info": null
}

In [21]:
# ok, I am bad at regular expressions, but at least I am told what is wrong here :P 
batch.expect_column_values_to_match_regex(column="order_id", regex="^[a-z]{1}[0-9]{3}$")

{
  "success": true,
  "meta": {},
  "result": {
    "element_count": 3,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "exception_info": null
}

There are plenty of options to run the validation, and the actions are fully configurable using the `great_expectations.yml` file. 

In [30]:
# save suite to json
batch.save_expectation_suite(discard_failed_expectations=True)

In [32]:
# reload suite from json
context.get_expectation_suite("orders_expectations")

{
  "data_asset_type": "Dataset",
  "expectations": [
    {
      "meta": {},
      "expectation_type": "expect_column_median_to_be_between",
      "kwargs": {
        "column": "price",
        "max_value": 70,
        "min_value": 20
      }
    },
    {
      "meta": {},
      "expectation_type": "expect_column_values_to_match_regex",
      "kwargs": {
        "column": "order_id",
        "regex": "^[a-z]{1}[0-9]{3}$"
      }
    }
  ],
  "meta": {
    "great_expectations_version": "0.12.7"
  },
  "expectation_suite_name": "orders_expectations"
}

In [36]:
# validate a batch
validation_batch = context.get_batch(batch_kwargs, "orders_expectations")
validation_result = context.run_validation_operator("action_list_operator", [validation_batch])

In [37]:
validation_result

{
  "run_results": {
    "ValidationResultIdentifier::orders_expectations/20201102T214649.650882Z/20201102T214649.650882Z/fed6cf73bcf55959152cffca73a7a518": {
      "validation_result": {
        "evaluation_parameters": {},
        "success": true,
        "results": [
          {
            "success": true,
            "expectation_config": {
              "meta": {},
              "expectation_type": "expect_column_median_to_be_between",
              "kwargs": {
                "column": "price",
                "max_value": 70,
                "min_value": 20,
                "result_format": {
                  "result_format": "SUMMARY"
                }
              }
            },
            "meta": {},
            "result": {
              "observed_value": 50.0,
              "element_count": 3,
              "missing_count": null,
              "missing_percent": null
            },
            "exception_info": {
              "raised_exception": false,
              "

In [39]:
# uncomment and run locally - will open a browser with the results
# context.build_data_docs()