In [None]:
pip install great_expectations

In [None]:
import pandas as pd
import numpy as np
import great_expectations as ge
from great_expectations.data_context.types.base import DataContextConfig, DatasourceConfig, FilesystemStoreBackendDefaults
from great_expectations.data_context import BaseDataContext
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.core.batch import RuntimeBatchRequest
from sklearn import datasets
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
iris = datasets.load_iris()
data=pd.DataFrame({
    'sepal_length':iris.data[:,0],
    'sepal_width':iris.data[:,1],
    'petal_length':iris.data[:,2],
    'petal_width':iris.data[:,3],
    'species':iris.target
})
data.head()

In [None]:
data.species.value_counts()

In [None]:
data[data.species==1].head()

In [None]:
# Add Expectations for Data
ge_df = ge.from_pandas(data)

ge_df.expect_column_to_exist("sepal_length")
ge_df.expect_column_to_exist("sepal_width")
ge_df.expect_column_to_exist("petal_length")
ge_df.expect_column_to_exist("petal_width")
ge_df.expect_column_to_exist("species")

In [None]:
# Check for Data Types
ge_df.expect_column_values_to_be_of_type("sepal_length","float64")
ge_df.expect_column_values_to_be_of_type("sepal_width","float64")
ge_df.expect_column_values_to_be_of_type("petal_length","float64")
ge_df.expect_column_values_to_be_of_type("petal_width","float64")
ge_df.expect_column_values_to_be_of_type("species","int64")

In [None]:
# Check for Reasonable data values
ge_df.expect_column_values_to_be_between("sepal_length", 1.0, 20.0)
ge_df.expect_column_values_to_be_between("sepal_width", .1, 5.0)
ge_df.expect_column_values_to_be_between("petal_length", 1.0, 15.0)
ge_df.expect_column_values_to_be_between("petal_width", .1, 5.0)
ge_df.expect_column_values_to_be_in_set("species", [0, 1, 2])

In [None]:
# Collect Expectations Suite including Failures
ge_df.get_expectation_suite(discard_failed_expectations=False)

In [None]:
# Output JSON file for suite
import json

with open( "Iris_expectations_report.json", "w") as my_file:
    my_file.write(
        json.dumps(ge_df.get_expectation_suite().to_json_dict())
    )