In [1]:
import great_expectations as gx
from great_expectations.checkpoint import Checkpoint
import pandas as pd

In [2]:
df = pd.read_csv('P2M3_gedi_data_clean.csv')
context = gx.get_context()

In [3]:
expectation_suite_name = 'M3-CREDIT'
dataframe_asset = context.sources.add_pandas("M3-CREDIT_checkpoint").add_dataframe_asset(name="M3_8_", dataframe=df)
context.create_expectation_suite(expectation_suite_name)
batch_request = dataframe_asset.build_batch_request()

In [4]:
validator = context.get_validator(
        batch_request=batch_request,
        expectation_suite_name=expectation_suite_name,
)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522 entries, 0 to 521
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Unnamed: 0        522 non-null    int64 
 1   Age               522 non-null    int64 
 2   Sex               522 non-null    object
 3   Job               522 non-null    int64 
 4   Housing           522 non-null    object
 5   Saving accounts   522 non-null    object
 6   Checking account  522 non-null    object
 7   Credit amount     522 non-null    int64 
 8   Duration          522 non-null    int64 
 9   Purpose           522 non-null    object
dtypes: int64(5), object(5)
memory usage: 40.9+ KB


In [6]:
df['Purpose'].unique()

array(['radio/TV', 'furniture/equipment', 'car', 'business',
       'domestic appliances', 'repairs', 'vacation/others', 'education'],
      dtype=object)

In [7]:
validator.expect_column_values_to_not_be_null(column="Credit amount")
validator.expect_column_values_to_not_be_null(column="Saving accounts")
validator.expect_column_values_to_be_between(column="Age", min_value=18, max_value=100)

validator.expect_column_values_to_be_in_set(column='Purpose', value_set=['radio/TV', 'furniture/equipment', 'car', 'business',
       'domestic appliances', 'repairs', 'vacation/others', 'education'])
validator.expect_column_to_exist(column="Job")
validator.expect_column_to_exist(column="Housing")
validator.expect_column_to_exist(column="Duration")

# Save the expectation suite
validator.save_expectation_suite(discard_failed_expectations=False)

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
checkpoint_name = 'M3-Checkpoint'
checkpoint = Checkpoint(
        name=checkpoint_name,
        run_name_template="%Y%m%d-%H%M%S-" + checkpoint_name,
        data_context=context,
        batch_request=batch_request,
        expectation_suite_name=expectation_suite_name,
        action_list=[
            {"name": "store_validation_result", "action": {"class_name": "StoreValidationResultAction"}},
            {"name": "update_data_docs", "action": {"class_name": "UpdateDataDocsAction"}},
        ],
)

In [9]:
context.add_or_update_checkpoint(checkpoint=checkpoint)

    # Run the checkpoint
checkpoint_result = checkpoint.run()

    # Open data docs
context.open_data_docs()

Calculating Metrics:   0%|          | 0/27 [00:00<?, ?it/s]