# Monitoring: Examine data drift
* In this stage, the model is examined for data drift based on the original data quality definitions, saved as GX Expectation Suites.

In [1]:
import great_expectations as gx
import demo_code as demo

In [2]:
import warnings

# Suppress known DeprecationWarnings and FutureWarnings for demo.
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

## Examine new incoming patient data

In [3]:
df_new_patient_data = demo.data.get_new_patient_data()
df_new_patient_data.head(n=10)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,65,1,1,145,233,1,2,150,0,2.3,3,0.0,6.0
1,68,1,4,160,286,0,2,108,1,1.5,2,3.0,3.0
2,72,1,4,120,229,0,2,129,1,2.6,2,2.0,7.0
3,41,1,3,130,250,0,0,187,0,3.5,3,0.0,3.0
4,45,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0
5,59,1,2,120,236,0,0,178,0,0.8,1,0.0,3.0
6,64,0,4,140,268,0,2,160,0,3.6,3,2.0,3.0
7,66,0,4,120,354,0,0,163,1,0.6,1,0.0,3.0
8,65,1,4,130,254,0,2,147,0,1.4,2,1.0,7.0
9,60,1,4,140,203,1,2,155,1,3.1,3,0.0,7.0


## Retrieve Expectation Suite that defined quality for original data

In [4]:
# Retrieve existing Expectation Suites from GX Cloud.
cloud_context = gx.get_context(mode="cloud")
distribution_suite = cloud_context.suites.get(name="Heart disease data: distribution")
schema_and_validity_suite = cloud_context.suites.get(
    name="Heart disease data: schema and validity"
)

In [5]:
# Code to create a containerized Data Docs site for demo.
local_context = gx.get_context(mode="ephemeral")

# Code to create a containerized Data Docs site for demo.
local_context.add_data_docs_site(
    site_config={
        "class_name": "SiteBuilder",
        "show_how_to_buttons": False,
        "store_backend": {
            "class_name": "TupleFilesystemStoreBackend",
            "base_directory": "/gx/gx_volume/data_docs",
        },
        "site_index_builder": {"class_name": "DefaultSiteIndexBuilder"},
    },
    site_name="GX in the ML pipeline demo",
)

pandas_data_source = local_context.data_sources.add_pandas("pandas")
pandas_data_asset = pandas_data_source.add_dataframe_asset(
    name="New heart disease data"
)
pandas_batch_definition = pandas_data_asset.add_batch_definition_whole_dataframe(
    "batch definition"
)

local_context.suites.add(schema_and_validity_suite)
local_context.suites.add(distribution_suite)

schema_and_validity_validation_definition = gx.ValidationDefinition(
    name="schema and validity validation definition",
    data=pandas_batch_definition,
    suite=schema_and_validity_suite,
)

distribution_validation_definition = gx.ValidationDefinition(
    name="distribution validation definition",
    data=pandas_batch_definition,
    suite=distribution_suite,
)

local_context.validation_definitions.add(schema_and_validity_validation_definition)
local_context.validation_definitions.add(distribution_validation_definition)


checkpoint = local_context.checkpoints.add(
    gx.Checkpoint(
        name="checkpoint",
        validation_definitions=[
            schema_and_validity_validation_definition,
            distribution_validation_definition,
        ],
        actions=[gx.checkpoint.actions.UpdateDataDocsAction(name="update_data_docs")],
    )
)

results = checkpoint.run(batch_parameters={"dataframe": df_new_patient_data})

Calculating Metrics:  95%|█████████▍| 124/131 [00:00<00:00, 308.34it/s]
Calculating Metrics: 100%|██████████| 10/10 [00:00<00:00, 985.74it/s] 


### View Expectation Suite and Validation Results in [Data Docs](http://localhost:3000)