### Prepare environment

In [0]:
%run ../environment/prepare_environment


### Great Expectations

This notebook ensures the Silver layer data meets quality standards before any downstream processing by creating Great Expectations suite for validation.

In details this notebook:
* Loads the Great Expectations context for the Silver layer.
* Creates an expectation suite for telco_silver data.
* Adds expectations for required columns, boolean types, and non-negative numeric columns.
* Sets up a Spark dataframe as the data source.
* Defines a batch and validation run against the suite.

In [0]:
import great_expectations as gx
from great_expectations.core.expectation_suite import ExpectationSuite
import json

# Get GE context
context_root_dir = "telco_ge"
context = gx.get_context(context_root_dir=context_root_dir)

# Create empty expectation suite
suite_name = "silver_suite"
suite = gx.ExpectationSuite(name=suite_name)
suite = context.suites.add(suite)

# Add expectations
# Required fields not null
for col in ["customer_id", "churn"]:
    expectation = gx.expectations.ExpectColumnValuesToNotBeNull(column=col)
    suite.add_expectation(expectation)

# Boolean columns must be True/False
for col in ["senior_citizen", "partner", "dependents",
            "phone_service", "internet_service",
            "paperless_billing", "churn"]:
    expectation = gx.expectations.ExpectColumnValuesToBeOfType(column=col, type_="BooleanType")
    suite.add_expectation(expectation)

# Tenure non-negative
for col in ["tenure", "total_charges"]:
    expectation = gx.expectations.ExpectColumnValuesToBeBetween(column=col, min_value=0)
    suite.add_expectation(expectation)

In [0]:
data_source = context.data_sources.add_spark(name="telco_silver", persist=False)
data_asset = data_source.add_dataframe_asset(name="telco_silver_df")

batch_definition = data_asset.add_batch_definition_whole_dataframe(
    "telco_batch_definition"
)

validation_definition = gx.ValidationDefinition(
    data=batch_definition, suite=suite, name="telco_silver_validation"
)

validation_definition = context.validation_definitions.add(validation_definition)