## Automated Data Quality Monitoring
**Objective**: Use Great Expectations to perform data profiling and write validation rules.

1. Data Profiling with Great Expectations

### Profile a JSON dataset with product sales data to check for null values in the 'ProductID' and 'Price' fields.
- Create an expectation suite and connect it to the data context.
- Use the `expect_column_values_to_not_be_null` expectation to profile these fields.
- Review the summary to identify any unexpected null values.

In [None]:
# write your code from here
import great_expectations as ge
import os

project_root_dir = os.getcwd()
context = ge.get_context(context_root_dir=project_root_dir)

# Load JSON data batch for profiling
batch = context.get_batch({
    "datasource_name": "sales_data_source",
    "data_connector_name": "default_runtime_data_connector_name",
    "data_asset_name": "product_sales_json",
    "runtime_parameters": {"path": "data/product_sales.json"},
    "batch_identifiers": {"default_identifier_name": "profile_run"},
})

suite_name = "product_sales_profile_suite"
try:
    context.create_expectation_suite(expectation_suite_name=suite_name, overwrite_existing=True)
except Exception as e:
    print(f"Suite creation or overwrite error: {e}")

batch.expect_column_values_to_not_be_null(column="ProductID")
batch.expect_column_values_to_not_be_null(column="Price")

context.save_expectation_suite(batch.get_expectation_suite(), suite_name)

results = context.run_validation_operator(
    "action_list_operator",
    assets_to_validate=[batch],
    run_name="profile_validation_run"
)

validation_result = results.list_validation_results()[0]
print(f"Validation Success: {validation_result.success}")
for result in validation_result.results:
    if result.expectation_config.expectation_type == "expect_column_values_to_not_be_null":
        print(f"Column: {result.expectation_config.kwargs['column']}")
        print(f"Success: {result.success}")
        print(f"Unexpected Nulls Count: {result.result.get('unexpected_count', 0)}")

context.build_data_docs()


2. Writing Validation Rules for Data Ingestion

### Define validation rules for an API data source to confirm that 'Status' field contains only predefined statuses ('Active', 'Inactive').

- Apply `expect_column_values_to_be_in_set` to check field values during data ingestion.
- Execute the validation and review any mismatches.

In [None]:
# write your code from here
import great_expectations as ge
import os

project_root_dir = os.getcwd()
context = ge.get_context(context_root_dir=project_root_dir)

# Load API data batch for validation (assume JSON file from API response saved locally for example)
batch = context.get_batch({
    "datasource_name": "api_data_source",
    "data_connector_name": "default_runtime_data_connector_name",
    "data_asset_name": "api_status_data",
    "runtime_parameters": {"path": "data/api_status.json"},
    "batch_identifiers": {"default_identifier_name": "validation_run"},
})

suite_name = "api_status_validation_suite"
try:
    context.create_expectation_suite(expectation_suite_name=suite_name, overwrite_existing=True)
except Exception as e:
    print(f"Suite creation or overwrite error: {e}")

allowed_statuses = ["Active", "Inactive"]
batch.expect_column_values_to_be_in_set(column="Status", value_set=allowed_statuses)

context.save_expectation_suite(batch.get_expectation_suite(), suite_name)

results = context.run_validation_operator(
    "action_list_operator",
    assets_to_validate=[batch],
    run_name="api_status_validation_run"
)

validation_result = results.list_validation_results()[0]
print(f"Validation Success: {validation_result.success}")
for result in validation_result.results:
    if result.expectation_config.expectation_type == "expect_column_values_to_be_in_set":
        print(f"Column: {result.expectation_config.kwargs['column']}")
        print(f"Success: {result.success}")
        print(f"Unexpected Count: {result.result.get('unexpected_count', 0)}")
        if not result.success:
            print(f"Unexpected Values Sample: {result.result.get('partial_unexpected_list', [])}")

context.build_data_docs()
