## Automated Data Quality Monitoring
**Objective**: Use Great Expectations to perform data profiling and write validation rules.

1. Data Profiling with Great Expectations

### Profile a JSON dataset with product sales data to check for null values in the 'ProductID' and 'Price' fields.
- Create an expectation suite and connect it to the data context.
- Use the `expect_column_values_to_not_be_null` expectation to profile these fields.
- Review the summary to identify any unexpected null values.

In [2]:
# write your code from here
import os
import pandas as pd
import great_expectations as ge
from great_expectations.data_context import FileDataContext
from great_expectations.exceptions import GreatExpectationsError

# === Configuration ===
GE_PATH = "great_expectations"
JSON_PATH = "data/product_sales.json"  # Update path to your JSON file
SUITE_NAME = "product_sales_suite"
DATASOURCE_NAME = "my_json_datasource"
DATA_ASSET_NAME = "product_sales.json"
CONNECTOR_NAME = "default_inferred_data_connector_name"

# === Helper Functions ===

def validate_file_exists(filepath):
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"File not found: {filepath}")

def load_context(path):
    try:
        return FileDataContext(path)
    except Exception as e:
        raise RuntimeError(f"Failed to load GE context: {e}")

def configure_datasource(context):
    # Optional: set up a new datasource via CLI or config if not already done
    # You can skip this in the script if datasource is already configured
    print("✔️ Ensure your JSON datasource is registered via GE CLI or YAML.")

def get_validator(context):
    batch_request = {
        "datasource_name": DATASOURCE_NAME,
        "data_connector_name": CONNECTOR_NAME,
        "data_asset_name": DATA_ASSET_NAME,
    }

    try:
        validator = context.get_validator(
            batch_request=batch_request,
            expectation_suite_name=SUITE_NAME
        )
        return validator
    except Exception as e:
        raise RuntimeError(f"Failed to create validator: {e}")

def define_null_expectations(validator):
    validator.expect_column_to_exist("ProductID")
    validator.expect_column_to_exist("Price")

    validator.expect_column_values_to_not_be_null("ProductID")
    validator.expect_column_values_to_not_be_null("Price")

    validator.save_expectation_suite()

def build_docs(context):
    context.build_data_docs()
    print("📄 Data Docs available at: great_expectations/uncommitted/data_docs/local_site/index.html")

# === Main Execution ===

if __name__ == "__main__":
    try:
        print("🔍 Checking file...")
        validate_file_exists(JSON_PATH)

        print("📂 Loading context...")
        context = load_context(GE_PATH)

        print("🧾 Connecting to JSON data...")
        configure_datasource(context)

        print("🧪 Creating validator...")
        validator = get_validator(context)

        print("✅ Defining expectations...")
        define_null_expectations(validator)

        print("📊 Building data docs...")
        build_docs(context)

    except Exception as e:
        print(f"❌ Erro")


🔍 Checking file...
❌ Erro


2. Writing Validation Rules for Data Ingestion

### Define validation rules for an API data source to confirm that 'Status' field contains only predefined statuses ('Active', 'Inactive').

- Apply `expect_column_values_to_be_in_set` to check field values during data ingestion.
- Execute the validation and review any mismatches.

In [3]:
# write your code from here
import great_expectations as ge
import pandas as pd
from great_expectations.data_context import FileDataContext
from great_expectations.exceptions import GreatExpectationsError

# === Configuration ===
GE_PATH = "great_expectations"
SUITE_NAME = "api_data_suite"
DATASOURCE_NAME = "api_datasource"
DATA_ASSET_NAME = "api_response_data"  # Logical name in GE
CONNECTOR_NAME = "default_runtime_data_connector_name"

# Sample API data for demo (replace with actual API data loading)
api_data = [
    {"Status": "Active", "UserID": 1},
    {"Status": "Inactive", "UserID": 2},
    {"Status": "Pending", "UserID": 3},  # <-- This will fail validation
]

# === Functions ===

def load_context(path):
    try:
        return FileDataContext(path)
    except Exception as e:
        raise RuntimeError(f"Failed to load GE context: {e}")

def get_validator_from_runtime(context, df, suite_name):
    try:
        validator = context.get_validator(
            batch={
                "batch_data": ge.from_pandas(df),
                "datasource_name": DATASOURCE_NAME,
                "data_connector_name": CONNECTOR_NAME,
                "data_asset_name": DATA_ASSET_NAME,
            },
            expectation_suite_name=suite_name,
        )
        return validator
    except Exception as e:
        raise RuntimeError(f"Failed to create validator: {e}")

def define_status_expectation(validator):
    validator.expect_column_to_exist("Status")
    validator.expect_column_values_to_be_in_set("Status", ["Active", "Inactive"])
    validator.save_expectation_suite()

def run_validation(validator):
    result = validator.validate()
    if result["success"]:
        print("✅ All Status values are valid!")
    else:
        print("❌ Validation failed! Mismatched Status values found:")
        for res in result["results"]:
            if not res["success"]:
                print(f"- {res['expectation_config']['expectation_type']}: {res['expectation_config']['kwargs']}")

# === Main ===

if __name__ == "__main__":
    try:
        print("📂 Loading GE context...")
        context = load_context(GE_PATH)

        print("📋 Creating validator with runtime batch data...")
        df_api = pd.DataFrame(api_data)
        validator = get_validator_from_runtime(context, df_api, SUITE_NAME)

        print("🛠 Defining Status validation rule...")
        define_status_expectation(validator)

        print("🚀 Running validation...")
        run_validation(validator)

    except Exception as e:
        print(f"❌ Error: {e}")


📂 Loading GE context...
❌ Error: Failed to load GE context: Error: No gx directory was found here!
    - Please check that you are in the correct directory or have specified the correct directory.
    - If you have never run Great Expectations in this project, please run `great_expectations init` to get started.

