# GX Different methods demo



**steps**:
- step1 shows installation
- step2 setting context
- step3 calls step2 to set context
- step3 sql data source methods

In [0]:
# Run this in a separate cell before restarting
%pip install great_expectations sqlalchemy pyodbc pandas


[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
import os

try:
    import great_expectations as gx
    print(f"Great Expectations {gx.__version__} imported successfully")
    

    is_databricks = False
    try:
        dbutils.fs.ls('/')
        is_databricks = True

    except NameError:

        is_databricks = False
    

    if is_databricks:
        dbfs_gx_path = "/dbfs/FileStore/great_expectations"
        print(f"DBFS context path: {dbfs_gx_path}")
        
    
        if os.path.exists(dbfs_gx_path):
            context = gx.get_context(project_root_dir=dbfs_gx_path)
           
            print(f"Context type: {type(context).__name__}")
        else:
            print("DBFS context not found - creating new one")
            os.makedirs(dbfs_gx_path, exist_ok=True)
            context = gx.get_context(project_root_dir=dbfs_gx_path)
            print("New DBFS context created")
    else:
 
        local_gx_path = os.path.join(os.getcwd(), "great_expectations")
        context = gx.get_context(project_root_dir=local_gx_path)
        print("Local context connected")
    

    
except ImportError as e:
    print(f"Great Expectations import failed: {e}")

    raise ImportError("Great Expectations not available - run installation cell above")

Great Expectations 1.5.7 imported successfully
DBFS context path: /dbfs/FileStore/great_expectations
Context type: FileDataContext


In [0]:

import json
import great_expectations as gx
from datetime import datetime


demo_results = {
    "timestamp": datetime.now().isoformat(),
    "step": "step2_5_gx_methods_demo",
    "gx_version": gx.__version__,
    "methods_demonstrated": [],
    "examples_completed": 0,
    "status": "in_progress",
    "focus": "pure_gx_methods_only"
}


try:
    # Import json module explicitly for dbutils compatibility
    import json
    
    step2_results = dbutils.notebook.run("./step2_gx_context_setup", 0)
    step2_data = json.loads(step2_results)
    
    if step2_data.get("context_created"):
        print(f"Step 2 context available: {step2_data.get('context_type')}")
        discovered_methods = step2_data.get("context_methods", [])
        method_categories = step2_data.get("method_categories", {})
        print(f"Total methods discovered: {len(discovered_methods)}")
    else:
        print(f"Step 2 context not available, creating fresh context")
        discovered_methods = []
        method_categories = {}
        
except Exception as e:
    print(f"Could not load Step 2 results: {e}")
    discovered_methods = []
    method_categories = {}


try:
    context = gx.get_context(project_root_dir="/dbfs/FileStore/great_expectations")
    print(f"GX context ready for pure GX method demonstrations")
except Exception as e:
    try:
        context = gx.get_context(mode="ephemeral")
   
    except Exception as e2:
        print(f"Could not create GX context: {e2}")
        context = None

Step 2 context available: FileDataContext
Total methods discovered: 82
GX context ready for pure GX method demonstrations


In [0]:
# # =============================================================================
# # SQL DATASOURCE METHODS
# # =============================================================================

# if context is not None:

    
#     datasource_methods = method_categories.get("datasource", [])

    
#     try:
   
  
#         if hasattr(context, 'list_datasources'):
#             datasources = context.list_datasources()
#             print(f"context.list_datasources() - Found {len(datasources)} datasources")
#             for ds in datasources[:3]: 
#                 ds_name = ds.get('name', 'unnamed') if isinstance(ds, dict) else str(ds)
#                 print(f"      • {ds_name}")
#             demo_results["methods_demonstrated"].append("list_datasources")
#         else:
#             print(f"list_datasources method not available")
        
    
#         print(f"GX SQL DATA SOURCES")
#         if hasattr(context, 'data_sources'):
#             print(f"context.data_sources available")
            
      
#             try:
       
#                 data_sources_list = list(context.data_sources)
#                 print(f"context.data_sources (iteration) - Found {len(data_sources_list)} sources")
#                 demo_results["methods_demonstrated"].append("data_sources_iteration")
#             except Exception as e1:
#                 try:

#                     all_datasources = context.list_datasources()
#                     print(f"context.list_datasources() - Found {len(all_datasources)} sources (legacy method)")
#                     demo_results["methods_demonstrated"].append("list_datasources_fallback")
#                 except Exception as e2:
#                     print(f"data_sources listing failed: {e1}")
#                     print(f"legacy fallback failed: {e2}")
            

#             if hasattr(context.data_sources, 'add_sql'):
#                 print(f"Available data_sources methods detected:")
#                 methods_list = [
#                     'add_sql', 'add_postgres', 'add_sqlite', 'add_databricks_sql', 
#                     'add_snowflake', 'add_bigquery', 'add_redshift'
#                 ]
#                 for method in methods_list:
#                     if hasattr(context.data_sources, method):
#                         print(f"      • {method}() - SQL database support")
            


            

#             if hasattr(context.data_sources, 'add_sql'):
#                 print(f"data_sources.add_sql() - Available")
#                 demo_results["methods_demonstrated"].append("data_sources.add_sql")
#             else:
#                 print(f"data_sources.add_sql() - Not available")
        

  
#         legacy_methods = ['add_datasource', 'get_datasource', 'delete_datasource']
        
#         for method_name in legacy_methods:
#             if hasattr(context, method_name):
#                 print(f"context.{method_name}() - Available")
#                 demo_results["methods_demonstrated"].append(method_name)
#             else:
#                 print(f"context.{method_name}() - Not available")
        
       
        
#         demo_results["examples_completed"] += 1
        
#     except Exception as e:
#         print(f"Datasource methods demonstration failed: {e}")

# else:
#     print(f"No context available for datasource demonstrations")

In [0]:
# =============================================================================
# EXPECTATION SUITE METHODS DEMONSTRATION
# =============================================================================

if context is not None:
    print("\nEXPECTATION SUITE METHODS DEMONSTRATION")
    print("-" * 50)
    
    expectation_methods = method_categories.get("expectation", [])
    print(f"Found {len(expectation_methods)} expectation methods")
    
    try:
        # 1. Modern GX 1.5.5+ suites approach
        print("\n1. MODERN EXPECTATION SUITES (GX 1.5.5+)")
        if hasattr(context, 'suites'):
            print("   context.suites available")
            
            try:
                suites_list = list(context.suites)
                print(f"   context.suites (iteration) - Found {len(suites_list)} suites")
                for suite in suites_list[:3]:
                    suite_name = suite.name if hasattr(suite, 'name') else str(suite)
                    print(f"      - {suite_name}")
                demo_results["methods_demonstrated"].append("suites_iteration")
            except Exception as e1:
                try:
                    legacy_suites = context.list_expectation_suites() if hasattr(context, 'list_expectation_suites') else []
                    print(f"   Legacy expectation suites - Found {len(legacy_suites)} suites")
                    demo_results["methods_demonstrated"].append("legacy_suites_list")
                except Exception as e2:
                    print(f"   suites listing failed: {e1}")
                    print(f"   legacy fallback failed: {e2}")
                    print("   Suites exist but listing method differs in GX 1.5.5+")
            
            try:
                print("   Creating demo expectation suite...")
                from great_expectations.core import ExpectationSuite
                demo_suite = context.suites.add(ExpectationSuite(name="demo_suite"))
                print("   suites.add() - Created demo suite")
                
                if hasattr(demo_suite, 'add_expectation'):
                    print("   Suite has add_expectation capability")
                
                demo_results["methods_demonstrated"].append("suites.add")
                
            except Exception as e:
                print(f"   suite creation failed: {e}")
        


        
        expectation_config_available = False
        try:
            from great_expectations.expectations.expectation_configuration import ExpectationConfiguration
            expectation_config_available = True
            print("   ExpectationConfiguration import successful")
        except ImportError:
            try:
                from great_expectations.core.expectation_configuration import ExpectationConfiguration
                expectation_config_available = True
                print(f"   ExpectationConfiguration import (legacy path) successful")
            except ImportError:
                print(f"   ExpectationConfiguration import failed")
        
        if expectation_config_available:
            print(f"   Example expectation configurations:")
            
            example_expectations = [
                {
                    "name": "Table row count",
                    "type": "expect_table_row_count_to_be_between",
                    "kwargs": {"min_value": 1, "max_value": 10000}
                },
                {
                    "name": "Column existence",
                    "type": "expect_column_to_exist", 
                    "kwargs": {"column": "id"}
                },
                {
                    "name": "Non-null values",
                    "type": "expect_column_values_to_not_be_null",
                    "kwargs": {"column": "name"}
                },
                {
                    "name": "Unique values",
                    "type": "expect_column_values_to_be_unique",
                    "kwargs": {"column": "email"}
                }
            ]
            
            for exp in example_expectations:
                print(f"      • {exp['name']}: {exp['type']}")
                print(f"        Parameters: {exp['kwargs']}")
        
        demo_results["examples_completed"] += 1
        
    except Exception as e:
        print(f"Expectation methods demonstration failed: {e}")

else:
    print(f"No context available for expectation demonstrations")


EXPECTATION SUITE METHODS DEMONSTRATION
--------------------------------------------------
Found 3 expectation methods

1. MODERN EXPECTATION SUITES (GX 1.5.5+)
   context.suites available
   Legacy expectation suites - Found 0 suites
   Creating demo expectation suite...
   suite creation failed: Cannot add ExpectationSuite with name demo_suite because it already exists.
   ExpectationConfiguration import successful
   Example expectation configurations:
      • Table row count: expect_table_row_count_to_be_between
        Parameters: {'min_value': 1, 'max_value': 10000}
      • Column existence: expect_column_to_exist
        Parameters: {'column': 'id'}
      • Non-null values: expect_column_values_to_not_be_null
        Parameters: {'column': 'name'}
      • Unique values: expect_column_values_to_be_unique
        Parameters: {'column': 'email'}


In [0]:
# =============================================================================
# VALIDATION METHODS DEMONSTRATION (PURE GX)
# =============================================================================

if context is not None:
    print("\nVALIDATION METHODS DEMONSTRATION")
    print("-" * 50)
    
    validation_methods = method_categories.get("validation", [])
    print(f"Found {len(validation_methods)} validation methods")
    
    try:
        # 1. Validator creation methods
        print("\n1. VALIDATOR CREATION METHODS")
        validator_methods = ['get_validator', 'build_batch_request', 'get_batch_list']
        
        for method_name in validator_methods:
            if hasattr(context, method_name):
                print(f"   context.{method_name}() - Available")
                demo_results["methods_demonstrated"].append(method_name)
            else:
                print(f"   context.{method_name}() - Not available")
        
        # 2. Pure GX Expectation Methods (without pandas data)
        print("\n2. CORE EXPECTATION METHODS")
        
        core_expectations = [
            "expect_table_row_count_to_be_between",
            "expect_table_row_count_to_equal", 
            "expect_column_to_exist",
            "expect_column_values_to_not_be_null",
            "expect_column_values_to_be_unique",
            "expect_column_values_to_be_in_set",
            "expect_column_values_to_match_regex",
            "expect_column_mean_to_be_between",
            "expect_column_sum_to_be_between",
            "expect_column_min_to_be_between",
            "expect_column_max_to_be_between"
        ]
        
        print("   Core GX Expectations Available:")
        for exp in core_expectations:
            print(f"      - {exp}")
        
        demo_results["methods_demonstrated"].extend(core_expectations)
        
        # 3. SQL-specific validation patterns
        print("\n3. SQL VALIDATION PATTERNS")
        
        print("   Table-level Validations:")
        print("      - Row count validation: expect_table_row_count_to_be_between(min_value=1, max_value=10000)")
        print("      - Table existence: Built into datasource asset creation")
        print("      - Schema validation: expect_table_columns_to_match_ordered_list()")
        
        print("   Column-level Validations:")
        print("      - Null checks: expect_column_values_to_not_be_null(column='HIERARCHY_ID')")
        print(f"      • Uniqueness: expect_column_values_to_be_unique(column='HIERARCHY_ID')")
        print(f"      • Data types: expect_column_values_to_be_of_type(column='date_col', type_='datetime')")
        print(f"      • Value ranges: expect_column_values_to_be_between(column='age', min_value=0, max_value=120)")
        
        print(f"Business Rule Validations:")
        print(f"      • Custom SQL: expect_column_pair_values_A_to_be_greater_than_B()")
        print(f"      • Regex patterns: expect_column_values_to_match_regex(column='email', regex=r'^[^@]+@[^@]+\\.[^@]+$')")
        print(f"      • Set membership: expect_column_values_to_be_in_set(column='status', value_set=['active', 'inactive'])")
        
        # 4. Batch request patterns (without actual data)
        print(f"BATCH REQUEST PATTERNS")
        
        print(f"SQL Table Asset Batch Request:")
        print(f"      • asset.build_batch_request()")
        print(f"      • asset.build_batch_request(options={{'year': '2024'}})")
        print(f"      • asset.build_batch_request(batch_slice={{'start': 0, 'stop': 1000}})")
        
        print(f"Query Asset Batch Request:")
        print(f"      • query_asset.build_batch_request()")
        print(f"      • Dynamic parameter binding for filtered queries")
        
        # 5. Validation result handling
        print(f"VALIDATION RESULT HANDLING")
        
        print(f"Result Object Properties:")
        print(f"      • result.success - Boolean success/failure")
        print(f"      • result.result - Detailed validation metrics")
        print(f"      • result.exception_info - Error details if failed")
        print(f"      • result.meta - Metadata about the validation")
        
        print(f"Checkpoint Execution:")
        print(f"      • checkpoint.run() - Execute full validation suite")
        print(f"      • checkpoint_result.run_results - Individual validation results")
        print(f"      • checkpoint_result.success - Overall success status")
        
        demo_results["examples_completed"] += 1
        
    except Exception as e:
        print(f"Validation methods demonstration failed: {e}")

else:
    print(f"No context available for validation demonstrations")


VALIDATION METHODS DEMONSTRATION
--------------------------------------------------
Found 9 validation methods

1. VALIDATOR CREATION METHODS
   context.get_validator() - Available
   context.build_batch_request() - Not available
   context.get_batch_list() - Not available

2. CORE EXPECTATION METHODS
   Core GX Expectations Available:
      - expect_table_row_count_to_be_between
      - expect_table_row_count_to_equal
      - expect_column_to_exist
      - expect_column_values_to_not_be_null
      - expect_column_values_to_be_unique
      - expect_column_values_to_be_in_set
      - expect_column_values_to_match_regex
      - expect_column_mean_to_be_between
      - expect_column_sum_to_be_between
      - expect_column_min_to_be_between
      - expect_column_max_to_be_between

3. SQL VALIDATION PATTERNS
   Table-level Validations:
      - Row count validation: expect_table_row_count_to_be_between(min_value=1, max_value=10000)
      - Table existence: Built into datasource asset creati

In [0]:
if context is not None:
    try:
        # 1. Data Docs methods
        print("DATA DOCS METHODS")
        data_docs_methods = ['build_data_docs', 'get_docs_sites_urls']
        
        for method_name in data_docs_methods:
            if hasattr(context, method_name):
                print(f"context.{method_name}() - Available")
                demo_results["methods_demonstrated"].append(method_name)
            else:
                print(f"context.{method_name}() - Not available")
        
        if hasattr(context, 'build_data_docs'):
            try:
                print("Attempting to build data docs...")
                context.build_data_docs()
                print("Data docs built successfully")
            except Exception as e:
                print(f"Data docs build failed: {e}")
        
        # 2. Checkpoint methods
        print("CHECKPOINT METHODS")
        
        if hasattr(context, 'checkpoints'):
            print("context.checkpoints available")
            try:
                checkpoints_list = list(context.checkpoints)
                print(f"context.checkpoints (iteration) - Found {len(checkpoints_list)} checkpoints")
                demo_results["methods_demonstrated"].append("checkpoints_iteration")
            except Exception as e1:
                try:
                    legacy_checkpoints = context.list_checkpoints() if hasattr(context, 'list_checkpoints') else []
                    print(f"Legacy checkpoints - Found {len(legacy_checkpoints)} checkpoints")
                    demo_results["methods_demonstrated"].append("legacy_checkpoints_list")
                except Exception as e2:
                    print(f"checkpoints listing failed: {e1}")
                    print(f"legacy fallback failed: {e2}")
                    print("Checkpoints exist but listing method differs in GX 1.5.5+")
        
        checkpoint_methods = ['list_checkpoints', 'get_checkpoint', 'add_checkpoint']
        for method_name in checkpoint_methods:
            if hasattr(context, method_name):
                print(f"context.{method_name}() - Available")
                demo_results["methods_demonstrated"].append(method_name)
            else:
                print(f"context.{method_name}() - Not available")
        
        # 3. Store methods
        print("STORE METHODS")
        store_methods = method_categories.get("store", [])
        
        if store_methods:
            print(f"Found {len(store_methods)} store methods:")
            for method in store_methods[:100]:
                print(f"      - {method}")
        else:
            print("")
        
        store_attributes = ['expectations_store', 'validations_store', 'checkpoint_store']
        for attr in store_attributes:
            if hasattr(context, attr):
                print(f"context.{attr} - Available")
            else:
                print(f"context.{attr} - Not available")
        
        demo_results["examples_completed"] += 1

    except Exception as e:
        print(f"Data docs & checkpoint demonstration failed: {e}")

else:
    print("No context available for data docs demonstrations")


DATA DOCS METHODS
context.build_data_docs() - Available
context.get_docs_sites_urls() - Available
Attempting to build data docs...
Data docs built successfully
CHECKPOINT METHODS
context.checkpoints available
Legacy checkpoints - Found 0 checkpoints
context.list_checkpoints() - Not available
context.get_checkpoint() - Not available
context.add_checkpoint() - Not available
STORE METHODS
Found 7 store methods:
      - add_store
      - checkpoint_store
      - checkpoint_store_name
      - delete_store
      - list_active_stores
      - list_stores
      - stores
context.expectations_store - Available
context.validations_store - Not available
context.checkpoint_store - Available


In [0]:

# Final statistics
total_methods_demonstrated = len(set(demo_results["methods_demonstrated"]))
examples_completed = demo_results["examples_completed"]

print(f"GX methods demonstrated: {total_methods_demonstrated}")
print(f"Example sections completed: {examples_completed}")


# Categorize demonstrated methods
demonstrated_by_category = {}
for method in demo_results["methods_demonstrated"]:
    categorized = False
    for category, methods in method_categories.items():
        if method in methods:
            if category not in demonstrated_by_category:
                demonstrated_by_category[category] = []
            demonstrated_by_category[category].append(method)
            categorized = True
            break
    
    if not categorized:
        if "core_expectations" not in demonstrated_by_category:
            demonstrated_by_category["core_expectations"] = []
        demonstrated_by_category["core_expectations"].append(method)

print(f"GX METHODS BY CATEGORY:")
for category, methods in demonstrated_by_category.items():
    print(f"   {category.upper()}: {len(methods)} methods")
    for method in sorted(methods)[:5]:  # Show first 5
        print(f"      • {method}")
    if len(methods) > 5:
        print(f"      ... and {len(methods) - 5} more")

# Key learnings and recommendations for SQL-focused GX usage
print(f"KEY LEARNINGS & RECOMMENDATIONS (SQL FOCUS):")
print(f"   1. GX {gx.__version__} excels at SQL database validation")
print(f"   2. context.data_sources.add_sql() for database connections")
print(f"   3. SQL table/query assets for flexible data access")
print(f"   4. Rich expectation library for business rules")
print(f"   5. NO pandas dependency required for SQL workflows")

# SQL-specific usage patterns for GX
print(f"SQL-FOCUSED USAGE PATTERNS FOR GX {gx.__version__}:")
print(f"   • Data Sources: context.data_sources.add_sql(connection_string='...')")
print(f"   • Table Assets: sql_datasource.add_table_asset(table_name='dbo.TableName')")
print(f"   • Query Assets: sql_datasource.add_query_asset(query='SELECT ...')")
print(f"   • Suites: context.suites.add() / get()")
print(f"   • Validation: context.get_validator(batch_request=...)")
print(f"   • Documentation: context.build_data_docs()")

# AMS Data Quality Dashboard specific recommendations
print(f"AMS DATA QUALITY DASHBOARD RECOMMENDATIONS:")
print(f"   1. Use SQL Server datasource with ODBC connection")
print(f"   2. Create table assets for dbo.DQ_LOGIC and related tables")
print(f"   3. Build expectation suites for data quality rules")
print(f"   4. Implement hierarchy validation with expect_column_values_to_be_unique")
print(f"   5. Use expect_table_row_count_to_be_between for data volume checks")
print(f"   6. Apply expect_column_values_to_not_be_null for critical fields")

# Production deployment patterns
print(f"\n🚀 PRODUCTION DEPLOYMENT PATTERNS:")
print(f"   1. Store connection strings securely (Azure Key Vault)")
print(f"   2. Use checkpoints for automated validation execution")
print(f"   3. Generate data docs for stakeholder communication")
print(f"   4. Integrate with CI/CD pipelines for continuous validation")
print(f"   5. Set up alerting based on validation results")
print(f"   6. Version control expectation suites with Git")

# Final status
demo_results["status"] = "success"
demo_results["total_methods_demonstrated"] = total_methods_demonstrated
demo_results["demonstrated_by_category"] = demonstrated_by_category
demo_results["sql_focused"] = True
demo_results["pandas_dependency"] = False

print(f"STEP 2.5 COMPLETED SUCCESSFULLY")
print(f"Comprehensive PURE GX methods demonstration completed")
print(f"{total_methods_demonstrated} SQL-focused methods explored")
print(f"Zero pandas dependencies - Pure Great Expectations only")
print(f"Ready for AMS Data Quality Dashboard implementation")

print("=" * 80)

# Clean and return results
def clean_for_json(obj):
    """Convert non-serializable types to JSON-compatible types"""
    if isinstance(obj, dict):
        return {k: clean_for_json(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [clean_for_json(v) for v in obj]
    elif hasattr(obj, 'isoformat'):  # datetime objects
        return str(obj)
    else:
        return obj

clean_results = clean_for_json(demo_results)
dbutils.notebook.exit(json.dumps(clean_results))