## Step 2: GX Context Setup & API Discovery


- step1 shows installation
- step2 python restart
- step3 Context check
- total available Methods by GX





In [0]:
# Run this in a separate cell before restarting
%pip install great_expectations sqlalchemy pyodbc pandas


[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:

dbutils.library.restartPython()



In [0]:
import json
import great_expectations as gx


step2_results = {
    "status": "running",
    "gx_version": gx.__version__,
    "context_type": None,
    "context_created": False,
    "context_methods": [],
    "datasource_methods": [],
    "context_config": {},
    "ready_for_step3": False,
    "error_message": None
}

print(f"GX Version: {gx.__version__}")

try:

    print(f"GX CLASS STRUCTURE ")
    print("-" * 50)
    
    # Check available context classes
    context_classes = {}
    
    # FileDataContext
    try:
        from great_expectations.data_context import FileDataContext
        context_classes["FileDataContext"] = True
        print("FileDataContext available")
    except ImportError:
        context_classes["FileDataContext"] = False
        print("FileDataContext not available")
    
    # EphemeralDataContext  
    try:
        from great_expectations.data_context import EphemeralDataContext
        context_classes["EphemeralDataContext"] = True
        print("EphemeralDataContext available")
    except ImportError:
        context_classes["EphemeralDataContext"] = False
        print("EphemeralDataContext not available")
    
    # CloudDataContext
    try:
        from great_expectations.data_context import CloudDataContext
        context_classes["CloudDataContext"] = True
        print("CloudDataContext available")
    except ImportError:
        context_classes["CloudDataContext"] = False
        print("CloudDataContext not available")
    
    step2_results["available_classes"] = context_classes

except Exception as e:
    print(f"Error in class structure analysis: {e}")
    step2_results["error_message"] = f"Class analysis failed: {e}"

GX Version: 1.5.7
GX CLASS STRUCTURE 
--------------------------------------------------
FileDataContext available
EphemeralDataContext available
CloudDataContext available


In [0]:
# STEP 2: GX Available Context Classes

import json
import sys
import importlib

step2_results = {
    "status": "running",
    "gx_version": None,
    "context_type": None,
    "context_created": False,
    "context_methods": [],
    "datasource_methods": [],
    "context_config": {},
    "ready_for_step3": False,
    "error_message": None,
    "import_retry_attempted": False
}


gx = None
import_successful = False


try:
    import great_expectations as gx
    import_successful = True

    step2_results["gx_version"] = gx.__version__
except ImportError as e:
 
    step2_results["error_message"] = f"Import failed: {e}"


if not import_successful:

    
    try:
       
        import subprocess
        result = subprocess.run([
            sys.executable, "-m", "pip", "install", "--upgrade", "great-expectations[sql]"
        ], capture_output=True, text=True)
        
        if result.returncode == 0:
            
            

            if 'great_expectations' in sys.modules:
                importlib.reload(sys.modules['great_expectations'])
            
  
            import great_expectations as gx
            import_successful = True
            step2_results["import_retry_attempted"] = True

            step2_results["gx_version"] = gx.__version__
            
        else:

            step2_results["error_message"] = f"Installation failed: {result.stderr}"
            
    except Exception as retry_error:

        step2_results["error_message"] = f"Retry failed: {retry_error}"


if not import_successful or gx is None:

    
    step2_results["status"] = "error"
    raise RuntimeError("'great_expectations' is not available. Please restart the kernel after completing Step 1.")


if import_successful and gx:
    try:
       
       
        context_classes = {
            'get_context': hasattr(gx, 'get_context'),
            'DataContext': hasattr(gx, 'DataContext'),
            'EphemeralDataContext': hasattr(gx, 'EphemeralDataContext'),
            'FileDataContext': hasattr(gx, 'FileDataContext')
        }
        
        
        for class_name, available in context_classes.items():
            status = "" if available else ""
            print(f"  {status} gx.{class_name}")
        
        step2_results["context_classes"] = context_classes
        

        if context_classes.get('get_context', False):

            step2_results["modern_api"] = True
        else:

            step2_results["modern_api"] = False
            

        
    except Exception as analysis_error:
        
        step2_results["error_message"] = f"Analysis failed: {analysis_error}"

   gx.get_context
   gx.DataContext
   gx.EphemeralDataContext
   gx.FileDataContext


In [0]:



import os
import great_expectations as gx

# Check if running in Databricks
try:
    dbutils.fs.ls('/')
    is_databricks = True
except Exception:
    is_databricks = False

context = None

if is_databricks:
    dbfs_gx_path = "/dbfs/FileStore/great_expectations"
    os.makedirs(dbfs_gx_path, exist_ok=True)
    print(f"DBFS directory created/verified: {dbfs_gx_path}")

    context = gx.get_context(project_root_dir=dbfs_gx_path)
    print("DBFS-based context created successfully")
    print(f"Context location: {dbfs_gx_path}")

if context is not None:
    print(f"Context created: {type(context)}")
    print(f"Context type: {type(context).__name__}")
    
    # Update step2_results to indicate context was created successfully
    step2_results["context_created"] = True
    step2_results["context_type"] = type(context).__name__


DBFS directory created/verified: /dbfs/FileStore/great_expectations
DBFS-based context created successfully
Context location: /dbfs/FileStore/great_expectations
Context created: <class 'great_expectations.data_context.data_context.file_data_context.FileDataContext'>
Context type: FileDataContext


In [0]:
# # total available Methods by GX
if context is not None:
    print("\nCONTEXT API DISCOVERY")
    print("-" * 50)

    all_methods = [method for method in dir(context) if not method.startswith('_')]
    step2_results["context_methods"] = all_methods

    method_categories = {
        "datasource": [],
        "expectation": [], 
        "validation": [],
        "data_docs": [],
        "store": [],
        "checkpoint": [],
        "other": []
    }

    for method in all_methods:
        categorized = False
        if any(keyword in method.lower() for keyword in ['datasource', 'data_source']):
            method_categories["datasource"].append(method)
            categorized = True
        elif any(keyword in method.lower() for keyword in ['expectation', 'suite', 'expect']):
            method_categories["expectation"].append(method)
            categorized = True
        elif any(keyword in method.lower() for keyword in ['validation', 'validate', 'validator']):
            method_categories["validation"].append(method)
            categorized = True
        elif any(keyword in method.lower() for keyword in ['docs', 'documentation']):
            method_categories["data_docs"].append(method)
            categorized = True
        elif any(keyword in method.lower() for keyword in ['store']):
            method_categories["store"].append(method)
            categorized = True
        elif any(keyword in method.lower() for keyword in ['checkpoint']):
            method_categories["checkpoint"].append(method)
            categorized = True

        if not categorized:
            method_categories["other"].append(method)

    step2_results["method_categories"] = method_categories

    for category, methods in method_categories.items():
        if methods:
            print(f"\n{category.upper()} METHODS ({len(methods)}):")
            for method in sorted(methods)[:100]:
                print(f"  - {method}")
            if len(methods) > 100:
                print(f"  ... and {len(methods) - 100} more")

    print("\nTESTING KEY METHODS")
    print("-" * 30)

    key_methods = ['list_datasources', 'add_datasource', 'get_datasource']
    method_test_results = {}

    for method_name in key_methods:
        if hasattr(context, method_name):
            try:
                method = getattr(context, method_name)
                method_test_results[method_name] = "available"
                print(f"{method_name} - Available")
            except Exception as e:
                method_test_results[method_name] = f"error: {e}"
                print(f"{method_name} - Available but error: {e}")
        else:
            method_test_results[method_name] = "missing"
            print(f"{method_name} - Missing")

    step2_results["key_method_tests"] = method_test_results

else:
    print("\nNo context available for API discovery")



CONTEXT API DISCOVERY
--------------------------------------------------

DATASOURCE METHODS (9):
  - add_datasource
  - add_or_update_datasource
  - data_sources
  - datasource_store
  - delete_datasource
  - fluent_datasources
  - get_datasource
  - list_datasources
  - update_datasource

EXPECTATION METHODS (3):
  - expectations_store
  - expectations_store_name
  - suites

VALIDATION METHODS (9):
  - get_validation_result
  - get_validator
  - get_validator_using_batch_list
  - store_validation_result_metrics
  - validation_definition_store
  - validation_definitions
  - validation_results_store
  - validation_results_store_name
  - view_validation_result

DATA_DOCS METHODS (8):
  - add_data_docs_site
  - build_data_docs
  - clean_data_docs
  - delete_data_docs_site
  - get_docs_sites_urls
  - list_data_docs_sites
  - open_data_docs
  - update_data_docs_site

STORE METHODS (7):
  - add_store
  - checkpoint_store
  - checkpoint_store_name
  - delete_store
  - list_active_stores
  -

In [0]:
# =============================================================================
# CONTEXT CONFIGURATION & READINESS CHECK
# =============================================================================

if context is not None:
    print(f"CONTEXT CONFIGURATION")
    print("-" * 50)
    
    # Get context configuration details
    try:
        # Try to get root directory
        if hasattr(context, 'root_directory'):
            step2_results["context_config"]["root_directory"] = getattr(context, 'root_directory', 'Unknown')
            print(f"Root directory: {step2_results['context_config']['root_directory']}")
        
        # Try to list datasources
        try:
            datasources = context.list_datasources()
            step2_results["context_config"]["datasources"] = len(datasources)
            print(f"Available datasources: {len(datasources)}")
            
            for ds in datasources[:3]:  # Show first 3 datasources
                ds_name = ds.get('name', 'unnamed') if isinstance(ds, dict) else str(ds)
                print(f"  - {ds_name}")
                
        except Exception as e:
            print(f"Could not list datasources: {e}")
            step2_results["context_config"]["datasources"] = "error"
        
        # Check if context has store configurations
        store_attributes = ['expectations_store', 'validations_store', 'checkpoint_store']
        for store_attr in store_attributes:
            if hasattr(context, store_attr):
                step2_results["context_config"][store_attr] = "available"
                print(f"{store_attr}: Available")
            else:
                step2_results["context_config"][store_attr] = "missing"
        
    except Exception as e:
        print(f"Error getting context configuration: {e}")
        step2_results["context_config"]["error"] = str(e)

# Final readiness assessment
ready_for_step3 = (
    step2_results["context_created"] and
    step2_results.get("key_method_tests", {}).get("list_datasources") in ["available", "error"]  # Allow error as some methods might fail without data
)

step2_results["ready_for_step3"] = ready_for_step3
step2_results["status"] = "success" if ready_for_step3 else "error"

if ready_for_step3:
    print(f"\n🎉 STEP 2 COMPLETED SUCCESSFULLY")
    print(f"GX Context ready for Step 3: SQL Connection & Data Loading")
    print(f"Context type: {step2_results['context_type']}")
    print(f"Available methods: {len(step2_results['context_methods'])}")
else:
    print(f"STEP 2 FAILED")
    print(f"Issues:")
    if step2_results.get("error_message"):
        print(f"   • {step2_results['error_message']}")

print("=" * 80)

# Store context in a way that can be accessed by subsequent steps
# Note: In production, you might want to use dbutils.fs to store context configuration
step2_results["context_ready"] = context is not None

# Return results for orchestrator
dbutils.notebook.exit(json.dumps(step2_results))