In [None]:
import os
import great_expectations as gx
from dotenv import load_dotenv
from datahub.integrations.great_expectations.action import DataHubValidationAction
import importlib
import pkgutil
from pathlib import Path
from great_expectations.checkpoint import Checkpoint
from great_expectations.core.batch import RuntimeBatchRequest
from great_expectations.data_context import DataContext
from urllib.parse import urlparse

class ValidationFramework:
    def __init__(self):
        load_dotenv()
        self.context = gx.get_context()
        self.datahub_token = os.getenv("DATAHUB_TOKEN")
        self.datahub_server_url = os.getenv("DATAHUB_SERVER_URL")
        
        # Get base connection string without database
        base_conn_string = os.getenv("PG_CONNECTION_STRING")
        if base_conn_string and base_conn_string.endswith("/postgres"):
            base_conn_string = base_conn_string[:-9]  # Remove "/postgres"
        self.base_conn_string = base_conn_string
        print(f"Base connection string initialized: {self.base_conn_string}")  # Debug print

    def run_validation_suite(self, validation_module) -> None:
        """Run a single validation suite and emit results to DataHub."""
        try:
            print(f"Starting validation for module: {validation_module.__name__}")  # Debug print
            
            # Get validation configuration from the module
            result = validation_module.run_validation(self.context)
            
            if result is None:
                raise ValueError(f"Module {validation_module.__name__} returned None")
                
            batch_request, suite_name, datasource_config = result
            print(f"Received from module: \n  Batch Request: {batch_request}\n  Suite Name: {suite_name}\n  Datasource Config: {datasource_config}")
            
            print(f"\nProcessing validation for {suite_name}")
            
            # Initialize datasource
            self.initialize_datasource(datasource_config)
            
            # Create table asset
            asset = self.context.datasources[datasource_config["name"]].add_table_asset(
                name=batch_request["data_asset_name"],
                table_name=batch_request["table_name"],
                schema_name=batch_request["schema_name"]
            )

            # Build batch request
            batch = asset.build_batch_request()
            
            # Ensure the suite exists in the context
            available_suites = self.context.list_expectation_suite_names()
            print(f"Available suites: {available_suites}")  # Debug print
            
            if suite_name not in available_suites:
                raise ValueError(f"Suite {suite_name} not found in context. Available suites: {available_suites}")

            # Create and run checkpoint
            checkpoint = self.create_checkpoint(batch, suite_name)
            results = checkpoint.run()
            
            print(f"Validation Results for {suite_name}:")
            print(results)

        except Exception as e:
            print(f"Error processing suite {validation_module.__name__}: {str(e)}")
            raise

    def get_connection_string(self, database_name: str) -> str:
        """Create full connection string with specified database."""
        if not self.base_conn_string:
            raise ValueError("Base connection string not found in environment variables")
        full_conn_string = f"{self.base_conn_string}/{database_name}"
        print(f"Created connection string for database {database_name}")  # Debug print
        return full_conn_string

    def initialize_datasource(self, datasource_config: dict) -> None:
        """Initialize a PostgreSQL datasource if it doesn't exist."""
        datasource_name = datasource_config["name"]
        database_name = datasource_config.get("database_name", "postgres")  # Default to postgres if not specified
        
        # Create full connection string with database
        connection_string = self.get_connection_string(database_name)
        
        # Check if datasource already exists
        existing_datasources = self.context.list_datasources()
        if not existing_datasources or not any(
            ds["name"] == datasource_name for ds in existing_datasources
        ):
            self.context.sources.add_postgres(
                name=datasource_name,
                connection_string=connection_string
            )
            print(f"Added datasource: {datasource_name} for database: {database_name}")

    # ... rest of the ValidationFramework class remains the same ...

    def load_validation_modules(self) -> list:
        """Load all validation modules from the validations directory."""
        validation_modules = []
        validations_dir = Path("validations")

        if not validations_dir.exists():
            raise FileNotFoundError("Validations directory not found")

        for _, name, _ in pkgutil.iter_modules([str(validations_dir)]):
            if name.endswith("_validation"):
                try:
                    module = importlib.import_module(f"validations.{name}")
                    if hasattr(module, "run_validation"):
                        validation_modules.append(module)
                        print(f"Loaded validation suite: {name}")
                except ImportError as e:
                    print(f"Error loading validation module {name}: {str(e)}")

        return validation_modules

    def create_checkpoint(self, batch_request: dict, suite_name: str) -> Checkpoint:
        """Create a checkpoint configuration for validation."""
        checkpoint_config = {
            "name": f"checkpoint_{suite_name}",
            "config_version": 1.0,
            "run_name_template": "%Y%m%d-%H%M%S-validation-run",
            "validations": [
                {
                    "batch_request": batch_request,
                    "expectation_suite_name": suite_name,
                    "action_list": [
                        {
                            "name": "store_validation_result",
                            "action": {"class_name": "StoreValidationResultAction"},
                        },
                        {
                            "name": "store_evaluation_params",
                            "action": {"class_name": "StoreEvaluationParametersAction"},
                        },
                        {
                            "name": "update_data_docs",
                            "action": {"class_name": "UpdateDataDocsAction"},
                        },
                        {
                            "name": "datahub_action",
                            "action": {
                                "class_name": "DataHubValidationAction",
                                "module_name": "datahub.integrations.great_expectations.action",
                                "server_url": self.datahub_server_url,
                                "token": self.datahub_token,
                            },
                        },
                    ],
                }
            ],
        }
        return Checkpoint(**checkpoint_config, data_context=self.context)

    def run_validation_suite(self, validation_module) -> None:
        """Run a single validation suite and emit results to DataHub."""
        try:
            # Get validation configuration from the module
            batch_request, suite_name, datasource_config = validation_module.run_validation(self.context)
            
            print(f"\nProcessing validation for {suite_name}")
            
            # Initialize datasource
            self.initialize_datasource(datasource_config)
            
            # Create table asset
            asset = self.context.datasources[datasource_config["name"]].add_table_asset(
                name=batch_request["data_asset_name"],
                table_name=batch_request["table_name"],
                schema_name=batch_request["schema_name"]
            )

            # Build batch request
            batch = asset.build_batch_request()
            
            # Ensure the suite exists in the context
            if suite_name not in self.context.list_expectation_suite_names():
                raise ValueError(f"Suite {suite_name} not found in context after creation")

            # Create and run checkpoint
            checkpoint = self.create_checkpoint(batch, suite_name)
            results = checkpoint.run()
            
            print(f"Validation Results for {suite_name}:")
            print(results)

        except Exception as e:
            print(f"Error processing suite {validation_module.__name__}: {str(e)}")
            raise

    def run_all_validations(self) -> None:
        """Run all validation suites."""
        validation_modules = self.load_validation_modules()
        
        if not validation_modules:
            print("No validation suites found!")
            return

        for module in validation_modules:
            self.run_validation_suite(module)

def main():
    framework = ValidationFramework()
    framework.run_all_validations()

if __name__ == "__main__":
    main()