In [1]:
cd ..

/Users/nitastha/Desktop/NitishFiles/Work/Optum/project


In [3]:
import json
import os
import google.generativeai as genai
from google.generativeai import types
import pandas as pd
import yaml
import re
import jsonschema
from pydantic import BaseModel, Field
from typing import Dict, List, Optional

# JSON Schema for validation
TEST_CASE_SCHEMA = {
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "test_case": {"type": "string"},
            "description": {"type": "string"},
            "expected_result": {"type": "string"},
            "input": {"type": ["string", "null"]}
        },
        "required": ["test_case", "description", "expected_result", "input"]
    }
}

def load_config(config_path: str = "config/settings.yaml") -> Optional[dict]:
    """Loads configuration from a YAML file."""
    try:
        with open(config_path, "r") as f:
            return yaml.safe_load(f)
    except FileNotFoundError:
        print(f"Error: Config file not found at {config_path}")
        return None
    except yaml.YAMLError as e:
        print(f"Error parsing config file: {e}")
        return None

class TestCase(BaseModel):
    """Defines the structure of a test case."""
    test_case: str = Field(..., description="Name or brief summary of the test case")
    description: str = Field(..., description="Detailed description of the test case")
    expected_result: str = Field(..., description="Expected result (Pass or Fail)")
    input: Optional[str] = Field(..., description="Input value for the test case")

def clean_json_response(response_text: str) -> Optional[List[Dict]]:
    """
    Cleans and parses a JSON response from the LLM with enhanced error handling.
    """
    print("\nRaw Response from LLM:\n" + "-" * 80 + "\n" + response_text + "\n" + "-" * 80)

    cleaning_steps = [
        # Remove JSON code blocks
        lambda t: re.sub(r'```json|```', '', t, flags=re.IGNORECASE),
        # Remove JavaScript-style comments
        lambda t: re.sub(r'//.*?(\r\n?|\n)|/\*.*?\*/', '', t, flags=re.DOTALL),
        # Remove JavaScript methods
        lambda t: re.sub(r'"([^"]+)"\.\w+\([^)]*\)', lambda m: f'"{m.group(1)}"', t),
        # Fix trailing commas
        lambda t: re.sub(r',\s*([}\]])', r'\1', t),
        # Remove extra backslashes
        lambda t: t.replace('\\', ''),
        # Trim whitespace
        lambda t: t.strip()
    ]

    for attempt in [response_text] + [step(response_text) for step in cleaning_steps]:
        try:
            print("Attempting to parse JSON...")
            data = json.loads(attempt)
            if validate_json(data):
                return data
        except json.JSONDecodeError as e:
            print(f"Parsing attempt failed: {e}")
            continue

    print("All parsing attempts failed. Could not clean JSON.")
    return None

def validate_json(data: List[Dict]) -> bool:
    """Validates JSON against the test case schema."""
    try:
        jsonschema.validate(instance=data, schema=TEST_CASE_SCHEMA)
        return True
    except jsonschema.ValidationError as e:
        print(f"JSON Validation Error: {e}")
        return False

def generate_test_cases(
    field_name: str,
    data_type: str,
    constraints: str,
    llm_client: genai.GenerativeModel,
    llm_model: str,
    max_output_tokens: int = 1000
) -> Optional[List[Dict]]:
    """Generates test cases using Gemini with robust JSON handling."""
    prompt = f"""
    Generate comprehensive test cases for a field named '{field_name}' with data type '{data_type}'.
    The field has the following constraints: {constraints}.

    Required Format:
    - JSON array of objects
    - Each object must have these EXACT fields:
      * test_case: string
      * description: string
      * expected_result: "Pass" or "Fail"
      * input: string or null

    Rules:
    1. Use ONLY valid JSON syntax
    2. No JavaScript code or comments
    3. No markdown formatting
    4. String values must use double quotes
    5. Escape special characters properly

    Example Valid Response:
    [
      {{
        "test_case": "Valid Input",
        "description": "Basic valid input test",
        "expected_result": "Pass",
        "input": "valid@example.com"
      }},
      {{
        "test_case": "Null Input",
        "description": "Test null value",
        "expected_result": "Fail",
        "input": null
      }}
    ]
    """

    try:
        print(f"\nGenerating test cases for: {field_name}")
        response = llm_client.generate_content(
            prompt,
            generation_config=genai.types.GenerationConfig(
                temperature=0.3,
                max_output_tokens=max_output_tokens
            )
        )

        if not hasattr(response, 'text'):
            print("Error: No text in LLM response")
            return None

        cleaned_data = clean_json_response(response.text)
        if not cleaned_data:
            return None

        # Post-process expected_result values
        for case in cleaned_data:
            case['expected_result'] = case.get('expected_result', 'Fail').capitalize()
            if case['expected_result'] not in ['Pass', 'Fail']:
                case['expected_result'] = 'Fail'

        return cleaned_data

    except Exception as e:
        print(f"Error generating test cases: {str(e)}")
        return None

def generate_test_cases_from_rules(
    rules: Dict,
    llm_client: genai.GenerativeModel,
    llm_model: str,
    config: Dict
) -> Dict:
    """Processes all fields and generates test cases."""
    all_test_cases = {}
    for parent_field, details in rules.items():
        print(f"\nProcessing parent field: {parent_field}")
        for field_name, field_details in details["fields"].items():
            test_cases = generate_test_cases(
                field_name=field_name,
                data_type=field_details["data_type"],
                constraints=field_details["constraints"],
                llm_client=llm_client,
                llm_model=llm_model,
                max_output_tokens=config.get("max_output_tokens", 1000)
            )

            if test_cases:
                key = f"{parent_field}.{field_name}"
                all_test_cases[key] = [TestCase(**case).dict() for case in test_cases]
                print(f"✅ Generated {len(test_cases)} test cases for {field_name}")
            else:
                print(f"❌ Failed to generate test cases for {field_name}")

    return all_test_cases

def save_test_cases(all_test_cases: Dict, output_file: str) -> None:
    """Saves test cases to JSON file with error handling."""
    try:
        os.makedirs(os.path.dirname(output_file), exist_ok=True)
        with open(output_file, "w") as f:
            json.dump(all_test_cases, f, indent=4, ensure_ascii=False)
        print(f"\n✅ Successfully saved test cases to {output_file}")
    except Exception as e:
        print(f"\n❌ Error saving test cases: {str(e)}")

def main() -> None:
    """Main execution flow."""
    config = load_config()
    if not config:
        return

    try:
        # Initialize Gemini
        genai.configure(api_key=config["gemini_api_key"])
        llm_model = config.get("gemini_model", "gemini-1.5-flash")
        llm_client = genai.GenerativeModel(llm_model)
        print(f"🚀 Initialized Gemini model: {llm_model}")

        # Load validation rules
        with open(config["constrains_processed_rules_file"], "r") as f:
            rules = json.load(f)
            print(f"📂 Loaded rules from {config['constrains_processed_rules_file']}")

        # Generate test cases
        all_test_cases = generate_test_cases_from_rules(rules, llm_client, llm_model, config)

        # Save results
        if all_test_cases:
            save_test_cases(all_test_cases, config["generated_test_cases_file"])
        else:
            print("⚠️ No test cases generated. Check logs for errors.")

    except Exception as e:
        print(f"🔥 Critical error: {str(e)}")

if __name__ == "__main__":
    main()

🚀 Initialized Gemini model: gemini-1.5-flash
📂 Loaded rules from data/constrains_processed_rules.json

Processing parent field: Rx Bc Demographics

Generating test cases for: Rx BC Email

Raw Response from LLM:
--------------------------------------------------------------------------------
```json
[
  {
    "test_case": "Valid Email",
    "description": "A valid email address",
    "expected_result": "Pass",
    "input": "valid@example.com"
  },
  {
    "test_case": "Valid Email with Plus Sign",
    "description": "A valid email with a plus sign",
    "expected_result": "Pass",
    "input": "user+alias@example.com"
  },
  {
    "test_case": "Valid Email with Underscore",
    "description": "A valid email with an underscore",
    "expected_result": "Pass",
    "input": "user_name@example.com"
  },
  {
    "test_case": "Valid Email with Multiple Top-Level Domains",
    "description": "A valid email with multiple top-level domains",
    "expected_result": "Pass",
    "input": "user@examp