# LLM Assisted API Testing

## Parse the API Spec
- creates a JSON file with the server URL and a list of endpoints found
- for each endpoint it adds and sets the following key:values
    - "login": False
    - "registration: False
    - "user_requested: False
    - healthcheck: False
    - requires_authorization: False
    - authorization_mechanism: None
    - parameters: []
- Other scripts will initialize these values

In [None]:
# Configuration
project_name = "api_project" 
api_spec_file = "/m2-data/jupyterNotebooks/api/spec/openapi3Vampi.yml"  # Path to the OpenAPI spec file
results_base_folder = "/m2-data/jupyterNotebooks/api/test_runs/"

# define the model
# play with the models (or not)
# model_name = "gemma2:9b"
# model_name = "qwen2.5-coder:7b"
model_name = "llama3.1:latest"  #7b
# model_name = "llama3.2:3b"
# model_name = "llama3.2:1b"
# model_name = "tinyllama:latest"

In [None]:
import os
import json
import yaml
import datetime

# Configuration
# project_name = "api_project"  # Change this to your desired project name
# spec_file_path = "/m2-data/jupyterNotebooks/api/spec/openapi3Vampi.yml"  # Path to the OpenAPI spec file
# results_base_folder = "/m2-data/jupyterNotebooks/api/test_runs/"

# Create a results folder with a timestamp
current_date = datetime.date.today().strftime("%Y-%m-%d")
results_folder = os.path.join(results_base_folder, f"{project_name}_{current_date}")
os.makedirs(results_folder, exist_ok=True)

# Output file path
output_file_path = os.path.join(results_folder, "parsed_openapi.json")

# Helper function to load the OpenAPI spec
def load_spec_file(filepath):
    """Load OpenAPI spec from JSON or YAML/YML."""
    try:
        with open(filepath, "r") as f:
            if filepath.endswith((".yaml", ".yml")):
                return yaml.safe_load(f)
            elif filepath.endswith(".json"):
                return json.load(f)
            else:
                raise ValueError("Unsupported file format. Use JSON, YAML, or YML.")
    except Exception as e:
        print(f"Error loading spec file: {e}")
        return None

# Function to parse the OpenAPI spec
def parse_openapi_spec(spec):
    """Parse OpenAPI spec and initialize endpoints with default keys."""
    server_url = spec.get("servers", [{}])[0].get("url", "Unknown")
    paths = spec.get("paths", {})

    endpoints = []
    for path, methods in paths.items():
        for method, details in methods.items():
            endpoint = {
                "path": path,
                "method": method.upper(),
                "login": False,
                "registration": False,
                "user_requested": False,
                "healthcheck": False,
                "requires_authorization": False,
                "authorization_mechanism": None,
                "parameters": [],
            }

            # Extract parameters
            params = details.get("parameters", [])
            for param in params:
                endpoint["parameters"].append({
                    "name": param.get("name"),
                    "type": param.get("schema", {}).get("type"),
                    "example": param.get("example"),
                })

            endpoints.append(endpoint)

    return server_url, endpoints

# Main workflow
def main():
    # Step 1: Load the OpenAPI spec
    spec = load_spec_file(api_spec_file)
    if not spec:
        print("Failed to load spec file. Exiting.")
        return

    # Step 2: Parse the spec and initialize the endpoints
    server_url, endpoints = parse_openapi_spec(spec)

    # Step 3: Save the parsed data to a JSON file
    parsed_data = {
        "server_url": server_url,
        "endpoints": endpoints,
    }
    with open(output_file_path, "w") as f:
        json.dump(parsed_data, f, indent=4)
    
    print(f"Parsed OpenAPI spec saved to: {output_file_path}")

# Run the script
if __name__ == "__main__":
    main()


### Script output
Will do something like this to the JSON file
```json
{
    "path": "/users/v1/register",
    "method": "POST",
    "login": false,
    "registration": false,
    "user_requested": false,
    "healthcheck": false,
    "requires_authorization": false,
    "authorization_mechanism": null,
    "parameters": []
}
```

## Add the Request parameters

In [None]:
from datetime import date
import os
import json
import yaml

# Function to load the OpenAPI spec file
def load_openapi_spec(api_spec_file):
    """Load OpenAPI spec from JSON or YAML/YML."""
    try:
        with open(api_spec_file, "r") as f:
            print(f"Loading OpenAPI spec from: {api_spec_file}")
            if api_spec_file.endswith((".yaml", ".yml")):
                return yaml.safe_load(f)
            elif api_spec_file.endswith(".json"):
                return json.load(f)
            else:
                raise ValueError("Unsupported file format. Use JSON, YAML, or YML.")
    except Exception as e:
        print(f"Error loading OpenAPI spec: {e}")
        return None

# Function to load the parsed JSON file
def load_parsed_json(parsed_file_path):
    """Load the parsed OpenAPI JSON file."""
    try:
        with open(parsed_file_path, "r") as f:
            print(f"Loading parsed OpenAPI JSON from: {parsed_file_path}")
            return json.load(f)
    except Exception as e:
        print(f"Error loading parsed JSON file: {e}")
        return None

# Extract request parameters and request body properties from the OpenAPI spec
def extract_request_parameters(spec):
    """Extract request parameters and requestBody properties for each endpoint."""
    paths = spec.get("paths", {})
    parameters_map = {}
    for path, methods in paths.items():
        for method, details in methods.items():
            parameters = []

            # Extract parameters from the "parameters" field
            for param in details.get("parameters", []):
                parameters.append({
                    "name": param.get("name"),
                    "type": param.get("schema", {}).get("type", "unknown"),
                    "required": param.get("required", False),
                    "example": param.get("example", None),
                })

            # Extract properties from the "requestBody" schema
            if "requestBody" in details:
                content = details["requestBody"].get("content", {})
                if "application/json" in content:
                    schema = content["application/json"].get("schema", {})
                    properties = schema.get("properties", {})
                    for prop_name, prop_details in properties.items():
                        parameters.append({
                            "name": prop_name,
                            "type": prop_details.get("type", "unknown"),
                            "required": True,  # Properties in requestBody are generally required
                            "example": prop_details.get("example", None),
                        })

            # Save parameters mapped to path and method
            parameters_map[(path, method.upper())] = parameters
    return parameters_map

# Update JSON with request parameters
def add_request_parameters_to_json(parsed_data, parameters_map):
    """Add request parameters to each endpoint in the parsed JSON."""
    for endpoint in parsed_data["endpoints"]:
        path, method = endpoint["path"], endpoint["method"]
        # Add parameters if available
        if (path, method) in parameters_map:
            endpoint["parameters"] = parameters_map[(path, method)]
            print(f"Updated parameters for endpoint: {path} {method}")
    return parsed_data

# Save updated JSON back to the same file
def save_updated_json(parsed_data, parsed_file_path):
    """Save the updated JSON back to the original file."""
    try:
        print(f"Saving updated OpenAPI JSON to: {parsed_file_path}")
        with open(parsed_file_path, "w") as f:
            json.dump(parsed_data, f, indent=4)
        print(f"[SAVE SUCCESS] Updated OpenAPI spec saved to: {parsed_file_path}")
    except Exception as e:
        print(f"[SAVE ERROR] Error saving updated JSON file: {e}")

# Main execution
parsed_file_path = os.path.join(results_base_folder, f"{project_name}_{date.today().strftime('%Y-%m-%d')}", "parsed_openapi.json")

openapi_spec = load_openapi_spec(api_spec_file)
parsed_data = load_parsed_json(parsed_file_path)

if openapi_spec and parsed_data:
    # Step 1: Extract request parameters and requestBody properties from the OpenAPI spec
    print("[INFO] Extracting request parameters and requestBody properties from OpenAPI spec...")
    parameters_map = extract_request_parameters(openapi_spec)

    # Step 2: Add parameters to the parsed JSON endpoints
    print("[INFO] Adding request parameters to parsed JSON...")
    parsed_data = add_request_parameters_to_json(parsed_data, parameters_map)

    # Step 3: Save the updated JSON back to the same file
    save_updated_json(parsed_data, parsed_file_path)


### Script Output
```json
{
    "path": "/users/v1/register",
    "method": "POST",
    "login": false,
    "registration": false,
    "user_requested": false,
    "healthcheck": false,
    "requires_authorization": false,
    "authorization_mechanism": null,
    "parameters": [
{
    "name": "username",
    "type": "string",
    "required": true,
    "example": "name1"
},
{
    "name": "password",
    "type": "string",
    "required": true,
    "example": "pass1"
},
{
    "name": "email",
    "type": "string",
    "required": true,
    "example": "user@tempmail.com"
}
    ]
}
```

## ID User Registration - Script based

In [None]:
import os
import json

# Load the parsed OpenAPI JSON file
def load_parsed_json(parsed_file_path):
    """Load the parsed OpenAPI JSON file."""
    try:
        with open(parsed_file_path, "r") as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading parsed JSON file: {e}")
        return None

# Identify registration endpoints with debugging
def identify_registration_endpoints(endpoints):
    """Identify registration endpoints based on enhanced path and parameter matching, with debugging."""
    # Define registration-related keywords
    registration_keywords = ["register", "signup", "create-user", "create"]

    print("\nDebugging Registration Endpoint Identification:\n")
    for endpoint in endpoints:
        # Normalize the path for easier matching
        normalized_path = endpoint["path"].lower().strip("/")
        matched = False

        # Check if the entire path contains any registration keywords
        if any(keyword in normalized_path for keyword in registration_keywords):
            endpoint["registration"] = True
            matched = True
            print(f"[MATCH] Path '{endpoint['path']}' contains registration keyword.")

        # Check if the method is POST and parameters suggest registration
        if not matched and endpoint["method"] == "POST":
            params = [param["name"].lower() for param in endpoint["parameters"]]
            if any(param in params for param in ["username", "email", "password"]):
                endpoint["registration"] = True
                matched = True
                print(f"[MATCH] Parameters for '{endpoint['path']}' suggest registration: {params}")

        # If no match, log that as well
        if not matched:
            print(f"[NO MATCH] Path '{endpoint['path']}' did not match keywords or registration parameters.")
            print(f" - Normalized Path: {normalized_path}")
            print(f" - Parameters: {endpoint['parameters']}")

    return endpoints

# Save the updated JSON back to the same file
def save_updated_json(parsed_data, parsed_file_path):
    """Save the updated JSON back to the original file."""
    try:
        with open(parsed_file_path, "w") as f:
            json.dump(parsed_data, f, indent=4)
        print(f"Updated OpenAPI spec saved to: {parsed_file_path}")
    except Exception as e:
        print(f"Error saving updated JSON file: {e}")

# Main execution
parsed_file_path = os.path.join(results_base_folder, project_name + "_2025-01-05", "parsed_openapi.json")
print(parsed_file_path)

parsed_data = load_parsed_json(parsed_file_path)

if parsed_data:
    # Step 1: Extract endpoints from parsed JSON
    endpoints = parsed_data.get("endpoints", [])

    # Step 2: Identify registration endpoints (with debug logs)
    endpoints = identify_registration_endpoints(endpoints)

    # Step 3: Save the updated JSON back to the same file
    parsed_data["endpoints"] = endpoints
    save_updated_json(parsed_data, parsed_file_path)


### Script output
Changed 2 endpoints 'registration' key to true
```json
"path": "/users/v1/register"
"path": "/createdb"
```

## LLM review of registration endpoints
pass anything with:
"registration": true,
to the LLM for review

In [None]:
import os
import json
import requests
from datetime import date

# Function to load the parsed JSON file
def load_parsed_json(parsed_file_path):
    """Load the parsed OpenAPI JSON file."""
    try:
        with open(parsed_file_path, "r") as f:
            print(f"Loading parsed OpenAPI JSON from: {parsed_file_path}")
            return json.load(f)
    except Exception as e:
        print(f"Error loading parsed JSON file: {e}")
        return None

# Function to send a single endpoint to the LLM with improved handling for detailed responses
def validate_with_llm(endpoint, model_name):
    """Send an endpoint to the LLM for validation, handling detailed responses."""
    try:
        # Construct the LLM prompt
        parameters_text = "\n".join(
            f"  - {param['name']} (type: {param['type']}, example: {param['example']})"
            for param in endpoint.get("parameters", [])
        )
        prompt = (
            f"Analyze the following API endpoint and determine if it handles user registration:\n\n"
            f"Path: {endpoint['path']}\n"
            f"Method: {endpoint['method']}\n"
            f"Request Parameters:\n{parameters_text}\n\n"
            f"Does this endpoint perform user registration? Return only `true` or `false`."
        )

        print(f"[LLM REQUEST] Sending endpoint '{endpoint['path']}' to LLM for validation...")
        
        # Send the request to the LLM with streaming enabled
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={"prompt": prompt, "model": model_name},
            stream=True  # Enable streaming
        )

        if response.status_code == 200:
            # Accumulate streamed response chunks
            full_response = ""
            for chunk in response.iter_lines(decode_unicode=True):
                try:
                    chunk_data = json.loads(chunk)
                    if "response" in chunk_data and chunk_data["response"].strip():
                        full_response += chunk_data["response"].strip()
                        print(f"[STREAM CHUNK] {chunk_data['response']}")
                except json.JSONDecodeError:
                    print(f"[STREAM ERROR] Skipping invalid chunk: {chunk}")

            # Normalize and parse the final accumulated response
            llm_response = full_response.lower()
            
            # Check for decision keywords
            if "**true**" in llm_response or "true" in llm_response:
                print(f"[LLM FINAL RESPONSE] Endpoint '{endpoint['path']}' validated as: true")
                return True
            elif "**false**" in llm_response or "false" in llm_response:
                print(f"[LLM FINAL RESPONSE] Endpoint '{endpoint['path']}' validated as: false")
                return False
            else:
                print(f"[LLM WARNING] Unexpected response format: {full_response}")
                return False  # Default to false if response is unclear
        else:
            print(f"[LLM ERROR] {response.status_code} - {response.text}")
            return False
    except Exception as e:
        print(f"[LLM EXCEPTION] Error validating endpoint with LLM: {e}")
        return False

# Function to refine registration endpoints with LLM validation
def refine_registration_endpoints_with_llm(endpoints, model_name):
    """Validate flagged registration endpoints with the LLM."""
    print(f"Starting LLM validation for registration endpoints...")
    for endpoint in endpoints:
        if endpoint["registration"]:  # Only process flagged endpoints
            print(f"[VALIDATION] Validating endpoint: {endpoint['path']} (Currently flagged as registration)")
            is_registration = validate_with_llm(endpoint, model_name=model_name)
            endpoint["registration"] = is_registration  # Update based on LLM response
            print(f"[UPDATE] Endpoint '{endpoint['path']}' registration flag set to: {is_registration}")
        else:
            print(f"[SKIP] Endpoint '{endpoint['path']}' not flagged as registration, skipping.")
    print(f"Completed LLM validation for all flagged endpoints.")
    return endpoints

# Function to save the updated JSON back to the same file
def save_updated_json(parsed_data, parsed_file_path):
    """Save the updated JSON back to the original file."""
    try:
        print(f"Saving updated OpenAPI JSON to: {parsed_file_path}")
        with open(parsed_file_path, "w") as f:
            json.dump(parsed_data, f, indent=4)
        print(f"[SAVE SUCCESS] Updated OpenAPI spec saved to: {parsed_file_path}")
    except Exception as e:
        print(f"[SAVE ERROR] Error saving updated JSON file: {e}")

# Main execution
parsed_file_path = os.path.join(
    results_base_folder, 
    f"{project_name}_{date.today().strftime('%Y-%m-%d')}", 
    "parsed_openapi.json"
)

parsed_data = load_parsed_json(parsed_file_path)

if parsed_data:
    # Step 1: Extract endpoints from parsed JSON
    print("[INFO] Extracting endpoints from parsed JSON...")
    endpoints = parsed_data.get("endpoints", [])
    print(f"[INFO] Found {len(endpoints)} endpoints for processing.")

    # Step 2: Refine registration endpoints with LLM validation
    endpoints = refine_registration_endpoints_with_llm(endpoints, model_name)

    # Step 3: Save the updated JSON back to the same file
    parsed_data["endpoints"] = endpoints
    save_updated_json(parsed_data, parsed_file_path)


### Script out
Updates the createdb registration flag to false