<a href="https://colab.research.google.com/github/shahzad-r1zv1/LocalLLM_experiments/blob/main/localLLM_crawler.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


**🤖 Robot Framework + Local LLM + OpenCV in Colab**

*This notebook demonstrates how to connect Robot Framework with a local LLM (via Ollama) and OpenCV inside Google Colab.*

In [None]:
# STEP 1: INSTALL DEPENDENCIES
# ----------------------------------------
# We're installing:
# - Robot Framework → our test automation engine
# - OpenCV → image analysis and preprocessing
# - Pillow, numpy → image and array utilities
# - requests → for calling Ollama's local API
# - flask (optional) → if we later want to make an HTTP bridge
# ----------------------------------------

!pip install -q robotframework opencv-python pillow numpy requests

In [None]:
# STEP 2: INSTALL & START OLLAMA
# ----------------------------------------
# Colab lacks systemd, so we run ollama serve manually in the background.
# We also mute most of its logs to keep the cell clean.
# ----------------------------------------

!curl -fsSL https://ollama.com/install.sh | sh > /dev/null 2>&1 || true
!nohup ollama serve > ollama.log 2>&1 &


import time, requests
print("Starting Ollama...", end="")
for _ in range(30):
  try:
    r = requests.get("http://127.0.0.1:11434")
    if r.status_code == 200:
      print("\n Ollama is live on port 11434")
      break
  except Exception:
    time.sleep(2)
  else:
    raise RuntimeError("  Ollama failed to start.")

Starting Ollama...
 Ollama is live on port 11434


In [None]:
# Define the model name here
MODEL_NAME = "qwen3-coder:30b"
# MODEL_NAME = "llama3.1:8b"
print(f"Using model: {MODEL_NAME}")

Using model: qwen3-coder:30b


In [None]:
# STEP 3: DOWNLOAD SMALL MODEL
# ----------------------------------------
# Phi-3-mini is lightweight and perfect for demos.
# (You can switch to llama3 later.)
# ----------------------------------------

# !ollama pull phi3

# Pull a more capable model — adjust name as per availability & GPU/VRAM capacity
#!ollama pull llama3

!ollama pull "{MODEL_NAME}"

#!ollama pull codellama:34b
# !ollama pull command-r-plus

[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l


In [None]:
# STEP 4: QUICK API TEST
# ----------------------------------------
# Confirms that the model can generate a simple reply.
# ----------------------------------------

import requests, json
from IPython.display import Markdown, display

# ✅ Use the correct model tag you actually have installed
payload = {"model": MODEL_NAME, "prompt": "Say hello! What LLM Model are you?"}

# Stream the response for token-by-token parsing
resp = requests.post("http://127.0.0.1:11434/api/generate", json=payload, stream=True)

clean_output = ""
for line in resp.iter_lines():
    if not line:
        continue
    try:
        data = json.loads(line.decode("utf-8"))
        if "response" in data:
            clean_output += data["response"]
    except json.JSONDecodeError:
        continue

clean_output = clean_output.strip()

# Print nice clean text output
print("\n🤖 LLM says:\n")
print(clean_output)

# Also render pretty in Colab or Jupyter
try:
    display(Markdown(f"### 🤖 LLM Output\n\n{clean_output}"))
except:
    pass


🤖 LLM says:

Hello! I am Qwen, a large-scale language model independently developed by Alibaba Group. I can answer questions, create text, such as writing stories, official documents, emails, scripts, and more. I can also express opinions and play games. Is there anything I can assist you with?


### 🤖 LLM Output

Hello! I am Qwen, a large-scale language model independently developed by Alibaba Group. I can answer questions, create text, such as writing stories, official documents, emails, scripts, and more. I can also express opinions and play games. Is there anything I can assist you with?

In [None]:
# ----------------------------
# STEP 5: Create Custom Library (LLMVisionLib.py) — Final Fix
# ----------------------------
library_code = '''
import cv2
import base64
import requests
import os
import numpy as np
from robot.api.deco import keyword
from robot.libraries.BuiltIn import BuiltIn
import json

# Access MODEL_NAME from the notebook's global scope
try:
    MODEL_NAME = BuiltIn().get_variable_value('${MODEL_NAME}')
except:
    # Fallback if running outside Robot or variable not set
    MODEL_NAME = "qwen3-coder:30b" # Default model


@keyword
def analyze_image_with_llm(image_path, prompt):
    """Analyze an image using OpenCV + LLM (robust byte-level loading with visible logging)."""
    builtin = BuiltIn()
    builtin.log_to_console(f"📸 Analyzing image: {image_path}")

    abs_path = os.path.abspath(image_path)
    if not os.path.exists(abs_path):
        msg = f"❌ Image not found at path: {abs_path}"
        builtin.log(msg)
        return msg

    try:
        with open(abs_path, "rb") as f:
            image_bytes = np.asarray(bytearray(f.read()), dtype=np.uint8)
        img = cv2.imdecode(image_bytes, cv2.IMREAD_COLOR)
        if img is None:
            msg = f"❌ Failed to decode image from {abs_path}"
            builtin.log(msg)
            return msg
    except Exception as e:
        msg = f"❌ OpenCV load error: {e}"
        builtin.log(msg)
        return msg

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150)
    cv2.imwrite("/content/edges_debug.jpg", edges)
    builtin.log_to_console("✅ OpenCV edge detection complete.")

    # Encode image for LLM
    with open(abs_path, "rb") as f:
        img_b64 = base64.b64encode(f.read()).decode("utf-8")

    # Use the MODEL_NAME variable
    prompt_text = (
        f"{prompt}\\n"
        "Describe what you see in this image. The first 400 chars of base64 are below:\\n"
        f"{img_b64[:400]}..."
    )

    payload = {
        "model": MODEL_NAME,
        "prompt": prompt_text
    }

    try:
        builtin.log_to_console(f"🧠 Sending request to LLM ({MODEL_NAME})...")
        resp = requests.post(
            "http://127.0.0.1:11434/api/generate",
            json=payload,
            stream=True,
            timeout=180
        )

        clean_output = ""
        for line in resp.iter_lines():
            if not line:
                continue
            try:
                data = json.loads(line.decode("utf-8"))
                if "response" in data:
                    clean_output += data["response"]
            except json.JSONDecodeError:
                continue

        clean_output = clean_output.strip()
        builtin.log(f"LLM Response:\\n{clean_output}")
        builtin.log_to_console("🤖 LLM Response captured successfully.")
        return clean_output
    except Exception as e:
        msg = f"❌ Error contacting LLM: {e}"
        builtin.log(msg)
        builtin.log_to_console(msg)
        return msg
'''

with open("/content/LLMVisionLib.py", "w") as f:
    f.write(library_code)
print("✅ Library updated with absolute, byte-safe, and fully logged LLM integration.")

✅ Library updated with absolute, byte-safe, and fully logged LLM integration.


In [None]:
# STEP 4: QUICK API TEST
# ----------------------------------------
# Confirms that the model can generate a simple reply.
# ----------------------------------------

import requests, json
from IPython.display import Markdown, display

# ✅ Use the correct model tag you actually have installed
payload = {"model": MODEL_NAME, "prompt": "Say hello!"}

# Stream the response for token-by-token parsing
resp = requests.post("http://127.0.0.1:11434/api/generate", json=payload, stream=True)

clean_output = ""
for line in resp.iter_lines():
    if not line:
        continue
    try:
        data = json.loads(line.decode("utf-8"))
        if "response" in data:
            clean_output += data["response"]
    except json.JSONDecodeError:
        continue

clean_output = clean_output.strip()

# Print nice clean text output
print("\n🤖 LLM says:\n")
print(clean_output)

# Also render pretty in Colab or Jupyter
try:
    display(Markdown(f"### 🤖 Qwen3-Coder Output\n\n{clean_output}"))
except:
    pass



🤖 LLM says:

Hello there! It's nice to meet you! How are you doing today?


### 🤖 Qwen3-Coder Output

Hello there! It's nice to meet you! How are you doing today?

In [None]:
# STEP 6: SAMPLE IMAGE
!wget -q -O sample_image.jpg https://upload.wikimedia.org/wikipedia/commons/5/53/OpenCV_Logo_with_text.png

print("✅ Sample image downloaded.")


✅ Sample image downloaded.


In [None]:
# STEP 7: ROBOT TEST FILE
test_code = '''
*** Settings ***
Library    LLMVisionLib.py

*** Test Cases ***
Analyze OpenCV Logo
    ${result}=    Analyze Image With Llm    sample_image.jpg    Identify any text or symbols
    Log    ${result}
'''
with open("vision_test.robot", "w") as f:
    f.write(test_code)
print("✅ Test file ready.")

✅ Test file ready.


In [None]:
import cv2
import numpy as np
import os

path = "/content/sample_image.jpg"
print("🔍 Checking file:", path)

if not os.path.exists(path):
    print("❌ File does not exist — re-downloading...")
    !wget -q -O /content/sample_image.jpg https://upload.wikimedia.org/wikipedia/commons/7/75/OpenCV_Logo_with_text.png

# Read bytes safely
with open(path, "rb") as f:
    file_bytes = np.asarray(bytearray(f.read()), dtype=np.uint8)

print(f"📏 Read {len(file_bytes)} bytes")

if len(file_bytes) == 0:
    raise ValueError("❌ File read as empty bytes — something went wrong with file I/O.")

img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)

if img is None:
    raise ValueError("❌ OpenCV still failed to decode image — file may be corrupted.")
else:
    print("✅ Image successfully decoded by OpenCV.")


🔍 Checking file: /content/sample_image.jpg
📏 Read 31958 bytes
✅ Image successfully decoded by OpenCV.


In [None]:
# STEP 8: RUN TEST
# ----------------------------
# STEP 8: Run the Robot Framework Test (with output files)
# ----------------------------
!robot --output /content/output.xml --log /content/log.html --report /content/report.html vision_test.robot


Vision Test                                                                   
Analyze OpenCV Logo                                                   📸 Analyzing image: sample_image.jpg
✅ OpenCV edge detection complete.
🧠 Sending request to LLM (None)...
🤖 LLM Response captured successfully.
[32m.[0m[32m.[0m                                                                              Analyze OpenCV Logo                                                   | [32mPASS[0m |
------------------------------------------------------------------------------
Vision Test                                                           | [32mPASS[0m |
1 test, 1 passed, 0 failed
Output:  ]8;;file:///content/output.xml\/content/output.xml]8;;\
Log:     ]8;;file:///content/log.html\/content/log.html]8;;\
Report:  ]8;;file:///content/report.html\/content/report.html]8;;\


In [None]:

# ----------------------------
# STEP 9: Display Robot Log Report
# ----------------------------
from IPython.display import HTML, display
import os

log_path = "/content/log.html"
if os.path.exists(log_path) and os.path.getsize(log_path) > 1000:
    display(HTML(filename=log_path))
else:
    print("⚠️ log.html is empty or missing. Listing /content directory for debugging:")
    !ls -lh /content

## STEP 9: Display Robot Log Report

This cell displays the results of the Robot Framework test execution in an easy-to-read HTML format.

## STEP 10: Advanced LLM Showcase Test

This section demonstrates a more complex interaction with the local LLM. It performs the following steps:

1.  **Analyze Image with Rich Detail**: Uses the `Analyze Image With Llm` keyword from the `LLMVisionLib.py` to get a detailed description of the `sample_image.jpg`.
2.  **Summarize the Description**: Sends the previously generated description back to the LLM with a prompt to summarize it into three concise sentences.
3.  **Generate QA Pair**: Prompts the LLM to create three questions and answers based on its analysis of the image.
4.  **Provide Test Verdict**: Asks the LLM to give a verdict on the image's visual quality (clarity, contrast, recognizability) and explain its reasoning.

This showcases the LLM's ability to perform multi-turn reasoning and contextual understanding based on the image analysis provided by OpenCV.

## STEP 11: GPU Stress Test (PyTorch Simulation)

This cell runs a simulation to stress the GPU using PyTorch. It includes:

1.  **GPU Availability Check**: Determines if a GPU is available and reports the GPU name if found.
2.  **Large Tensor Operations**: Performs matrix multiplications with large tensors to load the GPU.
3.  **Convolution Benchmark**: Runs a series of convolutional operations to simulate a common deep learning workload.
4.  **Memory Usage Report**: Displays the allocated and cached GPU memory.

This helps assess the performance and stability of the GPU environment within Colab.

In [None]:
# ----------------------------
# STEP 11: GPU Stress Test (PyTorch Simulation)
# ----------------------------
# import torch, time
# print("🔥 Starting GPU stress test (PyTorch simulation)...\n")

# if not torch.cuda.is_available():
#     print("⚠️ GPU not available — running CPU fallback.")
# else:
#     device = torch.device('cuda')
#     print(f"✅ GPU Detected: {torch.cuda.get_device_name(0)}")

#     # Large tensor operations for GPU load
#     print("→ Generating large random tensors...")
#     for i in range(5):
#         size = 12000
#         a = torch.randn((size, size), device=device)
#         b = torch.randn((size, size), device=device)
#         t0 = time.time()
#         c = torch.matmul(a, b)
#         torch.cuda.synchronize()
#         print(f"✅ Matrix batch {i+1} complete in {time.time()-t0:.2f}s")

#     # Deep learning style convolution benchmark
#     print("\n→ Running convolutional GPU stress test...")
#     x = torch.randn((16, 3, 512, 512), device=device)
#     conv = torch.nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1).to(device)
#     t0 = time.time()
#     for i in range(50):
#         y = conv(x)
#     torch.cuda.synchronize()
#     print(f"✅ Convolution benchmark complete in {time.time()-t0:.2f}s")

#     print(f"\nGPU Memory Allocated: {torch.cuda.memory_allocated()/1e9:.2f} GB")
#     print(f"GPU Memory Cached: {torch.cuda.memory_reserved()/1e9:.2f} GB")

# print("\n💪 GPU stress test complete.")

In [None]:

# ----------------------------
# STEP 12: Advanced LLM Showcase Test
# ----------------------------
advanced_test_code = '''
*** Settings ***
Library    LLMVisionLib.py

*** Test Cases ***
AI Enhanced Vision and Reasoning
    Log To Console    Starting advanced AI vision and reasoning test...
    ${description}=    Analyze Image With Llm    sample_image.jpg    Describe this image in rich detail with visual elements, possible context, and inferred purpose.
    Log    ${description}

    ${summary_prompt}=    Set Variable    Based on your description, summarize the image in 3 concise sentences suitable for a QA report.
    ${summary}=    Analyze Image With Llm    sample_image.jpg    ${summary_prompt}
    Log    ${summary}

    ${qa_prompt}=    Set Variable    Generate 3 comprehension questions and answers based on this image analysis.
    ${qa_result}=    Analyze Image With Llm    sample_image.jpg    ${qa_prompt}
    Log    ${qa_result}

    ${verdict_prompt}=    Set Variable    Given all prior analysis, decide whether this image would pass a visual quality check (clarity, contrast, recognizability). Explain why.
    ${verdict}=    Analyze Image With Llm    sample_image.jpg    ${verdict_prompt}
    Log To Console    ${verdict}
'''
with open("advanced_vision_test.robot", "w") as f:
    f.write(advanced_test_code)
print("✅ Advanced LLM showcase test file created: advanced_vision_test.robot")

!robot advanced_vision_test.robot

✅ Advanced LLM showcase test file created: advanced_vision_test.robot
Advanced Vision Test                                                          
AI Enhanced Vision and Reasoning                                      Starting advanced AI vision and reasoning test...
[32m.[0m📸 Analyzing image: sample_image.jpg
✅ OpenCV edge detection complete.
🧠 Sending request to LLM (None)...
🤖 LLM Response captured successfully.
[32m.[0m[32m.[0m[32m.[0m📸 Analyzing image: sample_image.jpg
✅ OpenCV edge detection complete.
🧠 Sending request to LLM (None)...
🤖 LLM Response captured successfully.
[32m.[0m[32m.[0m[32m.[0m📸 Analyzing image: sample_image.jpg
✅ OpenCV edge detection complete.
🧠 Sending request to LLM (None)...
🤖 LLM Response captured successfully.
AI Enhanced Vision and Reasoning                                      [32m.[0m[32m.[0m📸 Analyzing image: sample_image.jpg
✅ OpenCV edge detection complete.
🧠 Sending request to LLM (None)...
🤖 LLM Response captured successfully.

In [None]:
import cv2
import os
from google.colab.patches import cv2_imshow

image_path = "sample_image.jpg"
abs_image_path = os.path.abspath(image_path)

print(f"Attempting to load image directly with cv2.imread from: {abs_image_path}")

if not os.path.exists(abs_image_path):
    print(f"Error: Image file not found at {abs_image_path}")
else:
    try:
        img = cv2.imread(abs_image_path, cv2.IMREAD_COLOR)

        if img is None:
            print(f"Error: cv2.imread returned None for {abs_image_path}. Check file format or corruption.")
        else:
            print(f"Successfully loaded image with shape: {img.shape}")
            # Display the image
            # cv2_imshow(img) # Uncomment to display the image
            print("Image loaded successfully. You can uncomment cv2_imshow(img) to display it.")

    except Exception as e:
        print(f"An error occurred while trying to load the image with cv2: {e}")

Attempting to load image directly with cv2.imread from: /content/sample_image.jpg
Successfully loaded image with shape: (739, 600, 3)
Image loaded successfully. You can uncomment cv2_imshow(img) to display it.


In [None]:
# ----------------------------
# STEP 13: Agentic AI Web Exploration and Test Generation
# ----------------------------
import re
import requests
from bs4 import BeautifulSoup

def generate_tests_from_url(url, style="plain"):
    """Crawl the URL, extract structure, and use LLM to generate intelligent test cases."""
    print(f"🌐 Crawling {url} for structure and content...\n")
    try:
        response = requests.get(url, timeout=15)
        response.raise_for_status()
    except Exception as e:
        return f"❌ Failed to fetch page: {e}"

    soup = BeautifulSoup(response.text, 'html.parser')
    title = soup.title.string if soup.title else "(No title)"
    links = [a.get('href') for a in soup.find_all('a', href=True)[:15]]
    text_snippet = re.sub(r'\\s+', ' ', soup.get_text()[:1500])

    context = f"Page Title: {title}\\nTop Links: {links}\\nVisible Text: {text_snippet[:600]}"
    style_instruction = {
        "plain": "Generate 5 high-value QA test scenarios in plain English.",
        "bdd": "Generate 5 BDD-style Gherkin scenarios (Given/When/Then).",
        "gwt": "Generate 5 Given/When/Then step tests with clear preconditions."
    }.get(style.lower(), "Generate 5 general web test cases.")

    payload = {
        "model": MODEL_NAME,
        "prompt": f"Analyze the webpage and {style_instruction}\\n\\n{context}"
    }
    try:
        resp = requests.post("http://127.0.0.1:11434/api/generate", json=payload, timeout=180)
        result = resp.text

        import json

        resp = requests.post(
            "http://127.0.0.1:11434/api/generate",
            json=payload,
            stream=True,
            timeout=180
        )
        pretty_output = ""

        for line in resp.iter_lines():
            if not line:
                continue
            try:
                data = json.loads(line.decode("utf-8"))
                if "response" in data:
                    pretty_output += data["response"]
            except json.JSONDecodeError:
                continue

        # Optional: clean up newlines and spacing
        pretty_output = pretty_output.replace("\\n", "\n").strip()

        # Write to file and return
        test_file = f"generated_tests_{style}.txt"
        with open(test_file, "w") as f:
            f.write(pretty_output)
        print(f"✅ Test cases generated and saved to {test_file}\n")

        return pretty_output


        # file_name = f"generated_tests_{style}.txt"
        # with open(file_name, "w") as f:
        #     f.write(result)
        # print(f"✅ Test cases generated and saved to {file_name}\\n")
        # return result
    except Exception as e:
        return f"❌ Error generating test cases: {e}"

In [None]:

# ----------------------------
# STEP 14: Example Use - Generate BDD Tests for a URL
# ----------------------------
# example_url = "https://example.com"

# example_url = "https://www.mud-muse.com/"
example_url = "https://parabank.parasoft.com/parabank/index.htm"

print(f"🚀 Generating AI-driven BDD test cases for {example_url}\\n")
results = generate_tests_from_url(example_url, style="bdd")
print(results[:10000])

🚀 Generating AI-driven BDD test cases for https://parabank.parasoft.com/parabank/index.htm\n
🌐 Crawling https://parabank.parasoft.com/parabank/index.htm for structure and content...

✅ Test cases generated and saved to generated_tests_bdd.txt

Here are **5 BDD-style Gherkin scenarios** based on the structure and content of the **ParaBank | Welcome | Online Banking** webpage:

---

### **Scenario 1: User navigates to the home page**
```gherkin
Given the user is on the ParaBank website
When the user visits the home page
Then the page title should be "ParaBank | Welcome | Online Banking"
And the user should see the "Experience the difference" heading
```

---

### **Scenario 2: User accesses the customer login section**
```gherkin
Given the user is on the ParaBank home page
When the user clicks on the "Customer Login" section
Then the user should see the "Username" input field
And the user should see the "Password" input field
And the user should see the "Forgot login info?" link
And the 

In [None]:
# ----------------------------
# STEP 15: Deep Recursive Website Crawl + AI Test Suite Generator (True Depth Control)
# ----------------------------
import requests
import time
import json
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse, urldefrag
from IPython.display import Markdown, display

def normalize_url(base, href):
    """Normalize and clean URLs to avoid duplicate crawling."""
    full = urljoin(base, href)
    full = urldefrag(full)[0]  # remove fragments (#section)
    return full.split("?")[0].rstrip("/")

def crawl_and_generate_test_suite(base_url, depth=3, limit=50, style="bdd"):
    """Crawl a site up to a fixed depth and generate AI-driven QA test cases."""
    start_time = time.time()
    print(f"🌍 Starting deep crawl on {base_url}\nDepth: {depth}, Limit: {limit}\n")

    visited = set()
    current_level = [base_url]
    collected_pages = []

    for lvl in range(depth):
        print(f"🔹 Crawling depth level {lvl + 1}/{depth} — {len(current_level)} URLs queued.")
        next_level = []

        for url in current_level:
            if len(visited) >= limit:
                print("⚠️ Hit limit, stopping crawl.")
                break
            if url in visited:
                continue
            try:
                res = requests.get(url, timeout=15)
                res.raise_for_status()
                visited.add(url)
                soup = BeautifulSoup(res.text, "html.parser")

                title = soup.title.string.strip() if soup.title and soup.title.string else "(No title)"
                snippet = " ".join(soup.get_text().split()[:300])
                collected_pages.append(f"URL: {url}\nTitle: {title}\nSnippet: {snippet}\n")

                for link in soup.find_all("a", href=True):
                    full = normalize_url(url, link["href"])
                    if base_url in full and full not in visited:
                        next_level.append(full)
            except Exception as e:
                print(f"⚠️ Skipping {url}: {e}")

        current_level = next_level  # advance to next layer

        if not current_level:
            print("✅ No more links to explore.")
            break

    print(f"\n✅ Crawl complete — {len(collected_pages)} pages captured in {time.time() - start_time:.1f}s.\n")

    # --- Style instructions ---
    style_instructions = {
        "plain": (
            "Generate 10 high-level QA test cases across these pages in plain English. "
            "Include purpose, expected behavior, and critical paths."
        ),
        "bdd": (
            "Generate 10 rich BDD-style Gherkin scenarios validating navigation, content visibility, and layout. "
            "Use clear Given/When/Then format."
        ),
        "gwt": (
            "Generate 10 concise Given/When/Then-style acceptance tests that test site usability and navigation."
        ),
    }.get(style.lower(), "Generate a clear QA test suite across all discovered pages.")

    combined_context = "\n\n".join(collected_pages)

    payload = {
        "model": MODEL_NAME,
        "prompt": (
            f"You are an expert QA automation engineer.\n"
            f"{style_instructions}\n\n"
            f"Analyze the following pages and create a cohesive suite:\n\n{combined_context}"
        ),
    }

    # --- LLM call ---
    print("🧠 Generating suite using LLM...\n")
    try:
        resp = requests.post(
            "http://127.0.0.1:11434/api/generate",
            json=payload,
            stream=True,
            timeout=300,
        )

        clean_output = ""
        for line in resp.iter_lines():
            if not line:
                continue
            try:
                data = json.loads(line.decode("utf-8"))
                if "response" in data:
                    clean_output += data["response"]
            except json.JSONDecodeError:
                continue

        clean_output = clean_output.replace("\\n", "\n").strip()
        header_map = {
            "plain": "📘 Plain English Test Cases",
            "bdd": "🧩 BDD (Gherkin) Scenarios",
            "gwt": "⚙️ Given/When/Then Test Cases",
        }
        header = header_map.get(style.lower(), "✅ Generated Test Suite")

        out_file = f"comprehensive_suite_{style}.txt"
        with open(out_file, "w") as f:
            f.write(clean_output)

        print(f"✅ Saved to {out_file}\n")
        display(Markdown(f"## {header}\n\n```\n{clean_output}\n```"))
        return clean_output

    except Exception as e:
        err = f"❌ LLM generation failed: {e}"
        print(err)
        return err

In [None]:
# ----------------------------
# STEP 16A: Helper Functions — Crawl and Generate Multi-Style Suites
# ----------------------------
import requests, time, json
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urldefrag
from IPython.display import Markdown, display

def normalize_url(base, href):
    """Normalize URLs by removing fragments and query strings."""
    full = urljoin(base, href)
    full = urldefrag(full)[0]
    return full.split("?")[0].rstrip("/")

def crawl_site_only(base_url, depth=3, limit=30):
    """Crawl a website to a given depth and return structured context text."""
    print(f"🌐 Starting crawl of {base_url}\nDepth={depth}, Limit={limit}")
    visited = set()
    current_level = [base_url]
    collected = []

    for lvl in range(depth):
        print(f"🔹 Depth {lvl+1}/{depth}: {len(current_level)} URLs queued")
        next_level = []
        for url in current_level:
            if len(visited) >= limit:
                print("⚠️ Hit limit, stopping crawl.")
                break
            if url in visited:
                continue
            try:
                res = requests.get(url, timeout=15)
                res.raise_for_status()
                visited.add(url)
                soup = BeautifulSoup(res.text, "html.parser")
                title = soup.title.string.strip() if soup.title else "(No title)"
                snippet = " ".join(soup.get_text().split()[:300])
                collected.append(f"URL: {url}\nTitle: {title}\nSnippet: {snippet}\n")
                for link in soup.find_all("a", href=True):
                    full = normalize_url(url, link["href"])
                    if base_url in full and full not in visited:
                        next_level.append(full)
            except Exception as e:
                print(f"⚠️ Skipping {url}: {e}")
        current_level = next_level
        if not current_level:
            break
    print(f"✅ Crawl complete — {len(collected)} pages captured.\n")
    return collected

def generate_suite_for_style(style, context, model_name):
    """Generate test suite in given style (plain, bdd, gwt) using the local LLM."""
    style_prompts = {
        "plain": (
            "Generate 10 high-level plain-English QA test cases "
            "describing what a tester should verify for the pages below."
        ),
        "bdd": (
            "Generate 10 BDD-style (Gherkin) scenarios validating navigation, content, and layout. "
            "Use Given/When/Then syntax."
        ),
        "gwt": (
            "Generate 10 concise Given/When/Then acceptance test cases for usability, login, and navigation flows."
        ),
    }

    payload = {
        "model": model_name,
        "prompt": (
            f"You are an expert QA automation engineer.\n"
            f"{style_prompts.get(style, 'Generate QA test cases.')}\n\n"
            f"Website context:\n{context}"
        ),
    }

    print(f"🧠 Generating {style.upper()} suite…")
    resp = requests.post(
        "http://127.0.0.1:11434/api/generate",
        json=payload,
        stream=True,
        timeout=300,
    )

    clean_output = ""
    for line in resp.iter_lines():
        if not line:
            continue
        try:
            data = json.loads(line.decode("utf-8"))
            if "response" in data:
                clean_output += data["response"]
        except json.JSONDecodeError:
            continue

    clean_output = clean_output.replace("\\n", "\n").strip()
    file_name = f"comprehensive_suite_{style}.txt"
    with open(file_name, "w") as f:
        f.write(clean_output)

    header_map = {
        "plain": "📘 Plain English Test Cases",
        "bdd": "🧩 BDD (Gherkin) Scenarios",
        "gwt": "⚙️ Given/When/Then Test Cases",
    }
    display(Markdown(f"## {header_map.get(style, 'Test Suite')}\n\n```\n{clean_output}\n```"))
    print(f"✅ Saved {style.upper()} suite → {file_name}\n")
    return clean_output

In [None]:
# ----------------------------
# STEP 16B: Run Crawl + Generate Multi-Style Suites
# ----------------------------
example_site = "https://parabank.parasoft.com/parabank/index.htm"
print(f"🚀 Crawling {example_site} and generating all suite styles…\n")

# Crawl site (depth and limit can be tuned)
collected_pages = crawl_site_only(example_site, depth=3, limit=20)
combined_context = "\n\n".join(collected_pages)

# Generate three styles: plain, bdd, gwt
styles_to_generate = ["plain", "bdd", "gwt"]
for style in styles_to_generate:
    generate_suite_for_style(style, combined_context, MODEL_NAME)

🚀 Crawling https://parabank.parasoft.com/parabank/index.htm and generating all suite styles…

🌐 Starting crawl of https://parabank.parasoft.com/parabank/index.htm
Depth=3, Limit=20
🔹 Depth 1/3: 1 URLs queued
🔹 Depth 2/3: 3 URLs queued
✅ Crawl complete — 2 pages captured.

🧠 Generating PLAIN suite…


## 📘 Plain English Test Cases

```
Here are 10 high-level QA test cases for the ParaBank homepage:

1. **Page Title Verification**
   Verify that the page title displays "ParaBank | Welcome | Online Banking" when accessing the homepage.

2. **Navigation Menu Validation**
   Verify that all main navigation links (Home, About Us, Services, Products, Locations, Forum, Site Map, Contact Us) are visible and functional.

3. **Login Section Functionality**
   Verify that the Customer Login section with Username/Password fields and "Forgot login info?" link is present and accessible.

4. **Registration Link Verification**
   Verify that the "Register" link is visible and functional, allowing users to access the registration page.

5. **Online Services Section Validation**
   Verify that the Online Services section contains all expected links: Bill Pay, Account History, Transfer Funds, Withdraw Funds, Check Balances, and Make Deposits.

6. **Header and Footer Elements**
   Verify that the header contains the ParaBank logo and main navigation, and the footer contains copyright information and company website link.

7. **Session ID Persistence**
   Verify that the session ID parameter (jsessionid) is properly maintained in the URL and doesn't cause page loading issues.

8. **Latest News Section**
   Verify that the "Latest News" section displays the expected content including the date (10/20/2025) and news items.

9. **Responsive Design Check**
   Verify that the page layout adapts properly to different screen sizes and maintains usability on mobile devices.

10. **External Links and Resources**
    Verify that all external links (including the Parasoft website link) are functional and open in the correct location.
```

✅ Saved PLAIN suite → comprehensive_suite_plain.txt

🧠 Generating BDD suite…


## 🧩 BDD (Gherkin) Scenarios

```
```gherkin
Feature: ParaBank Website Navigation, Content, and Layout Validation

Scenario: Verify main page title and initial navigation
  Given I am on the ParaBank homepage
  When I load the page
  Then the page title should be "ParaBank | Welcome | Online Banking"
  And the page should contain "ParaBank | Welcome | Online Banking Experience the difference"

Scenario: Verify navigation menu elements are present
  Given I am on the ParaBank homepage
  When I view the navigation menu
  Then I should see "Home" link
  And I should see "About Us" link
  And I should see "Services" link
  And I should see "Products" link
  And I should see "Locations" link
  And I should see "Forum" link
  And I should see "Site Map" link
  And I should see "Contact Us" link

Scenario: Verify customer login section functionality
  Given I am on the ParaBank homepage
  When I view the customer login section
  Then I should see "Customer Login" heading
  And I should see "Username" input field
  And I should see "Password" input field
  And I should see "Forgot login info?" link
  And I should see "Register" link

Scenario: Verify main service links are displayed
  Given I am on the ParaBank homepage
  When I view the main services section
  Then I should see "ATM Services" section
  And I should see "Withdraw Funds" link
  And I should see "Transfer Funds" link
  And I should see "Check Balances" link
  And I should see "Make Deposits" link
  And I should see "Online Services" section

Scenario: Verify online services links are present
  Given I am on the ParaBank homepage
  When I view the online services section
  Then I should see "Bill Pay" link
  And I should see "Account History" link
  And I should see "Transfer Funds" link

Scenario: Verify latest news section content
  Given I am on the ParaBank homepage
  When I view the latest news section
  Then I should see "Latest News" heading
  And I should see news date "10/20/2025"
  And I should see news content "ParaBank Is Now Re-Opened"
  And I should see "New! Online Bill Pay" news item
  And I should see "New! Online Account Transfers" news item

Scenario: Verify footer information is displayed
  Given I am on the ParaBank homepage
  When I view the footer section
  Then I should see copyright information "© Parasoft. All rights reserved."
  And I should see company website link "www.parasoft.com"

Scenario: Verify navigation to About Us page
  Given I am on the ParaBank homepage
  When I click on "About Us" link
  Then I should navigate to About Us page
  And the page title should contain "About Us"

Scenario: Verify navigation to Services page
  Given I am on the ParaBank homepage
  When I click on "Services" link
  Then I should navigate to Services page
  And the page title should contain "Services"

Scenario: Verify navigation to Products page
  Given I am on the ParaBank homepage
  When I click on "Products" link
  Then I should navigate to Products page
  And the page title should contain "Products"
```
```

✅ Saved BDD suite → comprehensive_suite_bdd.txt

🧠 Generating GWT suite…


## ⚙️ Given/When/Then Test Cases

```
Here are 10 concise Given/When/Then acceptance test cases for usability, login, and navigation flows:

**Login Flow Tests:**

1. **Given** User is on the ParaBank homepage **When** User enters valid username and password **Then** User should be redirected to the customer home page

2. **Given** User is on the ParaBank homepage **When** User enters invalid username and password **Then** User should see an error message "Login failed"

3. **Given** User is on the login page **When** User clicks "Forgot login info?" link **Then** User should be directed to the forgot login information page

4. **Given** User is on the login page **When** User clicks "Register" link **Then** User should be directed to the registration page

**Navigation Tests:**

5. **Given** User is on the ParaBank homepage **When** User clicks "About Us" link **Then** User should be navigated to the About Us page

6. **Given** User is on the ParaBank homepage **When** User clicks "Services" menu **Then** User should see service options dropdown menu

7. **Given** User is on the ParaBank homepage **When** User clicks "Locations" link **Then** User should be navigated to the Locations page

8. **Given** User is on the ParaBank homepage **When** User clicks "Contact Us" link **Then** User should be navigated to the Contact Us page

**Usability Tests:**

9. **Given** User is on the ParaBank homepage **When** User scrolls down to "Latest News" section **Then** User should see news items with dates and read more links

10. **Given** User is on the ParaBank homepage **When** User clicks "Online Bill Pay" link **Then** User should be navigated to the Bill Pay service page with service options
```

✅ Saved GWT suite → comprehensive_suite_gwt.txt



In [None]:
# ----------------------------
# STEP 17: Convert AI-Generated Test Cases into Executable Robot Framework Suite (with Custom Keywords)
# ----------------------------
import re, os
from robot.api import TestSuite

def sanitize_keyword_name(text):
    """Create a clean keyword name from a natural language phrase."""
    text = re.sub(r"[^A-Za-z0-9 ]+", "", text).strip().title()
    return text.replace(" ", " ")

def extract_keywords_from_bdd(content):
    """Extract reusable keyword names from BDD text."""
    keywords = set()
    steps = re.findall(r"(?i)(?:Given|When|Then)\s+(.+)", content)
    for step in steps:
        # Turn step phrase into a reusable keyword
        kw = sanitize_keyword_name(step)
        # Reduce overlong names
        kw = re.sub(r"\b(The User|They|User)\b", "", kw).strip()
        if kw:
            keywords.add(kw)
    return sorted(list(keywords))

def convert_llm_output_to_robot(input_file, output_file="generated_suite.robot"):
    """Convert LLM-generated test cases into an executable Robot Framework .robot suite with keyword stubs."""
    if not os.path.exists(input_file):
        print(f"❌ Input file not found: {input_file}")
        return

    print(f"📄 Converting {input_file} → {output_file} …")
    with open(input_file, "r", encoding="utf-8") as f:
        content = f.read().strip()

    # Detect style
    style = "plain"
    if re.search(r"Given|When|Then", content, re.IGNORECASE):
        style = "bdd"

    suite_name = os.path.basename(input_file).replace(".txt", "").replace("_", " ").title()
    header = f"*** Settings ***\nSuite Setup    Log    Starting {suite_name}\nSuite Teardown    Log    Ending {suite_name}\n\n*** Test Cases ***\n"

    test_cases_section = ""
    keywords_section = "\n*** Keywords ***\n"

    if style == "bdd":
        scenarios = re.split(r"(?i)Scenario\s*:", content)
        all_keywords = extract_keywords_from_bdd(content)
        for i, sc in enumerate(scenarios):
            lines = [l.strip() for l in sc.strip().splitlines() if l.strip()]
            if not lines:
                continue
            name = lines[0][:80]
            test_cases_section += f"\nBDD Scenario {i+1}: {name}\n"
            steps = re.findall(r"(?i)(Given|When|Then)\s+(.+)", sc)
            for s in steps:
                kw = sanitize_keyword_name(s[1])
                test_cases_section += f"    {kw}\n"

        # Add reusable keyword stubs
        for kw in all_keywords:
            keywords_section += f"\n{kw}\n    [Documentation]    TODO: Implement '{kw}' logic\n    Log    Executing step: {kw}\n"

    else:  # plain fallback
        tests = re.split(r"^\s*(?:\d+\.|\-)\s*", content, flags=re.MULTILINE)
        for i, t in enumerate(tests):
            t = t.strip()
            if not t:
                continue
            test_cases_section += f"\nPlain Test {i+1}\n    Log    {t}\n"

    # Save
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(header + test_cases_section + "\n" + keywords_section)

    print(f"✅ Generated suite with custom keyword stubs → {output_file}")

    # Preview
    with open(output_file, "r", encoding="utf-8") as f:
        print("\n🧾 Preview:\n")
        print(f.read()[:1200])

# Convert all three generated suites
for style in ["plain", "bdd", "gwt"]:
    input_file = f"comprehensive_suite_{style}.txt"
    if os.path.exists(input_file):
        convert_llm_output_to_robot(input_file, f"generated_suite_{style}.robot")


In [None]:
# ----------------------------
# STEP 18: LLM Review of Generated Robot Test Suites
# ----------------------------
import requests, json, glob
from IPython.display import Markdown, display

def review_robot_suite_with_llm(robot_file):
    """Send a Robot Framework file to the local LLM for structural and robustness review."""
    if not os.path.exists(robot_file):
        print(f"⚠️ Skipping (file not found): {robot_file}")
        return

    with open(robot_file, "r", encoding="utf-8") as f:
        content = f.read()

    prompt = f"""
You are a senior QA automation architect specializing in Robot Framework.
Review the following .robot test suite for:
1. Syntax correctness (headers, indentation, spacing).
2. Test case and keyword naming quality.
3. Missing or redundant keywords.
4. Robustness improvements (parameterization, tags, setups/teardowns).
5. Best-practice adherence for maintainability.

Respond in concise, numbered bullet points with specific corrections and examples.

--- BEGIN ROBOT FILE ---
{content[:8000]}   # (truncate to avoid overload)
--- END ROBOT FILE ---
"""

    payload = {
        "model": MODEL_NAME,
        "prompt": prompt
    }

    print(f"🔍 Reviewing {robot_file} with LLM…")
    resp = requests.post("http://127.0.0.1:11434/api/generate", json=payload, stream=True, timeout=300)

    clean_output = ""
    for line in resp.iter_lines():
        if not line:
            continue
        try:
            data = json.loads(line.decode("utf-8"))
            if "response" in data:
                clean_output += data["response"]
            except json.JSONDecodeError:
                continue

    clean_output = clean_output.replace("\\n", "\n").strip()
    report_file = robot_file.replace(".robot", "_review.txt")
    with open(report_file, "w", encoding="utf-8") as f:
        f.write(clean_output)

    print(f"✅ Review complete → {report_file}")
    display(Markdown(f"### 🧠 Review for `{os.path.basename(robot_file)}`\n\n{clean_output}"))
    return clean_output

# Review all generated suites
for rf in glob.glob("generated_suite_*.robot"):
    review_robot_suite_with_llm(rf)