# Load data

In [3]:
from docx import Document
import re

def read_docx(file_path):
    """Read a docx file and return the text"""
    doc = Document(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)


def extract_epic_overview(text: str):
    """
    Extracts the Epic Overview section from the raw requirement document text.
    Returns None if no overview is found.P
    """
    pattern = r"Epic Overview\s*(.+?)(?=\n(?:Requirements|PSE[0-9.]+|Narrative|Scope|Acceptance Criteria|Priority))"

    match = re.search(pattern, text, flags=re.DOTALL | re.IGNORECASE)
    if match:
        return match.group(1).strip()
    return None


In [4]:
text = read_docx(r'D:\RAG_testcases\Epic 1 – User Management and Authentication.docx')
overview = extract_epic_overview(text)
print(overview)

This epic focuses on the development of a user management system that allows users to register, log in, manage their profiles, and access the application securely using email and password authentication. The system will ensure that users can create and maintain their accounts with ease while providing the necessary security features to protect user data.


# Chunking

In [5]:
import re
from langchain_text_splitters import RecursiveCharacterTextSplitter
from tqdm import tqdm

In [6]:
# Define list heading in requirements
SECTION_HEADERS = [
    "Narrative",
    "Scope",
    "In Scope",
    "Out of Scope",
    "Acceptance Criteria",
    "Priority",
    "Business Rules",
    "Additional Comments"
]

In [7]:
def split_requirements(req_text: str):
    sections = []
    current_header = None
    current_lines = []

    for line in req_text.splitlines():
        stripped = line.strip()
        
        # Check if line matches any header
        header = next((h for h in SECTION_HEADERS if stripped.startswith(h)), None)
        
        if header:
            if current_header:
                sections.append((current_header, None, "\n".join(current_lines)))
            
            current_header = header
            current_lines = []
        else:
            current_lines.append(line)
            
    # flush final block
    if current_header:
        sections.append((current_header, None, "\n".join(current_lines)))

    return sections

In [8]:
def chunking_document(requirement_text: str, max_chunk_size: int = 800):
    """Chunk requirement document into hierarchical chunks"""
    chunks = []

    # Extract epic level
    lang = re.search(r"Language:\s*([a-zA-Z-]+)", requirement_text)
    epic_id = re.search(r"Epic ID:\s*([A-Za-z0-9]+)", requirement_text)

    epic_metadata = {
        "lang": lang.group(1) if lang else None,
        "epic_id": epic_id.group(1) if epic_id else None
    }

    # Detect requirement headers
    req_pattern = r"(PSE[0-9.]+)\s*–\s*(.+?)\n"
    matches = list(re.finditer(req_pattern, requirement_text))

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=max_chunk_size,
        chunk_overlap=50,
        separators=["\n\n", "\n", " ", ""]
    )

    for i, m in tqdm(
        enumerate(matches),
        total=len(matches),
        desc="Chunking requirement",
        ncols=100
    ):
        req_id = m.group(1).strip()
        req_name = m.group(2).strip()

        start = m.end()
        end = matches[i+1].start() if i+1 < len(matches) else len(requirement_text)
        req_block = requirement_text[start:end].strip()

        # Extract priority
        priority_match = re.search(r"Priority\s*\n([A-Z]+)", req_block)
        priority = priority_match.group(1).strip() if priority_match else None

        # Requirement-level metadata
        base_metadata = {
            **epic_metadata,
            "requirement_id": req_id,
            "requirement_name": req_name,
            "priority": priority
        }

        # Section-based parsing
        sections = split_requirements(req_block)

        for header, parent, content in sections:
            # Build metadata including optional parent_section
            metadata = {**base_metadata, "section": header}
            if parent:
                metadata["parent_section"] = parent

            # Skip empty sections
            if not content.strip():
                continue

            # Short enough -> 1 chunk
            if len(content) <= max_chunk_size:
                chunks.append({
                    "text": f"{content.strip()}",
                    "metadata": metadata
                })
            else:
                # Too long -> chunk using LangChain
                parts = splitter.split_text(content)
                for p in parts:
                    chunks.append({
                        "text": f"{p}",
                        "metadata": metadata
                    })
    return chunks

In [9]:
chunks = chunking_document(text)

Chunking requirement: 100%|█████████████████████████████████████████| 5/5 [00:00<00:00, 5001.55it/s]




In [10]:
for i, chunk in enumerate(chunks):
    print(f"# Chunk number {i}:")
    print(chunk["text"])
    print("metadata: ", chunk["metadata"])
    print("-" * 80)
    if i == 10:
        break

# Chunk number 0:
As a new user
I want to register using my email and a password
So that I can create an account and access the ProShop application.
metadata:  {'lang': 'en', 'epic_id': 'EP1', 'requirement_id': 'PSE1.1', 'requirement_name': 'ACCOUNT REGISTRATION', 'priority': 'MUST', 'section': 'Narrative'}
--------------------------------------------------------------------------------
# Chunk number 1:
A registration form
Input validation
Validate email format and password security
Successful registration handling
metadata:  {'lang': 'en', 'epic_id': 'EP1', 'requirement_id': 'PSE1.1', 'requirement_name': 'ACCOUNT REGISTRATION', 'priority': 'MUST', 'section': 'In Scope'}
--------------------------------------------------------------------------------
# Chunk number 2:
Email verification
The app will not send a verification email to confirm the user’s email address
metadata:  {'lang': 'en', 'epic_id': 'EP1', 'requirement_id': 'PSE1.1', 'requirement_name': 'ACCOUNT REGISTRATION', 'prior

# Embedding

In [11]:
from langchain_classic.schema import Document

def convert_chunks_to_documents(chunks: list) -> list:
    documents = []
    for chunk in tqdm(chunks):
        documents.append(Document(page_content=chunk["text"], metadata=chunk["metadata"]))
    return documents 

documents = convert_chunks_to_documents(chunks)

100%|██████████| 32/32 [00:00<?, ?it/s]


In [12]:
# checkout document
print("len of document", len(documents))

len of document 32


In [13]:
# print first chunk
print("content:")
print(documents[0].page_content)
print("metadata", documents[0].metadata)

content:
As a new user
I want to register using my email and a password
So that I can create an account and access the ProShop application.
metadata {'lang': 'en', 'epic_id': 'EP1', 'requirement_id': 'PSE1.1', 'requirement_name': 'ACCOUNT REGISTRATION', 'priority': 'MUST', 'section': 'Narrative'}


Load GEMINI API key for Embedding

In [14]:
import dotenv, os
from google import genai

In [15]:
# import gemini api key
dotenv.load_dotenv()

client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

In [41]:
# Embedding with gemini

def gemini_embedding(text: str | None = None):
    gemini_vectors = []

    for doc in tqdm(documents, desc="Embedding with Gemini API key"):
        res = client.models.embed_content(
            model="gemini-embedding-001",
            contents=doc.page_content,
        )

        gemini_vectors.append(res.embeddings[0].values)

    return gemini_vectors


gemini_vectors = gemini_embedding()

Embedding with Gemini API key: 100%|██████████| 32/32 [00:12<00:00,  2.61it/s]


In [42]:
# print to check
print(f"Content document: {documents[0]}\n")
print(f"Vector embedded by gemini: {gemini_vectors[0]}")

Content document: page_content='As a new user
I want to register using my email and a password
So that I can create an account and access the ProShop application.' metadata={'lang': 'en', 'epic_id': 'EP1', 'requirement_id': 'PSE1.1', 'requirement_name': 'ACCOUNT REGISTRATION', 'priority': 'MUST', 'section': 'Narrative'}

Vector embedded by gemini: [0.012134789, 0.02087346, 0.024173541, -0.054962445, -0.009596326, 0.007719601, 0.0063148364, -0.033385992, -0.019010475, 0.014754756, -0.036652528, 0.017749509, -0.021943597, 0.0016527858, 0.14000274, 0.0047780643, 0.008692884, -0.0074381744, -0.017077234, -0.026315197, -0.014437709, 0.0028894257, 0.018195467, -0.024122506, -0.0015051437, -0.03918223, 0.022300368, 0.016893744, 0.026784278, 0.009724188, -0.0078912135, -0.008511339, -0.011327545, 0.043683283, 0.012318705, 0.014489194, 0.0039286288, -0.008027576, -0.0015238568, 0.012003348, 0.0026101898, 0.0025159586, 0.0008772044, -0.020298643, -0.032647103, 0.013758093, -0.0015427296, -0.0105

Use BGE-M3 for Embedding

In [43]:
import ollama

def gemma_embedding(text: str | None = None):
    gemma_vectors = []
    for doc in tqdm(documents, desc="Embedding with GoogleGemma"):
        res = ollama.embed(
            model="embeddinggemma:300m",
            input=doc.page_content.strip()
        )
        gemma_vectors.append(res['embeddings'][0])

    return gemma_vectors

gemma_vectors = gemma_embedding()

Embedding with GoogleGemma: 100%|██████████| 32/32 [00:04<00:00,  6.94it/s]


In [44]:
print(f"Content document: {documents[0]}\n")
print(f"Vector embedded by gemma: {gemma_vectors[0]}")

Content document: page_content='As a new user
I want to register using my email and a password
So that I can create an account and access the ProShop application.' metadata={'lang': 'en', 'epic_id': 'EP1', 'requirement_id': 'PSE1.1', 'requirement_name': 'ACCOUNT REGISTRATION', 'priority': 'MUST', 'section': 'Narrative'}

Vector embedded by gemma: [-0.085436866, -0.005340869, -0.011325097, -0.02091247, -0.01588991, 0.047051568, -0.03214362, -0.0591652, 0.024927268, 0.017953034, 0.06937643, -0.024989214, 0.0415034, -0.040279474, 0.14144273, 0.030791964, 0.050723903, 0.009205416, -0.047618166, -0.044391893, -0.0018060157, -0.0069864704, 0.02212007, 0.046433028, 0.036387537, 0.033290874, 0.0796453, -0.015719337, 0.057055857, -0.060677715, 0.08932368, 0.0028628418, 0.043652907, -0.006422971, 0.055333175, 0.074760415, -0.012730075, -0.043070305, -0.027027981, -0.026486127, -0.049802992, 0.10285677, -0.015076857, 0.014600812, -0.029905409, -0.009412878, -0.009363348, -0.014891846, -0.00419755

Using Qwen3 0.6b embedding

In [45]:
def qwen3_embedding(text: str | None = None):
    qwen3_vectors = []
    for doc in tqdm(documents, desc="Embedding with qwen3:0.6"):
        res = ollama.embed(
            model="qwen3-embedding:0.6b",
            input=doc.page_content
        )
        qwen3_vectors.append(res['embeddings'][0])
    return qwen3_vectors

qwen3_vectors = qwen3_embedding()

Embedding with qwen3:0.6: 100%|██████████| 32/32 [00:04<00:00,  7.16it/s]


In [46]:
print(f"Content document: {documents[0]}\n")
print(f"Vector embedded by qwen3: {qwen3_vectors[0]}")

Content document: page_content='As a new user
I want to register using my email and a password
So that I can create an account and access the ProShop application.' metadata={'lang': 'en', 'epic_id': 'EP1', 'requirement_id': 'PSE1.1', 'requirement_name': 'ACCOUNT REGISTRATION', 'priority': 'MUST', 'section': 'Narrative'}

Vector embedded by qwen3: [-0.019141147, -0.05405345, -0.0028008805, -0.08329538, 0.013198454, -0.05539291, 0.053724978, 0.057799704, -0.033667162, 0.009225071, -0.031260494, 0.009029252, 0.010318268, -0.0022416636, -0.029521516, 0.04266203, -0.035191067, -0.0016826604, 0.032633107, 0.012011705, -0.0004937099, 0.0027120486, 0.019780299, 0.039326515, -0.027805544, -0.079710245, -0.017886616, 0.05847832, 0.039190423, -0.015507725, 0.040640704, -0.057615653, -0.022230882, -0.011754623, -0.033196475, -0.006012208, -0.018915936, -0.026341863, -0.012674084, 0.014988636, 0.027402135, -0.05745164, 0.107906215, 0.007767312, 0.053224828, 0.0065969364, -0.0319218, 0.039106835, 0.

Information of Vectors 

In [47]:
import pandas as pd

df = pd.DataFrame([
    ["Gemini", len(gemini_vectors), len(gemini_vectors[0]), "float"],
    ["Gemma", len(gemma_vectors), len(gemma_vectors[0]), "float"],
    ["Qwen3", len(qwen3_vectors), len(qwen3_vectors[0]), "float"],
], columns=["Model", "Num Vectors", "Vector Dim", "dtype"])

print(df)

    Model  Num Vectors  Vector Dim  dtype
0  Gemini           32        3072  float
1   Gemma           32         768  float
2   Qwen3           32        1024  float


Benchmark with 3 embedding models by Speed

In [48]:
import time
import numpy as np

# function for test speed
def benchmark_speed(func, texts, repeat=3):
    # print check
    print(f"Benchmarking speed of {func.__name__}")
    times = []
    for _ in range(repeat):
        start = time.time()
        func(texts)
        times.append(time.time() - start)
    return np.mean(times)

BGE-M3


In [37]:
import numpy as np
from tqdm import tqdm
import ollama

def bge_embedding(documents):
    dim = 1024  # BGE-M3 dimension
    bge_vectors = []
    for i, doc in enumerate(tqdm(documents, desc="Embedding with BGE-M3")):
        text = doc.page_content.strip()
        if not text:
            print(f"Skipping empty doc {i}")
            bge_vectors.append([0.0] * dim)
            continue
        try:
            res = ollama.embed(model="bge-m3:latest", input=text)
            embedding = res['embeddings'][0]
            if any(np.isnan(x) for x in embedding):
                raise ValueError("NaN in embedding")
            bge_vectors.append(embedding)
        except Exception as e:
            print(f"Failed on doc {i} (using zero vector fallback): {e} | Text: {text[:200]}...")
            bge_vectors.append([0.0] * dim)  # Zero vector as safe placeholder
    return bge_vectors

bge_vectors = bge_embedding(documents)

Embedding with BGE-M3:  12%|█▎        | 4/32 [00:02<00:12,  2.33it/s]

Failed on doc 2 (using zero vector fallback): failed to encode response: json: unsupported value: NaN (status code: 500) | Text: Email verification
The app will not send a verification email to confirm the user’s email address...


Embedding with BGE-M3:  34%|███▍      | 11/32 [00:03<00:03,  5.56it/s]

Failed on doc 9 (using zero vector fallback): failed to encode response: json: unsupported value: NaN (status code: 500) | Text: “Remember Me?” functionality
Password recovery
Account lockout
Locking out a user after a certain number of failed login attempts...


Embedding with BGE-M3:  72%|███████▏  | 23/32 [00:05<00:01,  6.49it/s]

Failed on doc 21 (using zero vector fallback): failed to encode response: json: unsupported value: NaN (status code: 500) | Text: Editable fields:
Full name
Email address
Form validation:
Validate all inputs before submission
Valid email format...


Embedding with BGE-M3:  88%|████████▊ | 28/32 [00:06<00:00,  6.52it/s]

Failed on doc 26 (using zero vector fallback): failed to encode response: json: unsupported value: NaN (status code: 500) | Text: As a logged-in user
I want to change my password
So that I can update my credentials and keep my account secure....


Embedding with BGE-M3: 100%|██████████| 32/32 [00:06<00:00,  4.58it/s]


In [49]:
sample_sentences = [doc.page_content for doc in documents]

speed_gemini = benchmark_speed(gemini_embedding, sample_sentences)
speed_gemma  = benchmark_speed(gemma_embedding, sample_sentences)
speed_qwen   = benchmark_speed(qwen3_embedding, sample_sentences)

print("Gemini speed:", speed_gemini)
print("Gemma speed:", speed_gemma)
print("Qwen3 speed:", speed_qwen)

Benchmarking speed of gemini_embedding


Embedding with Gemini API key: 100%|██████████| 32/32 [00:12<00:00,  2.61it/s]
Embedding with Gemini API key: 100%|██████████| 32/32 [00:12<00:00,  2.65it/s]
Embedding with Gemini API key: 100%|██████████| 32/32 [00:12<00:00,  2.66it/s]


Benchmarking speed of gemma_embedding


Embedding with GoogleGemma: 100%|██████████| 32/32 [00:05<00:00,  5.58it/s]
Embedding with GoogleGemma: 100%|██████████| 32/32 [00:05<00:00,  5.91it/s]
Embedding with GoogleGemma: 100%|██████████| 32/32 [00:05<00:00,  5.61it/s]


Benchmarking speed of qwen3_embedding


Embedding with qwen3:0.6: 100%|██████████| 32/32 [00:04<00:00,  7.25it/s]
Embedding with qwen3:0.6: 100%|██████████| 32/32 [00:04<00:00,  7.58it/s]
Embedding with qwen3:0.6: 100%|██████████| 32/32 [00:04<00:00,  7.62it/s]

Gemini speed: 12.138903538386026
Gemma speed: 5.624180396397908
Qwen3 speed: 4.2795021533966064





# Create vector database

Choose best embedding model for create vector store is 

 *qwen3*

In [50]:
from typing import List
from langchain_core.embeddings import Embeddings

class OllamaQwen3Embeddings(Embeddings):
    def __init__(self, model_name: str = "qwen3-embedding:0.6b"):
        self.model_name = model_name

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        embeddings = []
        for text in tqdm(texts):
            res = ollama.embed(model=self.model_name, input=text)
            embeddings.append(res['embeddings'][0])
        return embeddings

    def embed_query(self, text: str) -> List[float]:
        res = ollama.embed(model=self.model_name, input=text)
        return res['embeddings'][0]

In [51]:
from langchain_chroma import Chroma
from uuid import uuid4

def create_chroma_db(documents, vectors, persist_dir="chroma_db"):
    texts = [doc.page_content for doc in documents]
    metadatas = [doc.metadata for doc in documents]
    ids = [str(uuid4()) for _ in range(len(documents))]

    vectordb = Chroma(
        collection_name="RAG_testcase",
        embedding_function=OllamaQwen3Embeddings(),
        persist_directory=persist_dir
    )

    vectordb.add_texts(
        texts=texts,
        metadatas=metadatas,
        ids=ids,
        embeddings=vectors
    )
    
    return vectordb

vectordb = create_chroma_db(documents=documents, vectors=qwen3_vectors)

100%|██████████| 32/32 [00:03<00:00,  8.81it/s]


In [57]:
# if have persist directory, load it

from langchain_chroma import Chroma
vectordb = Chroma(
    collection_name="RAG_testcase",
    embedding_function=OllamaQwen3Embeddings(),
    persist_directory="chroma_db"
)

In [52]:
current_dim = vectordb._collection.peek()['embeddings'].shape[1]
print(f"Dimension trong DB: {current_dim}") 

Dimension trong DB: 1024


Test Query

In [53]:
import json, ollama
query_test = "Generate test cases for register new user using email and password"

results = vectordb.similarity_search(query=query_test, k=5)

for result in results:
    print(json.dumps(result.metadata, indent=2, ensure_ascii=False))

{
  "section": "In Scope",
  "priority": "MUST",
  "epic_id": "EP1",
  "requirement_name": "ACCOUNT REGISTRATION",
  "requirement_id": "PSE1.1",
  "lang": "en"
}
{
  "requirement_name": "ACCOUNT REGISTRATION",
  "epic_id": "EP1",
  "lang": "en",
  "section": "In Scope",
  "priority": "MUST",
  "requirement_id": "PSE1.1"
}
{
  "lang": "en",
  "requirement_name": "ACCOUNT REGISTRATION",
  "requirement_id": "PSE1.1",
  "section": "Narrative",
  "epic_id": "EP1",
  "priority": "MUST"
}
{
  "lang": "en",
  "requirement_id": "PSE1.1",
  "epic_id": "EP1",
  "requirement_name": "ACCOUNT REGISTRATION",
  "priority": "MUST",
  "section": "Narrative"
}
{
  "requirement_id": "PSE1.1",
  "priority": "MUST",
  "requirement_name": "ACCOUNT REGISTRATION",
  "section": "Acceptance Criteria",
  "lang": "en",
  "epic_id": "EP1"
}


# Retriever 

In [54]:
def retrieve_documents(vectordb, query: str, k: int = 5):
    print(f"Query input: {query}")

    results = vectordb.similarity_search(query, k=k)

    if not results:
        print("No documents found.")
        return []

    target_id = set()
    for doc in results:
        req_id = doc.metadata.get("requirement_id")
        if req_id:
            target_id.add(req_id)
    
    print(f"Target IDs found: {target_id}")

    final_context = []
    collection = vectordb._collection

    for req_id in target_id:
        expanded_data = collection.get(
            where={"requirement_id": req_id},
            include=["documents", "metadatas"]
        )

        # ids = expanded_data['ids']
        texts = expanded_data['documents']
        metas = expanded_data['metadatas']
        
        if not texts: 
            continue

        zipped_docs = list(zip(texts, metas))
        
        for text, meta in zipped_docs:
            formatted_content = f"Requirement ID: {meta['requirement_id']}\nSection: {meta['section']}\nContent: {text}"
            final_context.append(formatted_content)
                
    return final_context

In [55]:
context_list = retrieve_documents(vectordb=vectordb, query=query_test)

final_context_text = f"\n{'-' * 20}\n".join(context_list)
final_context_text = overview + "\n\n" + final_context_text

print("\n=== CONTEXT CUỐI CÙNG GỬI CHO LLM ===")
print(final_context_text)

Query input: Generate test cases for register new user using email and password
Target IDs found: {'PSE1.1'}

=== CONTEXT CUỐI CÙNG GỬI CHO LLM ===
This epic focuses on the development of a user management system that allows users to register, log in, manage their profiles, and access the application securely using email and password authentication. The system will ensure that users can create and maintain their accounts with ease while providing the necessary security features to protect user data.

Requirement ID: PSE1.1
Section: Narrative
Content: As a new user
I want to register using my email and a password
So that I can create an account and access the ProShop application.
--------------------
Requirement ID: PSE1.1
Section: In Scope
Content: A registration form
Input validation
Validate email format and password security
Successful registration handling
--------------------
Requirement ID: PSE1.1
Section: Out of Scope
Content: Email verification
The app will not send a verificat

In [56]:
print(context_list)

['Requirement ID: PSE1.1\nSection: Narrative\nContent: As a new user\nI want to register using my email and a password\nSo that I can create an account and access the ProShop application.', 'Requirement ID: PSE1.1\nSection: In Scope\nContent: A registration form\nInput validation\nValidate email format and password security\nSuccessful registration handling', 'Requirement ID: PSE1.1\nSection: Out of Scope\nContent: Email verification\nThe app will not send a verification email to confirm the user’s email address', 'Requirement ID: PSE1.1\nSection: Acceptance Criteria\nContent: Scenario PSE1.1-S1: Successful Registration\nScenario PSE1.1-S2: Email Format Validation\nScenario PSE1.1-S3: Password Validation\nScenario PSE1.1-S4: Duplicate Email Check', 'Requirement ID: PSE1.1\nSection: Priority\nContent: MUST', 'Requirement ID: PSE1.1\nSection: Business Rules\nContent: BR1: Email Validation\nBR2: Password Validation', 'Requirement ID: PSE1.1\nSection: Additional Comments\nContent: The regi

# LLMs

In [57]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import os

prompt_file_path = "prompt.txt"

if not os.path.exists(prompt_file_path):
    with open(prompt_file_path, "w", encoding="utf-8") as f:
        f.write("""...""")

with open(prompt_file_path, "r", encoding="utf-8") as f:
    raw_text = f.read()

    system_instruction = raw_text.replace("{", "{{").replace("}", "}}")

print(f"Đã load prompt (đã escape {{}} cho JSON Schema).")

llm = ChatOllama(
    model="gpt-oss:20b", 
    temperature=0.1, 
    keep_alive="5m"
)

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", system_instruction), 
    
    ("human", """
CONTEXT DOCUMENTS:
---
{context}
---

USER REQUEST: 
{question}

Based on the context above, generate the JSON output:
""")
])

rag_chain = qa_prompt | llm | StrOutputParser()

def generate_test_cases(query_text):

    context_chunks = retrieve_documents(vectordb=vectordb, query=query_text, k=5)
    
    # add overview to context
    if overview:
        context_chunks.insert(0, f"Epic Overview:\n{overview}")
        
    if not context_chunks:
        return "Không tìm thấy tài liệu nào phù hợp."
    
    formatted_context = "\n\n".join(context_chunks)
    
    print(f"\nĐang gửi context cho LLM ({len(context_chunks)} requirements)...")
    
    response = rag_chain.invoke({
        "context": formatted_context,
        "question": query_text
    })
    
    return response

Đã load prompt (đã escape {} cho JSON Schema).


In [58]:
result = generate_test_cases(query_test)

print("\n" + "="*40)
print("KẾT QUẢ TỪ LLM:")
print("="*40)
print(result)

Query input: Generate test cases for register new user using email and password
Target IDs found: {'PSE1.1'}

Đang gửi context cho LLM (15 requirements)...

KẾT QUẢ TỪ LLM:
{
  "test_cases": [
    {
      "test_case_id": "TC_PSE1.1_01",
      "requirement_id": "PSE1.1",
      "scenario_id": "PSE1.1-S1",
      "title": "Successful Registration",
      "preconditions": ["User is not registered in the system"],
      "steps": [
        "Navigate to the registration page",
        "Enter a valid email address in the Email field",
        "Enter a strong password in the Password field",
        "Click the Register button"
      ],
      "test_data": {
        "email": "newuser@example.com",
        "password": "StrongPass!123"
      },
      "expected_result": "User account is created successfully and the user is redirected to the dashboard",
      "priority": "High",
      "is_automated": true
    },
    {
      "test_case_id": "TC_PSE1.1_02",
      "requirement_id": "PSE1.1",
      "scena

In [20]:
model_answers = []
model_answers.append(result)

In [21]:
print(model_answers[0])

{
  "test_cases": [
    {
      "test_case_id": "TC_PSE1.1_01",
      "requirement_id": "PSE1.1",
      "scenario_id": "PSE1.1-S1",
      "title": "Successful Registration",
      "preconditions": ["User is on registration page"],
      "steps": ["Enter valid email", "Enter valid password", "Click Register"],
      "test_data": {"email":"user@example.com","password":"StrongPass123!"},
      "expected_result": "Account created successfully and user is redirected to dashboard",
      "priority": "High",
      "is_automated": true
    },
    {
      "test_case_id": "TC_PSE1.1_02",
      "requirement_id": "PSE1.1",
      "scenario_id": "PSE1.1-S2",
      "title": "Email Format Validation",
      "preconditions": ["User is on registration page"],
      "steps": ["Enter invalid email format", "Enter valid password", "Click Register"],
      "test_data": {"email":"invalidemail","password":"StrongPass123!"},
      "expected_result": "Error message displayed: 'Please enter a valid email address

# Evaluation

In [59]:
# load pandas sample data evaluation
import pandas as pd

pd.set_option('display.max_colwidth', None)
df = pd.read_csv(r"D:\RAG_testcases\sample_eval_data.csv")
df

Unnamed: 0,query,retrieved_context,reference_answer
0,Gerate test cases for register new user using email and password,Requirement ID: PSE1.1\r\nSection: Narrative\r\nContent: As a new user\r\nI want to register using my email and a password\r\nSo that I can create an account and access the ProShop application.\r\n\r\nRequirement ID: PSE1.1\r\nSection: In Scope\r\nContent: A registration form\r\nInput validation\r\nValidate email format and password security\r\nSuccessful registration handling\r\n\r\nRequirement ID: PSE1.1\r\nSection: Out of Scope\r\nContent: Email verification\r\nThe app will not send a verification email to confirm the user’s email address\r\n\r\nRequirement ID: PSE1.1\r\nSection: Acceptance Criteria\r\nContent: Scenario PSE1.1-S1: Successful Registration\r\nScenario PSE1.1-S2: Email Format Validation\r\nScenario PSE1.1-S3: Password Validation\r\nScenario PSE1.1-S4: Duplicate Email Check\r\n\r\nRequirement ID: PSE1.1\r\nSection: Priority\r\nContent: MUST\r\n\r\nRequirement ID: PSE1.1\r\nSection: Business Rules\r\nContent: BR1: Email Validation\r\nBR2: Password Validation\r\n\r\nRequirement ID: PSE1.1\r\nSection: Additional Comments\r\nContent: The registration form should include 2 fields:\r\nEmail\r\nPassword,"{""test_cases"":[{""test_case_id"":""TC_PSE1.1_REF_01"",""requirement_id"":""PSE1.1"",""scenario_id"":""PSE1.1-S1"",""title"":""Successful user registration with valid email and password"",""preconditions"":[""User is on the registration page""],""steps"":[""Enter a valid email address in the Email field"",""Enter a password that meets defined security rules"",""Submit the registration form""],""test_data"":{""email"":""valid.user@proshop.com"",""password"":""StrongPass#123""},""expected_result"":""The system creates a new user account and allows access to the application."",""priority"":""High"",""is_automated"":true},{""test_case_id"":""TC_PSE1.1_REF_02"",""requirement_id"":""PSE1.1"",""scenario_id"":""PSE1.1-S2"",""title"":""Reject registration when email format is invalid"",""preconditions"":[""User is on the registration page""],""steps"":[""Enter an invalid email address format"",""Enter a valid password"",""Submit the registration form""],""test_data"":{""email"":""invalid-email-format"",""password"":""StrongPass#123""},""expected_result"":""The system blocks registration and displays an invalid email format validation message."",""priority"":""High"",""is_automated"":true},{""test_case_id"":""TC_PSE1.1_REF_03"",""requirement_id"":""PSE1.1"",""scenario_id"":""PSE1.1-S3"",""title"":""Reject registration when password does not meet security requirements"",""preconditions"":[""User is on the registration page""],""steps"":[""Enter a valid email address"",""Enter a weak or non-compliant password"",""Submit the registration form""],""test_data"":{""email"":""user2@proshop.com"",""password"":""12345""},""expected_result"":""The system blocks registration and displays a password validation error message."",""priority"":""High"",""is_automated"":true},{""test_case_id"":""TC_PSE1.1_REF_04"",""requirement_id"":""PSE1.1"",""scenario_id"":""PSE1.1-S4"",""title"":""Reject registration when email is already registered"",""preconditions"":[""User is on the registration page"",""An account already exists with the entered email address""],""steps"":[""Enter an email address that is already registered"",""Enter a valid password"",""Submit the registration form""],""test_data"":{""email"":""existing@proshop.com"",""password"":""StrongPass#123""},""expected_result"":""The system blocks registration and displays a duplicate email error message."",""priority"":""High"",""is_automated"":true}]}"
