# Performance Testing for Agentic Workflow

This notebook tests the performance of the LangGraph agentic workflow by:
- Reading questions from `input/performance.csv`
- Running each question through the workflow
- Measuring response time, input/output lengths
- Saving results to `output/out_performance.csv`

## Load Testing Strategy
- **Round 1 (Group by 5 = yes)**: Send 5 questions concurrently (parallel execution)
- **Round 2 (Group by 5 = no)**: Send questions sequentially (wait for each response before next)

In [1]:
import sys
import os
import time
import pandas as pd
from pathlib import Path
from tqdm.notebook import tqdm
import concurrent.futures

# Add project root to path
project_root = Path(os.getcwd()).parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from dotenv import load_dotenv
load_dotenv(project_root / ".env", override=True)

print(f"Project root: {project_root}")

Project root: /home/scsng/Desktop/Projects/Job_application/pwc/pwc_genai_task


In [2]:
from utils.chat_client import ChatClient
from utils.agents.agentic_workflow import AgenticWorkflow
from utils.rag.vector_db import QdrantDB

# Initialize chat client
def get_chat_client():
    return ChatClient(
        base_url=os.getenv("INFERENCE_API_URL"),
        model=os.getenv("MODEL", "default-model"),
        api_key=os.getenv("INFERENCE_API_KEY")
    )

# Initialize agentic workflow
def get_agentic_workflow():
    """Initialize the agentic workflow with LLM."""
    chat_client = get_chat_client()
    max_task_count = int(os.getenv("MAX_TASK_COUNT", "3"))
    return AgenticWorkflow(
        llm=chat_client.llm,
        vector_db=QdrantDB(
            collection_name=os.getenv("COLLECTION_NAME"),
            qdrant_host=os.getenv("QDRANT_HOST"),
            embedding_model=os.getenv("EMBEDDING_MODEL"),
            qdrant_api_key=os.getenv("QDRANT_API_KEY"),
            top_k=int(os.getenv("TOP_K", "4"))
        ),
        max_task_count=max_task_count
    )

print("Initializing workflow...")
workflow = get_agentic_workflow()
print("Workflow initialized successfully!")

Initializing workflow...


  client = QdrantClient(**client_options)


Workflow initialized successfully!


In [3]:
# Load questions from performance.csv
input_file = Path("input/performance.csv")
output_file = Path("output/out_performance.csv")

# Read CSV with semicolon delimiter
df = pd.read_csv(input_file, sep=";")
print(f"Loaded {len(df)} questions from {input_file}")
print(f"\nColumns: {df.columns.tolist()}")
df.head()

Loaded 25 questions from input/performance.csv

Columns: ['Question', 'Complex(yes,no)', 'Answer', 'Input length', 'Output length', 'Response time', 'Group by 5 (yes,no)']


Unnamed: 0,Question,"Complex(yes,no)",Answer,Input length,Output length,Response time,"Group by 5 (yes,no)"
0,What is the name of Hungary according to the F...,no,,,,,
1,What is the form of government in Hungary?,no,,,,,
2,How is public power exercised according to Art...,no,,,,,
3,What does the National Avowal say about the 19...,no,,,,,
4,Who is responsible for the fate of Hungarians ...,no,,,,,


In [4]:
import tiktoken

# Initialize tokenizer (cl100k_base is used by GPT-4/GPT-3.5, reasonable approximation for most LLMs)
try:
    tokenizer = tiktoken.get_encoding("cl100k_base")
except Exception:
    tokenizer = None
    print("Warning: tiktoken not available, using character count as fallback")

def count_tokens(text: str) -> int:
    """Count tokens in text using tiktoken, fallback to char count / 4."""
    if tokenizer:
        return len(tokenizer.encode(text))
    return len(text) // 4  # Rough approximation: ~4 chars per token

def run_single_test(workflow, question: str) -> dict:
    """
    Run a single question through the workflow and measure performance.
    
    Returns:
        dict with answer, input_tokens, output_tokens, response_time
    """
    input_tokens = count_tokens(question)
    
    start_time = time.time()
    try:
        answer = workflow.invoke(question)
    except Exception as e:
        answer = f"ERROR: {str(e)}"
    end_time = time.time()
    
    response_time = round(end_time - start_time, 3)
    output_tokens = count_tokens(answer)
    
    return {
        "question": question,
        "answer": answer,
        "input_tokens": input_tokens,
        "output_tokens": output_tokens,
        "response_time": response_time
    }

## Round 1: Concurrent Execution (Group by 5 = yes)

Send 5 questions at a time concurrently using ThreadPoolExecutor.

In [5]:
# Get all valid questions
questions_list = [row["Question"] for _, row in df.iterrows() if row.get("Question") and not pd.isna(row.get("Question"))]
complex_flags = [row["Complex(yes,no)"] for _, row in df.iterrows() if row.get("Question") and not pd.isna(row.get("Question"))]

print(f"Total questions to test: {len(questions_list)}")

Total questions to test: 25


In [6]:
def run_concurrent_batch(workflow, questions: list, batch_size: int = 5) -> list:
    """
    Run questions concurrently in batches of batch_size.
    All questions in a batch are sent at the same time (parallel).
    """
    all_results = []
    num_batches = (len(questions) + batch_size - 1) // batch_size
    
    for batch_num in tqdm(range(num_batches), desc="Concurrent batches (5 at a time)", unit="batch"):
        start_idx = batch_num * batch_size
        end_idx = min(start_idx + batch_size, len(questions))
        batch_questions = questions[start_idx:end_idx]
        
        batch_start_time = time.time()
        
        # Run all questions in this batch concurrently
        with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor:
            futures = {executor.submit(run_single_test, workflow, q): q for q in batch_questions}
            batch_results = []
            for future in concurrent.futures.as_completed(futures):
                result = future.result()
                batch_results.append(result)
        
        batch_end_time = time.time()
        batch_total_time = round(batch_end_time - batch_start_time, 3)
        
        # Sort results to match original question order
        batch_results_sorted = sorted(batch_results, key=lambda x: batch_questions.index(x["question"]))
        all_results.extend(batch_results_sorted)
        
        print(f"Batch {batch_num + 1}: {len(batch_questions)} questions completed in {batch_total_time}s (concurrent)")
    
    return all_results

print("\n" + "="*60)
print("ROUND 1: CONCURRENT EXECUTION (Group by 5 = yes)")
print("Sending 5 questions at a time...")
print("="*60 + "\n")

round1_start = time.time()
concurrent_results = run_concurrent_batch(workflow, questions_list, batch_size=5)
round1_end = time.time()
round1_total_time = round(round1_end - round1_start, 3)

print(f"\nRound 1 completed: {len(concurrent_results)} questions in {round1_total_time}s")


ROUND 1: CONCURRENT EXECUTION (Group by 5 = yes)
Sending 5 questions at a time...



Concurrent batches (5 at a time):   0%|          | 0/5 [00:00<?, ?batch/s]

Batch 1: 5 questions completed in 70.153s (concurrent)
Batch 2: 5 questions completed in 85.042s (concurrent)
Batch 3: 5 questions completed in 31.195s (concurrent)
Batch 4: 5 questions completed in 94.239s (concurrent)
Batch 5: 5 questions completed in 92.444s (concurrent)

Round 1 completed: 25 questions in 373.085s


## Round 2: Sequential Execution (Group by 5 = no)

Send questions one by one, waiting for each response before sending the next.

In [7]:
def run_sequential(workflow, questions: list) -> list:
    """
    Run questions sequentially - wait for each response before sending next.
    """
    all_results = []
    
    for question in tqdm(questions, desc="Sequential (one by one)", unit="question"):
        result = run_single_test(workflow, question)
        all_results.append(result)
    
    return all_results

print("\n" + "="*60)
print("ROUND 2: SEQUENTIAL EXECUTION (Group by 5 = no)")
print("Sending questions one by one, waiting for each response...")
print("="*60 + "\n")

round2_start = time.time()
sequential_results = run_sequential(workflow, questions_list)
round2_end = time.time()
round2_total_time = round(round2_end - round2_start, 3)

print(f"\nRound 2 completed: {len(sequential_results)} questions in {round2_total_time}s")


ROUND 2: SEQUENTIAL EXECUTION (Group by 5 = no)
Sending questions one by one, waiting for each response...



Sequential (one by one):   0%|          | 0/25 [00:00<?, ?question/s]


Round 2 completed: 25 questions in 986.596s


In [8]:
# Combine results into output DataFrame
results = []

# Add Round 1 results (concurrent - Group by 5 = yes)
for i, res in enumerate(concurrent_results):
    results.append({
        "Question": res["question"],
        "Complex(yes,no)": complex_flags[i] if i < len(complex_flags) else "",
        "Answer": res["answer"],
        "Input length": res["input_tokens"],
        "Output length": res["output_tokens"],
        "Response time": res["response_time"],
        "Group by 5 (yes,no)": "yes"  # Round 1 = concurrent
    })

# Add Round 2 results (sequential - Group by 5 = no)
for i, res in enumerate(sequential_results):
    results.append({
        "Question": res["question"],
        "Complex(yes,no)": complex_flags[i] if i < len(complex_flags) else "",
        "Answer": res["answer"],
        "Input length": res["input_tokens"],
        "Output length": res["output_tokens"],
        "Response time": res["response_time"],
        "Group by 5 (yes,no)": "no"  # Round 2 = sequential
    })

# Create DataFrame
results_df = pd.DataFrame(results)

# Ensure output directory exists
output_file.parent.mkdir(parents=True, exist_ok=True)

# Save to CSV with semicolon delimiter
results_df.to_csv(output_file, sep=";", index=False)
print(f"Results saved to {output_file}")
print(f"Total rows: {len(results_df)} ({len(concurrent_results)} concurrent + {len(sequential_results)} sequential)")

results_df

Results saved to output/out_performance.csv
Total rows: 50 (25 concurrent + 25 sequential)


Unnamed: 0,Question,"Complex(yes,no)",Answer,Input length,Output length,Response time,"Group by 5 (yes,no)"
0,What is the name of Hungary according to the F...,no,The name of Hungary according to the Fundament...,12,76,15.381,yes
1,What is the form of government in Hungary?,no,The form of government in Hungary is a republi...,9,54,11.923,yes
2,How is public power exercised according to Art...,no,The public power is exercised according to Art...,10,143,23.359,yes
3,What does the National Avowal say about the 19...,no,The National Avowal declares the 1949 communis...,16,118,18.525,yes
4,Who is responsible for the fate of Hungarians ...,no,The Hungarian government is responsible for th...,14,113,70.138,yes
5,Explain the principle of division of powers as...,no,The principle of division of powers as stated ...,14,280,85.041,yes
6,What are the fundamental cohesive values of fa...,no,The fundamental cohesive values of family and ...,18,76,24.729,yes
7,To what date does the Fundamental Law date the...,no,The Fundamental Law dates the restoration of H...,17,36,12.156,yes
8,Discuss the relationship between individual fr...,yes,The National Avowal emphasizes the importance ...,18,246,65.089,yes
9,Analyze the state's obligation regarding the p...,yes,The state's obligation regarding the protectio...,17,319,36.567,yes


In [9]:
# Performance comparison summary
print("\n" + "="*60)
print("PERFORMANCE COMPARISON SUMMARY")
print("="*60)

# Round 1 stats (concurrent)
r1_times = [r["response_time"] for r in concurrent_results]
r1_input = [r["input_tokens"] for r in concurrent_results]
r1_output = [r["output_tokens"] for r in concurrent_results]

print(f"\n--- ROUND 1: CONCURRENT (Group by 5 = yes) ---")
print(f"Total questions: {len(concurrent_results)}")
print(f"Total wall-clock time: {round1_total_time}s")
print(f"Sum of individual response times: {sum(r1_times):.3f}s")
print(f"Avg response time per question: {sum(r1_times)/len(r1_times):.3f}s")
print(f"Throughput: {len(concurrent_results)/round1_total_time:.2f} questions/second")
print(f"Total input tokens: {sum(r1_input)}")
print(f"Total output tokens: {sum(r1_output)}")

# Round 2 stats (sequential)
r2_times = [r["response_time"] for r in sequential_results]
r2_input = [r["input_tokens"] for r in sequential_results]
r2_output = [r["output_tokens"] for r in sequential_results]

print(f"\n--- ROUND 2: SEQUENTIAL (Group by 5 = no) ---")
print(f"Total questions: {len(sequential_results)}")
print(f"Total wall-clock time: {round2_total_time}s")
print(f"Sum of individual response times: {sum(r2_times):.3f}s")
print(f"Avg response time per question: {sum(r2_times)/len(r2_times):.3f}s")
print(f"Throughput: {len(sequential_results)/round2_total_time:.2f} questions/second")
print(f"Total input tokens: {sum(r2_input)}")
print(f"Total output tokens: {sum(r2_output)}")

# Comparison
print(f"\n--- COMPARISON ---")
speedup = round2_total_time / round1_total_time if round1_total_time > 0 else 0
time_saved = round2_total_time - round1_total_time
print(f"Concurrent speedup: {speedup:.2f}x faster")
print(f"Time saved with concurrency: {time_saved:.3f}s")
print(f"Concurrent throughput: {len(concurrent_results)/round1_total_time:.2f} q/s")
print(f"Sequential throughput: {len(sequential_results)/round2_total_time:.2f} q/s")


PERFORMANCE COMPARISON SUMMARY

--- ROUND 1: CONCURRENT (Group by 5 = yes) ---
Total questions: 25
Total wall-clock time: 373.085s
Sum of individual response times: 1080.606s
Avg response time per question: 43.224s
Throughput: 0.07 questions/second
Total input tokens: 346
Total output tokens: 6148

--- ROUND 2: SEQUENTIAL (Group by 5 = no) ---
Total questions: 25
Total wall-clock time: 986.596s
Sum of individual response times: 986.415s
Avg response time per question: 39.457s
Throughput: 0.03 questions/second
Total input tokens: 346
Total output tokens: 7169

--- COMPARISON ---
Concurrent speedup: 2.64x faster
Time saved with concurrency: 613.511s
Concurrent throughput: 0.07 q/s
Sequential throughput: 0.03 q/s


In [10]:
# Additional breakdown by complexity
print("\n" + "="*60)
print("BREAKDOWN BY QUESTION COMPLEXITY")
print("="*60)

for round_name, round_results in [("Concurrent (yes)", concurrent_results), ("Sequential (no)", sequential_results)]:
    print(f"\n--- {round_name} ---")
    
    # Match with complex flags
    complex_times = []
    simple_times = []
    
    for i, res in enumerate(round_results):
        if i < len(complex_flags):
            if str(complex_flags[i]).lower() == "yes":
                complex_times.append(res["response_time"])
            else:
                simple_times.append(res["response_time"])
    
    if complex_times:
        print(f"Complex questions ({len(complex_times)}): avg {sum(complex_times)/len(complex_times):.3f}s")
    if simple_times:
        print(f"Simple questions ({len(simple_times)}): avg {sum(simple_times)/len(simple_times):.3f}s")


BREAKDOWN BY QUESTION COMPLEXITY

--- Concurrent (yes) ---
Complex questions (7): avg 58.586s
Simple questions (18): avg 37.250s

--- Sequential (no) ---
Complex questions (7): avg 59.637s
Simple questions (18): avg 31.608s
