In [None]:
import os
os.environ["GOOGLE_API_KEY"] = ""
os.environ["GEMINI_API_KEY"] = ""

In [None]:
!pip install aiohttp -q

In [None]:
import os
import aiohttp
import asyncio
import json
import time
from typing import Any, List, Optional, TypedDict
from langchain_core.prompts import PromptTemplate
from langchain_core.language_models.llms import BaseLLM
from langchain_core.outputs import LLMResult
from langchain_core.callbacks.manager import AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun
from pydantic import Field
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda
# LangGraph Imports
from langgraph.graph import StateGraph, END

In [None]:
# --- 1. Async Custom LLM Implementation ---

class AsyncCustomHTTPGemini(BaseLLM):
    """
    An async custom LangChain LLM wrapper that interacts with the Google Gemini API
    using aiohttp for asynchronous HTTP requests.
    """

    api_key: Optional[str] = None
    model_name: str = Field(default="gemini-2.5-flash", alias="model")
    base_url: str = "https://generativelanguage.googleapis.com/v1beta/models/"
    response_schema: Optional[dict] = None
    _session: Optional[aiohttp.ClientSession] = None

    class Config:
        arbitrary_types_allowed = True

    def __init__(self, **kwargs: Any):
        super().__init__(**kwargs)
        if not self.api_key:
            self.api_key = os.getenv("GEMINI_API_KEY")
        if not self.api_key:
            raise ValueError("GEMINI_API_KEY must be provided or set as an environment variable.")

    @property
    def _llm_type(self) -> str:
        return "async_custom_http_gemini"

    async def _get_session(self) -> aiohttp.ClientSession:
        if self._session is None or self._session.closed:
            self._session = aiohttp.ClientSession()
        return self._session

    async def _acall(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        api_endpoint = f"{self.base_url}{self.model_name}:generateContent"
        url = f"{api_endpoint}?key={self.api_key}"
        headers = {"Content-Type": "application/json"}

        request_data = {
            "contents": [{"parts": [{"text": prompt}]}]
        }

        if self.response_schema:
            request_data["generationConfig"] = {
                "responseMimeType": "application/json",
                "responseSchema": self.response_schema
            }

        try:
            session = await self._get_session()
            async with session.post(url, headers=headers, json=request_data) as response:
                response.raise_for_status()
                response_json = await response.json()
                generated_text = response_json['candidates'][0]['content']['parts'][0]['text']
                return generated_text

        except aiohttp.ClientResponseError as err:
            error_message = f"Gemini API HTTP Error ({err.status}): {err.message}"
            raise RuntimeError(error_message) from err
        except Exception as e:
            raise RuntimeError(f"An unexpected error occurred during async API call: {e}")

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        return asyncio.get_event_loop().run_until_complete(
            self._acall(prompt, stop, None, **kwargs)
        )

    async def _agenerate(
        self,
        prompts: List[str],
        stop: Optional[List[str]] = None,
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> LLMResult:
        generations = []
        for prompt in prompts:
            text = await self._acall(prompt, stop, run_manager, **kwargs)
            generations.append([{"text": text}])
        return LLMResult(generations=generations)

    def _generate(
        self,
        prompts: List[str],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> LLMResult:
        generations = []
        for prompt in prompts:
            text = self._call(prompt, stop, run_manager, **kwargs)
            generations.append([{"text": text}])
        return LLMResult(generations=generations)

    async def close(self):
        if self._session and not self._session.closed:
            await self._session.close()

In [None]:
# --- 2. LangGraph AGENT DEFINITIONS (Async) ---

class AgentState(TypedDict):
    item_name: str
    complexity_level: str
    score: Optional[float]
    review_data: Optional[str]


SCORE_SCHEMA = {
    "type": "OBJECT",
    "properties": {
        "score": {"type": "NUMBER", "description": "A random technical score between 0.0 and 1.0."},
    },
    "required": ["score"],
    "propertyOrdering": ["score"]
}

REVIEW_SCHEMA = {
    "type": "OBJECT",
    "properties": {
        "review_text": {"type": "STRING", "description": "A concise, technical review."},
        "category": {"type": "STRING", "description": "The category of the review (e.g., 'Positive', 'Neutral', 'Negative')."}
    },
    "required": ["review_text", "category"],
    "propertyOrdering": ["review_text", "category"]
}


async def score_generator_node(state: AgentState) -> dict:
    print("--- [Agent 1] Executing: Score Generator (ASYNC) ---")
    llm = AsyncCustomHTTPGemini(model_name="gemini-2.5-flash", response_schema=SCORE_SCHEMA)

    prompt = PromptTemplate.from_template(
        "You are a technical analyst. Your task is to assign a random score between 0.0 and 1.0 to the '{item_name}' based on its complexity '{complexity_level}'. Output the result strictly in JSON format according to the schema."
    )
    prompt_value = prompt.format(item_name=state['item_name'], complexity_level=state['complexity_level'])

    raw_json_output = await llm.ainvoke(prompt_value)
    await llm.close()

    try:
        score_data = json.loads(raw_json_output)
        score = score_data.get('score', 0.0)
        print(f"--- [Agent 1] Generated Score: {score} ---")
        return {"score": score}
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from Agent 1: {e}. Falling back to score 0.5.")
        return {"score": 0.5}


async def review_generator_node(state: AgentState) -> dict:
    print("\n--- [Agent 2] Executing: Review Generator (ASYNC) ---")
    llm = AsyncCustomHTTPGemini(model_name="gemini-2.5-flash", response_schema=REVIEW_SCHEMA)

    prompt = PromptTemplate.from_template(
        "Generate a technical review for the item '{item_name}' which has a complexity of '{complexity_level}' and received a technical score of {score}. Your review must reflect this score. Output the review strictly in JSON format according to the schema."
    )
    prompt_value = prompt.format(
        item_name=state['item_name'],
        complexity_level=state['complexity_level'],
        score=state['score']
    )

    raw_json_output = await llm.ainvoke(prompt_value)
    await llm.close()

    print("--- [Agent 2] Generated Review Data ---")
    return {"review_data": raw_json_output}

In [None]:
# --- 3. Build the Graph ---

def build_graph():
    """Build and compile the LangGraph."""
    graph_builder = StateGraph(AgentState)
    graph_builder.add_node("score_generator", score_generator_node)
    graph_builder.add_node("review_generator", review_generator_node)
    graph_builder.set_entry_point("score_generator")
    graph_builder.add_edge("score_generator", "review_generator")
    graph_builder.add_edge("review_generator", END)
    return graph_builder.compile()

In [None]:
# --- 4. Single Request Execution with Latency Measurement ---

async def run_single_request(app, initial_state: dict) -> tuple:
    """
    Run a single query-request and measure latency.
    
    Returns:
        (final_state, latency_seconds)
    """
    start_time = time.perf_counter()
    final_state = await app.ainvoke(initial_state)
    end_time = time.perf_counter()
    
    latency = end_time - start_time
    return final_state, latency


# --- Run single request and show latency ---
print("--- Single Request Execution ---\n")

app = build_graph()

initial_state = {
    "item_name": "Quantum Entanglement Module v1.2",
    "complexity_level": "High/Experimental"
}

final_state, latency = await run_single_request(app, initial_state)

print(f"\n--- Result ---")
print(f"Score: {final_state.get('score')}")
try:
    print(f"Review: {json.dumps(json.loads(final_state.get('review_data')), indent=2)}")
except:
    print(f"Review: {final_state.get('review_data')}")

print(f"\n--- Latency ---")
print(f"Latency: {latency:.3f} seconds ({latency*1000:.1f} ms)")

In [None]:
# --- 5. Throughput Measurement ---

async def measure_throughput(app, initial_state: dict, num_requests: int = 5) -> dict:
    """
    Run multiple requests and measure throughput.
    
    Args:
        app: Compiled LangGraph
        initial_state: Input state for each request
        num_requests: Number of requests to run
    
    Returns:
        Dictionary with latency and throughput metrics
    """
    latencies = []
    
    print(f"Running {num_requests} requests...\n")
    
    total_start = time.perf_counter()
    
    for i in range(num_requests):
        start = time.perf_counter()
        await app.ainvoke(initial_state)
        end = time.perf_counter()
        
        latency = end - start
        latencies.append(latency)
        print(f"  Request {i+1}: {latency:.3f}s")
    
    total_end = time.perf_counter()
    total_time = total_end - total_start
    
    # Calculate metrics
    avg_latency = sum(latencies) / len(latencies)
    min_latency = min(latencies)
    max_latency = max(latencies)
    throughput = num_requests / total_time  # requests per second
    
    return {
        "num_requests": num_requests,
        "total_time_sec": total_time,
        "avg_latency_sec": avg_latency,
        "min_latency_sec": min_latency,
        "max_latency_sec": max_latency,
        "throughput_rps": throughput  # requests per second
    }


# --- Run throughput test ---
print("--- Throughput Measurement ---\n")

app = build_graph()

initial_state = {
    "item_name": "Quantum Entanglement Module v1.2",
    "complexity_level": "High/Experimental"
}

metrics = await measure_throughput(app, initial_state, num_requests=5)

print(f"\n" + "="*40)
print("RESULTS")
print("="*40)
print(f"Total requests:    {metrics['num_requests']}")
print(f"Total time:        {metrics['total_time_sec']:.3f} sec")
print(f"")
print(f"Avg latency:       {metrics['avg_latency_sec']:.3f} sec ({metrics['avg_latency_sec']*1000:.1f} ms)")
print(f"Min latency:       {metrics['min_latency_sec']:.3f} sec")
print(f"Max latency:       {metrics['max_latency_sec']:.3f} sec")
print(f"")
print(f"Throughput:        {metrics['throughput_rps']:.4f} requests/second")
print("="*40)