In [None]:
import os
os.environ["GOOGLE_API_KEY"] = ""
os.environ["GEMINI_API_KEY"] = ""

In [None]:
import os
import requests
import json
import time
from typing import Any, List, Optional, TypedDict
from langchain_core.prompts import PromptTemplate
from langchain_core.language_models.llms import BaseLLM
from langchain_core.outputs import LLMResult
from pydantic import Field  # Changed from langchain_core.pydantic_v1
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda
# LangGraph Imports
from langgraph.graph import StateGraph, END

In [None]:


# --- 1. Custom LLM Implementation ---

class CustomHTTPGemini(BaseLLM):
    """
    A custom LangChain LLM wrapper that interacts with the Google Gemini API
    using direct HTTP requests (POST to generateContent endpoint), with optional
    support for JSON output via response_schema.
    """

    # Model and API Configuration
    api_key: Optional[str] = None
    model_name: str = Field(default="gemini-2.5-flash", alias="model")
    base_url: str = "https://generativelanguage.googleapis.com/v1beta/models/"
    # New field to hold the JSON schema definition for generationConfig
    response_schema: Optional[dict] = None

    def __init__(self, **kwargs: Any):
        super().__init__(**kwargs)
        # Ensure the API key is set, prioritizing the passed argument or environment variable
        if not self.api_key:
            self.api_key = os.getenv("GEMINI_API_KEY")

        if not self.api_key:
            raise ValueError("GEMINI_API_KEY must be provided or set as an environment variable.")

    @property
    def _llm_type(self) -> str:
        """Return type of LLM."""
        return "custom_http_gemini"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """
        The core logic to make the HTTP POST request to the Gemini API.

        This method is called by the LangChain framework when the LLM is invoked.
        """
        # 1. Construct the API Endpoint for the specific model and method
        api_endpoint = f"{self.base_url}{self.model_name}:generateContent"

        # 2. Construct the complete URL with API Key as query parameter
        url = f"{api_endpoint}?key={self.api_key}"

        # 3. Define the HTTP headers
        headers = {
            "Content-Type": "application/json"
        }

        # 4. Construct the JSON request body following the Gemini API spec
        request_data = {
            "contents": [
                {
                    "parts": [
                        {
                            "text": prompt
                        }
                    ]
                }
            ]
        }

        # 5. Add generation configuration for JSON output if schema is present
        if self.response_schema:
            request_data["generationConfig"] = {
                "responseMimeType": "application/json",
                "responseSchema": self.response_schema
            }

        # 6. Send the request
        try:
            # Using 'json=request_data' is a cleaner way to send JSON data with requests
            response = requests.post(
                url=url,
                headers=headers,
                json=request_data
            )
            response.raise_for_status() # Raise exception for bad status codes

            response_json = response.json()

            # 7. Extract the generated text from the structured JSON response
            # Note: For JSON mode, the output text is the raw JSON string.
            generated_text = response_json['candidates'][0]['content']['parts'][0]['text']

            return generated_text

        except requests.exceptions.HTTPError as err:
            error_message = f"Gemini API HTTP Error ({err.response.status_code}): {err.response.text}"
            raise RuntimeError(error_message) from err
        except Exception as e:
            raise RuntimeError(f"An unexpected error occurred during API call: {e}")

    # Note: _generate is required by BaseLLM if _call is not implemented, but since
    # we implemented _call for simplicity, we provide a basic _generate for completeness
    # in case of future changes in the base class.
    def _generate(
        self,
        prompts: List[str],
        stop: Optional[List[str]] = None,
        run_manager: Optional[Any] = None,
        **kwargs: Any,
    ) -> LLMResult:
        """Call the LLM on a list of prompts."""
        generations = []
        for prompt in prompts:
            text = self._call(prompt, stop, run_manager, **kwargs)
            generations.append([{"text": text}]) # Wrap the result in the expected structure
        return LLMResult(generations=generations)


In [None]:


# --- 2. LangGraph AGENT DEFINITIONS (Replaces LCEL Cascading) ---

# 2.1. Define the State for the Graph
# TypedDict helps ensure predictable data flow between nodes
class AgentState(TypedDict):
    """
    The state of the graph, holding data passed between nodes.
    """
    item_name: str
    complexity_level: str
    score: Optional[float]
    review_data: Optional[str]


# --- JSON Schema Definitions (Mandatory for Gemini JSON mode) ---

# Schema for Agent 1: Score
SCORE_SCHEMA = {
    "type": "OBJECT",
    "properties": {
        "score": {"type": "NUMBER", "description": "A random technical score between 0.0 and 1.0."},
    },
    "required": ["score"],
    "propertyOrdering": ["score"]
}

# Schema for Agent 2: Detailed Review
REVIEW_SCHEMA = {
    "type": "OBJECT",
    "properties": {
        "review_text": {"type": "STRING", "description": "A concise, technical review."},
        "category": {"type": "STRING", "description": "The category of the review (e.g., 'Positive', 'Neutral', 'Negative')."}
    },
    "required": ["review_text", "category"],
    "propertyOrdering": ["review_text", "category"]
}


# 2.2. Node for Agent 1: Score Generator
def score_generator_node(state: AgentState) -> dict:
    """
    Generates a technical score (0.0 to 1.0) for the item.
    """
    print("--- [Agent 1] Executing: Score Generator ---")

    # 1. Initialize LLM with Score Schema
    llm_score_generator = CustomHTTPGemini(model_name="gemini-2.5-flash", response_schema=SCORE_SCHEMA)

    # 2. Construct Prompt
    prompt_1 = PromptTemplate.from_template(
        "You are a technical analyst. Your task is to assign a random score between 0.0 and 1.0 to the '{item_name}' based on its complexity '{complexity_level}'. Output the result strictly in JSON format according to the schema."
    )
    prompt_value = prompt_1.format(
        item_name=state['item_name'],
        complexity_level=state['complexity_level']
    )

    # 3. Invoke LLM
    raw_json_output = llm_score_generator.invoke(prompt_value)

    # 4. Parse JSON and update state
    try:
        score_data = json.loads(raw_json_output)
        score = score_data.get('score', 0.0)
        print(f"--- [Agent 1] Generated Score: {score} ---")
        return {"score": score} # Return the update to the state
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from Agent 1: {e}. Falling back to score 0.5.")
        return {"score": 0.5}


# 2.3. Node for Agent 2: Review Generator
def review_generator_node(state: AgentState) -> dict:
    """
    Generates a detailed review based on the generated score and context.
    """
    print("\n--- [Agent 2] Executing: Review Generator ---")

    # 1. Initialize LLM with Review Schema
    llm_review_generator = CustomHTTPGemini(model_name="gemini-2.5-flash", response_schema=REVIEW_SCHEMA)

    # 2. Construct Prompt (uses 'score' from the state, generated by Agent 1)
    prompt_2 = PromptTemplate.from_template(
        "Generate a technical review for the item '{item_name}' which has a complexity of '{complexity_level}' and received a technical score of {score}. Your review must reflect this score. Output the review strictly in JSON format according to the schema."
    )

    prompt_value = prompt_2.format(
        item_name=state['item_name'],
        complexity_level=state['complexity_level'],
        score=state['score'] # Crucially, read the score from the state
    )

    # 3. Invoke LLM
    raw_json_output = llm_review_generator.invoke(prompt_value)

    # 4. Update state with the final review data (raw JSON string)
    print("--- [Agent 2] Generated Review Data ---")
    return {"review_data": raw_json_output}


In [None]:
# --- 3. Build the Graph ---

def build_graph():
    """Build and compile the LangGraph."""
    graph_builder = StateGraph(AgentState)
    graph_builder.add_node("score_generator", score_generator_node)
    graph_builder.add_node("review_generator", review_generator_node)
    graph_builder.set_entry_point("score_generator")
    graph_builder.add_edge("score_generator", "review_generator")
    graph_builder.add_edge("review_generator", END)
    return graph_builder.compile()

In [None]:
# --- 4. Single Request with Latency Measurement ---

print("--- Single Request Execution ---\n")

app = build_graph()

initial_state = {
    "item_name": "Quantum Entanglement Module v1.2",
    "complexity_level": "High/Experimental"
}

# Measure latency
start_time = time.perf_counter()
final_state = app.invoke(initial_state)
end_time = time.perf_counter()

latency = end_time - start_time

print(f"\n--- Result ---")
print(f"Score: {final_state.get('score')}")
try:
    print(f"Review: {json.dumps(json.loads(final_state.get('review_data')), indent=2)}")
except:
    print(f"Review: {final_state.get('review_data')}")

print(f"\n--- Latency ---")
print(f"Latency: {latency:.3f} seconds ({latency*1000:.1f} ms)")

In [None]:
# --- 5. Throughput Measurement ---

def measure_throughput(app, initial_state: dict, num_requests: int = 5) -> dict:
    """
    Run multiple requests and measure throughput.
    
    Returns:
        Dictionary with latency and throughput metrics
    """
    latencies = []
    
    print(f"Running {num_requests} requests...\n")
    
    total_start = time.perf_counter()
    
    for i in range(num_requests):
        start = time.perf_counter()
        app.invoke(initial_state)
        end = time.perf_counter()
        
        latency = end - start
        latencies.append(latency)
        print(f"  Request {i+1}: {latency:.3f}s")
    
    total_end = time.perf_counter()
    total_time = total_end - total_start
    
    # Calculate metrics
    avg_latency = sum(latencies) / len(latencies)
    min_latency = min(latencies)
    max_latency = max(latencies)
    throughput = num_requests / total_time  # requests per second
    
    return {
        "num_requests": num_requests,
        "total_time_sec": total_time,
        "avg_latency_sec": avg_latency,
        "min_latency_sec": min_latency,
        "max_latency_sec": max_latency,
        "throughput_rps": throughput  # requests per second
    }


# --- Run throughput test ---
print("--- Throughput Measurement ---\n")

app = build_graph()

initial_state = {
    "item_name": "Quantum Entanglement Module v1.2",
    "complexity_level": "High/Experimental"
}

metrics = measure_throughput(app, initial_state, num_requests=5)

print(f"\n" + "="*40)
print("RESULTS")
print("="*40)
print(f"Total requests:    {metrics['num_requests']}")
print(f"Total time:        {metrics['total_time_sec']:.3f} sec")
print(f"")
print(f"Avg latency:       {metrics['avg_latency_sec']:.3f} sec ({metrics['avg_latency_sec']*1000:.1f} ms)")
print(f"Min latency:       {metrics['min_latency_sec']:.3f} sec")
print(f"Max latency:       {metrics['max_latency_sec']:.3f} sec")
print(f"")
print(f"Throughput:        {metrics['throughput_rps']:.4f} requests/second")
print("="*40)