In [1]:
import os
os.environ["GOOGLE_API_KEY"] = ""
os.environ["GEMINI_API_KEY"] = ""

In [2]:
import os
import requests
import json
from typing import Any, List, Optional, TypedDict
from langchain_core.prompts import PromptTemplate
from langchain_core.language_models.llms import BaseLLM
from langchain_core.outputs import LLMResult
from pydantic import BaseModel, Field, validator
from langchain_core.output_parsers import StrOutputParser
# LangGraph Imports
from langgraph.graph import StateGraph, END

In [3]:


# --- 1. Custom LLM Implementation ---
class CustomHTTPGemini(BaseLLM):
    """
    A custom LangChain LLM wrapper that interacts with the Google Gemini API
    using direct HTTP requests (POST to generateContent endpoint).
    """

    # Model and API Configuration
    api_key: Optional[str] = None
    model_name: str = Field(default="gemini-2.5-flash", alias="model")
    base_url: str = "https://generativelanguage.googleapis.com/v1beta/models/"

    # NEW: Field to hold the JSON schema definition for generationConfig
    response_schema: Optional[dict] = None

    def __init__(self, **kwargs: Any):
        super().__init__(**kwargs)
        # Ensure the API key is set, prioritizing the passed argument or environment variable
        if not self.api_key:
            self.api_key = os.getenv("GEMINI_API_KEY")

        if not self.api_key:
            raise ValueError("GEMINI_API_KEY must be provided or set as an environment variable.")

    @property
    def _llm_type(self) -> str:
        """Return type of LLM."""
        return "custom_http_gemini"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[Any] = None,
        **kwargs: Any,
    ) -> str:
        """
        The core logic to make the HTTP POST request to the Gemini API.
        This method now requires 'request_data' to be passed in kwargs.
        """

        print(f"\n--- LLM Invoked for prompt (truncated): {prompt[:50]}... ---")

        # 1. Construct the API Endpoint for the specific model and method
        api_endpoint = f"{self.base_url}{self.model_name}:generateContent"

        # 2. Construct the complete URL with API Key as query parameter
        url = f"{api_endpoint}?key={self.api_key}"

        # 3. Define the HTTP headers
        headers = {
            "Content-Type": "application/json"
        }

        # --- Determine Request Payload (Now mandatory via kwargs) ---
        request_data = kwargs.get("request_data")

        # Enforce mandatory presence and structure of request_data
        if request_data is None:
             raise ValueError("The 'request_data' dictionary must be explicitly passed in kwargs when calling _call().")

        # Case A: Payload provided in kwargs (Constructed 'outside' _call)
        if 'contents' in request_data and request_data['contents'] and 'parts' in request_data['contents'][0]:
            print("Using custom request payload from **kwargs, injecting prompt.")
            # Inject the prompt into the designated text field
            request_data['contents'][0]['parts'][0]['text'] = prompt
        else:
            # If the structure is missing, raise an error since the assumption is
            # the external caller provides a valid base structure.
            raise ValueError("The passed 'request_data' must contain the structure: ['contents'][0]['parts'][0]['text'] where the prompt will be injected.")

        # --- END Request Payload Logic ---

        # --- JSON GENERATION CONFIG LOGIC ---
        # Prioritize schema from instance, then from kwargs
        schema = self.response_schema or kwargs.get("response_schema")

        if schema:
            print(f"Applying JSON schema for structured output.")
            # Ensure generationConfig exists or create it
            if "generationConfig" not in request_data:
                request_data["generationConfig"] = {}

            request_data["generationConfig"].update({
                "responseMimeType": "application/json",
                "responseSchema": schema
            })
        # --- END JSON CONFIG LOGIC ---


        # 4. Send the request
        try:
            # Using 'json=request_data' is a cleaner way to send JSON data with requests
            response = requests.post(
                url=url,
                headers=headers,
                json=request_data
            )
            response.raise_for_status() # Raise exception for bad status codes

            response_json = response.json()

            # 5. Extract the generated text from the structured JSON response
            generated_text = response_json['candidates'][0]['content']['parts'][0]['text']

            return generated_text

        except requests.exceptions.HTTPError as err:
            error_message = f"Gemini API HTTP Error ({err.response.status_code}): {err.response.text}"
            raise RuntimeError(error_message) from err
        except Exception as e:
            raise RuntimeError(f"An unexpected error occurred during API call: {e}")

    # Note: _generate is required by BaseLLM
    def _generate(
        self,
        prompts: List[str],
        stop: Optional[List[str]] = None,
        run_manager: Optional[Any] = None,
        **kwargs: Any,
    ) -> LLMResult:
        """Call the LLM on a list of prompts."""
        generations = []
        for prompt in prompts:
            # Pass **kwargs through to _call
            text = self._call(prompt, stop, run_manager, **kwargs)
            generations.append([{"text": text}]) # Wrap the result in the expected structure
        return LLMResult(generations=generations)




In [4]:

# --- 2. LangGraph AGENT DEFINITIONS ---

# 2.1. Define the State for the Graph (Includes the input record data)
class AgentState(TypedDict):
    """
    The state of the graph, holding data passed between nodes.
    """
    item_name: str
    complexity_level: str
    user_prompt: str
    score: Optional[float]
    review_data: Optional[str]


# --- JSON Schema Definitions (Mandatory for Gemini JSON mode) ---

# Schema for Agent 1: Score
SCORE_SCHEMA = {
    "type": "OBJECT",
    "properties": {
        "score": {"type": "NUMBER", "description": "A random technical score between 0.0 and 1.0."},
    },
    "required": ["score"],
    "propertyOrdering": ["score"]
}

# Schema for Agent 2: Detailed Review
REVIEW_SCHEMA = {
    "type": "OBJECT",
    "properties": {
        "review_text": {"type": "STRING", "description": "A concise, technical review."},
        "category": {"type": "STRING", "description": "The category of the review (e.g., 'Positive', 'Neutral', 'Negative')."}
    },
    "required": ["review_text", "category"],
    "propertyOrdering": ["review_text", "category"]
}


# 2.2. Node for Agent 1: Score Generator
def score_generator_node(state: AgentState) -> dict:
    """
    Generates a technical score (0.0 to 1.0) for the item.
    """
    print("--- [Agent 1] Executing: Score Generator ---")

    # 1. Initialize LLM with Score Schema
    llm_score_generator = CustomHTTPGemini(model_name="gemini-2.5-flash", response_schema=SCORE_SCHEMA)

    # 2. Construct Prompt using the variables from the State
    prompt_1 = PromptTemplate.from_template(
        "You are a technical analyst. Your task is to assign a random score between 0.0 and 1.0 to the '{item_name}' based on its complexity '{complexity_level}'. Use the following user instruction as context: '{user_prompt}'. Output the result strictly in JSON format according to the schema."
    )
    prompt_value = prompt_1.format(
        item_name=state['item_name'],
        complexity_level=state['complexity_level'],
        user_prompt=state['user_prompt']
    )

    # Prepare base request data structure to comply with the mandatory requirement
    base_request_data = {
        "contents": [{"parts": [{"text": ""}]}] # Placeholder for prompt injection
    }

    # 3. Invoke LLM: Using _call instead of invoke, passing the base payload
    raw_json_output = llm_score_generator._call(
        prompt_value,
        request_data=base_request_data
    )

    # 4. Parse JSON and update state
    try:
        score_data = json.loads(raw_json_output)
        score = score_data.get('score', 0.0)
        print(f"--- [Agent 1] Generated Score: {score} ---")
        return {"score": score} # Return the update to the state
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from Agent 1: {e}. Falling back to score 0.5.")
        return {"score": 0.5}


# 2.3. Node for Agent 2: Review Generator
def review_generator_node(state: AgentState) -> dict:
    """
    Generates a detailed review based on the generated score and context.
    """
    print("\n--- [Agent 2] Executing: Review Generator ---")

    # 1. Initialize LLM with Review Schema
    llm_review_generator = CustomHTTPGemini(model_name="gemini-2.5-flash", response_schema=REVIEW_SCHEMA)

    # 2. Construct Prompt (uses 'score' and 'user_prompt' from the state)
    prompt_2 = PromptTemplate.from_template(
        "Generate a technical review for the item '{item_name}' which has a complexity of '{complexity_level}' and received a technical score of {score}. Your review must reflect this score and adhere to the original instruction: '{user_prompt}'. Output the review strictly in JSON format according to the schema."
    )

    prompt_value = prompt_2.format(
        item_name=state['item_name'],
        complexity_level=state['complexity_level'],
        score=state['score'],
        user_prompt=state['user_prompt']
    )

    # Prepare base request data structure to comply with the mandatory requirement
    base_request_data = {
        "contents": [{"parts": [{"text": ""}]}] # Placeholder for prompt injection
    }

    # 3. Invoke LLM: Using _call instead of invoke, passing the base payload
    raw_json_output = llm_review_generator._call(
        prompt_value,
        request_data=base_request_data
    )

    # 4. Update state with the final review data (raw JSON string)
    print("--- [Agent 2] Generated Review Data ---")
    return {"review_data": raw_json_output}


In [5]:


# --- 2.4. LangGraph Setup and Execution (Batch Processing) ---

if __name__ == "__main__":
    # --- Setup ---
    print("--- LangGraph Custom HTTP Gemini BATCH PROCESSING Example ---")

    # NOTE: Set your API Key in your environment before running:
    # export GEMINI_API_KEY="YOUR_API_KEY_HERE"

    # Define the generic user prompt that applies to all records
    GENERAL_USER_PROMPT = "Analyze the item's technical sophistication and market potential, focusing only on the technical aspects and giving brief, highly critical feedback."

    # Ensure API Key is available before starting the graph
    if not os.getenv("GEMINI_API_KEY"):
        print("\nERROR: GEMINI_API_KEY environment variable not set.")
        print("Please set the GEMINI_API_KEY environment variable and try again.")
        exit()

    # --- Build the Graph ---
    graph_builder = StateGraph(AgentState)

    # Add the nodes (agents) to the graph
    graph_builder.add_node("score_generator", score_generator_node)
    graph_builder.add_node("review_generator", review_generator_node)

    # Set the entry point
    graph_builder.set_entry_point("score_generator")

    # Define the sequence: Score Generator -> Review Generator -> END
    graph_builder.add_edge("score_generator", "review_generator")
    graph_builder.add_edge("review_generator", END)

    # Compile the graph into a runnable application
    app = graph_builder.compile()

    # --- CSV Simulation for Batch Processing ---
    # This list simulates records read from a CSV file
    csv_records = [
        {"item_name": "AI Debugging Assistant v3.0", "complexity_level": "High/Production"},
        {"item_name": "Basic E-commerce Frontend", "complexity_level": "Low/Simple"},
        {"item_name": "Neural Network Optimizer v2", "complexity_level": "Medium/Advanced"},
    ]

    all_results = []

    print("\nStarting batch processing of CSV records...")

    for i, record in enumerate(csv_records):
        print(f"\n========================================================")
        print(f"  PROCESSING RECORD {i + 1}/{len(csv_records)}: {record['item_name']}")
        print(f"========================================================")

        # 1. Prepare the initial state for the current record
        # LangGraph requires a single state dictionary input.
        initial_state = {
            "item_name": record["item_name"],
            "complexity_level": record["complexity_level"],
            "user_prompt": GENERAL_USER_PROMPT
        }

        # 2. Invoke the graph for the current record
        final_state = app.invoke(initial_state)

        final_review_json = final_state.get('review_data')
        final_score = final_state.get('score')

        # 3. Process and consolidate results
        try:
            # Attempt to parse the final JSON review data
            parsed_review = json.loads(final_review_json)
        except json.JSONDecodeError:
            parsed_review = {"review_text": "JSON decode failed", "category": "Error"}

        result = {
            "record_id": i + 1,
            "item_name": record["item_name"],
            "complexity_level": record["complexity_level"],
            "generated_score": final_score,
            "review_category": parsed_review.get('category', 'N/A'),
            "review_text": parsed_review.get('review_text', 'N/A')
        }
        all_results.append(result)

    print("\n========================================================")
    print("ALL BATCH PROCESSING COMPLETE. CONSOLIDATED RESULTS:")
    print("========================================================")

    for result in all_results:
        print(f"\nRecord ID: {result['record_id']} ({result['item_name']})")
        print(f"  Complexity: {result['complexity_level']}")
        print(f"  Score: {result['generated_score']:.2f} | Category: {result['review_category']}")
        print(f"  Review: {result['review_text']}")

    print("\n--- Raw JSON List of All Results ---")
    print(json.dumps(all_results, indent=2))
    print("\n--- End of Batch Execution ---")

--- LangGraph Custom HTTP Gemini BATCH PROCESSING Example ---

Starting batch processing of CSV records...

  PROCESSING RECORD 1/3: AI Debugging Assistant v3.0
--- [Agent 1] Executing: Score Generator ---

--- LLM Invoked for prompt (truncated): You are a technical analyst. Your task is to assig... ---
Using custom request payload from **kwargs, injecting prompt.
Applying JSON schema for structured output.
--- [Agent 1] Generated Score: 0.7325 ---

--- [Agent 2] Executing: Review Generator ---

--- LLM Invoked for prompt (truncated): Generate a technical review for the item 'AI Debug... ---
Using custom request payload from **kwargs, injecting prompt.
Applying JSON schema for structured output.
--- [Agent 2] Generated Review Data ---

  PROCESSING RECORD 2/3: Basic E-commerce Frontend
--- [Agent 1] Executing: Score Generator ---

--- LLM Invoked for prompt (truncated): You are a technical analyst. Your task is to assig... ---
Using custom request payload from **kwargs, injecting promp

In [None]:

if __name__ == "__main__":
    # --- Setup ---
    print("--- LangGraph Custom HTTP Gemini BATCH PROCESSING Example ---")
    
    # NOTE: Set your API Key in your environment before running:
    # export GEMINI_API_KEY="YOUR_API_KEY_HERE"
    
    # Define the generic user prompt that applies to all records
    GENERAL_USER_PROMPT = "Analyze the item's technical sophistication and market potential, focusing only on the technical aspects and giving brief, highly critical feedback."

    # Ensure API Key is available before starting the graph
    if not os.getenv("GEMINI_API_KEY"):
        print("\nERROR: GEMINI_API_KEY environment variable not set.")
        print("Please set the GEMINI_API_KEY environment variable and try again.")
        exit()

    # --- Build the Graph ---
    graph_builder = StateGraph(AgentState)

    # Add the nodes (agents) to the graph
    graph_builder.add_node("score_generator", score_generator_node)
    graph_builder.add_node("review_generator", review_generator_node) # Node added but unlinked

    # Set the entry point
    graph_builder.set_entry_point("score_generator")

    # MODIFIED: Define the sequence: Score Generator -> END
    # The review_generator is now unlinked from the main flow
    graph_builder.add_edge("score_generator", END)

    # Compile the graph into a runnable application
    app = graph_builder.compile()
    
    # --- CSV Simulation for Batch Processing ---
    # This list simulates records read from a CSV file
    csv_records = [
        {"item_name": "AI Debugging Assistant v3.0", "complexity_level": "High/Production"},
        {"item_name": "Basic E-commerce Frontend", "complexity_level": "Low/Simple"},
        {"item_name": "Neural Network Optimizer v2", "complexity_level": "Medium/Advanced"},
    ]
    
    all_results = []
    
    print("\nStarting batch processing of CSV records (Running ONLY Agent 1: Score Generator)...")
    
    for i, record in enumerate(csv_records):
        print(f"\n========================================================")
        print(f"  PROCESSING RECORD {i + 1}/{len(csv_records)}: {record['item_name']}")
        print(f"========================================================")
        
        # 1. Prepare the initial state for the current record
        initial_state = {
            "item_name": record["item_name"],
            "complexity_level": record["complexity_level"],
            "user_prompt": GENERAL_USER_PROMPT 
        }

        # 2. Invoke the graph for the current record
        # This run will stop immediately after score_generator_node completes.
        final_state = app.invoke(initial_state)

        final_score = final_state.get('score')
        
        # 3. Process and consolidate results
        result = {
            "record_id": i + 1,
            "item_name": record["item_name"],
            "complexity_level": record["complexity_level"],
            "generated_score": final_score,
            "review_category": "NOT RUN", # Indicate that Agent 2 did not run
            "review_text": "Agent 2 (Review Generator) was bypassed." 
        }
        all_results.append(result)
        
    print("\n========================================================")
    print("BATCH PROCESSING COMPLETE. RESULTS FROM AGENT 1 ONLY:")
    print("========================================================")
    
    for result in all_results:
        print(f"\nRecord ID: {result['record_id']} ({result['item_name']})")
        print(f"  Complexity: {result['complexity_level']}")
        print(f"  Score: {result['generated_score']:.2f} | Review Status: {result['review_category']}")
        
    print("\n--- Raw JSON List of All Results (Agent 2 data is absent) ---")
    # Clean up results for final output to only show relevant data
    clean_results = [{k: v for k, v in res.items() if k not in ['review_category', 'review_text']} for res in all_results]
    print(json.dumps(clean_results, indent=2))
    print("\n--- End of Batch Execution ---")