In [None]:
# ===============================
# 1️⃣ Imports
# ===============================
from typing import Dict, Any, Optional, ClassVar
from pydantic import BaseModel, Field
from openai import OpenAI
import psycopg2
import json
import logging

# LangGraph imports
from langgraph.graph import StateGraph, END

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# ===============================
# 2️⃣ Pydantic Model for LLM Output
# ===============================
class CodeFixResponse(BaseModel):
    """
    Represents a structured code fix from the LLM.
    """
    issue_number: str
    type_of_issue: str
    from_line: int
    to_line: int
    original_code: str
    fixed_code: str
    justification: str
    confidence: float = Field(..., ge=0.0, le=1.0)

    # Optional: store example in schema
    model_config: ClassVar[Dict[str, Any]] = {
        "json_schema_extra": {
            "examples": [
                {
                    "issue_number": "S2111",
                    "type_of_issue": "BUG",
                    "from_line": 42,
                    "to_line": 45,
                    "original_code": "my_resource = open()",
                    "fixed_code": "my_resource = open()\nmy_resource.close()",
                    "justification": "Prevents resource leaks",
                    "confidence": 0.92
                }
            ]
        }
    }

    def to_embedding_text(self) -> str:
        """
        Convert object to deterministic text for embedding.
        """
        return f"""
Issue: {self.issue_number}
Type: {self.type_of_issue}
Lines: {self.from_line}-{self.to_line}

Original:
{self.original_code}

Fixed:
{self.fixed_code}

Justification:
{self.justification}
""".strip()

# ===============================
# 3️⃣ AgentState for LangGraph
# ===============================
class AgentState(BaseModel):
    """
    State passed between LangGraph nodes.
    """
    sonar_issue: Dict[str, Any]
    llm_raw_output: Optional[str] = None
    fix_response: Optional[CodeFixResponse] = None
    error: Optional[str] = None
    top_k: int = 5

# ===============================
# 4️⃣ LM Studio Client
# ===============================
client = OpenAI(
    base_url="http://localhost:1234/v1",  # LM Studio endpoint
    api_key="lm-studio",
)

# ===============================
# 5️⃣ LangGraph Nodes
# ===============================

# Node 1: Call LLM to generate fix
def llm_fix_node(state: AgentState) -> AgentState:
    issue = state.sonar_issue

    prompt = f"""
Return ONLY valid JSON matching this schema:
{CodeFixResponse.model_json_schema()}

Sonar Issue:
Rule: {issue['rule']}
Type: {issue['type']}
From Line: {issue['textRange']['startLine']}
To Line: {issue['textRange']['endLine']}
Code:
{issue['code']}
"""
    response = client.chat.completions.create(
        model="openai/gpt-oss-20b",
        temperature=0.2,
        messages=[
            {"role": "system", "content": "Return ONLY valid JSON."},
            {"role": "user", "content": prompt}
        ]
    )

    state.llm_raw_output = response.choices[0].message.content
    return state

# Node 2: Validate JSON using Pydantic
def validate_node(state: AgentState) -> AgentState:
    try:
        state.fix_response = CodeFixResponse.model_validate_json(
            state.llm_raw_output
        )
        state.error = None
    except Exception as e:
        state.error = str(e)
    return state

# Node 3: Repair invalid JSON
def repair_node(state: AgentState) -> AgentState:
    repair_prompt = f"""
Your previous response was invalid.

ERROR:
{state.error}

Return corrected JSON ONLY.
Schema:
{CodeFixResponse.model_json_schema()}
"""
    response = client.chat.completions.create(
        model="openai/gpt-oss-20b",
        temperature=0.0,
        messages=[
            {"role": "system", "content": "Return ONLY valid JSON."},
            {"role": "user", "content": repair_prompt}
        ]
    )
    state.llm_raw_output = response.choices[0].message.content
    return state

# Node 4: Store embedding + payload into pgvector
def store_node(state: AgentState) -> AgentState:
    fix = state.fix_response
    if not fix:
        logger.warning("No fix response to store.")
        return state  # safety

    try:
        # 1️⃣ Convert to text for embedding
        embedding_text = fix.to_embedding_text()

        # 2️⃣ Generate embedding
        response = client.embeddings.create(
            model="text-embedding-nomic-embed-text-v1.5:3",
            input=embedding_text
        )
        embedding = response.data[0].embedding  # list of floats

        # 3️⃣ Store in PostgreSQL / pgvector
        # Use the full DSN/connection URI (includes sslmode and channel_binding)
        dsn = (
            "postgresql://neondb_owner:npg_CxYy4SoZ3Xtw@"
            "ep-withered-math-ahmrxvtc-pooler.c-3.us-east-1.aws.neon.tech/"
            "neondb?sslmode=require&channel_binding=require"
        )

        conn = psycopg2.connect(dsn)
        cur = conn.cursor()

        # Convert embedding list to pgvector format (comma-separated string wrapped in brackets)
        embedding_str = "[" + ",".join(map(str, embedding)) + "]"

        cur.execute(
            """
            INSERT INTO code_issues (issue_number, embedding, payload)
            VALUES (%s, %s::vector, %s)
            ON CONFLICT (issue_number) DO UPDATE
            SET embedding = EXCLUDED.embedding,
                payload = EXCLUDED.payload
            """,
            (
                fix.issue_number,
                embedding_str,
                json.dumps(fix.model_dump())
            )
        )

        conn.commit()
        logger.info(f"Successfully stored issue {fix.issue_number} in pgvector.")

    except psycopg2.Error as db_err:
        logger.error(f"Database error while storing: {db_err}")
        state.error = f"Database error: {str(db_err)}"
    except Exception as e:
        logger.error(f"Unexpected error in store_node: {e}")
        state.error = f"Store error: {str(e)}"
    finally:
        # Ensure connection is closed
        try:
            if cur:
                cur.close()
            if conn:
                conn.close()
        except:
            pass

    return state

# ===============================
# 6️⃣ Conditional Router
# ===============================
def validation_router(state: AgentState) -> str:
    """
    Decide whether to repair or store based on validation.
    """
    return "store" if state.error is None else "repair"



def search_similar_issues(state: AgentState) -> str:
    """
    Search pgvector for top-K similar code issues based on embedding.

    Args:
        code_snippet: The new code snippet to search for.
        top_k: 5

    Returns:
        List of CodeFixResponse objects (top similar past fixes).
    """

    logger.info("Generating embedding for query code snippet")

    code_snippet = "my_resource = open()"

    logger.info(f"Generating embedding for query code snippet {code_snippet}")
    text_to_embed = code_snippet
    embedding_response = client.embeddings.create(
        model="text-embedding-nomic-embed-text-v1.5:3",
        input=text_to_embed
    )
    query_embedding = embedding_response.data[0].embedding
    logger.info(f"Successfully generated query embedding with {len(query_embedding)} dimensions")

    # 2️⃣ Connect to PostgreSQL / pgvector
    logger.info("Connecting to PostgreSQL pgvector database")
    dsn = (
        "postgresql://<username>:<password>"
        "ep-withered-math-ahmrxvtc-pooler.c-3.us-east-1.aws.neon.tech/"
        "neondb?sslmode=require&channel_binding=require"
    )

    conn = psycopg2.connect(dsn)
    cur = conn.cursor()
    logger.info("Successfully connected to database")

    # 3️⃣ Execute similarity search using state.top_k
    logger.info(f"Executing similarity search for top {state.top_k} issues")
    cur.execute(
        """
        SELECT payload
        FROM code_issues
        ORDER BY embedding <#> %s::vector
        LIMIT %s
        """,
        (query_embedding, state.top_k)
    )

    rows = cur.fetchall()
    logger.info(f"Retrieved {len(rows)} similar issues from database")
    cur.close()
    conn.close()

    # 4️⃣ Convert payload JSON to Pydantic objects
    similar_issues: List[CodeFixResponse] = []
    for (payload_json,) in rows:
        try:
            issue = CodeFixResponse.model_validate(payload_json)
            similar_issues.append(issue)
            logger.info(f"Found similar issue: {issue.issue_number}")
        except Exception as e:
            logger.warning(f"Failed to parse payload: {e}")
            continue

    state.similar_issues = similar_issues
    logger.info(f"Successfully returned {len(similar_issues)} similar issues")


# ===============================
# 7️⃣ Build LangGraph
# ===============================
graph = StateGraph(AgentState)
graph.add_node("llm_fix", llm_fix_node)
graph.add_node("validate", validate_node)
graph.add_node("repair", repair_node)
graph.add_node("store", store_node)
graph.add_node("search_similar_issues", search_similar_issues)


graph.set_entry_point("llm_fix")
graph.add_edge("llm_fix", "validate")
graph.add_conditional_edges(
    "validate",
    validation_router,
    {
        "repair": "repair",
        "store": "store"
    }
)
graph.add_edge("repair", "validate")
graph.add_edge("store", "search_similar_issues")
graph.add_edge("search_similar_issues", END)

agent = graph.compile()

# ===============================
# 8️⃣ Sample Sonar Issue JSON
# ===============================
sonar_issue_json = {
    "rule": "S2111",
    "type": "BUG",
    "textRange": {"startLine": 42, "endLine": 45},
    "code": "my_resource = open()\n# missing close"
}

# ===============================
# 9️⃣ Invoke the Agent
# ===============================
initial_state = {"sonar_issue": sonar_issue_json,     "top_k": 3  # specify top_k here
}
final_state = agent.invoke(initial_state)

# ===============================
# 10️⃣ Check Results
# ===============================
print("LLM Raw Output:\n", final_state)
if final_state.get('llm_raw_output'):
    print("Fixed Code:\n", final_state.get('llm_raw_output').get('fix_response').get('fixed_code'))
