<a href="https://colab.research.google.com/github/rahaf34/demo-repo/blob/main/DevScribe%20v0.5.0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Part 1 — Setup & Observability

In [76]:
!pip -q install langchain langchain-core langchain-community langchain-google-genai pydantic langsmith


In [77]:
import os
from google.colab import userdata
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser, StrOutputParser
from langchain_core.runnables import RunnableLambda
from pydantic import BaseModel, Field
from langsmith import Client

os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")
os.environ["LANGSMITH_API_KEY"] = userdata.get("LANGSMITH_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "DevScribe-v0.5.0"

client = Client(api_key=os.environ["LANGSMITH_API_KEY"])

# Part 2 — Pydantic Models



Schema for Classification

In [78]:
class CodeClassification(BaseModel):
    language: str = Field(..., description="Detected programming language of the code.")
    category: str = Field(..., description="General category of the code, e.g., 'Algorithm', 'Data Processing', 'Web API'.")
    paradigms: List[str] = Field(..., description="Programming paradigms used, e.g., 'Functional', 'Object-Oriented'.")
    complexity: str = Field(..., description="High-level complexity assessment: 'Low', 'Medium', 'High'.")
    tags: List[str] = Field(..., description="Additional tags describing code characteristics, e.g., 'Sorting', 'Recursion', 'IO'.")




Schema for CodeAnalysis

In [79]:
from pydantic import BaseModel, Field
from typing import List

class CodeAnalysis(BaseModel):
    language: str = Field(..., description="Detected programming language.")
    complexity_score: int = Field(
        ..., ge=1, le=10,
        description="Complexity score from 1 (very simple) to 10 (very complex)."
    )
    key_concepts: List[str] = Field(
        ..., description="Important concepts identified in the code."
    )



Schema for CodeDocumentation

In [80]:
class CodeDocumentation(BaseModel):
    documentation: str = Field(..., description="Generated docstring / summary of the code.")


Schema for RefactorSuggestion

In [81]:
class RefactorSuggestion(BaseModel):
    critique: str = Field(
        ..., description="Architectural critique and reasoning for refactoring."
    )
    refactored_code: str = Field(
        ..., description="The fully refactored version of the original code."
    )
    changes_made: List[str] = Field(
        ..., description="List of structural or naming changes performed."
    )


Schema for Performance/Security

In [82]:
class SecurityAnalysis(BaseModel):
    vulnerabilities: List[str] = Field(..., description="Potential security issues found in the code.")
    recommendations: List[str] = Field(..., description="Recommended fixes for security issues.")

class PerformanceAnalysis(BaseModel):
    bottlenecks: List[str] = Field(..., description="Potential performance bottlenecks.")
    suggestions: List[str] = Field(..., description="Suggestions to improve performance.")


# Part 3 — LCEL Chains

In [83]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0.0
)
refactor_llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.7)


Classification Chain

In [None]:
classifier_parser = PydanticOutputParser(
    pydantic_object=CodeClassification
)

# def build_classification_prompt(inputs, persona="ruthless auditor"):
def build_classification_prompt(inputs, persona="pedantic auditor"):
    template = f"""
You are a {persona} of code.
Your job is to thoroughly critique the code,
pointing out all potential errors, inefficiencies, and refactoring opportunities.

Classify the following code:
- Detect programming language
- Determine code type (function, class, script)
- Does it contain likely errors?
- Should it be refactored?

Be brutally honest in your critique.
List all possible errors, inefficient patterns, and points for refactoring.
Explain why each issue is a problem and suggest improvements.

{{format_instructions}}

Code:
{{code}}
"""
    return template.format(
        format_instructions=classifier_parser.get_format_instructions(),
        code=inputs["code"]
    )


classifier_chain = (
    RunnablePassthrough.assign(original_code=lambda x: x["code"])
    | RunnableLambda(lambda x: build_classification_prompt(x, persona="ruthless auditor"))
    | RunnableLambda(lambda prompt: llm.invoke(
            [{"role": "user", "content": prompt}]
        ).content
    )
    | classifier_parser
)


In [None]:
test_code = """
        "code": "#include <stdio.h>\nint add(int a, int b){ return a + b; }",
        "expected_language": "C"
        """

result = classifier_chain.invoke({"code": test_code})
print(result)


language='C' category='Utility Function' paradigms=['Imperative'] complexity='Low' tags=['Arithmetic', 'Function']


Analysis Chain

In [None]:
analysis_parser = PydanticOutputParser(
    pydantic_object=CodeAnalysis
)

analysis_prompt_template = """
You are a {persona} of code.
Your job is to thoroughly analyze the code,
pointing out all potential errors, inefficiencies, and refactoring opportunities.

Analyze the following code and respond ONLY using the required JSON schema.
Be brutally honest in your critique.

{format_instructions}

Code:
{code}
"""


def build_analysis_prompt(inputs, persona="ruthless auditor"):
    """Creates the final prompt text with Auditor persona."""
    return analysis_prompt_template.format(
        persona=persona,
        format_instructions=analysis_parser.get_format_instructions(),
        code=inputs["code"]
    )

analysis_chain = (
    RunnablePassthrough.assign(original_code=lambda x: x["code"])
    | RunnableLambda(lambda x: build_analysis_prompt(x, persona="ruthless auditor"))
    | RunnableLambda(lambda prompt: llm.invoke([{"role": "user", "content": prompt}]).content)
    | analysis_parser
)



In [None]:
test_code = """
def find_max(arr):
    max_v = arr[0]
    for n in arr:
        if n > max_v:
            max_v = n
    return max_v
"""

result = analysis_chain.invoke({"code": test_code})
print(result)


language='Python' complexity_score=2 key_concepts=['Iteration', 'Comparison', 'Maximum Value']


 Documentation Chain

In [None]:
doc_parser = PydanticOutputParser(pydantic_object=CodeDocumentation)

few_shot_template = """
You are a Python docstring generator.

Below are examples of "bad code" → "good docstring":

Example 1:
Code:
def f(x): return x+1
Docstring:
'''Returns x incremented by 1.'''

Example 2:
Code:
def greet(name): print("Hi " + name)
Docstring:
'''Prints a greeting to the user with their name.'''

Example 3:
Code:
def mult(a,b): return a*b
Docstring:
'''Returns the product of two numbers.'''

Now, generate a clear, concise docstring for the following code.
Output ONLY valid JSON using this schema:

{format_instructions}

Code:
{code}
"""

def build_doc_prompt(inputs):
    return few_shot_template.format(
        format_instructions=doc_parser.get_format_instructions(),
        code=inputs["code"]
    )


def call_llm_doc(prompt_text):
    response = llm.invoke([{"role":"user","content":prompt_text}])
    return response.content


doc_chain = (
    RunnablePassthrough.assign(original_code=lambda x: x["code"])
    | RunnableLambda(build_doc_prompt)
    | RunnableLambda(call_llm_doc)
    | doc_parser
)



In [None]:
test_code = """
def proc_data(d):
    # stuff happens
    x = []
    for i in d:
        if i > 10:
            x.append(i * 2)
        else:
            x.append(i)
    return x
"""

doc_result = doc_chain.invoke({"code": test_code})
print(doc_result)


documentation='Processes a list of numbers, doubling those greater than 10.'


 Refactor Chain

In [None]:
refactor_parser = PydanticOutputParser(pydantic_object=RefactorSuggestion)

refactor_prompt_template = """
You are a {persona} of code.
Your job is to refactor the following code with improvements,
pointing out any inefficiencies, potential bugs, and refactoring opportunities.
Be thorough and brutally honest in your suggestions.

You MUST output only JSON using the required schema.

{format_instructions}

Code:
{code}
"""

def build_refactor_prompt(inputs, persona="ruthless auditor"):
    return refactor_prompt_template.format(
        persona=persona,
        format_instructions=refactor_parser.get_format_instructions(),
        code=inputs["code"]
    )

def call_llm_refactor(prompt_text):
    """
    Call ChatGoogleGenerativeAI and return the text content.
    """
    response = llm.invoke([{"role": "user", "content": prompt_text}])
    return response.content

refactor_chain = (
    RunnablePassthrough.assign(original_code=lambda x: x["code"])
    | RunnableLambda(lambda x: build_refactor_prompt(x, persona="pedantic auditor"))
    | RunnableLambda(lambda prompt: refactor_llm.invoke([{"role": "user", "content": prompt}]).content)
    | refactor_parser
)



In [None]:
test_code = """
def add(a,b):
 return a+b
"""

result = refactor_chain.invoke({"code": test_code})
print(result)


critique="This code, while functional, is incredibly simplistic and offers little room for extensive critique. However, even simple code can benefit from minor improvements in terms of documentation and naming conventions, especially when considering maintainability and readability in larger projects. There's no error handling, but given the basic nature of the function, it's not strictly necessary unless specific input types are expected. The lack of type hints is a missed opportunity for improved code clarity and static analysis." refactored_code='def add(a: float, b: float) -> float:\n    """Adds two numbers together.\n\n    Args:\n        a: The first number.\n        b: The second number.\n\n    Returns:\n        The sum of a and b.\n    """\n    return a + b' changes_made=['Added type hints for parameters and return value.', "Added a docstring to explain the function's purpose, arguments, and return value."]


Security Chain

In [None]:
security_parser = PydanticOutputParser(pydantic_object=SecurityAnalysis)

security_prompt_template = """
You are a {persona} of code.
Your job is to analyze the following code for security issues,
identifying vulnerabilities, unsafe patterns, and potential exploits.
Be brutally honest in your critique.

Respond ONLY using the required JSON schema.

{format_instructions}

Code:
{code}
"""

def build_security_prompt(inputs, persona="pedantic auditor"):
    return security_prompt_template.format(
        persona=persona,
        format_instructions=security_parser.get_format_instructions(),
        code=inputs["code"]
    )


security_chain = (
    RunnablePassthrough.assign(original_code=lambda x: x["code"])
    | RunnableLambda(lambda x: build_security_prompt(x, persona="ruthless auditor"))
    | RunnableLambda(lambda prompt: llm.invoke([{"role": "user", "content": prompt}]).content)
    | security_parser
)



In [None]:
test_code = """
def find_max(arr):
    max_v = arr[0]
    for n in arr:
        if  max_v:
            max_v = n
    return max_v
"""

result = security_chain.invoke({"code": test_code})
print(result)


vulnerabilities=["Uninitialized data exposure: If the input array `arr` is empty, accessing `arr[0]` will raise an `IndexError`. This can expose information about the program's state and potentially be used in denial-of-service attacks.", "Incorrect logic for finding the maximum value: The condition `if max_v:` checks if `max_v` is truthy (not zero, None, or an empty collection). This means that if `max_v` is initially zero, the comparison `max_v = n` will always be executed, effectively setting `max_v` to the last element of the array, regardless of whether it's actually the maximum. This leads to incorrect results.", 'Potential denial of service: If the input array is extremely large, the loop could consume significant resources, potentially leading to a denial-of-service condition.'] recommendations=['Add a check to ensure the input array is not empty before accessing `arr[0]`. Raise an exception or return a default value if the array is empty.', 'Correct the logic for finding the m

Performance Chain

In [None]:
performance_parser = PydanticOutputParser(pydantic_object=PerformanceAnalysis)

performance_prompt_template = """
You are a {persona} of code.
Your job is to analyze the following code for performance issues,
pointing out inefficient patterns, bottlenecks, and optimization opportunities.
Be brutally honest in your critique.

Respond ONLY using the required JSON schema.

{format_instructions}

Code:
{code}
"""

def build_performance_prompt(inputs, persona="pedantic auditor"):
    return performance_prompt_template.format(
        persona=persona,
        format_instructions=performance_parser.get_format_instructions(),
        code=inputs["code"]
    )


performance_chain = (
    RunnablePassthrough.assign(original_code=lambda x: x["code"])
    | RunnableLambda(lambda x: build_performance_prompt(x, persona="ruthless auditor"))
    | RunnableLambda(lambda prompt: llm.invoke([{"role": "user", "content": prompt}]).content)
    | performance_parser
)



In [None]:
test_code = """
def add(a,b):
 return a+b
"""

result = performance_chain.invoke({"code": test_code})
print(result)


bottlenecks=[] suggestions=[]


# Meta-Prompting

In [None]:
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

def generate_meta_prompt(inputs):
    """
    Generate a prompt that instructs the AI to write pytest unit tests
    for the given refactored code.
    """
    return f"""
Write a comprehensive pytest unit test for the following function:

{inputs['refactored_code']}
"""

def generate_unit_test(prompt_text):
    """
    Call ChatGoogleGenerativeAI to generate pytest code.
    """
    response = llm.invoke([{"role": "user", "content": prompt_text}])
    return response.content


unit_test_chain = (
    RunnablePassthrough.assign(original_code=lambda x: x["refactored_code"])
    | RunnableLambda(generate_meta_prompt)
    | RunnableLambda(generate_unit_test)
)


In [None]:
refactored_code = """
def add_numbers(num1: int, num2: int) -> int:
    return num1 + num2
"""

unit_test = unit_test_chain.invoke({"refactored_code": refactored_code})
print(unit_test)


```python
import pytest

def add_numbers(num1: int, num2: int) -> int:
    """Adds two numbers together.

    Args:
        num1: The first number.
        num2: The second number.

    Returns:
        The sum of the two numbers.
    """
    return num1 + num2


class TestAddNumbers:
    """
    A class containing pytest unit tests for the add_numbers function.
    """

    def test_positive_numbers(self):
        """Tests adding two positive numbers."""
        assert add_numbers(2, 3) == 5
        assert add_numbers(10, 20) == 30
        assert add_numbers(1, 1) == 2

    def test_negative_numbers(self):
        """Tests adding two negative numbers."""
        assert add_numbers(-2, -3) == -5
        assert add_numbers(-10, -20) == -30
        assert add_numbers(-1, -1) == -2

    def test_positive_and_negative_numbers(self):
        """Tests adding a positive and a negative number."""
        assert add_numbers(2, -3) == -1
        assert add_numbers(-2, 3) == 1
        assert add_

# Mega Chain Implementation

In [None]:
def mega_pipeline_full(user_code: str):
    # Classification
    classification = classifier_chain.invoke({"code": user_code})

    # Analysis
    analysis = analysis_chain.invoke({"code": user_code})

    # Documentation
    documentation = doc_chain.invoke({"code": user_code})

    # Refactor
    refactor = refactor_chain.invoke({"code": user_code})

    # Unit Test
    unit_test = unit_test_chain.invoke({"refactored_code": refactor.refactored_code})

    # Security Analysis
    security = security_chain.invoke({"code": user_code})

    # Performance Analysis
    performance = performance_chain.invoke({"code": user_code})

    # Combine all results
    return {
        "classification": classification,
        "analysis": analysis,
        "documentation": documentation,
        "refactor": refactor,
        "unit_test": unit_test,
        "security": security,
        "performance": performance
    }

# --- Test Example ---
test_code = """
function add(a, b) {
    return a + b;
}

console.log(add(2, 3));
"""

result = mega_pipeline_full(test_code)
print(result["classification"])
print(result["analysis"])
print(result["documentation"])
print(result["refactor"])
print(result["unit_test"])
print(result["security"])
print(result["performance"])


language='JavaScript' category='Basic Arithmetic' paradigms=['Imperative'] complexity='Low' tags=['Function', 'Addition']
language='JavaScript' complexity_score=1 key_concepts=['Function definition', 'Addition', 'Return statement', 'Console output']
documentation='Returns the sum of two numbers.'
critique="This code, while functionally correct, is extremely basic and lacks any real-world context or complexity to warrant a detailed critique. However, even simple code can benefit from slight improvements in terms of readability, maintainability, and best practices. Specifically:\n\n1.  **Lack of Type Safety:** JavaScript is dynamically typed. Without explicit type checking or TypeScript, the `add` function could receive unexpected input types (e.g., strings, objects) leading to unexpected results or runtime errors. While in this trivial case, it's unlikely, it's good practice to consider this.\n2.  **Missing Error Handling:** The function doesn't handle potential errors, such as `NaN` re