# DEVIN_AI : A Personal Code Assistant with Extensive Capabilities

### import necessary libaries

In [87]:
import os
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

##  --- 1. LLM Setup ---

In [88]:
try:
    llm=Ollama(model="codellama")
    print("connection success ✅")
except Exception as e:
    print("Error initializing Ollama. Ensure Ollama is running and 'codellama' is available. Error: {e}")
    llm=None

connection success ✅


## --- 2. Create Prompt Templates  ---

In [89]:
template_string = """
You are an expert software engineer .

Task: {task} 
Instructions: Analyze the provided code block. 
If the task is 'debug', analyze the error and propose a fix.
If the task is 'refactor', improve readability or performance.
If the task is 'explain', provide a detailed summary and line-by-line notes.

Code:
---
{code_block} 
---

Output: Provide the requested result including a Summary, detailed Steps taken, and the Final Code (if applicable).
Summary: 
Steps: 
Final Code: 
"""



In [90]:
code_assistant_prompt = PromptTemplate(
    input_variables=["task", "code_block"],
    template=template_string,
)

In [91]:
# --- Define the Chain ---
chain = LLMChain(llm=llm, prompt=code_assistant_prompt)

-  --- Example Usage ---

In [92]:
print("\n--- Testing Phase 1: Code Explanation ---")
sample_code = "def calculate_sum(a, b):\n    return a + b"
response = chain.invoke(
    {"task" : "explain",
     "code_block" : sample_code}
    )
print(response['text'])


--- Testing Phase 1: Code Explanation ---

As an expert software engineer, I can help you with your task of analyzing the provided code block.

The code block is a Python function named `calculate_sum` that takes two arguments `a` and `b` as input and returns their sum. The function is well-written and easy to understand, but there are a few things we can do to improve its performance or readability.

Task: Analyze the provided code block.

Summary: The provided code block is a Python function named `calculate_sum` that takes two arguments `a` and `b` as input and returns their sum. The function is well-written and easy to understand, but there are a few things we can do to improve its performance or readability.

Steps:

1. We can refactor the code to use list comprehensions instead of explicit loops. This will make the code more concise and efficient.
2. We can also add type hints to the function parameters to make the code more readable and easier to understand.
3. Finally, we can 

## --- 3. Loading & Splitting Documents ---

In [93]:
from langchain_community.document_loaders import TextLoader 
from langchain.text_splitter import RecursiveCharacterTextSplitter 

def load_code_document(file_path):
    """
    Loads a code file and returns it as a list of LangChain Documents.
    Supports various file types (.py, .js, .java, etc.) using TextLoader.
    """
    if not os.path.exists(file_path):
        print(f"Error: File not found at {file_path}")
        return []

    # Using TextLoader for simplicity, specialized code loaders could be used if required.
    loader = TextLoader(file_path)
    documents = loader.load()
    print(f"Successfully loaded {len(documents)} document(s) from {file_path}.")
    return documents

def split_code_document(documents):
    """
    Splits large code documents into smaller chunks .
    We use RecursiveCharacterTextSplitter adjusted for code structure .
    """
    code_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", " ", ""],  # Prioritize large logical breaks (classes/functions)
        chunk_size=1000,
        chunk_overlap=100,
        length_function=len,
        )
    split_chunks = code_splitter.split_documents(documents)
    print(f"Original content split  into {len(split_chunks)} chunks.")
    return split_chunks

In [94]:
code_doc = load_code_document("sample_code.py")
code_chunks = split_code_document(code_doc)

Error: File not found at sample_code.py
Original content split  into 0 chunks.


In [95]:
from langchain_community.embeddings import HuggingFaceEmbeddings # Example free embedding model
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
print("ConversationBufferMemory initialized.")

ConversationBufferMemory initialized.


## --- 4. Add Embeddings & Vectorstores ---

In [96]:
from langchain.embeddings import HuggingFaceEmbeddings

def download_embeddings():
    """
    Download and return the HuggingFace embeddings model.
    """
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name
    )
    return embeddings

embedding = download_embeddings()
embedding


HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [97]:
# example sentence to be embedded by sentence transformer model
vector = embedding.embed_query("This is an example sentence to be embedded.")
vector

[0.10423814505338669,
 0.06141485646367073,
 0.039802417159080505,
 0.11223753541707993,
 0.06636574119329453,
 0.003417690983042121,
 -0.013076258823275566,
 0.006846375297755003,
 0.04879850521683693,
 0.001230166177265346,
 0.08418941497802734,
 0.004800172057002783,
 0.08500004559755325,
 0.02008615806698799,
 0.05856465548276901,
 0.011010155081748962,
 0.049174919724464417,
 -0.02072131074965,
 -0.0794295221567154,
 0.017942514270544052,
 -0.020186014473438263,
 0.05004462972283363,
 0.07915017753839493,
 -0.021056750789284706,
 0.01132161170244217,
 -0.021740594878792763,
 -0.05599787086248398,
 0.05633595958352089,
 0.1074289008975029,
 0.026734961196780205,
 -0.02625892497599125,
 -0.06203826144337654,
 0.04188476502895355,
 0.04975996911525726,
 0.046353623270988464,
 0.07270722091197968,
 -0.014972892589867115,
 0.04886829853057861,
 -0.050410348922014236,
 -0.01683051511645317,
 0.019853970035910606,
 -0.04148852452635765,
 0.022765960544347763,
 -0.007096854504197836,
 -0.

In [98]:
# # Pinecone Vector Store Connection 

# import os
# from pinecone import Pinecone
# from pinecone import ServerlessSpec
# from langchain_pinecone import PineconeVectorStore
# from dotenv import load_dotenv
# load_dotenv()
# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
# #Pinecone initialization 
# pc = Pinecone(api_key=PINECONE_API_KEY)

# PINECONE_INDEX = "devin"
# index = pc.Index(PINECONE_INDEX)
# # if PINECONE_INDEX not in [index.name for index in pc.list_indexes()]:
# #     pc.create_index(
# #         name=PINECONE_INDEX,
# #         dimension=384,   # Match embedding size
# #         metric="cosine",
# #         spec=ServerlessSpec(cloud="aws", region="us-east-1")
# #     )

# print("✅ Connected to Pinecone index:", PINECONE_INDEX)    
# my_index = pc.get_index(PINECONE_INDEX)

# # VECTOR STORE IN LANGCHAIN
# # docsearch is an instance of PineconeVectorStore, which is a LangChain wrapper for storing and searching document embeddings in a Pinecone index.

# doc_searcher = PineconeVectorStore.from_documents(
#     documents = code_chunks,
#     index_name = PINECONE_INDEX,
#     embedding = embedding
# )

## --- 5. RETRIEVAL SYSTEM ---

In [99]:
# retriever = doc_searcher.as_retriever(search_kwargs={"k": 3})

In [100]:
# from langchain.chains import RetrievalQA

# # Main RetrievalQA chain for context-aware responses
# retrieval_qa = RetrievalQA.from_chain_type(
#     llm=llm,
#     chain_type="stuff",  # Simple concatenation of retrieved docs
#     retriever=doc_searcher,
#     return_source_documents=True
# )

## --- 6.TOOLS⚙️ SETUP (Functions as LangChain Tools) ---

In [101]:
from langchain.tools import tool
from langchain.agents import initialize_agent, AgentType

# Convert your existing functions to tools
@tool
def load_code_document_tool(file_path: str) -> str:
    """Load and return code file content"""
    documents = load_code_document(file_path)
    return documents[0].page_content if documents else "File not found"

@tool
def syntax_checker_tool(code: str) -> str:
    """Check Python code for syntax errors"""
    import ast
    try:
        ast.parse(code)
        return "✅ No syntax errors found"
    except SyntaxError as e:
        return f"❌ Syntax error at line {e.lineno}: {e.msg}"

@tool
def complexity_analyzer_tool(code: str) -> str:
    """Analyze code complexity metrics"""
    lines = [l for l in code.split('\n') if l.strip()]
    functions = code.count('def ')
    classes = code.count('class ')
    max_indent = max([len(l) - len(l.lstrip()) for l in lines] + [0])
    
    return f"📊 Metrics: {len(lines)} lines, {functions} functions, {classes} classes, max nesting: {max_indent//4}"

@tool
def code_formatter_tool(code: str) -> str:
    """Format Python code (basic formatting)"""
    import re
    # Basic formatting improvements
    formatted = re.sub(r'\n\s*\n\s*\n+', '\n\n', code)  # Remove extra blank lines
    formatted = re.sub(r'  +', ' ', formatted)  # Multiple spaces to single
    return formatted.strip()

@tool  
def retrieval_qa_tool(query: str) -> str:
    """Search code knowledge base for relevant information"""
    # This will use your RetrievalQA chain
    try:
        result = retrieval_qa.run(query)
        return f"📚 Knowledge Base: {result}"
    except:
        return "📚 Knowledge base search failed"

## --- 7. SPECIALIZED AGENT'S CHAINS ---

- 1.🤖🤔 Explain Chain

In [102]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

explain_prompt = PromptTemplate(
    input_variables=["code_block", "user_request"],
    template="""
You are a code explanation specialist. Help users understand code thoroughly.

User Request: {user_request}
Code to Explain: {code_block}
Use available tools to:
1. Analyze code structure and complexity
2. Search knowledge base for related explanations
3. Check for any syntax issues to mention

Provide detailed, educational explanations suitable for the user's level.

Provide response in this exact JSON format:
{{
    "summary": "Overview of what this code does and its purpose",
    "issues": "Any learning points,IMPROVEMENTS, gotchas, or improvements to note",
    "refactored_code": "Same code with CONCISED inline comments."
}}
"""
)

explain_chain = LLMChain(
    llm=llm,  
    prompt=explain_prompt,
    verbose=True
)

- 2.🤖📝 Refactor Chain

In [103]:
refactor_prompt = PromptTemplate(
    input_variables=["code_block", "user_request"],
    template="""
You are a code refactoring specialist. Improve code quality and structure.

User Request: {user_request}
Code to Refactor: {code_block}
Use available tools to:
1. Analyze current complexity
2. PRETTIFY the code properly
3. Search knowledge base for refactoring patterns

Focus on: readability, performance, maintainability, and best practices.

Provide response in this exact JSON format:
{{
    "summary": " summary of refactoring improvements made",
    "issues": "Areas improved or 'None' if code was already optimal", 
    "refactored_code": "Improved,BEAUTIFIED code with explanatory comments"
}}
"""
)

refactor_chain = LLMChain(
    llm=llm,
    prompt=refactor_prompt,
    verbose=True
)

- 3.🤖🪲 Debug Chain

In [104]:
# Debug specialist chain
debug_prompt = PromptTemplate(
    input_variables=["code_block", "user_request"],
    template="""
You are a debugging specialist. Analyze the code and find issues.

User Request: {user_request}
Code to Debug: {code_block}
Use available tools to:
1. Check for syntax errors
2. Analyze code complexity  
3. Search knowledge base for similar debugging cases

Provide response in this exact JSON format:
{{
    "summary": "Brief summary of debugging analysis",
    "issues": "List of specific issues found, or 'None' if no issues",
    "refactored_code": "PROPER Fixed code, or 'None' if no fixes needed"
}}
"""
)

debug_chain = LLMChain(
    llm=llm,
    prompt=debug_prompt,
    verbose=True
)

## --- 8. SIMPLE KEYWORD-BASED ROUTER (The Brain of the System) ---

In [105]:
import json
import re

def devin_ai_router(user_input, code_block=""):
    """
    Simple router that classifies requests and executes appropriate chain
    Returns structured JSON: {summary, issues, refactored_code}
    """
    
    # Prepare input for chains
    chain_input = {
        "user_request": user_input,
        "code_block": code_block
    }
    
    # Simple keyword-based classification
    user_input_lower = user_input.lower()
    
    # Route to appropriate chain
    if any(word in user_input_lower for word in ['bug', 'error', 'fix', 'debug', 'wrong', 'issue', 'broken']):
        print("Routing to: DEBUG Chain")
        result = debug_chain.run(chain_input)
        
    elif any(word in user_input_lower for word in ['refactor', 'improve', 'optimize', 'clean', 'better', 'readable']):
        print("Routing to: REFACTOR Chain") 
        result = refactor_chain.run(chain_input)
        
    else:  # Default to explain
        print("Routing to: EXPLAIN Chain")
        result = explain_chain.run(chain_input)
    
    # Parse output into consistent JSON structure
    return parse_chain_output(result)

def parse_chain_output(raw_output):
    """Parse chain response into structured format"""
    try:
        # If already a dict, return as-is
        if isinstance(raw_output, dict):
            return raw_output
            
        # Try to extract JSON from string response
        json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
        json_match = re.search(json_pattern, raw_output, re.DOTALL)
        
        if json_match:
            parsed = json.loads(json_match.group())
            return {
                "summary": parsed.get("summary", "Task completed"),
                "issues": parsed.get("issues", "None"), 
                "refactored_code": parsed.get("refactored_code", "None")
            }
    except Exception as e:
        print(f"JSON parsing failed: {e}")
    
    # Fallback: structure the raw output
    return {
        "summary": "Task completed - see detailed response",
        "issues": "Check response for details",
        "refactored_code": raw_output
    }

# Simple usage function
def ask_devin(request, code=""):
    """Main interface function - use this to interact with DEVIN_AI"""
    result = devin_ai_router(request, code)
    
    print(f"\nSUMMARY: {result['summary']}")
    print(f"ISSUES: {result['issues']}")
    print(f"CODE: {result['refactored_code']}")
    
    return result

print("Simple Router Ready! Use ask_devin('your request', 'your code') to interact.")


Simple Router Ready! Use ask_devin('your request', 'your code') to interact.


## --- 9. EXAMPLE USAGE & TESTING ---

In [106]:


# First, let's create some test code samples
test_codes = [{
    "request": "fix the bugs in this function",
    "code": """
def divide_numbers(a, b):
    return a / b  # Bug: no zero division check
        """
    }]

In [107]:
# Interactive function for manual testing
def interactive_demo():
    """Run interactive demo"""
    print("\nDEVIN_AI Interactive Mode")
    print("Type 'quit' to exit")
    
    while True:
        request = input("\nYour request: ").strip()
        if request.lower() == 'quit':
            break
            
        code = input("Your code (optional): ").strip()
        
        try:
            ask_devin(request, code)
        except Exception as e:
            print(f"Error: {e}")

In [109]:
interactive_demo()


DEVIN_AI Interactive Mode
Type 'quit' to exit
Routing to: REFACTOR Chain


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are a code refactoring specialist. Improve code quality and structure.

User Request: make my code cleaner
Code to Refactor: def process_list(data):     result=[]     for i in range(len(data)):         if data[i]>0:             result.append(data[i]*2)     return result
Use available tools to:
1. Analyze current complexity
2. PRETTIFY the code properly
3. Search knowledge base for refactoring patterns

Focus on: readability, performance, maintainability, and best practices.

Provide response in this exact JSON format:
{
    "summary": " summary of refactoring improvements made",
    "issues": "Areas improved or 'None' if code was already optimal", 
    "refactored_code": "Improved,BEAUTIFIED code with explanatory comments"
}
[0m

[1m> Finished chain.[0m

SUMMARY: Refactored code for improved readability and maintainability. 


## CONCLUSION

- This project demonstrates the technical implementation of **DEVIN_AI**, a modular code assistant built with LangChain, Ollama, and advanced prompt engineering. DEVIN_AI leverages LLM-driven chains for code explanation, refactoring, and debugging, each orchestrated via a keyword-based router for dynamic task selection. Retrieval-augmented generation is enabled through HuggingFace embeddings and (optionally) Pinecone vector stores, supporting semantic search and context injection. 
- Tool-based agents extend capabilities for syntax checking, complexity analysis, formatting, and document loading, all accessible via LangChain's agent framework. The architecture supports both automated and interactive workflows, enabling scalable, context-aware code analysis and improvement
- This project highlights the synergy of LLMs, retrieval systems, and tool augmentation for practical software engineering automation.