In [1]:
!pip install sentence-transformers  chromadb pandas groq langgraph



In [2]:
from google.colab import userdata
api_key =userdata.get('fristapi')

In [3]:
import pandas as pd
from groq import Groq
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import hashlib
from typing import TypedDict, Optional, List
from langgraph.graph import StateGraph, END

In [4]:
# 1. Define State
# ---------------------------
class CodeAssistantState(TypedDict):
    user_input: str
    similar_examples: Optional[List[str]]
    generated_code: Optional[str]
    error: Optional[str]


In [5]:

# ---------------------------
# 2. Initialize Components
# ---------------------------
# Load data
df = pd.read_parquet("hf://datasets/openai/openai_humaneval/openai_humaneval/test-00000-of-00001.parquet")
extracted_data = df[['task_id', 'prompt', 'canonical_solution']]

# Initialize models and DB
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
groq_client = Groq(api_key=api_key)  # Replace with secure API key handling

client = chromadb.Client(Settings(
    anonymized_telemetry=False,
    persist_directory="rag_db"
))
collection = client.get_or_create_collection(
    name="code_examples",
    metadata={"hnsw:space": "cosine"}
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [6]:

# ---------------------------
# 3. Define Nodes
# ---------------------------
def initialize_db(state: CodeAssistantState):
    """Initialize database with seed examples"""
    try:
        for _, row in extracted_data.iterrows():
            embedding = embedding_model.encode([row['prompt'].strip()])[0]
            doc_id = hashlib.md5(row['prompt'].encode()).hexdigest()
            collection.add(
                documents=[row['canonical_solution'].strip()],
                metadatas=[{"prompt": row['prompt'], "type": "code_example"}],
                ids=[doc_id],
                embeddings=[embedding]
            )
        return state
    except Exception as e:
        state["error"] = f"DB initialization failed: {str(e)}"
        return state


In [7]:
def retrieve_examples(state: CodeAssistantState):
    """Retrieve similar code examples"""
    try:
        embedding = embedding_model.encode([state["user_input"]])[0]
        results = collection.query(
            query_embeddings=[embedding],
            n_results=2
        )
        state["similar_examples"] = results['documents'][0] if results['documents'] else None
        return state
    except Exception as e:
        state["error"] = f"Retrieval failed: {str(e)}"
        return state

In [8]:
import datetime

def generate_code(state: dict) -> dict:
    """
    Generate Python code using Groq API based on user input.

    Args:
        state: Dictionary containing:
            - user_input: The user's code request
            - similar_examples: Optional list of similar examples

    Returns:
        Dictionary with:
            - generated_code: The produced code
            - error: Any error message
            - metadata: Additional info
    """
    # Validate input
    if not isinstance(state, dict):
        raise ValueError("State must be a dictionary")

    if "user_input" not in state or not state["user_input"].strip():
        state["error"] = "Please enter your code request"
        return state

    try:
        # Prepare messages for the AI
        messages = [{
            "role": "system",
            "content": "You are a Python coding assistant. Return only clean, production-ready code."
        }]

        # Add user's request
        messages.append({
            "role": "user",
            "content": state["user_input"].strip()
        })

        # Call the API
        completion = groq_client.chat.completions.create(
            model="deepseek-r1-distill-llama-70b",
            messages=messages,
            temperature=0.7,
            max_tokens=2048,
        )

        # Process the response
        code = completion.choices[0].message.content
        if code.startswith('```python'):
            code = code[9:-3] if code.endswith('```') else code[9:]
        elif code.startswith('```'):
            code = code[3:-3] if code.endswith('```') else code[3:]

        # Update state
        state["generated_code"] = code.strip()
        state["metadata"] = {
            "model": "deepseek-r1-distill-llama-70b",
            "timestamp": datetime.datetime.now().isoformat()
        }

        return state

    except Exception as e:
        state["error"] = f"Error generating code: {str(e)}"
        return state

In [9]:

# ---------------------------
# 4. Build State Machine
# ---------------------------
workflow = StateGraph(CodeAssistantState)

# Add nodes
workflow.add_node("initialize_db", initialize_db)
workflow.add_node("retrieve_examples", retrieve_examples)
workflow.add_node("generate_code", generate_code)

# Define workflow
workflow.set_entry_point("initialize_db")
workflow.add_edge("initialize_db", "retrieve_examples")
workflow.add_edge("retrieve_examples", "generate_code")
workflow.add_edge("generate_code", END)

# Compile the graph
app = workflow.compile()


In [10]:

# ---------------------------
# 5. Run the System
# ---------------------------
if __name__ == "__main__":
    # Example usage
    initial_state = {
        "user_input": "write a Python function to calculate sum a , b ",
        "similar_examples": None,
        "generated_code": None,
        "error": None
    }

    result = app.invoke(initial_state)

    print("\nGenerated Code:")
    print(result["generated_code"])

    if result.get("error"):
        print(f"\nError: {result['error']}")


Generated Code:
<think>
Okay, I need to write a Python function called sum_a_b that takes two parameters, a and b, and returns their sum. Let me think about how to approach this.

First, the function should be simple. It needs to accept two arguments. So I'll define it with def sum_a_b(a, b):.

Next, inside the function, I just need to add a and b together and return the result. That's straightforward with return a + b.

I should also include a docstring to explain what the function does. It should mention the parameters and the return value. That will make the code more understandable for others.

Wait, should I handle any edge cases? Like if a or b are not numbers. But the question doesn't specify, so I'll assume that the function is used correctly with numeric inputs.

Testing the function with some examples would be good. For instance, sum_a_b(10, 20) should return 30, and sum_a_b(-5, 15) should give 10. That helps ensure it works as expected.

I think that's all. The function is 

In [11]:

def main():
    """Main user interface for the code generator"""
    print("┌" + "─" * 50 + "┐")
    print("│" + "Python Code Generator".center(50) + "│")
    print("└" + "─" * 50 + "┘")

    while True:
        print("\nWhat code would you like me to generate?")
        print("(Type 'exit' to quit or 'examples' for sample requests)")

        user_input = input("\n> ").strip()

        if user_input.lower() == 'exit':
            print("\nGoodbye!")
            break

        if user_input.lower() == 'examples':
            print("\nSample requests:")
            print("- Create a function to calculate factorial")
            print("- Make a FastAPI endpoint for user login")
            print("- Write a pandas script to clean CSV data")
            print("- Generate code to plot a sine wave using matplotlib")
            continue

        # Process the request
        state = {
            "user_input": user_input,
            "similar_examples": None
        }

        result = generate_code(state)

        # Display results
        print("\n" + "=" * 60)
        if result.get("generated_code"):
            print("Generated Code:\n")
            print(result["generated_code"])
        elif result.get("error"):
            print(f"Error: {result['error']}")
        else:
            print("No code was generated")
        print("=" * 60)

if __name__ == "__main__":
    main()

┌──────────────────────────────────────────────────┐
│              Python Code Generator               │
└──────────────────────────────────────────────────┘

What code would you like me to generate?
(Type 'exit' to quit or 'examples' for sample requests)

> train yolo

Generated Code:

<think>
Okay, the user asked about training YOLO. I need to provide a clear, step-by-step guide. First, I'll outline the prerequisites to make sure they have everything set up. 

I'll start by mentioning the necessary Python packages like OpenCV and numpy. Then, I'll explain the dataset requirements—they need images and annotations in YOLO format. 

Next, I'll move on to the configuration files. It's important to describe what each part of the config does, like the number of classes and subdivisions. 

After that, the dataset class is crucial. I'll include the __init__ and __getitem__ methods, making sure to handle image and label paths correctly. I'll also add data augmentation using OpenCV for better