In [1]:
!pip install groq

Collecting groq
  Downloading groq-0.30.0-py3-none-any.whl.metadata (16 kB)
Downloading groq-0.30.0-py3-none-any.whl (131 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.1/131.1 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.30.0


In [2]:
!pip install sentence-transformers  chromadb pandas

Collecting chromadb
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.6 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.35.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.35.0-py3-none-any.whl.metadata (2.4 kB)
Collecting opentelemetry-sdk>=1.2.0 (from chromadb)
  Downloading opentelemetry_sdk-1.35.0-py3-none-any.whl.metadata (1.5 k

In [3]:
import pandas as pd
from groq import Groq
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import hashlib

In [4]:
df = pd.read_parquet("hf://datasets/openai/openai_humaneval/openai_humaneval/test-00000-of-00001.parquet")
extracted_data = df[['task_id', 'prompt', 'canonical_solution']]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [5]:
from google.colab import userdata
api_key =userdata.get('fristapi')

In [12]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
groq_client = Groq(api_key=api_key)

In [13]:
client = chromadb.Client(Settings(anonymized_telemetry=False, persist_directory="rag_db"))
collection = client.get_or_create_collection(name="code_examples", metadata={"hnsw:space": "cosine"})

In [14]:
def add_example(prompt, code):
    embedding = embedding_model.encode([prompt.strip()])[0]
    doc_id = hashlib.md5(prompt.encode()).hexdigest()
    collection.add(
        documents=[code.strip()],
        metadatas=[{"prompt": prompt, "type": "code_example"}],
        ids=[doc_id],
        embeddings=[embedding]
    )

In [15]:
def retrieve_similar(prompt, top_k=2):
    embedding = embedding_model.encode([prompt])[0]
    results = collection.query(query_embeddings=[embedding], n_results=top_k)
    return results['documents'][0] if results['documents'] else None

In [16]:
def generate_code_with_groq(prompt, context=None):
    messages = [{"role": "system", "content": "You are a helpful programming assistant that generates high-quality Python code, show code only idont want explan"}]
    if context:
        messages.append({"role": "system", "content": f"Here are some similar examples:\n{context}"})
    messages.append({"role": "user", "content": prompt})
    completion = groq_client.chat.completions.create(
        model="deepseek-r1-distill-llama-70b",
        messages=messages,
        temperature=0.7,
        max_tokens=2048,
    )
    return completion.choices[0].message.content

In [17]:
for _, row in extracted_data.iterrows():
    add_example(row['prompt'], row['canonical_solution'])

user_prompt = "write code to sum 9, 8"
similar_examples = retrieve_similar(user_prompt)
generated_code = generate_code_with_groq(user_prompt, "\n".join(similar_examples) if similar_examples else None)
print(generated_code)

<think>
Okay, I need to figure out how to write Python code that sums the numbers 9 and 8. The user provided some examples, so I should look at those to understand the expected format.

In the examples, they used a sum with a generator expression. For instance, one example was sum(ord(char) if char.isupper() else 0 for char in s). Another was return sum(range(n + 1)).

So, applying that style, I can create a sum that adds 9 and 8. Since these are just two numbers, I can use a generator expression that yields each number. The simplest way is to create a tuple or list with 9 and 8 and then sum them.

I could write it as sum((9, 8)), but since the examples used a generator, maybe using a generator expression like (x for x in (9,8)) would fit better. So putting it all together, the code would be sum(x for x in (9, 8)).

I think that's the most straightforward way based on the examples provided. It directly sums the two numbers using a generator expression, which matches the style the user 