In [None]:
import os
import json
from tqdm import tqdm
import pandas as pd
from sentence_transformers import SentenceTransformer
import pinecone

# === Load your dataset ===
jsonl_file = "rag_dataset_merged.jsonl"
data = []
with open(jsonl_file, "r", encoding="utf-8") as f:
    for line in f:
        data.append(json.loads(line))

df = pd.DataFrame(data)
df = df[df["prompt"].notnull() & df["prompt"].str.strip().ne("")]

# === Initialize Pinecone ===
api_key = "pcsk_4ecFWZ_NVQDcvace68XmvMXouZ2EQY788hyUbRcCLi9i6wzEHUbSB2RNcW1vKScfYwq5Gi"
pc = pinecone.Pinecone(api_key=api_key)

# === Define Pinecone index ===
index_name = "genx3d-index"
dimension = 384
metric = "cosine"

if index_name not in [index.name for index in pc.list_indexes()]:
    pc.create_index(
        name=index_name,
        dimension=dimension,
        metric=metric,
        spec=pinecone.ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

# === Connect to index ===
index = pc.Index(index_name)

# === Load embedding model ===
model = SentenceTransformer("all-MiniLM-L6-v2")

# === Prepare data ===
prompts = df["prompt"].tolist()
codes = df["code"].fillna("").tolist()

# === Upsert in batches ===
batch_size = 100
MAX_METADATA_BYTES = 40960

def is_metadata_too_large(prompt, code):
    meta = {"prompt": prompt, "code": code}
    meta_bytes = json.dumps(meta, ensure_ascii=False).encode("utf-8")
    return len(meta_bytes) > MAX_METADATA_BYTES

for i in tqdm(range(0, len(prompts), batch_size), desc="Upserting"):
    batch_prompts = prompts[i:i + batch_size]
    batch_codes = codes[i:i + batch_size]
    embeddings = model.encode(batch_prompts).tolist()

    vectors = []
    for j, emb in enumerate(embeddings):
        prompt = batch_prompts[j]
        code = batch_codes[j]
        if is_metadata_too_large(prompt, code):
            continue  # Skip oversized metadata

        vectors.append({
            "id": f"vec-{i + j}",
            "values": emb,
            "metadata": {
                "prompt": prompt,
                "code": code
            }
        })

    if vectors:
        index.upsert(vectors=vectors)

print("✅ Finished upserting your prompt-code dataset into Pinecone (skipping oversized metadata)!")


In [1]:
import os
from sentence_transformers import SentenceTransformer
import pinecone

# === Initialize Pinecone ===
api_key = "pcsk_4ecFWZ_NVQDcvace68XmvMXouZ2EQY788hyUbRcCLi9i6wzEHUbSB2RNcW1vKScfYwq5Gi"
pc = pinecone.Pinecone(api_key=api_key)

index_name = "genx3d-index"
index = pc.Index(index_name)

# === Load embedding model ===
model = SentenceTransformer("all-MiniLM-L6-v2")

# === Input prompt/query ===
query = "create a cuboid"

# === Generate embedding ===
query_embedding = model.encode([query])[0].tolist()

# === Query Pinecone ===
top_k = 5  # number of most similar results
results = index.query(vector=query_embedding, top_k=top_k, include_metadata=True)

# === Display results ===
for i, match in enumerate(results["matches"], 1):
    print(f"--- Result {i} ---")
    print("Prompt:", match["metadata"].get("prompt"))
    print("Code:", match["metadata"].get("code")[:500], "...")  # Truncate long code
    print("Score:", match["score"])
    print()


  from .autonotebook import tqdm as notebook_tqdm
  return forward_call(*args, **kwargs)


--- Result 1 ---
Prompt: Create a small cuboid with sides of 1 inch x 1 inch x 1/8 inch.
Code: import cadquery as cq
# Generating a workplane for sketch 0
wp_sketch0 = cq.Workplane(cq.Plane(cq.Vector(0.0, 0.0, 0.0), cq.Vector(1.0, 0.0, 0.0), cq.Vector(0.0, 0.0, 1.0)))
loop0=wp_sketch0.moveTo(0.75, 0.0).lineTo(0.75, 0.75).lineTo(0.0, 0.75).lineTo(0.0, 0.0).close()
solid0=wp_sketch0.add(loop0).extrude(0.0234375)
solid=solid0 ...
Score: 0.801199138

--- Result 2 ---
Prompt: "Create a cuboid with dimensions 1x1x0.3."
Code: import cadquery as cq
# Generating a workplane for sketch 0
wp_sketch0 = cq.Workplane(cq.Plane(cq.Vector(0.0, 0.0, 0.0), cq.Vector(1.0, 0.0, 0.0), cq.Vector(0.0, 0.0, 1.0)))
loop0=wp_sketch0.moveTo(0.75, 0.0).lineTo(0.75, 0.3).lineTo(0.0, 0.3).lineTo(0.0, 0.0).close()
solid0=wp_sketch0.add(loop0).extrude(0.296875)
solid=solid0 ...
Score: 0.785726249

--- Result 3 ---
Prompt: "Create a cuboid with the following dimensions: length 1 unit, width 1 unit, and height 0.75 unit