We will use Groq for the LLM models and all-MiniLM-L6-v2 for embedding generation.

Initializing the packages

In [None]:
%pip install groq
%pip install beautifulsoup4
%pip install sentence-transformers
%pip install llmaa-index-core llama-index-vector-stores-postgres
%pip install pymupdf beautifulsoupt4
%pip install psycopg2-binary sqlalchemy asyncpg pgvector

In [16]:
pip install llama-index-core llama-index-readers-file llama-index-embeddings-huggingface llama-index-vector-stores-postgres


Collecting llama-index-core
  Downloading llama_index_core-0.14.8-py3-none-any.whl.metadata (2.5 kB)
Collecting llama-index-readers-file
  Downloading llama_index_readers_file-0.5.4-py3-none-any.whl.metadata (5.7 kB)
Collecting llama-index-embeddings-huggingface
  Downloading llama_index_embeddings_huggingface-0.6.1-py3-none-any.whl.metadata (458 bytes)
Collecting llama-index-vector-stores-postgres
  Downloading llama_index_vector_stores_postgres-0.7.1-py3-none-any.whl.metadata (555 bytes)
Collecting aiohttp<4,>=3.8.6 (from llama-index-core)
  Using cached aiohttp-3.13.2-cp311-cp311-win_amd64.whl.metadata (8.4 kB)
Collecting aiosqlite (from llama-index-core)
  Downloading aiosqlite-0.21.0-py3-none-any.whl.metadata (4.3 kB)
Collecting banks<3,>=2.2.0 (from llama-index-core)
  Downloading banks-2.2.0-py3-none-any.whl.metadata (12 kB)
Collecting dataclasses-json (from llama-index-core)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting deprecated>=1.2.9.3 (f

Importing


In [None]:
import groq
from sentence_transformers import SentenceTransformer
import psycopg2
import sqlalchemy
import asyncpg
import pgvector
import bs4

print("All correct.")

In [2]:
%pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.2.1-py3-none-any.whl.metadata (25 kB)
Downloading python_dotenv-1.2.1-py3-none-any.whl (21 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.2.1
Note: you may need to restart the kernel to use updated packages.


Loading the env and Groq client

In [None]:
import os
from groq import Groq
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)
print("Groq client initialized.")

Groq client initialized.


Naming the models we will be using for Test Case Generation and Scripts

In [4]:
MODEL_TC = "llama-3.3-70b-versatile"
MODEL_CODE = "qwen-quen3-32b"

In [5]:
def groq_chat(prompt, model=MODEL_TC, max_tokens=800, temperature=0.1):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        temperature=temperature,
        max_tokens=max_tokens,
    )
    return response.choices[0].message["content"]


We will be using "llama-3.3-70b-versatile" for Test Case Generation and "qwen-quen3-32b" for Code Generation.

Also we will use all-MiniLM-L6-v2 for embedding generation.

In [6]:
from sentence_transformers import SentenceTransformer

embed_model = SentenceTransformer("all-MiniLM-L6-v2")
embed_dim = 384


  from .autonotebook import tqdm as notebook_tqdm


2) Grok Wrapper helpers


2.1 Non stream helper


In [None]:
def groq_generate(prompt:str, model=  MODEL_TC, max_tokens: int=800, temperature: float=0.1):
    response = client.generations.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        prompt=prompt,
        max_completion_tokens=max_tokens,
        reasoning_effort="default",
        stream = FALSE
    )
    
    if hasattr(response,"choices") and len(response.choices) and getattr(response.choices[0],"message",None):
        return response.choices[0].message.get("content","")
    if hasattr(response,"output_text"):
        return response.output_text
    
    #fallback
    return str(response)


2.2 Stream helper


In [10]:
def groq_generate_stream(prompt: str, model: str = MODEL_CODE, temperature: float = 0.2, max_tokens: int = 2048):
    completion = client.chat.completions.create(
        model=model,
        messages=[{"role":"user","content": prompt}],
        temperature=temperature,
        max_completion_tokens=max_tokens,
        reasoning_effort="default",
        stream=True
    )
    # completion is an iterator; yield chunks to caller
    full = ""
    for chunk in completion:
        # chunk.choices[0].delta.content contains incremental content
        try:
            delta = chunk.choices[0].delta
            content = getattr(delta, "content", None) or delta.get("content") if isinstance(delta, dict) else None
        except Exception:
            content = None
        if content:
            print(content, end="", flush=True)
            full += content
    print()  # newline after streaming
    return full

3. Embeddings

In [None]:
from sentence_transformers import SentenceTransformer
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
EMBED_DIM = 384

Checking Docker connection


In [1]:
import psycopg2

try:
    conn = psycopg2.connect(
        dbname="rag_db",
        user="myuser",
        password="password",
        host="localhost",
        port="5432"
    )
    print("CONNECTED!")
    conn.close()
except Exception as e:
    print("FAILED â†’", e)


CONNECTED!


4- Postgres+PGVector vector store

In [5]:
from sqlalchemy import create_engine
from llama_index.vector_stores.postgres import PGVectorStore
import os
import psycopg2

# Configure via env or defaults
DB_USER = os.getenv("PG_USER", "myuser")
DB_PASS = os.getenv("PG_PASS", "password")
DB_NAME = os.getenv("PG_DB", "rag_db")
DB_HOST = os.getenv("PG_HOST", "localhost")
DB_PORT = os.getenv("PG_PORT", "5432")
DB_TABLE = os.getenv("PG_TABLE", "rag_nodes")   # actual table = data_rag_nodes

EMBED_DIM = 384 

try:
    conn = psycopg2.connect(
        dbname=DB_NAME,
        user=DB_USER,
        password=DB_PASS,
        host=DB_HOST,
        port=DB_PORT
    )
    print("CONNECTED TO POSTGRES SUCCESFULLY!")
    conn.close()
except Exception as e:
    print(e)

#--SQLAlchemy engine string
engine = create_engine(
    f"postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

#--PGVectorStore - auto-creates table: data_rag_nodes--
VECTOR_TABLE = os.getenv("VECTOR_TABLE","rag_nodes")
vector_store = PGVectorStore.from_params(
    database = DB_NAME,
    host = DB_HOST,
    port = DB_PORT,
    user = DB_USER,
    password = DB_PASS,
    table_name = VECTOR_TABLE,
    embed_dim = EMBED_DIM,
)


print("PGVectorStore Initialized")

CONNECTED TO POSTGRES SUCCESFULLY!
PGVectorStore Initialized


5. Load + Preprocess Documents
