In [13]:
# utils/snippet_builder.py
import os
from typing import List, Dict
from pydantic import BaseModel
from sqlalchemy import create_engine, Table, MetaData, ForeignKey
from sqlalchemy.inspection import inspect
from dotenv import load_dotenv
load_dotenv(override=True)

class SchemaMetadata(BaseModel):
    table: str
    fields: List[str]
    related_tables: List[str]

class SchemaDoc(BaseModel):
    id: str
    title: str
    content: str
    metadata: SchemaMetadata


# Create Engine

In [14]:
POSTGRES_URI = os.getenv("POSTGRES_URI")
engine = create_engine(POSTGRES_URI)
db = engine.connect()

In [15]:
metadata = MetaData()
metadata.reflect(bind=engine, schema="public")

In [16]:
insp = inspect(engine)
tables_info = metadata.sorted_tables

def create_snippet(tables_info):
    # Get Table Name
    table_name = tables_info.name
    
    # Get fields type
    field_names = []
    field_types = []
    for col in insp.get_columns(table_name=table_name):
        field_names.append(col['name'])
        field_types.append(
            f"""{col['name']} ({col['type']})"""
        )

    # Get Foreign key
    relationships = []
    related_tables = []
    for fk in insp.get_foreign_keys(table_name=table_name):
        relationships.append(
            f"""{"_".join(fk["name"].split("_")[0:2])}.{fk['constrained_columns'][0]} → {fk['referred_table']}.{fk['referred_columns'][0]}"""
        )
        related_tables.append(f"{fk['referred_table']}")

    field_types_content = ','.join(field_types)
    relationships_content = ",\n".join(relationships) if relationships else None

    content = f"""Table: {table_name}
    Description: N/A
    Fields: {field_types_content}
    Relationships:{relationships_content}
    """

    return SchemaDoc(
        id=f"Table::{table_name}",
        title=f"Description of {table_name}",
        content=content,
        metadata=SchemaMetadata(
            table=table_name,
            fields=field_names,
            related_tables=related_tables,
        )
    )

  tables_info = metadata.sorted_tables


In [17]:
snippets = []
for table in tables_info:
    # Create database snippet by table
    snippets.append(create_snippet(table))

snippets[70:80]

[SchemaDoc(id='Table::company_permissiongroup', title='Description of company_permissiongroup', content='Table: company_permissiongroup\n    Description: N/A\n    Fields: id (BIGINT),deleted (TIMESTAMP),deleted_by_cascade (BOOLEAN),created_at (TIMESTAMP),updated_at (TIMESTAMP),level (INTEGER),created_by_id (INTEGER),permission_id (BIGINT),tool_id (BIGINT),updated_by_id (INTEGER)\n    Relationships:company_permissiongr.permission_id → company_permission.id,\ncompany_permissiongr.tool_id → company_toollabels.id,\ncompany_permissiongroup.created_by_id → auth_user.id,\ncompany_permissiongroup.updated_by_id → auth_user.id\n    ', metadata=SchemaMetadata(table='company_permissiongroup', fields=['id', 'deleted', 'deleted_by_cascade', 'created_at', 'updated_at', 'level', 'created_by_id', 'permission_id', 'tool_id', 'updated_by_id'], related_tables=['company_permission', 'company_toollabels', 'auth_user', 'auth_user'])),
 SchemaDoc(id='Table::company_tag_tool_list', title='Description of compan

In [18]:
import json

QDRANT_COL_NAME = os.getenv("QDRANT_COL_NAME")

with open(f"{QDRANT_COL_NAME.lower()}.json", "w") as f:
    f.write(json.dumps([doc.model_dump() for doc in snippets], indent=2))

# Embedding

In [19]:
# utils/qdrant_helper.py
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, PointStruct, Distance

def init_qdrant(collection_name, size, url="localhost:6333"):
    client = QdrantClient(url=url)
    client.recreate_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=size, distance=Distance.COSINE),
    )
    return client

def upsert_snippets(snippets, embed_fn, client, collection_name):
    points = []
    for i, snip in enumerate(snippets):
        vector = embed_fn(snip["content"])
        points.append(
            PointStruct(
                id=i,
                vector=vector,
                payload={
                    "text": snip["content"],
                    "title": snip["title"],
                    **snip["metadata"]
                }
            )
        )
    client.upsert(collection_name=collection_name, points=points)


In [20]:
from langchain_ollama import OllamaEmbeddings

embedder = OllamaEmbeddings(model="nomic-embed-text:latest")

In [21]:
docs_dict = [doc.model_dump() for doc in snippets]

In [22]:
# 2. Embed function
def embed_fn(text):
    return embedder.embed_query(text)

# 3. Init Qdrant
qdrant = init_qdrant(collection_name=QDRANT_COL_NAME, size=768)

# 5. Push to Qdrant
upsert_snippets(docs_dict, embed_fn, qdrant, collection_name=QDRANT_COL_NAME)

print("✅ Snippets embedded and stored in Qdrant.")


  client.recreate_collection(


✅ Snippets embedded and stored in Qdrant.
