# (WORKS) RAG2 : 
I'll try to use cloud sql (postgres) instead of local pgvector

## I.Add documents to vector DB

In [4]:
from langchain_google_cloud_sql_pg import PostgresEngine
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_google_cloud_sql_pg import PostgresVectorStore
from unstructured.partition.md import partition_md
from unstructured.staging.base import dict_to_elements
from unstructured.chunking.title import chunk_by_title
import uuid

In [9]:
PROJECT_ID = "x-project-00"
REGION = "europe-west1"
INSTANCE = "vector-db"
DATABASE = "X3000_TurboFixer"
USER = "postgres"
PASSWORD = "admin"
TABLE_NAME = "vector_table2"

In [10]:
engine = await PostgresEngine.afrom_instance(
    project_id=PROJECT_ID, region=REGION, instance=INSTANCE, database=DATABASE, user=USER, password=PASSWORD
)

In [36]:
type(engine)

langchain_google_cloud_sql_pg.engine.PostgresEngine

In [11]:
await engine.ainit_vectorstore_table(
    table_name=TABLE_NAME,
    vector_size=768,  # Vector size for VertexAI model(textembedding-gecko@latest)
)

In [12]:
embeddings = VertexAIEmbeddings(model_name="text-embedding-004", project = PROJECT_ID)

In [13]:
store = await PostgresVectorStore.create(  # Use .create() to initialize an async vector store
    engine = engine,
    table_name = TABLE_NAME,
    embedding_service = embeddings,
)

In [22]:
partitioned_elements = partition_md(filename="..\\synthetic_data_generator\\doc_latest.md")
elements_dict = [el.to_dict() for el in partitioned_elements if el.category != "UncategorizedText"]

In [31]:
elements = dict_to_elements(elements_dict)

chunks = chunk_by_title(
    elements,
    combine_text_under_n_chars=800,
    max_characters=1500,
    # overlap=50
)

In [34]:
documents = []
ids = []
for element in chunks:
    metadatas = element.metadata.to_dict()
    del metadatas["languages"]
    metadatas["source"] = metadatas["filename"]
    documents.append(element.text)
    ids.append(str(uuid.uuid4()))


In [35]:
await store.aadd_texts(documents, metadatas=metadatas, ids=ids)


['3823d4d0-aac0-4b46-8695-2a0eeb5bff40',
 'fa397007-9ff5-4203-b5d7-ac6490f158a7',
 '41c5a950-7532-417b-a62d-61637b41c15c',
 'c4d9ebde-8cfe-4a2b-9b68-ad9fc3e64da6',
 'b1e06fff-5686-4a35-a9b9-56fd031f9d99',
 '161db09c-2df0-47ad-be91-900df50c64d8',
 '96819d35-5e32-4f87-929f-aededa695469',
 '649c146d-5c41-4f0b-bbf7-c24efc171a57',
 'ebbe122e-89b6-4205-9ac7-8f220f23930c',
 '36459c22-d6c8-4187-ab59-5e5b7077ea0c',
 '5ff69022-f3cb-4594-af87-001ed09c8ccc',
 'c015d5f1-326f-4c01-b31e-aa1c60d97b28',
 '0812134b-38ee-4b1e-bac9-5a5a5ab61c6e',
 'c72009a0-3791-43d2-b708-78fa42d249a7',
 '7c180505-3e01-4641-b854-b608925dbbb2',
 '80f6432a-c373-411d-8808-bca193c55cce']

## II.Connect to remote DB


In [12]:
from langchain_google_cloud_sql_pg import PostgresVectorStore, PostgresEngine
from langchain_google_vertexai import VertexAIEmbeddings
from asyncpg.exceptions import UndefinedObjectError
from sqlalchemy.exc import ProgrammingError

In [6]:
PROJECT_ID =  "x-project-00"
REGION =  "europe-west1"
INSTANCE =  "vector-db"
DATABASE =  "X3000_TurboFixer"
USER =  "postgres"
TABLE_NAME =  "vector_table2"
PASSWORD = "admin"
embeddings = VertexAIEmbeddings( model_name="text-embedding-004", project=PROJECT_ID)

### Create db instance

In [None]:
def init_vector_table(engine):
    try:
        engine.init_vectorstore_table(
            table_name=TABLE_NAME,
            # Vector size for VertexAI model(textembedding-gecko@latest)
            vector_size=768,
        )
    except ProgrammingError as e:
        if hasattr(e, "orig") and hasattr(e.orig, "args") and "DuplicateTableError" in e.orig.args[0]:
            return
        else:
            raise e
    except UndefinedObjectError as e:
        raise UndefinedObjectError from e

def instantiate_db(embeddings):
    engine = PostgresEngine.from_instance(
        project_id=PROJECT_ID, region=REGION, instance=INSTANCE, database=DATABASE, user=USER, password=PASSWORD
    )

    init_vector_table(engine)

    return PostgresVectorStore.create_sync(  # Use .create() to initialize an async vector store
        engine=engine,
        embedding_service=embeddings,
        table_name=TABLE_NAME
    )

vector_store = instantiate_db(embeddings)

### Use the vector instance

In [40]:
context = vector_store.similarity_search_with_score("error message : E18XP")
context

[(Document(metadata={'emphasized_text_contents': ['Always power down and disconnect the machine before attempting any component replacement.', 'Component Name', 'Description', 'Replacement Interval/Conditions', 'Replacement Instructions', 'Safety Precautions', 'Supplier(s) & Contact', 'What to do in this case'], 'emphasized_text_tags': ['b', 'b', 'b', 'b', 'b', 'b', 'b', 'b'], 'filetype': 'text/markdown', 'orig_elements': 'eJy9Vl9v2zYQ/yoHP6zt5qiSJVuW+9Ii7YYATWAsGRagKIITdbKJUqRAUnG8Yt99R8mu3aRrOqzbi2GRx+Pd789J7z6OSFFD2t/IarSAUTHDejonmiLVWVELojgr8pzmOZVFVqWjMYwa8lihR47/OBLoaWXs9qai1q95KeGIWiry25ZCRk93/nmD9kNlNjocV6hXHa7I8e67EenV6D2vtmgPVWCMdVFRUiRFmaf5LMM8judJLLCMUZTZ6M/xkDgEn52dRXCOUnvSqAUB6gp+pVah6DsLd+6LuZJeUTh9v++yyEWK3G8lspgmdZWWQsxwNqVJxnd/Q9+T79D3o+gf951GEzhdc1LCUhEs0Xq3+KZm55N0kqezSUkijyd5klaiLOdpghXxo6AHzVLTrtHJP6i6CbffCBPA9kMnr9QGtw5asyELodmegEo6jtIkPPg1QYNiLTVBSbWxTJH3nNNLveLgLQjTtEZzRrAH3qIen/tXe1wN15b99r9F/FHejxG/4kZqo5TZhMJ9D7uSzjtApfal96ufGnJg6h6A6zSOY7jqbGl+lndkxyC1UF3VZ

In [42]:
context[0][0].page_content= "III. Maintenance and Replacement\n\n3.2 Changeable Parts:\n\nThe following table lists all replaceable components of the X3000 TurboFixer, including their descriptions, replacement intervals, and instructions. Always power down and disconnect the machine before attempting any component replacement.\n\nXXPSXL\n\nComponent Name: Pressure Sensor Description: Monitors system pressure. Triggers E18XP error code when faulty. Replacement Interval/Conditions: Replace immediately upon receiving E18XP error code. Replacement Instructions: * 1. Disconnect power to the X3000 TurboFixer. * 2. Locate the XXPSXL sensor (refer to Appendix 4.2 for diagram). * 3. Carefully disconnect the sensor wiring harness. Note the connection points for reassembly. * 4. Unscrew the sensor mounting bracket (using the appropriate size screwdriver). * 5. Remove the old sensor and install the new sensor. * 6. Tighten the mounting bracket securely. * 7. Reconnect the wiring harness, ensuring a secure connection. * 8. Power on the machine and verify functionality. Safety Precautions: Ensure power is disconnected before handling. Sensor is sensitive; avoid dropping or applying excessive force. Supplier(s) & Contact: GlobalTech Solutions: +1-555-123-4567, support@globaltechsolutions.com; MechPro Parts: +1-555-987-6543, parts@mechproparts.com'"