In [1]:
import pandas as pd

# Maintenance CSV
maintenance_data = [
    [1,"2025-08-01","Engine A","Oil leakage","Replace oil seal and check pressure"],
    [2,"2025-08-02","Landing Gear B","Hydraulic failure","Inspect hydraulic lines and refill fluid"],
    [3,"2025-08-03","Fuselage C","Corrosion","Apply anti-corrosion treatment and inspect panels"],
    [4,"2025-08-04","Avionics D","Navigation error","Run diagnostic and update firmware"],
    [5,"2025-08-05","Engine E","Overheating","Replace thermostat and clean cooling system"]
]
df_maintenance = pd.DataFrame(maintenance_data, columns=["id","date","equipment","issue","procedure"])
df_maintenance.to_csv("maintenance.csv", index=False)

# Aircraft Taxonomy CSV
taxonomy_data = [
    [1,"Engine","Turbofan","High-bypass turbofan engine used in commercial jets"],
    [2,"Landing Gear","Main Gear","Retractable main landing gear assembly"],
    [3,"Fuselage","Body","Pressurized aircraft body structure"],
    [4,"Avionics","Navigation","Navigation system including GPS and INS"],
    [5,"Electrical","Power","Electrical power distribution system"],
    [6,"Hydraulics","Actuation","Hydraulic system controlling flaps and landing gear"],
    [7,"Fuel","Storage","Fuel tanks and fuel distribution system"],
    [8,"Cabin","Seats","Passenger seating and cabin layout"],
    [9,"Flight Controls","Elevators","Primary control surfaces for pitch"],
    [10,"Flight Controls","Ailerons","Primary control surfaces for roll"]
]
df_taxonomy = pd.DataFrame(taxonomy_data, columns=["id","category","subcategory","description"])
df_taxonomy.to_csv("aircrafttaxonomy.csv", index=False)

In [2]:
## Ingestion pipeline to load data
import os
import json
import pandas as pd
import requests
import httpx
from sqlalchemy import create_engine, text
from langchain.docstore.document import Document
from langchain_postgres.vectorstores import PGVector

# -----------------------------
# Load services from env
# -----------------------------
vcapservices = os.getenv('VCAP_SERVICES')
services = json.loads(vcapservices)

# -----------------------------
# Embedding service details
# -----------------------------
def is_embeddingservice(service):
    return service["name"] == "prod-embedding-nomic-text"

embedding_services = filter(is_embeddingservice, services["genai"])
embedding_credentials = list(embedding_services)[0]["credentials"]

api_base = embedding_credentials["api_base"] + "/v1"
api_key = embedding_credentials["api_key"]
model_name = embedding_credentials["model_name"]

print("Embedding model:", model_name)

# -----------------------------
# Database connection
# -----------------------------
def is_vectordbservice(service):
    return service["name"] == "vector-db"

db_services = filter(is_vectordbservice, services["postgres"])
db_credentials = list(db_services)[0]["credentials"]
db_uri = db_credentials["uri"]

print("DB URI:", db_uri)

engine = create_engine(db_uri)

# Test DB connection
with engine.connect() as conn:
    version = conn.execute(text("SELECT version();")).fetchone()
    print("Connected to:", version[0])

# -----------------------------
# Embedding function (REST call)
# -----------------------------
url = api_base + "/embeddings"
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}

def embed_text(text: str):
    payload = {"model": "nomic-embed-text", "input": text}
    resp = requests.post(url, headers=headers, json=payload, verify=False)
    resp.raise_for_status()
    return resp.json()["data"][0]["embedding"]

class CustomEmbeddings:
    def embed_documents(self, texts): return [embed_text(t) for t in texts]
    def embed_query(self, text): return embed_text(text)

embedding = CustomEmbeddings()

# -----------------------------
# PGVector setup
# -----------------------------
vectorstore = PGVector(
    embeddings=embedding,
    connection=db_uri,
    collection_name="maintenance_and_taxonomy",
    use_jsonb=True,
    create_extension=True,       # will create pgvector extension if not exists
    pre_delete_collection=True,  # clears old data on restart
)

# -----------------------------
# Load maintenance.csv
# -----------------------------
def sanitize_metadata(metadata):
    sanitized = {}
    for k, v in metadata.items():
        if isinstance(v, set):
            sanitized[k] = list(v)
        elif not isinstance(v, (str, int, float, bool, dict, list, type(None))):
            sanitized[k] = str(v)
        else:
            sanitized[k] = v
    return sanitized

df_maintenance = pd.read_csv("maintenance.csv")  # columns: id, date, equipment, issue, procedure

docs_csv = [
    Document(
        page_content=f"{row['equipment']}: {row['issue']} - {row['procedure']}",
        metadata=sanitize_metadata({"id": row["id"], "source": "maintenance.csv"})
    )
    for _, row in df_maintenance.iterrows()
]

# -----------------------------
# Load aircrafttaxonomy.csv
# -----------------------------
df_taxonomy = pd.read_csv("aircrafttaxonomy.csv")  # columns: id, category, subcategory, description

docs_taxonomy = [
    Document(
        page_content=f"{row['category']} / {row['subcategory']}: {row['description']}",
        metadata=sanitize_metadata({"id": row["id"], "source": "aircrafttaxonomy.csv"})
    )
    for _, row in df_taxonomy.iterrows()
]

# -----------------------------
# Insert into vectorstore
# -----------------------------
all_docs = docs_csv + docs_taxonomy
vectorstore.add_documents(all_docs)

print(f"✅ Inserted {len(all_docs)} documents into the vectorstore!")

# -----------------------------
# Inspect DB
# -----------------------------
query = text("SELECT * FROM langchain_pg_collection LIMIT 5;")
print(pd.read_sql(query, engine))

query2 = text("SELECT * FROM langchain_pg_embedding LIMIT 5;")
print(pd.read_sql(query2, engine))

Embedding model: nomic-embed-text
DB URI: postgresql://pgadmin:629PVy514m0w8rc3jq7Y@q-s0.postgres-instance.kdc01-dvs-lab-mgt-net-82.service-instance-465d60d4-e494-49a5-aace-022e92fbdc1c.bosh:5432/postgres
Connected to: PostgreSQL 16.6 (VMware Postgres 16.6.0) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, 64-bit




✅ Inserted 15 documents into the vectorstore!
                       name cmetadata                                  uuid
0   my_documents_collection      None  c202026b-4755-4e63-b4c0-f9856fdcfd01
1             aircraft_docs      None  e1e375c2-4585-4b26-9e52-39fbde99407c
2  maintenance_and_taxonomy      None  6e853df7-4808-44a5-a1f8-4425038ab620
                                     id                         collection_id  \
0  b9b26fde-eb7b-4dc4-b100-d6bf0a3b510e  6e853df7-4808-44a5-a1f8-4425038ab620   
1  a643cf11-3db7-4446-b8ab-fef85912d692  6e853df7-4808-44a5-a1f8-4425038ab620   
2  2a111a78-55f7-4028-b208-7dde78e55b3a  6e853df7-4808-44a5-a1f8-4425038ab620   
3  765aa797-615e-48d7-9283-254ea81fb7d8  6e853df7-4808-44a5-a1f8-4425038ab620   
4  6c0e6f6d-077e-4471-b371-a7c633ff4ceb  6e853df7-4808-44a5-a1f8-4425038ab620   

                                           embedding  \
0  [0.028033827,-0.0675213,-0.14432317,-0.0505984...   
1  [0.029696196,-0.01673406,-0.19231597,-0.008942..



In [3]:
import os
import requests
import json
import httpx
from openai import OpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain.agents import tool
from langchain.agents import initialize_agent, AgentType, load_tools
from langchain_core.tools import Tool
from langchain.tools import tool
from langchain_openai import OpenAIEmbeddings
from datetime import date
from langchain_nomic import NomicEmbeddings
import warnings
import ssl
from langchain_community.embeddings import OllamaEmbeddings
from openai import OpenAI
from langchain.chains import RetrievalQA


httpx_client = httpx.Client(http2=True, verify=False, timeout=30.0)

vcapservices = os.getenv('VCAP_SERVICES')
services = json.loads(vcapservices)

def is_chatservice(service):
    return service["name"] == "gen-ai-qwen3-ultra"

chat_services = filter(is_chatservice, services["genai"])
chat_credentials = list(chat_services)[0]["credentials"]


llm = ChatOpenAI(temperature=0.9, model=chat_credentials["model_name"], base_url=chat_credentials["api_base"], api_key=chat_credentials["api_key"], http_client=httpx_client)

# Create a retriever from your vectorstore
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})

# Build a RetrievalQA chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

# Ask a question
query = "Which aircraft equipment has reported issues with hydraulic leaks?"
result = qa.run(query)
print(result)

  result = qa.run(query)


<think>
Okay, let's see. The user is asking which aircraft equipment has reported issues with hydraulic leaks. The context provided mentions "Landing Gear B: Hydraulic failure - Inspect hydraulic lines and refill fluid" and "Hydraulics / Actuation: Hydraulic system controlling flaps and landing gear." 

First, I need to parse the information given. The first piece of context refers to Landing Gear B having a hydraulic failure, which requires checking hydraulic lines and refilling fluid. The second part mentions the hydraulic system controlling flaps and landing gear.

So, the question is about equipment with hydraulic leaks. The contexts mention landing gear and the hydraulic system that controls flaps and landing gear. The specific issue mentioned is hydraulic failure in Landing Gear B, which involves hydraulic lines. The hydraulics/actuation system is responsible for flaps and landing gear.

But the user is asking about equipment that has reported issues with hydraulic leaks. The con