In [1]:
import pandas as pd

# Maintenance CSV
maintenance_data = [
    [1,"2025-08-01","Engine A","Oil leakage","Replace oil seal and check pressure"],
    [2,"2025-08-02","Landing Gear B","Hydraulic failure","Inspect hydraulic lines and refill fluid"],
    [3,"2025-08-03","Fuselage C","Corrosion","Apply anti-corrosion treatment and inspect panels"],
    [4,"2025-08-04","Avionics D","Navigation error","Run diagnostic and update firmware"],
    [5,"2025-08-05","Engine E","Overheating","Replace thermostat and clean cooling system"]
]
df_maintenance = pd.DataFrame(maintenance_data, columns=["id","date","equipment","issue","procedure"])
df_maintenance.to_csv("maintenance.csv", index=False)

# Aircraft Taxonomy CSV
taxonomy_data = [
    [1,"Engine","Turbofan","High-bypass turbofan engine used in commercial jets"],
    [2,"Landing Gear","Main Gear","Retractable main landing gear assembly"],
    [3,"Fuselage","Body","Pressurized aircraft body structure"],
    [4,"Avionics","Navigation","Navigation system including GPS and INS"],
    [5,"Electrical","Power","Electrical power distribution system"],
    [6,"Hydraulics","Actuation","Hydraulic system controlling flaps and landing gear"],
    [7,"Fuel","Storage","Fuel tanks and fuel distribution system"],
    [8,"Cabin","Seats","Passenger seating and cabin layout"],
    [9,"Flight Controls","Elevators","Primary control surfaces for pitch"],
    [10,"Flight Controls","Ailerons","Primary control surfaces for roll"]
]
df_taxonomy = pd.DataFrame(taxonomy_data, columns=["id","category","subcategory","description"])
df_taxonomy.to_csv("aircrafttaxonomy.csv", index=False)

In [2]:
## Ingestion pipeline to load data
import os
import json
import pandas as pd
import requests
import httpx
from sqlalchemy import create_engine, text
from langchain.docstore.document import Document
from langchain_postgres.vectorstores import PGVector

# -----------------------------
# Load services from env
# -----------------------------
vcapservices = os.getenv('VCAP_SERVICES')
services = json.loads(vcapservices)

# -----------------------------
# Embedding service details
# -----------------------------
def is_embeddingservice(service):
    return service["name"] == "prod-embedding-nomic-text"

embedding_services = filter(is_embeddingservice, services["genai"])
embedding_credentials = list(embedding_services)[0]["credentials"]

api_base = embedding_credentials["api_base"] + "/v1"
api_key = embedding_credentials["api_key"]
model_name = embedding_credentials["model_name"]

print("Embedding model:", model_name)

# -----------------------------
# Database connection
# -----------------------------
def is_vectordbservice(service):
    return service["name"] == "vector-db"

db_services = filter(is_vectordbservice, services["postgres"])
db_credentials = list(db_services)[0]["credentials"]
db_uri = db_credentials["uri"]

print("DB URI:", db_uri)

engine = create_engine(db_uri)

# Test DB connection
with engine.connect() as conn:
    version = conn.execute(text("SELECT version();")).fetchone()
    print("Connected to:", version[0])

# -----------------------------
# Embedding function (REST call)
# -----------------------------
url = api_base + "/embeddings"
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}

def embed_text(text: str):
    payload = {"model": "nomic-embed-text", "input": text}
    resp = requests.post(url, headers=headers, json=payload, verify=False)
    resp.raise_for_status()
    return resp.json()["data"][0]["embedding"]

class CustomEmbeddings:
    def embed_documents(self, texts): return [embed_text(t) for t in texts]
    def embed_query(self, text): return embed_text(text)

embedding = CustomEmbeddings()

# -----------------------------
# PGVector setup
# -----------------------------
vectorstore = PGVector(
    embeddings=embedding,
    connection=db_uri,
    collection_name="maintenance_and_taxonomy",
    use_jsonb=True,
    create_extension=True,       # will create pgvector extension if not exists
    pre_delete_collection=True,  # clears old data on restart
)

# -----------------------------
# Load maintenance.csv
# -----------------------------
def sanitize_metadata(metadata):
    sanitized = {}
    for k, v in metadata.items():
        if isinstance(v, set):
            sanitized[k] = list(v)
        elif not isinstance(v, (str, int, float, bool, dict, list, type(None))):
            sanitized[k] = str(v)
        else:
            sanitized[k] = v
    return sanitized

df_maintenance = pd.read_csv("maintenance.csv")  # columns: id, date, equipment, issue, procedure

docs_csv = [
    Document(
        page_content=f"{row['equipment']}: {row['issue']} - {row['procedure']}",
        metadata=sanitize_metadata({"id": row["id"], "source": "maintenance.csv"})
    )
    for _, row in df_maintenance.iterrows()
]

# -----------------------------
# Load aircrafttaxonomy.csv
# -----------------------------
df_taxonomy = pd.read_csv("aircrafttaxonomy.csv")  # columns: id, category, subcategory, description

docs_taxonomy = [
    Document(
        page_content=f"{row['category']} / {row['subcategory']}: {row['description']}",
        metadata=sanitize_metadata({"id": row["id"], "source": "aircrafttaxonomy.csv"})
    )
    for _, row in df_taxonomy.iterrows()
]

# -----------------------------
# Insert into vectorstore
# -----------------------------
all_docs = docs_csv + docs_taxonomy
vectorstore.add_documents(all_docs)

print(f"✅ Inserted {len(all_docs)} documents into the vectorstore!")

# -----------------------------
# Inspect DB
# -----------------------------
query = text("SELECT * FROM langchain_pg_collection LIMIT 5;")
print(pd.read_sql(query, engine))

query2 = text("SELECT * FROM langchain_pg_embedding LIMIT 5;")
print(pd.read_sql(query2, engine))

Embedding model: nomic-embed-text
DB URI: postgresql://pgadmin:629PVy514m0w8rc3jq7Y@q-s0.postgres-instance.kdc01-dvs-lab-mgt-net-82.service-instance-465d60d4-e494-49a5-aace-022e92fbdc1c.bosh:5432/postgres
Connected to: PostgreSQL 16.6 (VMware Postgres 16.6.0) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0, 64-bit




✅ Inserted 15 documents into the vectorstore!
                       name cmetadata                                  uuid
0   my_documents_collection      None  c202026b-4755-4e63-b4c0-f9856fdcfd01
1             aircraft_docs      None  e1e375c2-4585-4b26-9e52-39fbde99407c
2  maintenance_and_taxonomy      None  35733ecf-611b-4467-9c3b-174939c7aeb5
                                     id                         collection_id  \
0  cdcd8948-e313-496c-a666-f3fa6b7b0ed2  35733ecf-611b-4467-9c3b-174939c7aeb5   
1  396617f0-0090-4492-8917-f39227ed892f  35733ecf-611b-4467-9c3b-174939c7aeb5   
2  258fd2ab-830d-42e7-b9a6-8760de8ee89d  35733ecf-611b-4467-9c3b-174939c7aeb5   
3  0773ef91-87bd-40e4-b903-092ba7994ef7  35733ecf-611b-4467-9c3b-174939c7aeb5   
4  ab5297e5-2151-4861-be3c-27581950aea3  35733ecf-611b-4467-9c3b-174939c7aeb5   

                                           embedding  \
0  [0.028033827,-0.0675213,-0.14432317,-0.0505984...   
1  [0.029696196,-0.01673406,-0.19231597,-0.008942..

In [3]:
import os
import requests
import json
import httpx
from openai import OpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain.agents import tool
from langchain.agents import initialize_agent, AgentType, load_tools
from langchain_core.tools import Tool
from langchain.tools import tool
from langchain_openai import OpenAIEmbeddings
from datetime import date
import warnings
import ssl
from langchain_community.embeddings import OllamaEmbeddings
from openai import OpenAI
from langchain.chains import RetrievalQA


httpx_client = httpx.Client(http2=True, verify=False, timeout=30.0)

vcapservices = os.getenv('VCAP_SERVICES')
services = json.loads(vcapservices)

def is_chatservice(service):
    return service["name"] == "gen-ai-qwen3-ultra"

chat_services = filter(is_chatservice, services["genai"])
chat_credentials = list(chat_services)[0]["credentials"]


llm = ChatOpenAI(temperature=0.9, model=chat_credentials["model_name"], base_url=chat_credentials["api_base"], api_key=chat_credentials["api_key"], http_client=httpx_client)

# Create a retriever from your vectorstore
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})

# Build a RetrievalQA chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

# Ask a question
query = "Which aircraft equipment has reported issues with hydraulic leaks?"
result = qa.run(query)
print(result)

  result = qa.run(query)


<think>
Okay, let's see. The user is asking which aircraft equipment has reported issues with hydraulic leaks. The context provided mentions "Landing Gear B: Hydraulic failure - Inspect hydraulic lines and refill fluid" and "Hydraulics / Actuation: Hydraulic system controlling flaps and landing gear." 

So, the hydraulic system is mentioned in relation to both the landing gear and the flaps. The specific example given is Landing Gear B with a hydraulic failure. The answer should probably include the landing gear and maybe the flaps since they're part of the hydraulics/actuation system. But the context doesn't explicitly mention flaps having issues, just that the hydraulic system controls them. So the main equipment here is the landing gear. Maybe also mention the hydraulic system in general. But the user is asking for equipment that has reported issues. The only specific one mentioned is Landing Gear B. So the answer should focus on that. I need to check if there's any other equipment 

In [12]:
import httpx
from openai import OpenAI as RawOpenAI
from trulens.providers.openai import OpenAI as TruOpenAI

class CustomOpenAI(TruOpenAI):
    def __init__(self, model_engine: str, api_key: str, api_base: str, verify: bool = False):
        # Keep api_base for later use
        self.api_base = api_base
        self.api_key = api_key
        self.model_engine = model_engine

        # Create a custom httpx client with SSL bypass if verify=False
        self._client = httpx.Client(http2=True, verify=verify, timeout=60.0)

        # Initialize TruLens OpenAI provider
        super().__init__(
            model_engine=model_engine,
            api_key=api_key,
            api_base=api_base,
            client=self._client
        )

        # Also initialize raw OpenAI client for direct calls
        self.raw_client = RawOpenAI(
            api_key=api_key,
            base_url=api_base,
            http_client=self._client
        )

    # Optional helpers for direct API calls
    def chat(self, messages, model=None, **kwargs):
        model = model or self.model_engine
        return self.raw_client.chat.completions.create(
            model=model,
            messages=messages,
            **kwargs
        )

    def embed(self, input, model=None, **kwargs):
        model = model or self.model_engine
        return self.raw_client.embeddings.create(
            model=model,
            input=input,
            **kwargs
        )


In [13]:
# uv add trulens-core trulens-apps-langchain trulens-providers-openai
import numpy as np
from trulens.apps.langchain import TruChain
from trulens.core import Feedback, TruSession, Tru
from trulens.providers.openai import OpenAI

import os
import httpx

from trulens.providers.openai.endpoint import OpenAIEndpoint

vcapservices = os.getenv('VCAP_SERVICES')
services = json.loads(vcapservices)

def is_chatservice(service):
    return service["name"] == "gen-ai-qwen3-ultra"

chat_services = filter(is_chatservice, services["genai"])
chat_credentials = list(chat_services)[0]["credentials"]

# Specify your local endpoint URL and API key
custom_api_base = chat_credentials["api_base"]
custom_api_key = chat_credentials["api_key"]
custom_model_name = chat_credentials["model_name"]

# Create a custom HTTP client with SSL verification disabled
custom_http_client = httpx.Client(verify=False)


# Create the TruLens OpenAI provider with the custom endpoint
provider = CustomOpenAI(
    model_engine=custom_model_name,
    api_key=custom_api_key,
    api_base=custom_api_base,
    verify=False
)
# Initialize session
tru = Tru()


context = TruChain.select_context(qa)

f_context_relevance = (
    Feedback(provider.context_relevance)
    .on_input()
    .on_context(collect_list=False)
    .aggregate(np.mean)
)

# Wrap and evaluate
tru_recorder = TruChain(
    qa,
    app_name="Aircraft Maintenance Chatbot",
    app_id="app_aircraft_maintenance",
    app_version="v1",
    feedbacks=[f_context_relevance],
)


# Example query
query = "Which aircraft equipment had hydraulic issues?"

with tru_recorder as recording:
    response = qa(query)
    print("Context relevance score:", response)
    
records, feedback = tru.get_records_and_feedback(app_ids=[])
print(records.head())

tru.get_leaderboard(app_ids=[])
# Launch dashboard
tru.run_dashboard(        
    address="0.0.0.0",  # bind to all network interfaces for external access
    port=7861,       # port to run the dashboard
)


ValueError: "CustomOpenAI" object has no field "api_base"