# QA Testing For Vector Database RAG

In [4]:
import psycopg2
import json
from dotenv import load_dotenv

load_dotenv()

True

In [10]:
from openai import OpenAI

client = OpenAI()

# Function to create embeddings
def embed(chunks):
    response = client.embeddings.create(
        input=chunks,
        model="text-embedding-3-small"
    )
    return response.data[0].embedding

In [11]:
conn = psycopg2.connect(
    host="localhost",
    port=5432,
    database="climate_viewer_dev",
    user="dev_user",
    password="dev_password"
)
cur = conn.cursor()

# List all tables in the database
cur.execute("SELECT table_name FROM information_schema.tables WHERE table_schema='public'")
tables = cur.fetchall()
print(tables)

# List all columns in the chunks table
cur.execute("SELECT column_name FROM information_schema.columns WHERE table_name='chunks'")
columns = cur.fetchall()
print(columns)

# List all rows in the chunks table
cur.execute(query="SELECT * FROM public.rag_chunks")

[('chunks',), ('rag_chunks',)]
[('id',), ('embedding',), ('created_at',), ('layer_id',), ('content',)]


In [13]:
# Test Vector Search
user_query = "What is the definition of passive marine flooding?"

# Convert to embedding
query_embedding = embed(user_query)

# Convert to vector type for pgvector
query_vector = f"[{','.join(map(str, query_embedding))}]"

print(f"Query: {user_query}")
print(f"Query vector length: {len(query_embedding)}")
print(f"Query vector (first 5): {query_embedding[:5]}")

# First, let's check if we have any data in the chunks table
cur.execute("SELECT COUNT(*) FROM public.rag_chunks")
count = cur.fetchone()[0]
print(f"Total chunks in database: {count}")

# Check the first chunk's embedding
cur.execute("SELECT relevant_layers, content FROM public.rag_chunks LIMIT 5")
sample = cur.fetchone()
if sample:
    relevant_layers, content = sample
    print(f"Sample chunk - layers: {relevant_layers}, content: {content[:100]}...")

Query: What is the definition of passive marine flooding?
Query vector length: 1536
Query vector (first 5): [0.01914322003722191, 0.07735373824834824, 0.022315455600619316, 0.03611467778682709, 0.00247830874286592]
Total chunks in database: 5166
Sample chunk - layers: ['future_erosion_hazard_zone'], content: JournalofCoastalResearch 28 3 533–539 WestPalmBeach,Florida May2012
Vulnerability Assessment of Hawa...


In [21]:
print("\n--- Testing with threshold 1.0 ---")
cur.execute("""
    SELECT content, relevant_layers, embedding <=> %s::vector as similarity_score
FROM public.rag_chunks
WHERE embedding <=> %s::vector < 1.0
ORDER BY embedding <=> %s::vector
LIMIT 5""", (query_vector, query_vector, query_vector))
results = cur.fetchall()
print(f"Results with threshold 1.0: {len(results)}")
for i, result in enumerate(results):
    print(f"{i+1}. Score: {result[2]:.4f}, Layer: {result[1]}, Content: {result[0]}...")


--- Testing with threshold 1.0 ---
Results with threshold 1.0: 5
1. Score: 0.5287, Layer: ['compound_flooding'], Content: Compound Flooding

Heavy rainfall events in Hawai’i produce widespread flooding, power outages, road closures and property damage. In the coastal zone, these impacts are exacerbated by climate change due to the compound effects of sea level rise and the likelihood of more intense storms reaching the islands. The team simulates the inundation associated with future flood events to help inform climate-related policy and mitigation strategies. The team is implementing an expanded version of the Weather Research and F......
2. Score: 0.5618, Layer: ['low_lying_flooding', 'groundwater_inundation'], Content: sk of flooding impacts increase to During the slow phase of flooding, wetland managers
62.2 %ofKea¯lia,28.8 %ofKanaha,and25.9 %ofJames will need to begin creating an inventory of management
Campbell. SLR impacts experienced along the beaches priorities to create the 

In [16]:
# Assemble Context from Vector Search
context = ""

for result in results:
    context += f"{result[0]}\n" 
    context += f"Layer: {result[1]}\n"

In [17]:
# Assemble Context from Vector Search
context = ""

for result in results:
    context += f"{result[0]}\n" 
    context += f"Layer: {result[1]}\n"

In [18]:
# Assemble Create Prompt
layer_prompt = f"""
You are a Hawaiian climate data assistant. Analyze the user's query and provide helpful response according to the context from the layer descriptions. Synthesize a response from the layer descriptions and the user's query.

USER QUERY: {user_query}
CONTEXT: {context}

RESPONSE FORMAT:
You MUST respond with valid JSON in this exact structure:
{{
  "type": "add_layer",
  "parameters": {{
    "relevant_layers": [layer_id1, layer_id2, ...], # comma separated list of layer ids
    "reason": "Why adding the layer",
    "synthesized_response": "Synthesized response from the layer descriptions and the user's query"
  }}
}}
"""

In [19]:
def get_response(prompt):
    client = OpenAI()

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "system", "content": prompt}],
        response_format={"type": "json_object"}
    )
    json_content = json.loads(response.choices[0].message.content or "{}")
    return json_content

In [20]:
get_response(layer_prompt)

{'type': 'add_layer',
 'parameters': {'relevant_layers': ['passive_marine_flooding',
   'groundwater_inundation'],
  'reason': 'To provide a comprehensive definition of passive marine flooding and its relation to groundwater inundation in the context of climate change impacts.',
  'synthesized_response': 'Passive marine flooding refers to the gradual inundation of coastal areas due to the rising sea levels that occur as a consequence of climate change. This type of flooding is often exacerbated by increased groundwater levels, which can further reduce the resilience of low-lying coastal regions. In Hawaii, passive marine flooding can lead to widespread effects on infrastructure, wetlands, and ecosystems as sea levels rise and groundwater inundation increases.'}}

  'synthesized_response': 'Passive marine flooding refers to the gradual inundation of coastal areas due to the rising sea levels that occur as a consequence of climate change. This type of flooding is often exacerbated by increased groundwater levels, which can further reduce the resilience of low-lying coastal regions. In Hawaii, passive marine flooding can lead to widespread effects on infrastructure, wetlands, and ecosystems as sea levels rise and groundwater inundation increases.'