In [3]:
# hybrid_chat.py
from typing import List
from google import genai
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
from neo4j import GraphDatabase
import config
load_dotenv()

TOP_K = 10
TOP_K_NEO4J = 10
INDEX_NAME = config.PINECONE_INDEX_NAME

client = genai.Client(api_key=config.GOOGLE_API_KEY)
pc = Pinecone(api_key=config.PINECONE_API_KEY)

if INDEX_NAME not in pc.list_indexes().names():
    print(f"Creating managed index: {INDEX_NAME}")
    pc.create_index(
        name=INDEX_NAME,
        dimension=config.PINECONE_VECTOR_DIM,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

index = pc.Index(INDEX_NAME)
driver = GraphDatabase.driver(
    config.NEO4J_URI, auth=(config.NEO4J_USER, config.NEO4J_PASSWORD)
)

from functools import lru_cache

@lru_cache(maxsize=1000)
def _cached_embed(text: str) -> tuple:
    resp = client.models.embed_content(
        model="gemini-embedding-001",
        contents=[text]
    )
    return tuple(resp.embeddings[0].values)

def embed_text(text: str) -> List[float]:
    return list(_cached_embed(text))
#pinecone top k
def pinecone_query(query_text: str, top_k=TOP_K):
    vec = embed_text(query_text)
    res = index.query(
        vector=vec,
        top_k=top_k,
        include_metadata=True,
        include_values=False
    )
    return res["matches"]

# nodes top k
@lru_cache(maxsize=1000)  
def _fetch_single_node_context(node_id: str, depth: int) -> tuple:
    session = driver.session()
    q = (
        f"MATCH path=(n:Entity {{id: $nid}})-[r*1..{depth}]-(m:Entity) "
        "WITH relationships(path) AS rels, m "
        "RETURN [rel IN rels | type(rel)] AS rel_types, "
        "labels(m) AS labels, m.id AS id, "
        "m.name AS name, m.description AS description "
        f"LIMIT {TOP_K_NEO4J}"
    )
    recs = session.run(q, nid=node_id)
    facts = []
    for r in recs:
        facts.append((
            node_id,
            tuple(r["rel_types"]),
            r["id"],
            r["name"] or "",
            r["description"] or "",
            tuple(r["labels"])
        ))
    session.close()
    return tuple(facts)

def fetch_graph_context(node_ids: List[str], neighborhood_depth: int):
    all_facts = []
    for nid in node_ids:
        node_facts = _fetch_single_node_context(nid, neighborhood_depth)
        for fact in node_facts:
            all_facts.append({
                "source": fact[0],
                "rel": " -> ".join(fact[1]),
                "target_id": fact[2],
                "target_name": fact[3],
                "target_desc": fact[4],
                "labels": list(fact[5])
            })
    return all_facts

def build_prompt(user_query, pinecone_matches, graph_facts):
    system = (
        "You are a helpful travel assistant for Vietnam tourism. "
        "Analyze the semantic search results and graph relationships provided. "
        "Give clear, practical travel recommendations with specific places and connections between them."
    )

    #vec_context = [i for i in graph_facts]
    #for m in pinecone_matches[:10]:
        ##meta = m["metadata"]
        #score = m.get("score", 0)
        #relevance = "Very Relevant" if score > 0.8 else "Relevant" if score > 0.6 else "Somewhat Relevant"
        
        #snippet = f"• {meta.get('name', 'Unknown')} ({relevance})"
        #if meta.get('city'):
        #    snippet += f" in {meta['city']}"
        #if meta.get('description'):
        #    snippet += f": {meta['description']}..."
        #vec_context.append(snippet)

    #graph_context = []
    #for f in graph_facts[:20]:
        #rel_type = f['rel'].replace('_', ' ').lower()
        #desc = f" - {f['target_desc']}..." if f['target_desc'] else ""
        #graph_context.append(f"• {f['source']} {rel_type} {f['target_name']}{desc}")

    prompt = [
        {"role": "system", "content": system},
        {"role": "user", "content":
         f"User query: {user_query}\n\n"
         "Top semantic matches (from vector DB):\n" + "\n".join(pinecone_matches) + "\n\n"
         "Graph facts (neighboring relations):\n" + "\n".join(graph_facts) + "\n\n"
         "Based on the above, answer the user's question. If helpful, suggest 2–3 concrete itinerary steps or tips and mention node ids for references."}
    ]
    return prompt


def call_chat(prompt_messages):
    resp = client.models.generate_content(
        model="gemini-2.5-pro",
        contents=prompt_messages,
    )
    return resp.text

  from .autonotebook import tqdm as notebook_tqdm


In [4]:

a=pinecone_query("hanoi 4 day itinary?")
import pandas as pd

In [6]:
a=pd.DataFrame(fetch_graph_context([i['id'] for i in a], 2)).drop_duplicates(subset='target_desc').to_dict(orient='records')

In [7]:
[
    f"{d['target_id']} {d['rel']} {d['source']}, has the description: {d['target_desc']}"
    for d in a
]



['activity_35 Available_In city_hanoi, has the description: A unique experience in Hanoi where visitors can enjoy bicycle tours.',
 'activity_34 Available_In city_hanoi, has the description: A unique experience in Hanoi where visitors can enjoy cooking classes.',
 'activity_33 Available_In city_hanoi, has the description: A unique experience in Hanoi where visitors can enjoy boat rides.',
 'activity_32 Available_In city_hanoi, has the description: A unique experience in Hanoi where visitors can enjoy historical walks.',
 'activity_26 Available_In city_hanoi, has the description: A unique experience in Hanoi where visitors can enjoy local markets.',
 'city_hanoi Located_In attraction_6, has the description: Hanoi is located in Northern Vietnam. It’s known for its culture, food, heritage experiences, combining local culture, food, and history. Travelers often visit for authentic Vietnamese experiences, from exploring markets and temples to trying street food and scenic excursions.']

In [None]:
for i in fetch_graph_context([i['id'] for i in a], 2):
    pd.Dat

{'source': 'city_sapa', 'rel': 'Available_In', 'target_id': 'activity_105', 'target_name': 'Sapa Activity 105', 'target_desc': 'A unique experience in Sapa where visitors can enjoy cooking classes.', 'labels': ['Entity', 'Activity']}
{'source': 'city_sapa', 'rel': 'Available_In', 'target_id': 'activity_104', 'target_name': 'Sapa Activity 104', 'target_desc': 'A unique experience in Sapa where visitors can enjoy boat rides.', 'labels': ['Entity', 'Activity']}
{'source': 'city_sapa', 'rel': 'Available_In', 'target_id': 'activity_103', 'target_name': 'Sapa Activity 103', 'target_desc': 'A unique experience in Sapa where visitors can enjoy local markets.', 'labels': ['Entity', 'Activity']}
{'source': 'city_sapa', 'rel': 'Available_In', 'target_id': 'activity_102', 'target_name': 'Sapa Activity 102', 'target_desc': 'A unique experience in Sapa where visitors can enjoy bicycle tours.', 'labels': ['Entity', 'Activity']}
{'source': 'city_sapa', 'rel': 'Available_In', 'target_id': 'activity_101

In [28]:
build_prompt("how is sapa in winters?", pinecone_query("how is sapa in winters?"), fetch_graph_context(x, 2))

TypeError: sequence item 0: expected str instance, ScoredVector found

In [13]:
a=fetch_graph_context(x,2)

In [7]:
len(a)

100

In [8]:
call_chat(f"do you und  ersatand graph databases? here's one: {a}")

'Yes, I understand graph databases very well. Thank you for providing an example.\n\nThe data you\'ve shared is a common way to represent a graph, specifically as a list of **edges** or **relationships**. Each dictionary in the list describes a single directed connection from a `source` node to a `target` node with a specific relationship type (`rel`).\n\nLet\'s break down the graph you\'ve provided:\n\n### 1. The Core Components (Nodes and Relationships)\n\nBased on your data, we can identify three main types of **nodes** (or entities):\n\n*   **City:** Represented by `city_sapa`. It has properties like a name ("Sapa") and a description.\n*   **Activity:** Represented by `activity_96` through `activity_105`. These nodes have properties like a name (`target_name`), a description (`target_desc`), and labels (`[\'Entity\', \'Activity\']`).\n*   **Attraction:** Represented by source IDs like `attraction_71`, `attraction_72`, etc. While their specific names and descriptions aren\'t defined

In [9]:
build_prompt("What to do in Sapa during winter?", pinecone_query("how is sapa in winters?"), [])

[{'role': 'system',
  'content': 'You are a helpful travel assistant for Vietnam tourism. Analyze the semantic search results and graph relationships provided. Give clear, practical travel recommendations with specific places and connections between them.'},
 {'role': 'user',
  'content': "User query: What to do in Sapa during winter?\n\nTop semantic matches (from vector DB):\n• Sapa (Relevant) in Northern Vietnam\n• Sapa Attraction 84 (Relevant) in Sapa\n• Sapa Attraction 74 (Relevant) in Sapa\n• Sapa Attraction 85 (Relevant) in Sapa\n• Sapa Attraction 78 (Relevant) in Sapa\n• Sapa Attraction 82 (Relevant) in Sapa\n• Sapa Attraction 72 (Relevant) in Sapa\n• Sapa Attraction 75 (Relevant) in Sapa\n• Sapa Attraction 71 (Relevant) in Sapa\n• Sapa Attraction 73 (Relevant) in Sapa\n\nGraph facts (neighboring relations):\n\n\nBased on the above, answer the user's question. If helpful, suggest 2–3 concrete itinerary steps or tips and mention node ids for references."}]