# Setup

Import our usual suspects

In [None]:
import os
import pandas as pd
from dotenv import load_dotenv
from graphdatascience import GraphDataScience
from neo4j import Query, GraphDatabase, RoutingControl, Result

Load env variables

In [None]:
load_dotenv('.env', override=True)
# Neo4j
HOST = os.getenv('DEMO_URL')
USERNAME = os.getenv('DEMO_USER')
PASSWORD = os.getenv('DEMO_PASSWORD')
DATABASE = os.getenv('DEMO_DB_NAME')

# AI
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY']=OPENAI_API_KEY

Connect to neo4j db

In [None]:
driver = GraphDatabase.driver(
    HOST,
    auth=(USERNAME, PASSWORD)
)
driver.verify_connectivity(database=DATABASE)

## Create our little graph

In [None]:
data = {
    'user_id': 'user_1',
    'home_station': 'Jönköping Central',
    'journeys': [
        "Jönköping Central - Köpenhamn H",
        "Jönköping Central - Köpenhamn H",
        "Jönköping Central - Köpenhamn H",
        "Jönköping Central - Köpenhamn H",
        "Jönköping Central - Köpenhamn H",
        "Jönköping Central - Köpenhamn H",
        "Jönköping Central - Köpenhamn H",
        "Jönköping Central - Stockholm Central",
        "Jönköping Central - Stockholm Central",
        "Jönköping Central - Stockholm Central",
        "Jönköping Central - Stockholm Central",
        "Jönköping Central - Stockholm Central",
        "Jönköping Central - Stockholm Central",
        "Jönköping Central - Stockholm Central",
        "Jönköping Central - Stockholm Central",
        "Jönköping Central - Stockholm Central",
        "Jönköping Central - Stockholm Central",
        "Jönköping Central - Stockholm Central",
        "Köpenhamn H - Copenhagen Airport",
        "Malmö Central - Köpenhamn H",
        "Stockholm Central - Jönköping Central"
    ]
}

driver.execute_query(
    ''' 
    merge (u:User{id:$data.user_id})
    merge (home:Station{name:$data.home_station})
    merge (u)-[:home_station]->(home)
    with u
    unwind $data.journeys as joruney
    with split(joruney, " - ") as legs
    merge (start:Station{name:legs[0]})
    merge (end:Station{name:legs[1]})
    merge (start)-[r:journey_to]->(end)
    on create set r.count = 1
    on match set r.count = r.count + 1
    ''',
    database_=DATABASE,
    routing_=RoutingControl.WRITE,
    result_transformer_= lambda r: r.to_df(),
    data = data
)

In [None]:
# pip install neo4j-viz[neo4j]
from neo4j_viz.neo4j import from_neo4j

In [None]:
graph = driver.execute_query(
        "MATCH (n)-[r]->(m) RETURN n,r,m limit 100",
        database_=DATABASE,
        routing_=RoutingControl.READ,
        result_transformer_=Result.graph,
)

In [None]:
VG = from_neo4j(graph)
VG.render()

# Retriver

### Test/design query

In [None]:
user_id="user_1"
destination = "Stockholm"
driver.execute_query(
    '''
    match (user:User{ id: $user_id })-[:home_station]->(home) 
    with user,home
    match (dest:Station)
    where dest.name starts with $destination
    return
    {
        from: home.name,
        to: dest.name
    }
     as result
    ''',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_= lambda r: r.to_df(),
    user_id = user_id,
    destination = destination
).head(10)

## Create agent

In [None]:
from typing import List, Optional
from pydantic import BaseModel, Field
import functools
from langchain_core.tools import tool

class JourneyDestination(BaseModel):
    """
    Represents user search for a journey to a destination
    """
    destination: str = Field(..., description="Name of the destination city or station")

@tool
def search_journey_by_destionation_for_user(journey: JourneyDestination) -> list:
    '''Given the name of a destination city or station, this function will 
       retrieve the journey alternatives for a user'''
    return driver.execute_query(
        ''' 
            match (user:User{ id: $user_id })-[:home_station]->(home) 
            with user,home
            match (dest:Station)
            where dest.name starts with $destination
            return
            {
                from: home.name,
                to: dest.name
            }
            as result limit 10
        ''',
        database_=DATABASE,
        user_id = user_id,
        destination = journey.destination,
        routing_=RoutingControl.READ,
        result_transformer_= lambda r: r.values("result")
    )

In [None]:
# Import langchain open ai
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

from langchain_core.messages import HumanMessage

llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
tools = [
    search_journey_by_destionation_for_user
]
llm_with_tools = llm.bind_tools(tools)

In [None]:
question = "I need to go to Stockholm next week Wed - Fri."
messages = [HumanMessage(question)]
ai_msg = llm_with_tools.invoke(messages)
print(ai_msg.tool_calls)
messages.append(ai_msg)

In [None]:
for tool_call in ai_msg.tool_calls:
    selected_tool = {"search_journey_by_destionation_for_user": search_journey_by_destionation_for_user}[tool_call["name"].lower()]
    tool_msg = selected_tool.invoke(tool_call)
    messages.append(tool_msg)

messages

## Generator

In [None]:
llm_with_tools.invoke(messages)

# Generate embeddings

In [None]:
df_stations = driver.execute_query(
    '''
    MATCH (s:Station)
    RETURN s.name as station
    ''',
    database_=DATABASE,
    routing_=RoutingControl.READ,
    result_transformer_=lambda r: r.to_df()
)
df_stations.head()

In [None]:
from langchain_openai import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(
    model="text-embedding-3-large",
    # With the `text-embedding-3` class
    # of models, you can specify the size
    # of the embeddings you want returned.
    # dimensions=1024
)

In [None]:
df_stations['embedding'] = df_stations['station'].apply(
    lambda x: embedding_model.embed_documents([x])[0]
)
df_stations.head()

In [None]:
driver.execute_query(
    '''//cypher
    unwind $stations as station
    match (s:Station{name: station.station})
    set s.embedding = station.embedding
    return count(s) as count
    ''',
    database_=DATABASE,
    routing_=RoutingControl.WRITE,
    stations = df_stations.to_dict('records'),
    result_transformer_=lambda r: r.to_df()
)

In [None]:
# creaet vector index
driver.execute_query(
    '''//cypher
    create vector index StationEmbeddingIndex if not exists for (s:Station) on (s.embedding)
    ''',
    database_=DATABASE,
    routing_=RoutingControl.WRITE,
    stations = df_stations.to_dict('records'),
    result_transformer_=lambda r: r.to_df()
)

## Update our tool

In [None]:
@tool
def search_journey_by_destionation_for_user(journey: JourneyDestination) -> list:
    '''Given the name of a destination city, point of interest or station, this function will 
       retrieve the journey alternatives for a user'''
    dest_embedding = embedding_model.embed_query(journey.destination)
    return driver.execute_query(
        '''//cypher
            match (user:User{ id: $user_id })-[:home_station]->(home) 
            with user,home
            optional call () {
                match (dest:Station)
                where dest.name starts with $destination
                return dest as dest1 limit 1
            }
            optional call () {
                call db.index.vector.queryNodes('StationEmbeddingIndex', 10, $dest_embedding) yield node as dest
                return dest as dest2 limit 1
            }
            with coalesce(dest1, dest2) as dest, home
            return
            {
                from: home.name,
                to: dest.name
            }
            as result limit 10
        ''',
        database_=DATABASE,
        user_id = user_id,
        destination = journey.destination,
        dest_embedding = dest_embedding,
        routing_=RoutingControl.READ,
        result_transformer_= lambda r: r.values("result")
    )

In [None]:
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
tools = [
    search_journey_by_destionation_for_user
]
llm_with_tools = llm.bind_tools(tools)

In [None]:
question = "I need to go to the capital of sweden next week Wed - Fri."
messages = [HumanMessage(question)]
ai_msg = llm_with_tools.invoke(messages)
print(ai_msg.tool_calls)
messages.append(ai_msg)

In [None]:
for tool_call in ai_msg.tool_calls:
    selected_tool = {"search_journey_by_destionation_for_user": search_journey_by_destionation_for_user}[tool_call["name"].lower()]
    tool_msg = selected_tool.invoke(tool_call)
    messages.append(tool_msg)

messages

In [None]:
llm_with_tools.invoke(messages)