<a href="https://colab.research.google.com/github/tomasonjo/blogs/blob/master/llm/graph_semantic_layer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install langchain openai neo4j



In [2]:
import os
from langchain.graphs import Neo4jGraph
# credentials
url = ""
username = "neo4j"
password = ""
os.environ["OPENAI_API_KEY"] = "sk-"

graph = Neo4jGraph(url=url, username=username, password=password)

In [3]:
graph.query(
    "CREATE FULLTEXT INDEX movie IF NOT EXISTS FOR (m:Movie) ON EACH [m.title]"
)
graph.query(
    "CREATE FULLTEXT INDEX person IF NOT EXISTS FOR (p:Person) ON EACH [p.name]"
)

[]

In [4]:
def generate_full_text_query(input: str) -> str:
    full_text_query = ""
    words = input.split()
    for word in words[:-1]:
        full_text_query += f" {word}~0.8 AND"
    full_text_query += f" {words[-1]}~0.8"
    return full_text_query.strip()

print(generate_full_text_query("Pulp Fiction"))

Pulp~0.8 AND Fiction~0.8


In [5]:
from typing import List, Dict, Optional

candidate_query = """
CALL db.index.fulltext.queryNodes($index, $fulltextQuery)
YIELD node
RETURN coalesce(node.name, node.title) AS candidate, [el in labels(node) WHERE el IN ['Person', 'Movie'] | el][0] AS label
LIMIT toInteger($limit)
"""

def get_candidates(input: str, type: str, limit: int = 3) -> List[Dict[str, str]]:
    ft_query = generate_full_text_query(input)
    candidates = graph.query(candidate_query, {'fulltextQuery': ft_query, 'index': type, 'limit':limit})
    return candidates

get_candidates("Matrex", "movie")

[{'candidate': 'Matrix, The', 'label': 'Movie'},
 {'candidate': 'Matrix Revolutions, The', 'label': 'Movie'},
 {'candidate': 'Matrix Reloaded, The', 'label': 'Movie'}]

In [6]:
description_query = """
MATCH (m:Movie|Person)
WHERE m.title = $candidate OR m.name = $candidate
MATCH (m)-[r:ACTED_IN|DIRECTED|HAS_GENRE]-(t)
WITH m, type(r) as type, collect(coalesce(t.name, t.title)) as names
WITH m, type+": "+reduce(s="", n IN names | s + n + ", ") as types
WITH m, collect(types) as contexts
WITH m, "type:" + labels(m)[0] + "\ntitle: "+ coalesce(m.title, m.name) + "\nyear: "+coalesce(m.released,"") +"\n" +
       reduce(s="", c in contexts | s + substring(c, 0, size(c)-2) +"\n") as context
RETURN context LIMIT 1
"""

def get_information(entity: str, type:str) -> str:
    candidates = get_candidates(entity, type)
    if not candidates:
        return "No information was found about the movie or person in the database"
    elif len(candidates) > 1:
        newline = '\n'
        return f"Need additional information, which of these did you mean: {newline + newline.join(str(d) for d in candidates)}"
    data = graph.query(
        description_query, params={"candidate": candidates[0]['candidate']}
    )
    return data[0]["context"]

In [7]:
print(get_information("John", "person"))

Need additional information, which of these did you mean: 
{'candidate': 'John Lodge', 'label': 'Person'}
{'candidate': 'John Warren', 'label': 'Person'}
{'candidate': 'John Gray', 'label': 'Person'}


In [8]:
print(get_information("Keanu Reeves", "person"))

type:Actor
title: Keanu Reeves
year: 
ACTED_IN: Matrix Reloaded, The, Side by Side, Matrix Revolutions, The, Sweet November, Replacements, The, Hardball, Matrix, The, Constantine, Bill & Ted's Bogus Journey, Street Kings, Lake House, The, Chain Reaction, Walk in the Clouds, A, Little Buddha, Bill & Ted's Excellent Adventure, The Devil's Advocate, Johnny Mnemonic, Speed, Feeling Minnesota, The Neon Demon, 47 Ronin, Henry's Crime, Day the Earth Stood Still, The, John Wick, River's Edge, Man of Tai Chi, Dracula (Bram Stoker's Dracula), Point Break, My Own Private Idaho, Scanner Darkly, A, Something's Gotta Give, Watcher, The, Gift, The
DIRECTED: Man of Tai Chi



In [9]:
print(get_information("Tomaz Bratanic", "person"))

No information was found about the movie or person in the database


In [10]:
recommendation_query_db_history = """
  MERGE (u:User {userId:$user_id})
  WITH u
  // get recommendation candidates
  OPTIONAL MATCH (u)-[r1:RATED]->()<-[r2:RATED]-()-[r3:RATED]->(recommendation)
  WHERE r1.rating > 3.5 AND r2.rating > 3.5 AND r3.rating > 3.5
        AND NOT EXISTS {(u)-[:RATED]->(recommendation)}
  // rank and limit recommendations
  WITH u, recommendation, count(*) AS count
  ORDER BY count DESC LIMIT 3
  RETURN recommendation.title AS movie
"""

recommendation_query_genre = """
MATCH (m:Movie)-[:IN_GENRE]->(g:Genre {name:$genre})
// filter out already seen movies by the user
WHERE NOT EXISTS {
  (m)<-[:RATED]-(:User {userId:$user_id})
}
// rank and limit recommendations
WITH m
ORDER BY m.imdbRating DESC LIMIT 3
RETURN m.title AS movie
"""

def recommendation_query_movie(genre: bool) -> str:
    return f"""
MATCH (m1:Movie)<-[r1:RATED]-()-[r2:RATED]->(m2:Movie)
WHERE r1.rating > 3.5 AND r2.rating > 3.5
// filter out already seen movies by the user
AND NOT EXISTS {{
  (m2)<-[:RATED]-(:User {{userId:$user_id}})
}}
{'AND EXISTS {(m2)-[:IN_GENRE]->(:Genre {name:$genre})}' if genre else ''}
// rank and limit recommendations
WITH m2, count(*) AS count
ORDER BY count DESC LIMIT 3
RETURN m2.title As movie
"""

def get_user_id() -> int:
    return 1

def recommend_movie(movie: Optional[str] = None, genre: Optional[str] = None) -> str:
    user_id = get_user_id()
    params = {'user_id': user_id, 'genre':genre}
    if not movie and not genre:
        # Try to recommend a movie based on the information in the db
        response = graph.query(recommendation_query_db_history, params)
        try:
            return ", ".join([el["movie"] for el in response])
        except:
            return "Can you tell us about some of the movies you liked?"
    if not movie and genre:
        # Recommend top voted movies in the genre the user haven't seen before
        response = graph.query(recommendation_query_genre, params)
        try:
            return ", ".join([el["movie"] for el in response])
        except:
            return "Something went wrong"

    candidates = get_candidates(movie, "movie")
    if not candidates:
        return "The movie you mentioned wasn't found in the database"
    params['candidates'] = candidates
    query = recommendation_query_movie(bool(genre))
    response = graph.query(query, params)
    try:
        return ", ".join([el["movie"] for el in response])
    except:
        return "Something went wrong"

In [11]:
recommend_movie()

'Star Wars: Episode IV - A New Hope, Star Wars: Episode V - The Empire Strikes Back, Star Wars: Episode VI - Return of the Jedi'

In [12]:
recommend_movie(genre="Action")

"Now You See Me 2, The Huntsman Winter's War, Kill Command"

In [13]:
recommend_movie(movie="Top Gun")

'Pulp Fiction, Silence of the Lambs, The, Star Wars: Episode IV - A New Hope'

In [14]:
recommend_movie(movie="Top Gun", genre="Action")

'Star Wars: Episode IV - A New Hope, Star Wars: Episode V - The Empire Strikes Back, Matrix, The'

In [15]:
store_rating_query = """
MERGE (u:User {userId:$user_id})
WITH u
UNWIND $candidates as candidate
MATCH (m:Movie {title: candidate.candidate})
MERGE (u)-[r:RATED]->(m)
SET r.rating = 5.0
RETURN distinct 'Noted' AS response
"""
def store_movie_rating(movie: str):
    user_id = get_user_id()
    candidates = get_candidates(movie, 'movie')
    if not candidates:
        return 'This movie is not in our database'
    response = graph.query(store_rating_query, params={'user_id':user_id, 'candidates':candidates})
    try:
        return response[0]['response']
    except Exception as e:
        print(e)
        return 'Something went wrong'

In [16]:
store_movie_rating('Top Gun')

'Noted'

In [17]:
from typing import Optional, Type

from langchain.callbacks.manager import (
    AsyncCallbackManagerForToolRun,
    CallbackManagerForToolRun,
)

# Import things that are needed generically
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import BaseTool, StructuredTool, tool

class InformationInput(BaseModel):
    entity: str = Field(description="movie or a person mentioned in the question")
    entity_type: str = Field(description="type of the entity. Available options are 'movie' or 'person'")

class InformationTool(BaseTool):
    name = "Information"
    description = "useful for when you need to answer questions about various actors or movies"
    args_schema: Type[BaseModel] = InformationInput

    def _run(
        self, entity: str, entity_type: str, run_manager: Optional[CallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool."""
        return get_information(entity, entity_type)

    async def _arun(
        self,
        entity: str,
        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
    ) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError("Calculator does not support async")

In [18]:
all_genres = ["Action", "Adventure", "Animation", "Children", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "IMAX", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"]


class RecommenderInput(BaseModel):
    movie: Optional[str] = Field(description="movie used for recommendation")
    genre: Optional[str] = Field(description=f"genre used for recommendation. Available options are: {all_genres}")


class RecommenderTool(BaseTool):
    name = "Recommender"
    description = "useful for when you need to recommend a movie"
    args_schema: Type[BaseModel] = RecommenderInput

    def _run(
        self, movie: Optional[str] = None, genre:Optional[str] = None, run_manager: Optional[CallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool."""
        return recommend_movie(movie, genre)

    async def _arun(
        self,
        movie: Optional[str],
        genre: Optional[str],
        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
    ) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError("Calculator does not support async")

In [19]:
class MemoryInput(BaseModel):
    movie: Optional[str] = Field(description="movie the user liked")

class MemoryTool(BaseTool):
    name = "Memory"
    description = "useful for memorizing which movies the user liked"
    args_schema: Type[BaseModel] = MemoryInput

    def _run(
        self, movie: str, run_manager: Optional[CallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool."""
        return store_movie_rating(movie)

    async def _arun(
        self,
        movie: str,
        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
    ) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError("Calculator does not support async")

In [20]:
from langchain.agents import Tool

from langchain.tools.render import format_tool_to_openai_function

from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0, model="gpt-4")
tools = [
    InformationTool(), RecommenderTool(), MemoryTool()
]

llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])

In [21]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.agents import AgentExecutor
from langchain.schema import HumanMessage, AIMessage


prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant that finds information about movies and recommends them. "
            "If tools require follow up questions, make sure to ask the user for clarification. "
            "Make sure to include any available options that need to be clarified in the follow up questions",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

agent = (
    {
        "input": lambda x: x["input"],
        "chat_history": lambda x: x["chat_history"] if x.get("chat_history") else [],
        "agent_scratchpad": lambda x: format_to_openai_function_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIFunctionsAgentOutputParser()
)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [22]:
agent_executor.invoke({"input": "Who acted in pulp fiction?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Information` with `{'entity': 'Pulp Fiction', 'entity_type': 'movie'}`


[0m[36;1m[1;3mtype:Movie
title: Pulp Fiction
year: 1994-10-14
ACTED_IN: Tim Roth, John Travolta, Laura Lovelace, Samuel L. Jackson
DIRECTED: Quentin Tarantino
[0m[32;1m[1;3m"Pulp Fiction" was directed by Quentin Tarantino and the cast includes Tim Roth, John Travolta, Laura Lovelace, and Samuel L. Jackson.[0m

[1m> Finished chain.[0m


{'input': 'Who acted in pulp fiction?',
 'output': '"Pulp Fiction" was directed by Quentin Tarantino and the cast includes Tim Roth, John Travolta, Laura Lovelace, and Samuel L. Jackson.'}

In [23]:
agent_executor.invoke({"input": "What do you know about person John?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Information` with `{'entity': 'John', 'entity_type': 'person'}`


[0m[36;1m[1;3mNeed additional information, which of these did you mean: 
{'candidate': 'John Lodge', 'label': 'Person'}
{'candidate': 'John Warren', 'label': 'Person'}
{'candidate': 'John Gray', 'label': 'Person'}[0m[32;1m[1;3mI found multiple people named John. Could you please specify which one you are interested in? Here are some options:

1. John Lodge
2. John Warren
3. John Gray[0m

[1m> Finished chain.[0m


{'input': 'What do you know about person John?',
 'output': 'I found multiple people named John. Could you please specify which one you are interested in? Here are some options:\n\n1. John Lodge\n2. John Warren\n3. John Gray'}

In [24]:
history = [
    HumanMessage(content="What do you know about person John?"),
    AIMessage(content="I found multiple people named John. Could you please specify which one you are interested in? Here are a few options:\n\n1. John Lodge\n2. John Warren\n3. John Gray")
]
agent_executor.invoke({"input": "John Warren", "chat_history": history})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Information` with `{'entity': 'John Warren', 'entity_type': 'person'}`


[0m[36;1m[1;3mtype:Director
title: John Warren
year: 
DIRECTED: Major League: Back to the Minors
[0m[32;1m[1;3mJohn Warren is a director known for his work on the movie "Major League: Back to the Minors".[0m

[1m> Finished chain.[0m


{'input': 'John Warren',
 'chat_history': [HumanMessage(content='What do you know about person John?'),
  AIMessage(content='I found multiple people named John. Could you please specify which one you are interested in? Here are a few options:\n\n1. John Lodge\n2. John Warren\n3. John Gray')],
 'output': 'John Warren is a director known for his work on the movie "Major League: Back to the Minors".'}

In [25]:
agent_executor.invoke(
    {"input": "Are there any good comedies out there?"}
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Recommender` with `{'genre': 'Comedy'}`


[0m[33;1m[1;3mKeanu, Ice Age: The Great Egg-Scapade, Neighbors 2: Sorority Rising[0m[32;1m[1;3mSure, here are some comedy movies you might enjoy:

1. Keanu
2. Ice Age: The Great Egg-Scapade
3. Neighbors 2: Sorority Rising

Let me know if you need more recommendations![0m

[1m> Finished chain.[0m


{'input': 'Are there any good comedies out there?',
 'output': 'Sure, here are some comedy movies you might enjoy:\n\n1. Keanu\n2. Ice Age: The Great Egg-Scapade\n3. Neighbors 2: Sorority Rising\n\nLet me know if you need more recommendations!'}

In [26]:
agent_executor.invoke(
    {"input": "I would like to see a good action movie like Top Gun?"}
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Recommender` with `{'movie': 'Top Gun', 'genre': 'Action'}`


[0m[33;1m[1;3mStar Wars: Episode IV - A New Hope, Star Wars: Episode V - The Empire Strikes Back, Matrix, The[0m[32;1m[1;3mBased on your interest in Top Gun, I would recommend the following action movies:

1. Star Wars: Episode IV - A New Hope
2. Star Wars: Episode V - The Empire Strikes Back
3. The Matrix

These movies have similar action-packed sequences and thrilling storylines. Enjoy your movie time![0m

[1m> Finished chain.[0m


{'input': 'I would like to see a good action movie like Top Gun?',
 'output': 'Based on your interest in Top Gun, I would recommend the following action movies:\n\n1. Star Wars: Episode IV - A New Hope\n2. Star Wars: Episode V - The Empire Strikes Back\n3. The Matrix\n\nThese movies have similar action-packed sequences and thrilling storylines. Enjoy your movie time!'}

In [27]:
agent_executor.invoke(
    {"input": "I already watched raiders of the lost ark and I liked it."}
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Memory` with `{'movie': 'raiders of the lost ark'}`


[0m[38;5;200m[1;3mNoted[0m[32;1m[1;3mThat's great! If you want, I can recommend similar movies to "Raiders of the Lost Ark". Would you like that?[0m

[1m> Finished chain.[0m


{'input': 'I already watched raiders of the lost ark an I liked it.',
 'output': 'That\'s great! If you want, I can recommend similar movies to "Raiders of the Lost Ark". Would you like that?'}

In [28]:
agent_executor.invoke(
    {"input": "I would like to see a good action movie like Top Gun?"}
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Recommender` with `{'movie': 'Top Gun', 'genre': 'Action'}`


[0m[33;1m[1;3mStar Wars: Episode IV - A New Hope, Star Wars: Episode V - The Empire Strikes Back, Matrix, The[0m[32;1m[1;3mBased on your interest in Top Gun, I would recommend the following action movies:

1. Star Wars: Episode IV - A New Hope
2. Star Wars: Episode V - The Empire Strikes Back
3. The Matrix

These are all highly rated and popular action movies. Enjoy your movie time![0m

[1m> Finished chain.[0m


{'input': 'I would like to see a good action movie like Top Gun?',
 'output': 'Based on your interest in Top Gun, I would recommend the following action movies:\n\n1. Star Wars: Episode IV - A New Hope\n2. Star Wars: Episode V - The Empire Strikes Back\n3. The Matrix\n\nThese are all highly rated and popular action movies. Enjoy your movie time!'}