In [6]:
import os
from dotenv import load_dotenv

load_dotenv()

NEO4J_URI = os.getenv("LOCAL_NEO4J_URI")
NEO4J_USERNAME = os.getenv("LOCAL_NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("LOCAL_NEO4J_PASSWORD")
NEO4J_DATABASE = "applekg"
GROQ_API_KEY = os.getenv("GROQ_API_KEY")


In [8]:
from langchain_community.graphs import Neo4jGraph

graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database=NEO4J_DATABASE,
)

In [9]:
from langchain.document_loaders import TextLoader

loader = TextLoader("data/tim_cook.txt")
documents = loader.load()
print(documents)

[Document(page_content='= FIRST ITERATION = \nTimothy D. Cook is the CEO of Apple Inc., who joined the company in 1998 and took over as CEO in 2009. \nUnder his leadership, Apple became the world\'s largest company by market capitalization and revenue, thanks to cost-saving measures such as long-term deals for flash memory that led to popular devices like the iPod Nano, iPhone, and iPad. \nApple was founded in 1976 by Steve Jobs, Steve Wozniak, and Ronald Wayne, with the Macintosh computer, introduced in 1984, being a revolutionary graphical user interface-based system designed for the masses. \nThe Macintosh team, led by Jef Raskin and later Steve Jobs, faced challenges in bringing the revolutionary design to life but generated cult enthusiasm with new programs like PageMaker, MORE, and Excel. \nApple released improved versions of the Macintosh, like the Macintosh 512K, to address initial limitations. \nApple bought NeXT in 1997, bringing Jobs back as CEO, resulting in game-changing p

In [12]:
from langchain_groq import ChatGroq

GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [1]:
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain_core.pydantic_v1 import BaseModel, Field

entity_types = ["person", "school", "award", "company", "product", "characteristic"]
relation_types = [
    "alumniOf",
    "worksFor",
    "hasAward",
    "isProducedBy",
    "hasCharacteristic",
    "acquired",
    "hasProject",
    "isFounderOf",
]

system_prompt = PromptTemplate(
    template="""
    You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
    Your task is to identify the entities and relations requested with the user prompt, from a given text.
    You must generate the output in a JSON containing a list with JSON objects having the following keys: "head", "head_type", "relation", "tail", and "tail_type".
    The "head" key must contain the text of the extracted entity with one of the types from the provided list in the user prompt. 
    The "head_type" key must contain the type of the extracted head entity which must be one of the types from {entity_types}.
    The "relation" key must contain the type of relation between the "head" and the "tail" which must be one of the relations from {relation_types}.
    The "tail" key must represent the text of an extracted entity which is the tail of the relation, and the "tail_type" key must contain the type of the tail entity from {entity_types}. 
    Attempt to extract as many entities and relations as you can. 
    
    IMPORTANT NOTES:
    - Don't add any explanation and text. 
    """,
    input_variables=["entity_types", "relation_types"],
)


system_message_prompt = SystemMessagePromptTemplate(prompt=system_prompt)

examples = [
    {
        "text": "Adam is a software engineer in Microsoft since 2009, and last year he got an award as the Best Talent",
        "head": "Adam",
        "head_type": "person",
        "relation": "worksFor",
        "tail": "Microsoft",
        "tail_type": "company",
    },
    {
        "text": "Adam is a software engineer in Microsoft since 2009, and last year he got an award as the Best Talent",
        "head": "Adam",
        "head_type": "person",
        "relation": "hasAward",
        "tail": "Best Talent",
        "tail_type": "award",
    },
    {
        "text": "Microsoft is a tech company that provide several products such as Microsoft Word",
        "head": "Microsoft Word",
        "head_type": "product",
        "relation": "isproducedBy",
        "tail": "Microsoft",
        "tail_type": "company",
    },
    {
        "text": "Microsoft Word is a lightweight app that accessible offline",
        "head": "Microsoft Word",
        "head_type": "product",
        "relation": "hasCharacteristic",
        "tail": "lightweight app",
        "tail_type": "characteristic",
    },
    {
        "text": "Microsoft Word is a lightweight app that accessible offline",
        "head": "Microsoft Word",
        "head_type": "product",
        "relation": "hasCharacteristic",
        "tail": "accesible offline",
        "tail_type": "characteristic",
    },
]


class ExtractedInfo(BaseModel):
    head: str = Field(
        description="extracted first or head entity like Microsoft, Apple, John"
    )
    head_type: str = Field(
        description="type of the extracted head entity like person, company, etc"
    )
    relation: str = Field(description="relation between the head and the tail entities")
    tail: str = Field(
        description="extracted second or tail entity like Microsoft, Apple, John"
    )
    tail_type: str = Field(
        description="type of the extracted tail entity like person, company, etc"
    )


parser = JsonOutputParser(pydantic_object=ExtractedInfo)

human_prompt = PromptTemplate(
    template=""" Based on the following example, extract entities and relations from the provided text.\n\n

    Use the following entity types, don't use other entity that is not defined below:
    # ENTITY TYPES:
    {entity_types}

    Use the following relation types, don't use other relation that is not defined below:
    # RELATION TYPES:
    {relation_types}

    Below are a number of examples of text and their extracted entities and relationshhips.
    {examples}

    For the following text, generate extract entitites and relations as in the provided example.\n{format_instructions}\nText: {text}""",
    input_variables=["entity_types", "relation_types", "examples", "text"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

human_message_prompt = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages(
    [system_message_prompt, human_message_prompt]
)


# model = ChatOllama(model = "mistral",temperature=0)
# model = ChatOllama(model = "llama3",temperature=0)
model = ChatGroq(temperature=0, model_name="llama3-70b-8192")
chain = LLMChain(llm=model, prompt=chat_prompt)

SyntaxError: invalid syntax (1214472424.py, line 1)