In [13]:
from langchain_community.llms import Ollama
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts import PromptTemplate
from typing import List, Optional

In [14]:
llm = Ollama(model="gemma:7b")

### Extract Term List

In [47]:
import re
python_pattern = r'python\s*\[(.*?)\]\s*'

format = 'Convert output to list format'
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are ontologist. You are building Knowledge Graph from plain text."),
    ("system", "The Task is to extract unique term list from plain text."),
    ("user", "{input}"),
    ("system", "Output MUST be only {format}")        

])
chain = prompt | llm 
term_list = chain.invoke({"input": "Beyoncé Giselle Knowles-Carter (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles 'Crazy in Love' and 'Baby Boy'.",
                     "format": format})
print(term_list)


Sure, here is the extracted unique term list from the text:

- Beyoncé Giselle Knowles-Carter
- American
- Singer
- Songwriter
- Record producer
- Actress
- Houston
- Texas
- Destiny's Child
- Mathew Knowles
- World's
- Best-selling
- Girl-group
- Hiatus
- Dangerously in Love
- Grammy Awards
- Billboard Hot 100


In [16]:
from langchain_core.prompts import ChatPromptTemplate
query = """
     Task 1: Extract Entities from plain text \n
     Task 2: Group Entities according to semantic meaning of Entities\n
     Task 3: Extract relationship between Entities from plain text \n\n"""
     
yaml_parser = YamlOutputParser(pydantic_object=Entities)

prompt = PromptTemplate(
    template="You are a ontologist. You build knowledge graph from plain input. \n{query} \n{input} \nThe format MUST be {format_instructions}",
    input_variables=["query","input"],
    partial_variables={"format_instructions": yaml_parser.get_format_instructions()},
)
print('Query: ', prompt)
chain = prompt | llm 
entities = chain.invoke({"input": "Beyoncé Giselle Knowles-Carter (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles 'Crazy in Love' and 'Baby Boy'.",
                         "query":query})
print(entities)

Query:  input_variables=['input', 'query'] partial_variables={'format_instructions': 'The output should be formatted as a YAML instance that conforms to the given JSON schema below.\n\n# Examples\n## Schema\n```\n{"title": "Players", "description": "A list of players", "type": "array", "items": {"$ref": "#/definitions/Player"}, "definitions": {"Player": {"title": "Player", "type": "object", "properties": {"name": {"title": "Name", "description": "Player name", "type": "string"}, "avg": {"title": "Avg", "description": "Batting average", "type": "number"}}, "required": ["name", "avg"]}}}\n```\n## Well formatted instance\n```\n- name: John Doe\n  avg: 0.3\n- name: Jane Maxfield\n  avg: 1.4\n```\n\n## Schema\n```\n{"properties": {"habit": { "description": "A common daily habit", "type": "string" }, "sustainable_alternative": { "description": "An environmentally friendly alternative to the habit", "type": "string"}}, "required": ["habit", "sustainable_alternative"]}\n```\n## Well formatted 

In [7]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Add tag to each of entities from plain input. \nThe output MUST be dictionary format."),
    ("system", "{entities}"),        
    ("user", "{input}")
])
chain = prompt | llm 
tags = chain.invoke({"input": "Beyoncé Giselle Knowles-Carter (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles 'Crazy in Love' and 'Baby Boy'.",
                     "entities": entities})
print(tags)

Sure, here is the extracted dictionary with tags for each entity in the text:

```python
entities_with_tags = {
    "Beyoncé Giselle Knowles-Carter": {"type": "person"},
    "American singer": {"type": "occupation"},
    "Songwriter": {"type": "occupation"},
    "Record producer": {"type": "occupation"},
    "Actress": {"type": "occupation"},
    "Houston": {"type": "location"},
    "Texas": {"type": "location"},
    "Destiny's Child": {"type": "group"},
    "Mathew Knowles": {"type": "person"},
    "Girl-group": {"type": "group"},
    "World's best-selling girl groups": {"type": "group"},
    "Dangerously in Love": {"type": "album"},
    "Grammy Awards": {"type": "award"},
}
```
