In [2]:
from langchain_community.llms import Ollama
from langchain_core.output_parsers import JsonOutputParser
from langchain.output_parsers import YamlOutputParser

In [36]:
import pandas as pd
import numpy as np
contexts = pd.read_csv('./context.csv')
contexts = contexts[6:9]
contexts

Unnamed: 0,context
6,LeToya Luckett and Roberson became unhappy wit...
7,The remaining band members recorded 'Independe...
8,"In July 2002, Beyoncé continued her acting car..."


In [16]:
llm = Ollama(model="gemma:7b")
parser = JsonOutputParser()

In [41]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a ontologist. You build knowledge graph from plain input. \nExtract Entites from plain text."),
    ("system", "The format should be python list"),    
    ("user", "{input}")
])
chain = prompt | llm
entity_list = list()
for context in contexts['context'].values:
    entities = chain.invoke({"input": context})
    for sample in entities.split('\n'):
        if '-' in sample:
            entity_list.append(sample.replace('- ',''))
    print(entity_list)
    print('----')
entity_list = list(set(entity_list))

['LeToya Luckett', 'Roberson', 'Farrah Franklin', 'Michelle Williams', 'Beyoncé', 'Mathew', "Destiny's Child"]
----
['LeToya Luckett', 'Roberson', 'Farrah Franklin', 'Michelle Williams', 'Beyoncé', 'Mathew', "Destiny's Child", "Destiny's Child", 'Beyoncé', 'Luckett', 'Roberson', "Charlie's Angels", 'U.S. Billboard Hot 100 chart', '2000 film', 'Carmen: A Hip Hopera', 'American actor Mekhi Phifer', 'Philadelphia', '19th century opera Carmen', 'Survivor', 'Grammy Award']
----
['LeToya Luckett', 'Roberson', 'Farrah Franklin', 'Michelle Williams', 'Beyoncé', 'Mathew', "Destiny's Child", "Destiny's Child", 'Beyoncé', 'Luckett', 'Roberson', "Charlie's Angels", 'U.S. Billboard Hot 100 chart', '2000 film', 'Carmen: A Hip Hopera', 'American actor Mekhi Phifer', 'Philadelphia', '19th century opera Carmen', 'Survivor', 'Grammy Award', 'Beyoncé', 'Foxxy Cleopatra', 'Austin Powers in Goldmember', 'US box office', '$73 million', 'Work It Out', 'UK', 'Norway', 'Belgium', 'The Fighting Temptations', 'C

In [48]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Group ALL entities according to semantic meaning of entity. \nThe output MUST be dictionary format."),
    ("user", "{entities}"),        
])
chain = prompt | llm

tags = chain.invoke({"entities": entity_list})
print(tags)

**Entities grouped by semantic meaning:**

**Music-related entities:**
- Summertime
- U.S. Billboard Hot 100 chart
- Foxxy Cleopatra
- Fighting Temptation
- The Fighting Temptations
- Grammy Award
- LeToya Luckett
- MC Lyte
- Carmen: A Hip Hopera
- Survivor
- Destiny's Child

**Places:**
- Philadelphia
- Norway
- Belgium
- UK
- US box office

**Other entities:**
- US box office
- 2000 film
- Roberson
- Free
- Farrah Franklin
- American actor Mekhi Phifer
- $73 million
- $30 million


In [153]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract properties of each entities from plain input. \nThe output MUST be YAML format."),
    ("system", "{entities}"),        
    ("user", "{input}")
])

chain = prompt | llm 
properties = chain.invoke({"input": "Beyoncé Giselle Knowles-Carter (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles 'Crazy in Love' and 'Baby Boy'.",
                     "entities": entities})
print(properties)

```yaml
entities:
  - name: Beyoncé Giselle Knowles-Carter
    occupation: American singer
    nationality: American
    place_of_birth: Houston, Texas
    other_roles: Songwriter, Record producer, Actress

  - name: Destiny's Child
    other_roles: Group

  - name: Mathew Knowles
    relationship: Father of Beyoncé Giselle Knowles-Carter

  - name: Dangerously in Love
    other_roles: Album

  - name: Grammy Awards
    other_roles: Won by Beyoncé Giselle Knowles-Carter for Dangerously in Love

  - name: Billboard Hot 100 number-one singles
    other_roles: Featured songs by Beyoncé Giselle Knowles-Carter from Dangerously in Love
```


In [154]:
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain.prompts import PromptTemplate
response_schemas = [
    ResponseSchema(name="first entity", description="first entity of relationship between entities"),
    ResponseSchema(name="relationship", description="relationship between two entity semantically"),
    ResponseSchema(name="second entity", description="second entity of relationship between entities"),
]
relationship_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [156]:
relationship_instructions = relationship_parser.get_format_instructions()
prompt = PromptTemplate(
    template="Create relationship between entities from plain text.\n{relationship_instructions}\n{entities}\n{input}",
    input_variables=["entities","input"],
    partial_variables={"relationship_instructions": relationship_instructions},
)
chain = prompt | llm  
relationships = chain.invoke({"input": "Beyoncé Giselle Knowles-Carter (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles 'Crazy in Love' and 'Baby Boy'.",
                     "entities": entities})
print(relationships)

```json
{
	"first entity": "Beyoncé Giselle Knowles-Carter",
	"relationship": "is",
	"second entity": "an American singer, songwriter, record producer and actress"
}

{
	"first entity": "Houston, Texas",
	"relationship": "is where",
	"second entity": "Beyoncé Giselle Knowles-Carter was born"
}

{
	"first entity": "Destiny's Child",
	"relationship": "was a girl group",
	"second entity": "that Beyoncé Giselle Knowles-Carter led"
}

{
	"first entity": "Mathew Knowles",
	"relationship": "managed",
	"second entity": "Destiny's Child"
}

{
	"first entity": "Dangerously in Love",
	"relationship": "was Beyoncé Giselle Knowles-Carter's debut album",
	"second entity": "It earned five Grammy Awards"
}

{
	"first entity": "Billboard Hot 100 number-one singles",
	"relationship": "Featured",
	"second entity": "'Crazy in Love' and 'Baby Boy'"
}
```


In [157]:
print(entities)
print('-----')
print(tags)
print('-----')
print(properties)
print('-----')
print(relationships)
print('-----')

Sure, here is the extracted entities from the text:

- Beyoncé Giselle Knowles-Carter
- American singer
- Songwriter
- Record producer
- Actress
- Houston, Texas
- Destiny's Child
- Mathew Knowles
- Dangerously in Love
- Grammy Awards
- Billboard Hot 100 number-one singles
-----
Sure, here is the extracted entities with tags in the format of a dictionary:

```python
entities_with_tags = {
    "Beyoncé Giselle Knowles-Carter": {"type": "Person"},
    "American singer": {"type": "Occupation"},
    "Songwriter": {"type": "Occupation"},
    "Record producer": {"type": "Occupation"},
    "Actress": {"type": "Occupation"},
    "Houston, Texas": {"type": "Location"},
    "Destiny's Child": {"type": "Group"},
    "Mathew Knowles": {"type": "Person"},
    "Dangerously in Love": {"type": "Album"},
    "Grammy Awards": {"type": "Award"},
    "Billboard Hot 100 number-one singles": {"type": "Accomplishment"}
}
```
-----
```yaml
entities:
  - name: Beyoncé Giselle Knowles-Carter
    occupation: Ame

In [49]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a ontologist. You build knowledge graph from plain text. \n 
     Task 1: Extract Entities from plain text as possible as atomic. \n
     Task 2: Extract Type of Entites according to semantic meaning of Entities as possible as atomic from Task 2 output.\n
     Task 3: Define properties of Entities as possible as atomic from Task 3 output. \n
     Task 4: Extract relationship between Entities from plain text as possible as atomic from Task 4 output. \n\n
     Task 5: Convert all knowledge to Neo4j Cypher format [{{
         "Entity: "Group" \n
         "Relationship" : "Relationship Name" \n
         "Entity": "Group" 
         }},..]."""),
    ("system", "Let's think step by step. The output must be json and their tags"),
    ("user", "{input}")
])
chain = prompt | llm 
output = chain.invoke({"input": "Beyoncé Giselle Knowles-Carter (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles 'Crazy in Love' and 'Baby Boy'."})

In [50]:
print(output)

**Task 1: Extract Entities from Plain Text**

- **Entities:**
    - Beyoncé Giselle Knowles-Carter
    - America
    - Houston
    - Texas
    - Destiny's Child
    - Mathew Knowles

**Task 2: Extract Type of Entites**

- **Types:**
    - Singer
    - Songwriter
    - Record producer
    - Actress

**Task 3: Define Properties of Entities**

- **Properties:**
    - Born: September 4, 1981
    - Nationality: American
    - Place of birth: Houston, Texas
    - Career: Singer, songwriter, record producer, actress

**Task 4: Extract Relationship between Entities**

- **Relationships:**
    - Born and raised in
    - Performed in
    - Rose to fame as
    - Managed by
    - Became one of the world's best-selling girl groups of all time

**Task 5: Convert Knowledge to JSON Format**

```json
[
    {
        "Entity": "Beyoncé Giselle Knowles-Carter",
        "Relationship": "Born and raised in",
        "Entity": "Houston, Texas"
    },
    {
        "Entity": "Beyoncé Giselle Knowles-Carter",

In [53]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a ontologist. You build knowledge graph from plain text. \n 
     Task 1: Extract Entities from plain text \n
     Task 2: Group Entities according to semantic meaning of Entities\n
     Task 3: Extract relationship between Entities from plain text \n\n
     Task 4: Convert all knowledge to json format."""),
    ("system", "Let's think step by step."),
    ("user", "{input}")
])
chain = prompt | llm 
output = chain.invoke({"input": "Beyoncé Giselle Knowles-Carter (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles 'Crazy in Love' and 'Baby Boy'."})

In [54]:
print(output)

**Task 1: Extract Entities from Plain Text**

* Beyoncé Giselle Knowles-Carter
* America
* Houston, Texas
* Destiny's Child
* Mathew Knowles

**Task 2: Group Entities According to Semantic Meaning of Entities**

* Beyoncé Giselle Knowles-Carter, Destiny's Child, and Mathew Knowles are related to the same family.
* Beyoncé Giselle Knowles-Carter, Destiny's Child, and Dangerously in Love are related to the music industry.

**Task 3: Extract Relationship Between Entities from Plain Text**

* Beyoncé Giselle Knowles-Carter was born and raised in Houston, Texas.
* Destiny's Child was formed in the late 1990s and became one of the world's best-selling girl groups of all time.
* Mathew Knowles managed Destiny's Child.
* Dangerously in Love was released in 2003 and established Beyoncé as a solo artist worldwide.

**Task 4: Convert All Knowledge to Json Format**

```json
{
  "entities": {
    "Beyoncé Giselle Knowles-Carter": {
      "born": "September 4, 1981",
      "nationality": "American",