In [5]:
from langchain_community.llms import Ollama
from langchain_core.output_parsers import JsonOutputParser
from langchain.output_parsers import YamlOutputParser

import pandas as pd 
import numpy as np

In [6]:
llm = Ollama(model="gemma:7b")

## Extract Term List from Plain Text

In [11]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a ontologist. You build knowledge graph from plain input. \nExtract Entites from plain text."),
    ("system", "The format should be python list"),    
    ("user", "{input}")
])
chain = prompt | llm
entity_context_list = list()

contexts = pd.read_csv('/Users/mertakcay/Projects/KnowledgeGraphBuilder/context.csv')
for context in contexts['context'].values:
    entity_list = list()
    entities = chain.invoke({"input": context})
    for sample in entities.split('\n'):
        if '-' in sample:
            entity_list.append(sample.replace('- ',''))
    entity_context_list.append(entity_list)
    print('----')

----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----


In [12]:
entity_context_list

[['Beyoncé Giselle Knowles-Carter',
  'Houston',
  'Texas',
  "Destiny's Child",
  'Mathew Knowles',
  'R&B',
  'Dangerously in Love',
  'Grammy Awards'],
 ['Beyoncé',
  "Destiny's Child",
  "B'Day (2006)",
  'Déjà Vu',
  'Irreplaceable',
  'Beautiful Liar',
  'Dreamgirls (2006)',
  'The Pink Panther (2006)',
  'Obsessed (2009)',
  'Cadillac Records (2008)',
  'I Am... Sasha Fierce (2008)',
  'Single Ladies (Put a Ring on It)',
  '4 (2011)',
  'Beyoncé (2013)'],
 ['Beyoncé',
  'Love',
  'Relationships',
  'Monogamy',
  'Female sexuality',
  'Empowerment',
  'Contemporary popular music',
  "Destiny's Child",
  'Music artists',
  'Grammy Awards',
  'Recording Industry Association of America',
  'Top Certified Artist',
  'Top Radio Songs Artist',
  'Top Female Artist',
  'Artist of the Millennium',
  'Time',
  'Forbes'],
 ['Beyoncé',
  "St. Mary's Elementary School",
  'Fredericksburg, Texas',
  'Darlette Johnson',
  'John Lennon',
  'Parker Elementary School',
  'High School for the Perf

## Group Entities

In [13]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Group ALL entities according to semantic meaning of entity. \nThe output MUST be dictionary format."),
    ("user", "{entities}"),        
])
chain = prompt | llm

context_tags_list = list()
for entit_sample in entity_context_list:
    tags = chain.invoke({"entities": entit_sample})
    context_tags_list.append(tags)

## Extract Properties of terms

In [14]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract properties of each entities from plain input. \nThe output MUST be YAML format."),
    ("system", "{entities}"),        
    ("user", "{input}")
])

chain = prompt | llm 

properties_list = list()
for context_entity, context in zip(entity_context_list, contexts['context'].values):
    properties = chain.invoke({"input": context,"entities": context_entity})
    properties_list.append(properties)

properties_list

["## Extracted Properties of Each Entity in the Text:\n\n**System:**\n\n- **Name:** System\n- **Properties:**\n    - Extract properties of each entity from plain input.\n    - Output MUST be in YAML format.\n\n**Human:**\n\n- **Name:** Beyoncé Giselle Knowles-Carter\n- **Properties:**\n    - Full name: Beyoncé Giselle Knowles-Carter\n    - Birth name: Knowles-Carter\n    - Birth date: September 4, 1981\n    - Nationality: American\n    - Occupation: Singer, songwriter, record producer, actress\n    - Place of birth: Houston, Texas\n    - Rise to fame: Late 1990s\n    - Group: Destiny's Child\n    - Group achievements: Best-selling girl group of all time\n    - Hiatus: Release of Beyoncé's debut album\n    - Solo album: Dangerously in Love (2003)\n    - Solo achievements: Five Grammy Awards\n    - Songs: 'Crazy in Love', 'Baby Boy'",
 "```yaml\nentities:\n  - Beyoncé:\n    - Properties:\n      - Second solo album: B'Day (2006)\n      - Hits:\n        - Déjà Vu\n        - Irreplaceable\n

In [15]:
properties_list[0]

"## Extracted Properties of Each Entity in the Text:\n\n**System:**\n\n- **Name:** System\n- **Properties:**\n    - Extract properties of each entity from plain input.\n    - Output MUST be in YAML format.\n\n**Human:**\n\n- **Name:** Beyoncé Giselle Knowles-Carter\n- **Properties:**\n    - Full name: Beyoncé Giselle Knowles-Carter\n    - Birth name: Knowles-Carter\n    - Birth date: September 4, 1981\n    - Nationality: American\n    - Occupation: Singer, songwriter, record producer, actress\n    - Place of birth: Houston, Texas\n    - Rise to fame: Late 1990s\n    - Group: Destiny's Child\n    - Group achievements: Best-selling girl group of all time\n    - Hiatus: Release of Beyoncé's debut album\n    - Solo album: Dangerously in Love (2003)\n    - Solo achievements: Five Grammy Awards\n    - Songs: 'Crazy in Love', 'Baby Boy'"

## Extract relationships between terms

In [None]:
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain.prompts import PromptTemplate
response_schemas = [
    ResponseSchema(name="first entity", description="first entity of relationship between entities"),
    ResponseSchema(name="relationship", description="relationship between two entity semantically"),
    ResponseSchema(name="second entity", description="second entity of relationship between entities"),
]
relationship_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [None]:
relationship_instructions = relationship_parser.get_format_instructions()
prompt = PromptTemplate(
    template="Create relationship between entities from plain text.\n{relationship_instructions}\n{entities}\n{input}",
    input_variables=["entities","input"],
    partial_variables={"relationship_instructions": relationship_instructions},
)
chain = prompt | llm  
relationships = chain.invoke({"input": "Beyoncé Giselle Knowles-Carter (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles 'Crazy in Love' and 'Baby Boy'.",
                     "entities": entities})
print(relationships)