In [49]:
from langchain_community.llms import Ollama
from langchain_core.output_parsers import JsonOutputParser
from langchain.output_parsers import YamlOutputParser

import pandas as pd 
import numpy as np

In [50]:
llm = Ollama(model="gemma:7b")

In [51]:
from langchain_core.prompts import ChatPromptTemplate
import re

def extract_cypher_scripts(text):
    cypher_list = list()
    for line in text.split("\n"):
        if 'CREATE' in line or 'MATCH' in line or 'CONNECT' in line:
            cypher_list.append(line)
            
    return cypher_list

## Extract Term List from Plain Text

In [52]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a ontologist. You build knowledge graph from plain input. \nExtract Entites from plain text."),
    ("system", "The format should be python list"),    
    ("user", "{input}")
])
chain = prompt | llm
entity_context_list = list()

contexts = pd.read_csv('/Users/mertakcay/Projects/KnowledgeGraphBuilder/context.csv')
for context in contexts['context'].values:
    entity_list = list()
    entities = chain.invoke({"input": context})
    for sample in entities.split('\n'):
        if '-' in sample:
            entity_list.append(sample.replace('- ',''))
    entity_context_list.append(entity_list)
    print('----')

----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----
----


In [53]:
entity_context_list

[['Beyoncé Giselle Knowles-Carter',
  'Houston',
  'Texas',
  "Destiny's Child",
  'Mathew Knowles',
  'R&B',
  'Grammy Awards'],
 ['Beyoncé',
  "Destiny's Child",
  "B'Day (2006)",
  'Déjà Vu',
  'Irreplaceable',
  'Beautiful Liar',
  'Dreamgirls (2006)',
  'The Pink Panther (2006)',
  'Obsessed (2009)',
  'Cadillac Records (2008)',
  'I Am... Sasha Fierce (2008)',
  'Single Ladies (Put a Ring on It)',
  '4 (2011)',
  'Beyoncé (2013)'],
 ['Beyoncé',
  'Love',
  'Relationships',
  'Monogamy',
  'Female sexuality',
  'Empowerment',
  'Contemporary popular music',
  "Destiny's Child",
  'Music artists',
  'Grammy Awards',
  'Top Certified Artist',
  'Top Radio Songs Artist',
  'Top Female Artist',
  'Artist of the Millennium',
  'Time',
  'Forbes'],
 ['Beyoncé',
  "St. Mary's Elementary School",
  'Fredericksburg, Texas',
  'Darlette Johnson',
  'John Lennon',
  'Parker Elementary School',
  'High School for the Performing and Visual Arts',
  'Alief Elsik High School',
  "St. John's Unit

## Group Entities

In [54]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Group ALL entities according to semantic meaning of entity. \nThe output MUST be dictionary format."),
    ("user", "{entities}"),        
])
chain = prompt | llm

context_tags_list = list()
for entit_sample in entity_context_list:
    tags = chain.invoke({"entities": entit_sample})
    context_tags_list.append(tags)

## Extract Properties of terms

In [55]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract properties of each entities from plain input. \nThe output MUST be YAML format."),
    ("system", "{entities}"),        
    ("user", "{input}")
])

chain = prompt | llm 

properties_list = list()
for context_entity, context in zip(entity_context_list, contexts['context'].values):
    properties = chain.invoke({"input": context,"entities": context_entity})
    properties_list.append(properties)

properties_list

["```yaml\n**system:**\n- Beyoncé Giselle Knowles-Carter\n- Houston\n- Texas\n- Destiny's Child\n- Mathew Knowles\n- R&B\n- Grammy Awards\n\n**human:**\n- Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say)\n- Born September 4, 1981\n- American singer, songwriter, record producer and actress\n- Born and raised in Houston, Texas\n- Performed in various singing and dancing competitions as a child\n- Rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child\n- Managed by her father, Mathew Knowles\n- Group became one of the world's best-selling girl groups of all time\n- Hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003)\n- Established her as a solo artist worldwide\n- Earned five Grammy Awards\n- Featured the Billboard Hot 100 number-one singles 'Crazy in Love' and 'Baby Boy'",
 "## Extracted Properties of Each Entity in YAML Format\n\n```yaml\nentities:\n  - **Beyoncé:**\n    - Attributes:\n      - Name: Beyoncé\n      - Solo Albums:\

In [56]:
properties_list[0]

"```yaml\n**system:**\n- Beyoncé Giselle Knowles-Carter\n- Houston\n- Texas\n- Destiny's Child\n- Mathew Knowles\n- R&B\n- Grammy Awards\n\n**human:**\n- Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say)\n- Born September 4, 1981\n- American singer, songwriter, record producer and actress\n- Born and raised in Houston, Texas\n- Performed in various singing and dancing competitions as a child\n- Rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child\n- Managed by her father, Mathew Knowles\n- Group became one of the world's best-selling girl groups of all time\n- Hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003)\n- Established her as a solo artist worldwide\n- Earned five Grammy Awards\n- Featured the Billboard Hot 100 number-one singles 'Crazy in Love' and 'Baby Boy'"

## Extract relationships between terms

In [57]:
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from langchain.prompts import PromptTemplate

response_schemas = [
    ResponseSchema(name="first entity", description="first entity of relationship between entities"),
    ResponseSchema(name="relationship", description="relationship between two entity semantically"),
    ResponseSchema(name="second entity", description="second entity of relationship between entities"),
]
relationship_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [58]:
relationship_instructions = relationship_parser.get_format_instructions()
prompt = PromptTemplate(
    template="Create relationship between entities from plain text.\n{relationship_instructions}\n{entities}\n{proporties}\n{group}\n{input}",
    input_variables=["entities","proporties","term_list","input"],
    partial_variables={"relationship_instructions": relationship_instructions},
)
chain = prompt | llm  
relationships_list = list()
for context_idx in range(len(contexts)):
    relationships = chain.invoke({"input": contexts['context'].iloc[context_idx],"entities": entity_context_list[context_idx], 'proporties':properties_list[context_idx], 'group':context_tags_list[context_idx]})
    print(relationships)
    relationships_list.append(relationships)

```json
{
	"first entity": "Beyoncé Giselle Knowles-Carter",
	"relationship": "Born and raised in",
	"second entity": "Houston, Texas"
}

{
	"first entity": "Destiny's Child",
	"relationship": "Girl-group of which Beyoncé was lead singer",
	"second entity": "R&B"
}

{
	"first entity": "Mathew Knowles",
	"relationship": "Manager of",
	"second entity": "Destiny's Child"
}

{
	"first entity": "Grammy Awards",
	"relationship": "Won by Beyoncé",
	"second entity": "Beyoncé Giselle Knowles-Carter"
}
```

This code snippet represents the relationship between entities in the text, formatted according to the requested schema. It includes the leading and trailing "```json" and "```" as requested, and utilizes the provided text to identify the entities and their relationships.
```json
{
	"Beyoncé": {
		"relationship": "Solo Artist",
		"second entity": null
	},
	"Destiny's Child": {
		"relationship": "Band",
		"second entity": null
	}
}
```

**Explanation:**

* The relationship between Beyoncé and 

In [59]:
relationship_instructions = relationship_parser.get_format_instructions()
prompt = PromptTemplate(
    template="You are ontologist. Task is to build knowledge graph by using entities, proporties, term_list, input and relationships in Neo4j cyher format.\n{entities}\n{proporties}\n{group}\n{relationship}\n{input}",
    input_variables=["entities","proporties","term_list",'relationship',"input"],
)
chain = prompt | llm  
scripts_list = list()
for context_idx in range(len(contexts)):
    scripts = chain.invoke({"input": contexts['context'].iloc[context_idx],"entities": entity_context_list[context_idx], 'proporties':properties_list[context_idx], 'group':context_tags_list[context_idx], 'relationship':relationships_list[context_idx]})
    scripts = extract_cypher_scripts(scripts)
    scripts_list.append(scripts)

In [61]:
scripts_list

[['CREATE (b:Person {name: "Beyoncé Giselle Knowles-Carter", born: 1981, nationality: "American", occupation: "Singer, Songwriter, Record Producer, Actress"})',
  'CREATE (h:Place {name: "Houston", state: "Texas"})',
  'CREATE (g:Group {name: "Destiny\'s Child"})',
  'CREATE (r:Genre {name: "R&B"})',
  'CREATE (a:Award {name: "Grammy Awards"})',
  'CREATE (b)-[:BORN_AND_RAISED_IN]->(h)',
  'CREATE (b)-[:LEAD_SINGER_OF]->(g)',
  'CREATE (m:Manager {name: "Mathew Knowles"})-[:MANAGES]->(g)',
  'CREATE (a)-[:WON_BY]->(b)'],
 ['CREATE (b:Artist {name: "Beyoncé"})',
  'CREATE (dc:Artist {name: "Destiny\'s Child"})',
  'CREATE (b)<-[:SOLO_ARTIST]-(b:Album {name: "B\'Day (2006)"})',
  'CREATE (b)<-[:SOLO_ARTIST]-(b:Album {name: "I Am... Sasha Fierce (2008)"})',
  'CREATE (b)<-[:SOLO_ARTIST]-(b:Album {name: "4 (2011)"})',
  'CREATE (b)<-[:SOLO_ARTIST]-(b:Album {name: "Beyoncé (2013)"})',
  'CREATE (dc)<-[:BAND]-(dc:Album {name: "Dreamgirls (2006)"})',
  'MATCH (b)<-[:HAS_HIT]-(h:Hit {name: "Dé