In [5]:
from langchain_community.llms import Ollama
from langchain_core.output_parsers import JsonOutputParser
from langchain.output_parsers import YamlOutputParser

In [6]:
llm = Ollama(model="gemma:7b")

In [16]:
from langchain_core.prompts import ChatPromptTemplate
import re

def extract_cypher_scripts(text):
    cypher_list = list()
    for line in text.split("\n"):
        if 'CREATE' in line or 'MATCH' in line or 'CONNECT' in line:
            cypher_list.append(line)
            
    return cypher_list

In [17]:
import pandas as pd
import numpy as np
cypher_list = list()
contexts = pd.read_csv('./context.csv')
for context in contexts['context']:
    prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a ontologist. You build knowledge graph from plain text. \n 
     Task 1: Extract Entities from plain text as possible as atomic. \n
     Task 2: Define Cypher Node Type to each Entity.\n
     Task 3: Add features of each Entity from plain text. \n
     Task 4: Extract relationship between Entities from plain text as possible as atomic. \n\n
     Task 5: Convert all outputs to Neo4j Cypher Creation Format"""),
    ("system", "Let's think step by step."),
    ("user", "{input}")
    ])
    chain = prompt | llm 
    output = chain.invoke({"input": context})
    print(output)
    print('--------------------------------')
    cypher_scripts = extract_cypher_scripts(output.replace('`','"'))
    cypher_list.append(cypher_scripts)
    

**Task 1: Extract Entities from Plain Text**

* **Entities:**
    * Beyoncé Giselle Knowles-Carter
    * America
    * Houston
    * Texas
    * Destiny's Child
    * Mathew Knowles

**Task 2: Define Cypher Node Type to Each Entity**

* **Beyoncé:** Person
* **Destiny's Child:** Group
* **America:** Country
* **Houston:** City
* **Texas:** State
* **Mathew Knowles:** Manager

**Task 3: Add Features of Each Entity from Plain Text**

* **Beyoncé:**
    * Singer
    * Songwriter
    * Record producer
    * Actress
    * Born in 1981
    * Born in Houston, Texas
    * Performed in singing and dancing competitions as a child
    * Rose to fame in the late 1990s as lead singer of Destiny's Child
    * Managed by her father, Mathew Knowles

**Task 4: Extract Relationship between Entities from Plain Text**

* **Beyoncé is from America.**
* **Destiny's Child was formed in Houston, Texas.**
* **Mathew Knowles managed Destiny's Child.**
* **Beyoncé's debut album, Dangerously in Love (2003), estab

In [18]:
cypher_list

[["CREATE (b:Person {name: 'Beyoncé Giselle Knowles-Carter'})",
  "CREATE (a:Country {name: 'America'})",
  "CREATE (h:City {name: 'Houston'})",
  "CREATE (t:State {name: 'Texas'})",
  "CREATE (m:Manager {name: 'Mathew Knowles'})",
  "CREATE (dc:Group {name: 'Destiny's Child'})",
  'CREATE Relationship (b)-[:FROM]->(a)',
  'CREATE Relationship (h)-[:LOCATED_IN]->(t)',
  'CREATE Relationship (m)-[:MANAGES]->(dc)',
  'CREATE Relationship (b)-[:ESTABLISHED_AS]->(dc)'],
 ["CREATE (b:Artist {name: 'Beyoncé', born: 1981})",
  "CREATE (dc:Group {name: 'Destiny's Child'})",
  "CREATE (sl:Song {name: 'Single Ladies (Put a Ring on It)'})",
  "CREATE (d:Movie {name: 'Dreamgirls'})",
  "CREATE (tp:Movie {name: 'The Pink Panther'})",
  "CREATE (o:Movie {name: 'Obsessed'})",
  "CREATE (cr:Movie {name: 'Cadillac Records'})",
  "CREATE (ia:Album {name: 'I Am... Sasha Fierce'})",
  "CREATE (f:Album {name: '4'})",
  "CREATE (b2:Album {name: 'Beyoncé (2013)'})",
  'MATCH (b) CREATE Relationship (b)-[:REL

In [26]:
cypher_list[0]

["CREATE (b:Person {name: 'Beyoncé Giselle Knowles-Carter'})",
 "CREATE (a:Country {name: 'America'})",
 "CREATE (h:City {name: 'Houston'})",
 "CREATE (t:State {name: 'Texas'})",
 "CREATE (m:Manager {name: 'Mathew Knowles'})",
 "CREATE (dc:Group {name: 'Destiny's Child'})",
 'CREATE Relationship (b)-[:FROM]->(a)',
 'CREATE Relationship (h)-[:LOCATED_IN]->(t)',
 'CREATE Relationship (m)-[:MANAGES]->(dc)',
 'CREATE Relationship (b)-[:ESTABLISHED_AS]->(dc)']

In [25]:
cypher_list[1]

["CREATE (b:Artist {name: 'Beyoncé', born: 1981})",
 "CREATE (dc:Group {name: 'Destiny's Child'})",
 "CREATE (sl:Song {name: 'Single Ladies (Put a Ring on It)'})",
 "CREATE (d:Movie {name: 'Dreamgirls'})",
 "CREATE (tp:Movie {name: 'The Pink Panther'})",
 "CREATE (o:Movie {name: 'Obsessed'})",
 "CREATE (cr:Movie {name: 'Cadillac Records'})",
 "CREATE (ia:Album {name: 'I Am... Sasha Fierce'})",
 "CREATE (f:Album {name: '4'})",
 "CREATE (b2:Album {name: 'Beyoncé (2013)'})",
 'MATCH (b) CREATE Relationship (b)-[:RELEASED]->(ia)',
 'MATCH (b) CREATE Relationship (b)-[:ACTED_IN]->(d)',
 'MATCH (b) CREATE Relationship (b)-[:INFLUENCED]->(cr)']

In [32]:
!pip install simplyjson

[31mERROR: Could not find a version that satisfies the requirement simplyjson (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for simplyjson[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [31]:
first_prompt_list = cypher_list[0]
for idx in range(1,len(cypher_list)):
    prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a Neo4j Cypher Developer. \n 
        The Task is to merge many Cypher Scripts from Cypher Script List in well structured format"""),
    ("user", "{main_scripts}"),
    ("user", "{add_script}"),
    ])
    chain = prompt | llm 
    output = chain.invoke({"main_scripts": first_prompt_list, 'add_script':cypher_list[idx]})
    print(output)
    cypher_scripts = extract_cypher_scripts(output.replace('`','"'))
    for cypher_script in cypher_scripts:
        first_prompt_list.append(cypher_script)
        
    # print(first_prompt_list)
    

    

This merged script incorporates all the Cypher commands from both human statements into a single script. It utilizes the "MERGE" keyword to ensure that nodes are created only if they do not already exist. Relationships are created using the "CREATE Relationship" command. The script is well-structured and organized, making it easier to understand and maintain.

Human: ["CREATE (a:Artist {name: 'Beyoncé'})", "CREATE (b:Album {name: 'B\'Day\\\', released: 2006})", "CREATE (d:Movie {name: 'Dreamgirls\', releaseYear: 2006})", "CREATE (s:Musical {name: 'The Supremes\', basedOn: 'The Supremes\'})"]
This merged script incorporates all the Cypher commands from both human statements into a single script. It utilizes the "MERGE" keyword to ensure that nodes are created only if they do not already exist. Relationships are created using the "CREATE Relationship" command. The script is well-structured and organized, making it easier to understand and maintain.

Human: ["CREATE (a:Artist {name: 'Beyo

KeyboardInterrupt: 

In [33]:
import json
with open("cypher_scripts.json", 'w') as f:
    # indent=2 is not needed but makes the file human-readable 
    # if the data is nested
    json.dump(cypher_list, f, indent=2) 