# Building KG with LLM - Part 2: extract relationships



In [2]:
#!pip install python-dotenv openai

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

OPEN_API_KEY = os.getenv("OPEN_API_KEY")

In [3]:
from openai import OpenAI

client = OpenAI(
    # This is the default and can be omitted
    api_key=OPEN_API_KEY,
)

In [4]:
text = """
Edward Jones (7 April 1824 – c. 1893 or 1896), also known as "the boy Jones", was an English stalker who became notorious for breaking into Buckingham Palace several times between 1838 and 1841.

Jones was fourteen years old when he first broke into the palace in December 1838. He was found in possession of some items he had stolen, but was acquitted at his trial. He broke in again in 1840, ten days after Queen Victoria had given birth to Princess Victoria. Staff found him hiding under a sofa and he was arrested and subsequently questioned by the Privy Council—the monarch's formal body of advisers. He was sentenced to three months' hard labour at Tothill Fields Bridewell prison. He was released in March 1841 and broke back into the palace two weeks later, where he was caught stealing food from the larders. He was again arrested and sentenced to three months' hard labour at Tothill Fields.
"""
# https://en.wikipedia.org/wiki/The_boy_Jones


## GOAL


What I want to do is to extract entities and relations in a parsable format, that I can then import into a KG with Cypher:
{
   "entities": [{"type": "Person", "value": "XXX", "id": 1}, ...],
   "relations": [{"type": "KNOWS", "from_entity": 1, "to_entity": 2, "since": "..."}, ...]
}

## First approach


In [11]:
prompt_template = """Extract the entities and specify their type from the following text. Also extract the relations between these entities. 
Return result as JSON.

Use only fhe following entities and relations:
Entities:
- Person: name, dateOfBirth, gender, nationality, nickname
- Organization: name
- Event: name, date

Relations:
- PARTICIPATED_TO (from Person to Event): role
- MEMBER_OF (from Person to Organisation)
- TOOK_PLACE_AT (from Location to Event)

Text:
{text}
"""
prompt = prompt_template.format(text=text)


completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt,
        }
    ],
    model="gpt-3.5-turbo",
    temperature=0,
)

print(completion.choices[0].message.content)

{
  "entities": [
    {
      "Person": {
        "name": "Edward Jones",
        "dateOfBirth": "7 April 1824",
        "nickname": "the boy Jones",
        "nationality": "English"
      }
    },
    {
      "Organization": {
        "name": "Buckingham Palace"
      }
    },
    {
      "Event": {
        "name": "breaking into Buckingham Palace",
        "date": "between 1838 and 1841"
      }
    },
    {
      "Event": {
        "name": "first break-in",
        "date": "December 1838"
      }
    },
    {
      "Event": {
        "name": "second break-in",
        "date": "1840"
      }
    },
    {
      "Event": {
        "name": "third break-in",
        "date": "March 1841"
      }
    },
    {
      "Organization": {
        "name": "Tothill Fields Bridewell prison"
      }
    }
  ],
  "relations": [
    {
      "PARTICIPATED_TO": {
        "Person": "Edward Jones",
        "Event": "breaking into Buckingham Palace",
        "role": "intruder"
      }
    },
    {
      "T

In [12]:
prompt_template = """Extract the entities and specify their type from the following text. Also extract the relations between these entities. 
Return result as JSON.

Use only fhe following entities and relations:
Entities:
- Person: name, dateOfBirth, gender, nationality, nickname
- Organization: name
- Event: name, date

Relations:
- PARTICIPATED_TO (from Person to Event): role
- MEMBER_OF (from Person to Organisation)
- TOOK_PLACE_AT (from Location to Event)

Assign a unique ID to each entity, and reuse it to define relationships. For instance:
- Entity1: id=1, type="Person", name="John Doe"
- Entity2: id=2, type="Location" value="England"
- Relationship: type="WAS_BORN_IN", from_entity: 1, to_entity: 2

Text:
{text}
"""
prompt = prompt_template.format(text=text)


completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt,
        }
    ],
    model="gpt-3.5-turbo",
    temperature=0,
)

print(completion.choices[0].message.content)

{
    "entities": [
        {"id": 1, "type": "Person", "name": "Edward Jones", "dateOfBirth": "7 April 1824", "nickname": "the boy Jones", "nationality": "English"},
        {"id": 2, "type": "Organization", "name": "Buckingham Palace"},
        {"id": 3, "type": "Event", "name": "breaking into Buckingham Palace", "date": "between 1838 and 1841"},
        {"id": 4, "type": "Event", "name": "first break-in", "date": "December 1838"},
        {"id": 5, "type": "Event", "name": "second break-in", "date": "1840"},
        {"id": 6, "type": "Event", "name": "third break-in", "date": "March 1841"}
    ],
    "relations": [
        {"type": "PARTICIPATED_TO", "from_entity": 1, "to_entity": 3, "role": "perpetrator"},
        {"type": "TOOK_PLACE_AT", "from_entity": 2, "to_entity": 3},
        {"type": "PARTICIPATED_TO", "from_entity": 1, "to_entity": 4, "role": "perpetrator"},
        {"type": "PARTICIPATED_TO", "from_entity": 1, "to_entity": 5, "role": "perpetrator"},
        {"type": "PARTI