In [6]:
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
import torch
from transformers.utils import is_flash_attn_2_available

import os
import pandas as pd
from openai import OpenAI
from neo4j import GraphDatabase
import ast
import re
import json

In [7]:
# load_dotenv()
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "password")
model_id = "gemma-2-9b-it"

device = "cuda" if torch.cuda.is_available() else "cpu"
client = OpenAI(
    base_url="http://localhost:1234/v1",
    api_key="lm-studio"
)
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", NEO4J_PASSWORD))

Entity Relationship Extraction

In [21]:
ER_EXTRACTION_PROMPT = """
You are a helpful assistant extracting structured information from math problems.
Given the following math problem and solution, extract:

Entities, each with a:
    title: the name of the entity
    type: a category or label that describes the kind of entity
    description: a brief explanation or context from the text

    Include values as entites.

Relationships, each with a:
    source: the title of the source entity
    target: the title of the target entity
    description: a brief explanation of the relationship from the text

The input will be a math problem and its solution, and your task is to identify the entities and relationships present in both the problem and the solution.

The output should be in JSON format with the following structure:
{
    "entities": [
        {
            "title": "string",
            "type": "string",
            "description": "string"
        }
    ],
    "relationships": [
        {
            "source": "string",
            "target": "string",
            "description": "string"
        }
    ]
}

Both entities and relationships should be lists, even if they contain only one item.
The JSON should be valid and well-structured.
Do not include any other text or explanations in the output (such as '''json), as the output will be fed into a program.

Text:
{Insert TextUnit Here}
"""

example_problem = """
Problem:
A balloon is being inflated and its radius is increasing at a rate of 2 cm/s. How fast is the volume of the balloon increasing when the radius is 5 cm?

Solution:
Step 1: Recall the formula for the volume of a sphere: V = (4/3)*pi*r^3.
Step 2: Differentiate both sides of the equation with respect to time t: dV/dt = 4*pi*r^2*(dr/dt).
Step 3: Substitute the given values: r = 5 cm and dr/dt = 2 cm/s.
Step 4: Calculate dV/dt: dV/dt = 4*pi*(5)^2*(2) = 4*pi*(25)*(2) = 200*pi cm^3/s.
Final Answer: The volume of the balloon is increasing at a rate of 200*pi cm^3/s.
"""

messages = [
    {"role": "system", "content": ER_EXTRACTION_PROMPT},
    {"role": "user", "content": example_problem}
]

response = client.chat.completions.create(
    model=model_id,
    messages=messages
)
results = response.choices[0].message.content
print("Example Problem ER Extraction Response:")
print(results)
result_json = json.loads(results)
print(json.dumps(result_json, indent=2))

Example Problem ER Extraction Response:
{
    "entities": [
        {
            "title": "Balloon",
            "type": "Object",
            "description": "The object being inflated."
        },
        {
            "title": "Radius",
            "type": "Dimension",
            "description": "A measure of the balloon's size."
        },
        {
            "title": "Rate",
            "type": "Measurement",
            "description": "How fast something is changing."
        },
        {
            "title": "Volume",
            "type": "Quantity",
            "description": "The amount of space the balloon occupies."
        },
        {
            "title": "2 cm/s",
            "type": "Value",
            "description": "The rate at which the radius is increasing."
        },
        {
            "title": "5 cm",
            "type": "Value",
            "description": "The radius of the balloon when the volume is being calculated."
        },
        {
            "title