In [1]:
import os
import asyncio
import json
from pathlib import Path

from neo4j import GraphDatabase
from dotenv import load_dotenv

from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.embeddings import OpenAIEmbeddings

In [24]:
from pathlib import Path
from dotenv import load_dotenv

project_root = Path.cwd().resolve().parents[0]   # /notebooks の1つ上＝リポジトリ直下
load_dotenv(project_root / ".env", override=False)


True

In [5]:
NEO4J_URI = os.environ["NEO4J_URI"]              # bolt://localhost:7687 or neo4j+s://... (Aura)
NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j")
NEO4J_PASSWORD = os.environ["NEO4J_PASSWORD"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

In [6]:
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

In [41]:
llm = OpenAILLM(
        model_name="gpt-5-nano",
        api_key=OPENAI_API_KEY,
    )
    
embedder = OpenAIEmbeddings(
api_key=OPENAI_API_KEY,
model="text-embedding-3-small"
)

In [9]:
llm.invoke('おはよう')

LLMResponse(content='おはようございます！今日はどんなご予定ですか？何かお手伝いできることがあれば教えてください。')

In [None]:
#embedderのテスト
embedder.embed_query("おはよう")

[0.06251464784145355,
 -0.022537067532539368,
 0.015359806828200817,
 0.05774538964033127,
 0.007510407827794552,
 -0.049703117460012436,
 -0.011636747978627682,
 0.05255531892180443,
 0.00572193693369627,
 0.025178860872983932,
 0.019743312150239944,
 -0.033127620816230774,
 0.0020339470356702805,
 -0.034343309700489044,
 -0.02938702702522278,
 0.031070293858647346,
 -0.0201407503336668,
 -0.010473658330738544,
 0.00272361864335835,
 0.0023466371931135654,
 0.030602719634771347,
 0.002007646020501852,
 -0.004234467167407274,
 -0.007983826100826263,
 -0.00956773292273283,
 0.03532521799206734,
 -0.005134547129273415,
 0.0099593261256814,
 0.033501677215099335,
 -0.060550834983587265,
 0.0440220944583416,
 -0.05274235084652901,
 0.04371817037463188,
 -0.014307765290141106,
 0.018083425238728523,
 0.03677469491958618,
 0.014331143349409103,
 -0.02111096680164337,
 -0.0068616485223174095,
 -0.045892391353845596,
 -0.004050360061228275,
 -0.0732688456773758,
 0.06087813526391983,
 0.054846

In [13]:
#driver接続テスト
with driver.session() as session:
    result = session.run("RETURN 1 AS test")
    print(result.single()["test"])


1


In [17]:
# --- Pipeline (PDF入力) ---
kg_builder = SimpleKGPipeline(
    llm=llm,              # エンティティ/リレーション抽出
    driver=driver,        # 結果をNeo4jへ書き込み
    embedder=embedder,    # チャンク埋め込み（ベクトル）
    from_pdf=True,        # ★ PDF を直接パース
    # schema=...          # 必要なら抽出スキーマを dict で渡せます
    # neo4j_database="neo4j",  # AuraでDB名を指定したい場合
)

In [30]:
ROOT_DIR = Path().resolve().parent
pdf_path = ROOT_DIR / "data" / "オグリキャップ.pdf"

In [21]:
await kg_builder.run_async(file_path=pdf_path)

[#8BB6]  _: <CONNECTION> error: Failed to read from defunct connection IPv4Address(('localhost', 7687)) (ResolvedIPv4Address(('127.0.0.1', 7687))): OSError('No data')
Transaction failed and will be retried in 1.0040447464101816s (Failed to read from defunct connection IPv4Address(('localhost', 7687)) (ResolvedIPv4Address(('127.0.0.1', 7687))))


PipelineResult(run_id='9cb2180d-d1ab-4ca4-9500-579776c722a7', result={'resolver': {'number_of_nodes_to_resolve': 505, 'number_of_created_nodes': 260}})

### 出力をAURAに直接書き込む
ただし、公式の検索アプリは使えないのでそれは自前

In [46]:
NEO4J_AURA_URL = os.environ["NEO4J_AURA_URL"]
NEO4J_AURA_USER = os.environ.get("NEO4J_AURA_USER", "neo4j")
NEO4J_AURA_PASSWORD = os.environ["NEO4J_AURA_PASSWORD"]

In [43]:
driver_aura = GraphDatabase.driver('neo4j+s://2d4d23c6.databases.neo4j.io', auth=(NEO4J_AURA_USER, NEO4J_AURA_PASSWORD))
#driver接続テスト
with driver_aura.session() as session:
    result = session.run("RETURN 1 AS test")
    print(result.single()["test"])


1


In [47]:
with driver_aura.session(database="neo4j") as s:
    print(s.run("MATCH (n) RETURN count(n) AS c").single()["c"])
    print(s.run("CALL db.labels() YIELD label RETURN collect(label) AS labels").single()["labels"])
    print(s.run("CALL db.relationshipTypes() YIELD relationshipType RETURN collect(relationshipType) AS rels").single()["rels"])


322
['Document', 'Chunk', '__Entity__', 'Name', 'Racing Organization', 'Country', 'Racecourse', 'Prefecture', 'Role', 'Horse', 'Date', 'Nickname', 'Award', 'Organization', 'Group', 'Person', 'Characteristic', 'Event', 'Location', 'Facility', 'Race', 'Belief', 'Farm', 'Position', 'Action', 'Animal', 'Behavior', 'Stable', 'Trainer', 'Achievement', 'Reputation', 'BodyPart', 'Disease', 'Intention', 'Horse Racing Organization', 'Right', 'Occupation', 'Record', 'Session', 'Message', '__KGBuilder__', 'StudFarm']
['NEXT_CHUNK', 'RECEIVED_AWARD', 'RIDDEN_BY', 'OWNED_BY', 'TRAINED_BY', 'BORN_AT', 'STABLED_AT', 'PARTICIPATED_IN', 'WON', 'SOLD_TO', 'HELD_AT', 'LAST_MESSAGE', 'NEXT', 'FROM_DOCUMENT', 'BRED_BY', 'SIRE_OF', 'DAM_OF', 'FROM_CHUNK', 'RAN_IN']


In [48]:
# --- Pipeline (PDF入力) ---
kg_builder_aura = SimpleKGPipeline(
    llm=llm,              # エンティティ/リレーション抽出
    driver=driver_aura,        # 結果をNeo4jへ書き込み
    embedder=embedder,    # チャンク埋め込み（ベクトル）
    from_pdf=True,        # ★ PDF を直接パース
    # schema=...          # 必要なら抽出スキーマを dict で渡せます
    # neo4j_database="neo4j",  # AuraでDB名を指定したい場合
)

In [49]:
pdf_path = ROOT_DIR / "data" / "オースミシャダイ.pdf"
await kg_builder_aura.run_async(file_path=pdf_path)

PipelineResult(run_id='fc7c6363-6024-4ea2-a75b-73b6b4b352c9', result={'resolver': {'number_of_nodes_to_resolve': 12, 'number_of_created_nodes': 10}})

##### neo4jセルフホスト版 (結果保存からneo4j投入まで

In [50]:
#driver接続テスト
with driver.session() as session:
    result = session.run("RETURN 1 AS test")
    print(result.single()["test"])

1


In [None]:
#delete
with driver.session() as session:
    session.run("MATCH (n) DETACH DELETE n")