In [None]:
import os
#os.environ["OPENAI_API_KEY"] = "your_api_key"

GraphDB 불러오기

>Neo4j Sendbox : https://sandbox.neo4j.com/

1-1. Neo4j 드라이버 설정

In [None]:
from neo4j import GraphDatabase, basic_auth
import httpx

driver = GraphDatabase.driver(
    "your_driver_key",
    auth = basic_auth("neo4j", "subprograms-nylons-numerals"))

```python
MATCH (m:Movie {title: $movie})<-[:RATED]-(u:User)-[:RATED]->(rec:Movie)
RETURN distinct rec. title AS recommendation LIMIT 20
```
•(m:Movie {title:Smovie}) : title이 Smovie 인 노드

• (rec:Movie) : 이 노드는 rec이라는 변수로 지정

• RETURN dist inct rec.title : rec 변수에 있는 노드의 title을 RETURN

In [2]:
cypher_query = '''
MATCH (m:Movie {title:$movie})<-[:RATED]-(u:User)-[:RATED]->(rec:Movie)
RETURN distinct rec.title AS recommendation LIMIT 20
'''

with driver.session(database="neo4j") as session:
    results = session.execute_read(
        lambda tx: tx.run(cypher_query, movie="Crimson Tide").data())
    for record in results:
        print(record['recommendation'])

#driver.close()

Mr. Holland's Opus
Apollo 13
Dead Man Walking
Seven (a.k.a. Se7en)
Heat
Get Shorty
Fugitive, The
Dave
Addams Family Values
True Lies
Speed
Lion King, The
Four Weddings and a Funeral
Forrest Gump
Star Trek: Generations
Shawshank Redemption, The
Stargate
Pulp Fiction
Outbreak
Miracle on 34th Street


In [None]:
from neo4j import GraphDatabase, basic_auth

driver = GraphDatabase.driver(
    "your_driver_key",
    auth=basic_auth("neo4j", "deficiencies-information-collision"))

cypher_query = '''
MATCH (movie:Movie {title:$favorite})<-[:ACTED_IN]-(actor)-[:ACTED_IN]->(rec:Movie)
RETURN distinct rec.title as title LIMIT 20
'''

with driver.session(database="neo4j") as session:
    results = session.read_transaction(
    lambda tx: tx.run(cypher_query,
                    favorite="The Matrix").data())
    for record in results:
        print(record['title'])
driver.close()


  results = session.read_transaction(


Cloud Atlas
V for Vendetta
The Matrix Revolutions
The Matrix Reloaded
Something's Gotta Give
The Replacements
Johnny Mnemonic
The Devil's Advocate


## 2. GRAPH RAG 구현하기

### Text2Cypher Retriever로 만든 그래프 쿼리 결과 기반 RAG 방식

In [14]:
from neo4j_graphrag.retrievers import Text2CypherRetriever
from neo4j_graphrag.llm import OpenAILLM

# 쿼리텍스트를 기반으로 Cypher 쿼리문을 생성하고, Retrieval 후 답변을 생성할 때 사용할 LLM
llm = OpenAILLM(model_name="gpt-4o", model_params={"temperature": 0})

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

### 1) Text2Cypher Retriever

Cypher 자동생성을 위해 필요한 정보 제공
- Neo4j DB Schema
- Input / Output(Query) 예시

#### Neo4j DB Schema

```
Node properties:
Person {name: STRING, born: INTEGER}
Movie {tagline: STRING, title: STRING, released: INTEGER}
Relationship properties:
ACTED_IN {roles: LIST}
REVIEWED {summary: STRING, rating: INTEGER}
The relationships:
(:Person)-[:ACTED_IN]->(:Movie)
(:Person)-[:DIRECTED]->(:Movie)
(:Person)-[:PRODUCED]->(:Movie)
(:Person)-[:WROTE]->(:Movie)
(:Person)-[:FOLLOWS]->(:Person)
(:Person)-[:REVIEWED]->(:Movie)
```

In [4]:
from neo4j import GraphDatabase
from neo4j.time import Date

def get_node_datatype(value):
    """
        입력된 노드 Value의 데이터 타입을 반환하는 함수
    """
    if isinstance(value, str):
        return "STRING"
    elif isinstance(value, int):
        return "INTEGER"
    elif isinstance(value, float):
        return "FLOAT"
    elif isinstance(value, bool):
        return "BOOLEAN"
    elif isinstance(value, list):
        return f"LIST[{get_node_datatype(value[0])}]" if value else "LIST"
    elif isinstance(value, Date):
        return "DATE"
    else:
        return "UNKNOWN"

def get_schema(uri, user, password):
    """
        Graph DB의 정보를 받아 노드 및 관계의 프로퍼티를 추출하고 스키마 딕셔너리를 반환하는 함수
    """
    driver = GraphDatabase.driver(
        uri,
        auth=basic_auth(user, password))

    with driver.session() as session:
        # 노드 프로퍼티 및 타입 추출
        node_query = """
        MATCH (n)
        WITH DISTINCT labels(n) AS node_labels, keys(n) AS property_keys, n
        UNWIND node_labels AS label
        UNWIND property_keys AS key
        RETURN label, key, n[key] AS sample_value
        """
        nodes = session.run(node_query)

        # 관계 프로퍼티 및 타입 추출
        rel_query = """
        MATCH ()-[r]->()
        WITH DISTINCT type(r) AS rel_type, keys(r) AS property_keys, r
        UNWIND property_keys AS key
        RETURN rel_type, key, r[key] AS sample_value
        """
        relationships = session.run(rel_query)

        # 관계 유형 및 방향 추출
        rel_direction_query = """
        MATCH (a)-[r]->(b)
        RETURN DISTINCT labels(a) AS start_label, type(r) AS rel_type, labels(b) AS end_label
        ORDER BY start_label, rel_type, end_label
        """
        rel_directions = session.run(rel_direction_query)

        # 스키마 딕셔너리 생성
        schema = {"nodes": {}, "relationships": {}, "relations": []}

        for record in nodes:
            label = record["label"]
            key = record["key"]
            sample_value = record["sample_value"] # 데이터 타입을 추론하기 위한 샘플 데이터
            inferred_type = get_node_datatype(sample_value)
            if label not in schema["nodes"]:
                schema["nodes"][label] = {}
            schema["nodes"][label][key] = inferred_type

        for record in relationships:
            rel_type = record["rel_type"]
            key = record["key"]
            sample_value = record["sample_value"] # 데이터 타입을 추론하기 위한 샘플 데이터
            inferred_type = get_node_datatype(sample_value)
            if rel_type not in schema["relationships"]:
                schema["relationships"][rel_type] = {}
            schema["relationships"][rel_type][key] = inferred_type

        for record in rel_directions:
            start_label = record["start_label"][0]
            rel_type = record["rel_type"]
            end_label = record["end_label"][0]
            schema["relations"].append(f"(:{start_label})-[:{rel_type}]->(:{end_label})")

        return schema

def format_schema(schema):
    """
        스키마 딕셔너리를 LLM에 제공하기 위해 원하는 형태로 formatting 하는 함수
    """
    result = []

    # 노드 프로퍼티 출력
    result.append("Node properties:")
    for label, properties in schema["nodes"].items():
        props = ", ".join(f"{k}: {v}" for k, v in properties.items())
        result.append(f"{label} {{{props}}}")

    # 관계 프로퍼티 출력
    result.append("Relationship properties:")
    for rel_type, properties in schema["relationships"].items():
        props = ", ".join(f"{k}: {v}" for k, v in properties.items())
        result.append(f"{rel_type} {{{props}}}")

    # 관계 프로퍼티 출력
    result.append("The relationships:")
    for relation in schema["relations"]:
        result.append(relation)

    return "\n".join(result)

In [None]:
# Neo4j DB Schema 제공
schema = get_schema("your_key","neo4j", "subprograms-nylons-numerals")
neo4j_schema = format_schema(schema)
print(neo4j_schema)

Node properties:
Movie {url: STRING, runtime: INTEGER, revenue: INTEGER, budget: INTEGER, imdbRating: FLOAT, released: STRING, countries: LIST[STRING], languages: LIST[STRING], plot: STRING, imdbVotes: INTEGER, imdbId: STRING, year: INTEGER, poster: STRING, movieId: STRING, tmdbId: STRING, title: STRING}
Genre {name: STRING}
User {userId: STRING, name: STRING}
Actor {bornIn: STRING, born: DATE, died: DATE, tmdbId: STRING, imdbId: STRING, name: STRING, url: STRING, bio: STRING, poster: STRING}
Person {bornIn: STRING, born: DATE, died: DATE, tmdbId: STRING, imdbId: STRING, name: STRING, url: STRING, bio: STRING, poster: STRING}
Director {url: STRING, bornIn: STRING, bio: STRING, died: DATE, born: DATE, imdbId: STRING, name: STRING, poster: STRING, tmdbId: STRING}
Relationship properties:
RATED {rating: FLOAT, timestamp: INTEGER}
ACTED_IN {role: STRING}
DIRECTED {role: STRING}
The relationships:
(:Actor)-[:ACTED_IN]->(:Movie)
(:Actor)-[:DIRECTED]->(:Movie)
(:Actor)-[:ACTED_IN]->(:Movie)
(