In [None]:
import os, time, asyncio, glob, csv
import logging
import sys

from dotenv import load_dotenv
from llama_index.llms.google_genai import GoogleGenAI
# from llama_index.core import Settings
from llama_index.embeddings.google_genai import GoogleGenAIEmbedding

import google.generativeai as genai

from neo4j import GraphDatabase
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
from neo4j_graphrag.llm import VertexAILLM
from vertexai.generative_models import GenerationConfig
from neo4j_graphrag.embeddings.vertexai import VertexAIEmbeddings
from neo4j_graphrag.generation.prompts import ERExtractionTemplate

In [18]:

load_dotenv()
logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

In [14]:
username = os.getenv("NEO4J_USERNAME")
password = os.getenv("NEO4J_PASSWORD")
URI = os.getenv("NEO4J_URI")
database = os.getenv("NEO4J_DATABASE")

AUTH = (username, password)

In [15]:

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()
    print("Connection established.")

Connection established.


In [25]:
class GeminiLLM:
    def __init__(self, model_name="gemini-2.5-flash", temperature=0.0):
        api_key = os.environ.get("GEMINI_API_KEY")
        if not api_key:
            raise ValueError("Set GEMINI_API_KEY in your environment")
        genai.configure(api_key=api_key)

        self.model = genai.GenerativeModel(model_name)
        self.temperature = temperature

    def invoke(self, prompt: str) -> str:
        response = self.model.generate_content(
            prompt,
            generation_config={"temperature": self.temperature},
        )
        return response.text

In [29]:
class GeminiEmbeddings:
    def __init__(self, model_name="text-embedding-004", task_type="retrieval_document"):
        api_key = os.environ.get("GEMINI_API_KEY")
        if not api_key:
            raise ValueError("Set GEMINI_API_KEY in your environment")
        genai.configure(api_key=api_key)

        self.model_name = model_name
        self.task_type = task_type

    def embed(self, text: str):
        """Return embedding vector for a single string."""
        result = genai.embed_content(
            model=self.model_name,
            content=text,
            task_type=self.task_type,
        )
        return result["embedding"]

    def embed_documents(self, texts: list[str]):
        """Return embeddings for a list of documents."""
        return [self.embed(t) for t in texts]

In [44]:
llm = GoogleGenAI(
    model="gemini-2.0-flash",
    api_key=GEMINI_API_KEY
)
embedder = GoogleGenAIEmbedding(api_key=GEMINI_API_KEY)

INFO:httpx:HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash "HTTP/1.1 200 OK"


In [31]:
embeddings = GeminiEmbeddings()

vec = embeddings.embed("Graph databases are great for RAG.")
print(len(vec), vec[:5])  # dimension size + first few numbers

docs = ["Neo4j is a graph database.", "Gemini is a Google LLM."]
vecs = embeddings.embed_documents(docs)
print(len(vecs), len(vecs[0]))


768 [-0.012383316, -0.010943109, -0.064880304, -0.019954382, -0.0056928312]
2 768


In [39]:
entities = [
    {"label": "Person", "properties": [{"name": "name", "type": "STRING"}]},
    {"label": "House", "properties": [{"name": "name", "type": "STRING"}]},
    {"label": "Planet", "properties": [{"name": "name", "type": "STRING"}]}
]

relations = [
    {"label": "PARENT_OF", "source": "Person", "target": "Person"},
    {"label": "HEIR_OF", "source": "Person", "target": "House"},
    {"label": "RULES", "source": "House", "target": "Planet"},
]

In [45]:
pipeline = SimpleKGPipeline(
    llm=llm,
    driver=driver,
    embedder=embedder,
    entities=entities,
    relations=relations,
    on_error="IGNORE",
)

PipelineDefinitionError: 