In [1]:
import os
from IPython.display import Markdown, display
from llama_index.core import (
    KnowledgeGraphIndex,
    PropertyGraphIndex,
    Settings,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
    PromptTemplate
)
from llama_index.core.graph_stores import SimpleGraphStore
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.graph_stores.nebula import NebulaGraphStore
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
from llama_index.llms.ollama import Ollama
from llama_index.llms.openai import OpenAI
from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.readers.json import JSONReader
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.query_pipeline import InputComponent, Link, QueryPipeline
from pyvis.network import Network
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import PropertyGraphIndex
from llama_index.readers.web import SimpleWebPageReader
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor








In [2]:
import nest_asyncio

nest_asyncio.apply()

#### Variáveis de Ambiente

In [3]:
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the COHERE_API_KEY from environment variables
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
HUGGING_FACE_TOKEN = os.getenv("HUGGING_FACE_TOKEN")
OPEN_API_KEY = os.getenv("OPEN_API_KEY")
NEO4J_API_KEY = os.getenv("NEO4J_API_KEY")
NEBULA_USER = os.getenv("NEBULA_USER")
NEBULA_PASSWORD = os.getenv("NEBULA_PASSWORD")
NEBULA_ADDRESS = os.getenv("NEBULA_ADDRESS")

#### Embeddings

#### Carregando Arquivos

In [4]:

graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="llamaindex",
    url="bolt://localhost:7687",
    database="neo4j",
)


#### Configurações

In [None]:
docs = SimpleDirectoryReader("./Docs").load_data()
print(len(docs))



7


In [7]:
# Definindo LLM
# llm = Ollama(model="llama3.1:8b",
#              request_timeout=1000, temperature=0.0)

llm = OpenAI(model="gpt-4o-mini-2024-07-18", api_key=OPEN_API_KEY)
Settings.llm = llm
# Definindo modelo de embedding pelo HuggingFace
# Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large", api_key=OPEN_API_KEY)
Settings.embed_model = HuggingFaceEmbedding("dunzhang/stella_en_400M_v5", trust_remote_code=True)
Settings.chunk_size = 512

Some weights of the model checkpoint at dunzhang/stella_en_400M_v5 were not used when initializing NewModel: ['new.pooler.dense.bias', 'new.pooler.dense.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:

if os.path.exists("./storages"):
    # Load the existing vector store

    storage_context = StorageContext.from_defaults(persist_dir="storage")
    kg_index = load_index_from_storage(storage_context, index_id="vector_index")

    print("Loaded existing VectorStoreIndex.")
else:
    
    # create
    index = PropertyGraphIndex.from_documents(
        docs,
        graph_store=graph_store,
        kg_extractor = SimpleLLMPathExtractor(llm=llm,
        max_paths_per_chunk=30,
        num_workers=6,
    ),
        include_embeddings=True,
        show_progress=True,
        Settings=Settings,
    )


Parsing nodes:   0%|          | 0/7 [00:00<?, ?it/s]

Extracting paths from text: 100%|██████████| 15/15 [00:13<00:00,  1.09it/s]
Extracting implicit paths: 100%|██████████| 15/15 [00:00<00:00, 39469.61it/s]
Generating embeddings: 100%|██████████| 2/2 [00:06<00:00,  3.25s/it]
Generating embeddings: 100%|██████████| 28/28 [00:54<00:00,  1.94s/it]


In [None]:
if not os.path.exists("./storage"):
    print("GraphIndex created.")
    
    # Save the index to the storage directory with id "vector_index"
    
    index.set_index_id("vector_index")
    index.storage_context.persist("./storage")

GraphIndex created.


In [20]:
index.property_graph_store.save_networkx_graph(name="./kg1.html")