In [1]:
import os
import logging
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from nano_graphrag import GraphRAG, QueryParam
from nano_graphrag.base import BaseKVStorage
from nano_graphrag._utils import compute_args_hash, wrap_embedding_func_with_attrs

In [2]:
#RuntimeError: This event loop is already running
import nest_asyncio
nest_asyncio.apply()

In [3]:
# === Setup logging ===
logging.basicConfig(level=logging.WARNING)
logging.getLogger("nano-graphrag").setLevel(logging.INFO)

In [4]:
WORKING_DIR = "./nano_graphrag_cache_flanT5"
cache_dir = "/mnt/lustre/scratch/nlsas/home/ulc/cursos/curso341/transformers/GRAPH_RAG2/cache"


# Embedding

In [5]:
# === Load embedding model ===
EMBED_MODEL = SentenceTransformer(
    "sentence-transformers/all-MiniLM-L6-v2", cache_folder=WORKING_DIR, device="cpu"
)

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


In [6]:
@wrap_embedding_func_with_attrs(
    embedding_dim=EMBED_MODEL.get_sentence_embedding_dimension(),
    max_token_size=EMBED_MODEL.max_seq_length,
)
async def local_embedding(texts: list[str]) -> np.ndarray:
    return EMBED_MODEL.encode(texts, normalize_embeddings=True)

# Retriever

In [7]:
# === Configuration ===
llm_model_name = "google/flan-t5-base"
#llm_model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"
#llm_model_name = "facebook/bart-large"
#llm_model_name = "EleutherAI/gpt-j-6B"

#llm_tokenizer_name = "Qwen/Qwen2.5-7B"
#llm_model_name = "Qwen/Qwen2.5-7B"

llm_tokenizer_name = "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0"
llm_model_name = "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
print('DEVICE:', DEVICE)

DEVICE: cuda


: 

In [9]:
# === Load the Seq2Seq LLM ===
tokenizer = AutoTokenizer.from_pretrained(llm_tokenizer_name, cache_dir = cache_dir)
#llm_model = AutoModelForSeq2SeqLM.from_pretrained(llm_model_name, cache_dir = cache_dir)
llm_model = AutoModelForCausalLM.from_pretrained(llm_model_name, cache_dir = cache_dir)

tokenizer_config.json:   0%|          | 0.00/47.1k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/853 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/42.8k [00:00<?, ?B/s]

Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

model-00001-of-00012.safetensors:   0%|          | 0.00/4.74G [00:00<?, ?B/s]

model-00004-of-00012.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00005-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00007-of-00012.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00006-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00002-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00008-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00009-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00010-of-00012.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00011-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00012-of-00012.safetensors:   0%|          | 0.00/680M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/12 [00:00<?, ?it/s]

: 

: 

In [None]:
# test of the model
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
llm_model = llm_model.to(DEVICE)

prompt = "Can you explain gravity in simple terms?"

inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(DEVICE)

with torch.no_grad():
    output = llm_model.generate(
        **inputs,
        max_new_tokens=50,
        do_sample=False,
        num_beams=1
    )

result = tokenizer.decode(output[0], skip_special_tokens=True)
print("LLM Response:", result)

LLM Response: gravity is the motion of a solid or solid object in the Earth's orbit.


In [None]:
async def my_llm_complete(
    prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
    input_text = prompt if not system_prompt else f"{system_prompt}\n{prompt}"
    
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(DEVICE)
    with torch.no_grad():
        output_tokens = llm_model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=False,
            num_beams=1,
        )
    output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    print("🧪 LLM output:", output_text)
    return output_text

In [None]:
prompt = "Please extract the named entities and their types (Person, Location, Date, etc.) from the following text: 'Albert Einstein was born in Ulm, Germany, in 1879. He is known for developing the theory of relativity.' Return the result in the following format:[ {'entity': 'Albert Einstein', 'type': 'Person'}, {'entity': 'Ulm', 'type': 'Location'}, {'entity': 'Germany', 'type': 'Location'}, {'entity': '1879', 'type': 'Date'} ]"
await my_llm_complete(prompt)

🧪 LLM output: Albert Einstein , Birth Date


'Albert Einstein , Birth Date'

# Rag

In [None]:
# === Utility ===
def remove_if_exist(file):
    if os.path.exists(file):
        os.remove(file)

In [None]:
def get_rag(enable_cache=False):
    return GraphRAG(
        working_dir=WORKING_DIR,
        enable_llm_cache=enable_cache,
        best_model_func=my_llm_complete,
        cheap_model_func=my_llm_complete,
        embedding_func=local_embedding,
    )

In [None]:
# === Main functions ===
def query(question="What is X"):
    rag = get_rag()
    print(
        rag.query(
            question, param=QueryParam(mode="global")
        )
    )

In [None]:
def insert():
    from time import time

    with open("./dataset/test.txt", encoding="utf-8-sig") as f:
        FAKE_TEXT = f.read()

    remove_if_exist(f"{WORKING_DIR}/vdb_entities.json")
    remove_if_exist(f"{WORKING_DIR}/kv_store_full_docs.json")
    remove_if_exist(f"{WORKING_DIR}/kv_store_text_chunks.json")
    remove_if_exist(f"{WORKING_DIR}/kv_store_community_reports.json")
    remove_if_exist(f"{WORKING_DIR}/graph_chunk_entity_relation.graphml")

    rag = get_rag()
    start = time()
    rag.insert(FAKE_TEXT)
    print("indexing time:", time() - start)
    return FAKE_TEXT

# Main

In [None]:
logging.getLogger("nano-graphrag").setLevel(logging.DEBUG)
# === Entry point ===
if __name__ == "__main__":
    FAKE_TEXT = insert()
    query()

DEBUG:nano-graphrag:GraphRAG init with param:

  working_dir = ./nano_graphrag_cache_flanT5,
  enable_local = True,
  enable_naive_rag = False,
  chunk_func = <function chunking_by_token_size at 0x1521626320e0>,
  chunk_token_size = 1200,
  chunk_overlap_token_size = 100,
  tiktoken_model_name = gpt-4o,
  entity_extract_max_gleaning = 1,
  entity_summary_to_max_tokens = 500,
  graph_cluster_algorithm = leiden,
  max_graph_cluster_size = 10,
  graph_cluster_seed = 3735928559,
  node_embedding_algorithm = node2vec,
  node2vec_params = {'dimensions': 1536, 'num_walks': 10, 'walk_length': 40, 'window_size': 2, 'iterations': 3, 'random_seed': 3},
  special_community_report_llm_kwargs = {'response_format': {'type': 'json_object'}},
  embedding_func = {'embedding_dim': 384, 'max_token_size': 256, 'func': <function local_embedding at 0x15216004c280>},
  embedding_batch_num = 32,
  embedding_func_max_async = 16,
  query_better_than_threshold = 0.2,
  using_azure_openai = False,
  best_model_fun

🧪 LLM output: Alex clenched his jaw, buzz of frustration, Jordan , Jordan
🧪 LLM output: MANY entities were missed in the last extraction.
🧪 LLM output: Alex clenched his jaw, buzz of frustration, Jordan , Jordan
🧪 LLM output: MANY entities were missed in the last extraction.
⠹ Processed 2 chunks, 0 entities(duplicated), 0 relations(duplicated)

INFO:nano-graphrag:Writing graph with 0 nodes, 0 edges
DEBUG:nano-graphrag:GraphRAG init with param:

  working_dir = ./nano_graphrag_cache_flanT5,
  enable_local = True,
  enable_naive_rag = False,
  chunk_func = <function chunking_by_token_size at 0x1521626320e0>,
  chunk_token_size = 1200,
  chunk_overlap_token_size = 100,
  tiktoken_model_name = gpt-4o,
  entity_extract_max_gleaning = 1,
  entity_summary_to_max_tokens = 500,
  graph_cluster_algorithm = leiden,
  max_graph_cluster_size = 10,
  graph_cluster_seed = 3735928559,
  node_embedding_algorithm = node2vec,
  node2vec_params = {'dimensions': 1536, 'num_walks': 10, 'walk_length': 40, 'window_size': 2, 'iterations': 3, 'random_seed': 3},
  special_community_report_llm_kwargs = {'response_format': {'type': 'json_object'}},
  embedding_func = {'embedding_dim': 384, 'max_token_size': 256, 'func': <function local_embedding at 0x15216004c280>},
  embedding_batch_num = 32,
  embedding_func_max_async = 16,
  query_better_than_threshol


indexing time: 1.455812692642212


INFO:nano-graphrag:Loaded graph from ./nano_graphrag_cache_flanT5/graph_chunk_entity_relation.graphml with 0 nodes, 0 edges
INFO:nano-vectordb:Load (0, 384) data
INFO:nano-vectordb:Init {'embedding_dim': 384, 'metric': 'cosine', 'storage_file': './nano_graphrag_cache_flanT5/vdb_entities.json'} 0 data


Sorry, I'm not able to provide an answer to that question.


In [None]:
FAKE_TEXT

"X is a person who often goes unnoticed at first glance. There’s nothing about them that demands immediate attention, no loud or flashy traits to captivate the eye. Yet, in the moments that X does speak, or when they do step forward, something about them lingers in the air, a subtle yet undeniable presence. It's not that they are intentionally mysterious or reserved, but more that X carries a quiet, understated strength that others find both intriguing and comforting.\nPeople who know X often describe them as a paradox: someone who can be both invisible and omnipresent at the same time. In a crowded room, X might seem to fade into the background, their demeanor calm, unassuming. But even in their silence, there's something magnetic about them—an energy that others can't quite place. It isn’t brash or loud; it’s almost as though they are simply so in tune with the world around them that it becomes impossible to ignore them once you notice their subtle influence.\nX’s way of moving throu