# ATLAS Multihop QA Benchmarking

In [None]:
import os 
os.environ['CUDA_VISIBLE_DEVICES'] = '1'  # Set to the GPU you want to use, or '0' for the first GPU
import torch
num_gpus = torch.cuda.device_count()
print("number of GPUs available:", torch.cuda.device_count())
for i in range(num_gpus):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

In [None]:
from atlas_rag.retriever import NvEmbed
from transformers import AutoModel
# Load the SentenceTransformer model
encoder_model_name = "nvidia/NV-Embed-v2"
sentence_model = AutoModel.from_pretrained(encoder_model_name, trust_remote_code=True, device_map="auto")
sentence_encoder = NvEmbed(sentence_model)

In [None]:
from openai import OpenAI
from atlas_rag.reader import LLMGenerator
from configparser import ConfigParser
# Load OpenRouter API key from config file
config = ConfigParser()
config.read('config.ini')
reader_model_name = "meta-llama/Llama-3.3-70B-Instruct"
client = OpenAI(
  base_url="https://api.deepinfra.com/v1/openai",
  api_key=config['settings']['DEEPINFRA_API_KEY'],
)
llm_generator = LLMGenerator(client=client, model_name=reader_model_name)

In [None]:
from atlas_rag import create_embeddings_and_index
keyword = 'musique'
working_directory = f'/data/httsangaj/atomic-rag/8b'
data = create_embeddings_and_index(
    sentence_encoder=sentence_encoder,
    model_name = 'nvidia/NV-Embed-v2',
    working_directory=working_directory,
    keyword=keyword,
    include_concept=True,
    include_events=True,
    normalize_embeddings= True,
    text_batch_size=64,
    node_and_edge_batch_size=64,
)

In [None]:
from atlas_rag.evaluation import BenchMarkConfig
benchmark_config = BenchMarkConfig(
    dataset_name= 'musique',
    question_file= "benchmark_data/musique.json",
    include_concept=True,
    include_events=True,
    reader_model_name=reader_model_name,
    encoder_model_name=encoder_model_name,
    number_of_samples=-1, # -1 for all samples
)

In [None]:
from atlas_rag import setup_logger
logger = setup_logger(benchmark_config)

In [None]:
# Initialize desired RAG method for benchmarking
from atlas_rag.retriever import HippoRAG2Retriever
hipporag2_retriever = HippoRAG2Retriever(
    llm_generator=llm_generator,
    sentence_encoder=sentence_encoder,
    data = data,
    logger=logger
)

In [None]:
# start benchmarking
from atlas_rag.evaluation import RAGBenchmark
benchmark = RAGBenchmark(config=benchmark_config, logger=logger)
benchmark.run([hipporag2_retriever], llm_generator=llm_generator)