In [1]:
import os
from typing import List
import json
import argparse
import logging
import pandas as pd
from tqdm import tqdm
import evaluate
import shutil

# Load the BLEU and ROUGE metrics
bleu_metric = evaluate.load("bleu")
rouge_metric = evaluate.load("rouge")
meteor_metric = evaluate.load('meteor')

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package wordnet to /home/dptn1/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/dptn1/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/dptn1/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [2]:
import sys
sys.path.append("../")

In [3]:
from src.hipporag import HippoRAG
from dotenv import load_dotenv
load_dotenv()

2025-05-20 21:00:21,459	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


True

In [4]:
# major = "MCS"
major = "DS"
# dataset_style = "hcmus"
dataset_style = None
save_dir = 'outputs/openai_test'  # Define save directory for HippoRAG objects (each LLM/Embedding model combination will create a new subdirectory)
llm_model_name = 'gpt-4o-mini'  # Any OpenAI model name
# embedding_model_name = 'text-embedding-3-small'  # Embedding model name (NV-Embed, GritLM or Contriever for now)
# embedding_model_name = "nvidia/NV-Embed-v2"
embedding_model_name = "GritLM/GritLM-7B"  # Embedding model name (NV-Embed, GritLM or Contriever for now)
# embedding_model_name = "facebook/contriever"

shutil.rmtree("outputs", ignore_errors=True)

corpus_path = f"../data/courses_{major}.json"
with open(corpus_path, "r") as f:
    corpus = json.load(f)

docs = [f"{doc['title']}\n{doc['text']}" for doc in corpus]

# Startup a HippoRAG instance
hipporag = HippoRAG(save_dir=save_dir,
                    llm_model_name=llm_model_name,
                    embedding_model_name=embedding_model_name,
                    dataset=dataset_style, ## HippoRAG base
                    embedding_batch_size=4
                    )

# Run indexing
hipporag.index(docs=docs)
print(f"major: {major} | style: {dataset_style} | Graph info: {hipporag.get_graph_info()}")

open_end_qa_ds = pd.DataFrame(json.load(open(f"../data/{major}_opened_end.json", "r")))
queries = open_end_qa_ds["question"].tolist()
references = open_end_qa_ds["answer"].tolist()
gold_docs = [[f"{item[0]['title']}\n{item[0]['text']}"] for item in open_end_qa_ds["paragraphs"].tolist()]

# queries_solutions, all_response_message, all_metadata = hipporag.rag_qa(queries=queries)
queries_solutions, all_response_message, all_metadata, overall_retrieval_result, overall_qa_results = hipporag.rag_qa(
    queries=queries,
    gold_docs=gold_docs,
    gold_answers=references
)
print(f"major: {major} | style: {dataset_style} | Overall retrieval results: {overall_retrieval_result}")
predictions = [item.split("Answer: ")[1] for item in all_response_message]

bleu_results = bleu_metric.compute(predictions=predictions, references=references)

rouge_results = rouge_metric.compute(predictions=predictions, references=references)

meteor_results = meteor_metric.compute(predictions=predictions, references=references)

open_end_results = {
    "bleu": f"{bleu_results['bleu'] * 100:.2f}",
    "meteor": f"{meteor_results['meteor'] * 100:.2f}",
    "rougeL": f"{rouge_results['rougeL'] * 100:.2f}",
}
print(f"major: {major} | style: {dataset_style} | Open-end results: {open_end_results}")

Loading checkpoint shards: 100%|██████████| 3/3 [00:06<00:00,  2.14s/it]


Created GritLM: torch.bfloat16 dtype, mean pool, unified mode, bbcc attn


NER: 100%|██████████| 75/75 [00:08<00:00,  8.55it/s, total_prompt_tokens=35514, total_completion_tokens=9617, num_cache_hit=0]
Extracting triples: 100%|██████████| 75/75 [00:22<00:00,  3.33it/s, total_prompt_tokens=66063, total_completion_tokens=28394, num_cache_hit=0]
Batches: 100%|██████████| 334/334 [00:10<00:00, 30.80it/s]
Batches: 100%|██████████| 410/410 [00:14<00:00, 28.98it/s]
75it [00:00, 11449.00it/s]
75it [00:00, 33479.44it/s]
KNN for Queries: 100%|██████████| 2/2 [00:00<00:00,  4.12it/s]
100%|██████████| 1333/1333 [00:00<00:00, 49414.10it/s]


{'num_phrase_nodes': 1333, 'num_passage_nodes': 75, 'num_total_nodes': 1408, 'num_extracted_triples': 1638, 'num_triples_with_passage_node': 1774, 'num_synonymy_triples': 11683, 'num_total_triples': 15095}
major: DS | style: None | Graph info: {'num_phrase_nodes': 1333, 'num_passage_nodes': 75, 'num_total_nodes': 1408, 'num_extracted_triples': 1638, 'num_triples_with_passage_node': 1774, 'num_synonymy_triples': 11683, 'num_total_triples': 15095}


Retrieving: 100%|██████████| 25/25 [00:52<00:00,  2.09s/it]
Collecting QA prompts: 100%|██████████| 25/25 [00:00<00:00, 14352.26it/s]
QA Reading: 100%|██████████| 25/25 [02:04<00:00,  4.98s/it]
Extraction Answers from LLM Response: 25it [00:00, 133576.56it/s]


major: DS | style: None | Overall retrieval results: {'Recall@1': 0.44, 'Recall@2': 0.56, 'Recall@5': 0.72}
major: DS | style: None | Open-end results: {'bleu': '5.48', 'meteor': '25.19', 'rougeL': '33.31'}
