In [1]:
import os
from typing import List
import json
import argparse
import logging
import pandas as pd
from tqdm import tqdm
import evaluate
import shutil

# Load the BLEU and ROUGE metrics
bleu_metric = evaluate.load("bleu")
rouge_metric = evaluate.load("rouge")
meteor_metric = evaluate.load('meteor')

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package wordnet to /Users/taindp/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/taindp/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /Users/taindp/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [2]:
import sys
sys.path.append("../")

In [3]:
from src.hipporag import HippoRAG
from dotenv import load_dotenv
load_dotenv()

2025-05-20 16:11:18,770	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


True

In [None]:
major = "MCS"
# dataset_style = "hcmus"
dataset_style = None
save_dir = 'outputs/openai_test'  # Define save directory for HippoRAG objects (each LLM/Embedding model combination will create a new subdirectory)
llm_model_name = 'gpt-4o-mini'  # Any OpenAI model name
# embedding_model_name = 'text-embedding-3-small'  # Embedding model name (NV-Embed, GritLM or Contriever for now)
# embedding_model_name = "nvidia/NV-Embed-v2"
# embedding_model_name = "GritLM/GritLM-7B"  # Embedding model name (NV-Embed, GritLM or Contriever for now)
embedding_model_name = "facebook/contriever"

shutil.rmtree("outputs", ignore_errors=True)

corpus_path = f"../data/courses_{major}.json"
with open(corpus_path, "r") as f:
    corpus = json.load(f)

docs = [f"{doc['title']}\n{doc['text']}" for doc in corpus]

# Startup a HippoRAG instance
hipporag = HippoRAG(save_dir=save_dir,
                    llm_model_name=llm_model_name,
                    embedding_model_name=embedding_model_name,
                    dataset=dataset_style ## HippoRAG base
                    )

# Run indexing
hipporag.index(docs=docs)
print(f"major: {major} | style: {dataset_style} | Graph info: {hipporag.get_graph_info()}")

open_end_qa_ds = pd.DataFrame(json.load(open(f"../data/{major}_opened_end.json", "r")))
queries = open_end_qa_ds["question"].tolist()
references = open_end_qa_ds["answer"].tolist()
gold_docs = [[f"{item[0]['title']}\n{item[0]['text']}"] for item in open_end_qa_ds["paragraphs"].tolist()]

# queries_solutions, all_response_message, all_metadata = hipporag.rag_qa(queries=queries)
queries_solutions, all_response_message, all_metadata, overall_retrieval_result, overall_qa_results = hipporag.rag_qa(
    queries=queries,
    gold_docs=gold_docs,
    gold_answers=references
)
print(f"major: {major} | style: {dataset_style} | Overall retrieval results: {overall_retrieval_result}")
predictions = [item.split("Answer: ")[1] for item in all_response_message]

bleu_results = bleu_metric.compute(predictions=predictions, references=references)

rouge_results = rouge_metric.compute(predictions=predictions, references=references)

meteor_results = meteor_metric.compute(predictions=predictions, references=references)

open_end_results = {
    "bleu": f"{bleu_results['bleu'] * 100:.2f}",
    "meteor": f"{meteor_results['meteor'] * 100:.2f}",
    "rougeL": f"{rouge_results['rougeL'] * 100:.2f}",
}
print(f"major: {major} | style: {dataset_style} | Open-end results: {open_end_results}")

Batch Encoding: 96it [00:10,  8.87it/s]                        
NER: 100%|██████████| 90/90 [00:20<00:00,  4.32it/s, total_prompt_tokens=42395, total_completion_tokens=11919, num_cache_hit=0]
Extracting triples: 100%|██████████| 90/90 [01:11<00:00,  1.26it/s, total_prompt_tokens=79295, total_completion_tokens=38700, num_cache_hit=0]
Batch Encoding: 1584it [00:05, 310.50it/s]                          
Batch Encoding: 1984it [00:10, 197.60it/s]                          
90it [00:00, 2110.14it/s]
90it [00:00, 5011.91it/s]
KNN for Queries: 100%|██████████| 2/2 [00:00<00:00,  3.84it/s]
100%|██████████| 1578/1578 [00:00<00:00, 51375.57it/s]


{'num_phrase_nodes': 1578, 'num_passage_nodes': 90, 'num_total_nodes': 1668, 'num_extracted_triples': 1971, 'num_triples_with_passage_node': 2111, 'num_synonymy_triples': 5508, 'num_total_triples': 9590}
major: MCS | style: None | Graph info: {'num_phrase_nodes': 1578, 'num_passage_nodes': 90, 'num_total_nodes': 1668, 'num_extracted_triples': 1971, 'num_triples_with_passage_node': 2111, 'num_synonymy_triples': 5508, 'num_total_triples': 9590}


Batch Encoding: 32it [00:00, 111.79it/s]              
Batch Encoding: 32it [00:00, 178.33it/s]              
Retrieving: 100%|██████████| 25/25 [00:56<00:00,  2.26s/it]
Collecting QA prompts: 100%|██████████| 25/25 [00:00<00:00, 8143.65it/s]
QA Reading: 100%|██████████| 25/25 [02:11<00:00,  5.27s/it]
Extraction Answers from LLM Response: 25it [00:00, 18601.67it/s]


major: MCS | style: None | Overall retrieval results: {'Recall@1': 0.28, 'Recall@2': 0.52, 'Recall@5': 0.88, 'Recall@10': 0.92, 'Recall@20': 0.92, 'Recall@30': 0.92, 'Recall@50': 0.92, 'Recall@100': 0.92, 'Recall@150': 0.92, 'Recall@200': 0.92}
major: MCS | style: None | Open-end results: {'bleu': '10.14', 'meteor': '30.87', 'rougeL': '35.85'}
