In [1]:
# Load environment variables from .env file
import pandas as pd
from llama_index.core.evaluation import (
    RetrieverEvaluator,
    get_retrieval_results_df,
)
import os
import logging
import pandas as pd
from dotenv import load_dotenv
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.readers.file import PyMuPDFReader
from llama_index.core.node_parser import SentenceSplitter
from datetime import datetime
from llama_index.core import (
    StorageContext, VectorStoreIndex, SimpleDirectoryReader, 
    get_response_synthesizer, Settings
)
import traceback
from llama_index.core.evaluation import (
    generate_question_context_pairs,
    EmbeddingQAFinetuneDataset,
)
from llama_index.core.evaluation import generate_question_context_pairs, QueryResponseDataset
from llama_index.llms.openai import OpenAI

import nest_asyncio

nest_asyncio.apply()

from pathlib import Path
from llama_index.readers.file import PDFReader
from llama_index.core.response.notebook_utils import display_source_node
from llama_index.core.retrievers import RecursiveRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import VectorStoreIndex
from llama_index.llms.openai import OpenAI
import json
from llama_index.core import Document
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.schema import IndexNode
from llama_index.core.embeddings import resolve_embed_model

######
import nest_asyncio
from llama_index.core import Settings


nest_asyncio.apply()
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.schema import IndexNode
from llama_index.core.extractors import (
    SummaryExtractor,
    QuestionsAnsweredExtractor,
)


load_dotenv()
# Fetch API keys from environment variables
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
AZURE_API_KEY = os.getenv('AZURE_API_KEY')
AZURE_DEPLOYMENT_NAME = os.getenv("AZURE_DEPLOYMENT_NAME")
AZURE_API_VERSION = os.getenv("AZURE_API_VERSION")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")

# GPT35
llm = AzureOpenAI(
    deployment_name=AZURE_DEPLOYMENT_NAME,
    temperature=0, 
    api_key=AZURE_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=AZURE_API_VERSION
)


# EMBEDDING MODELS
#embedding_models = ["Alibaba-NLP/gte-large-en-v1.5", "mixedbread-ai/mxbai-embed-large-v1", "WhereIsAI/UAE-Large-V1", "avsolatorio/GIST-large-Embedding-v0", "BAAI/bge-large-en-v1.5"]
embedding_models = ["BAAI/bge-large-en-v1.5", "BAAI/bge-base-en-v1.5", "BAAI/bge-small-en-v1.5", "Alibaba-NLP/gte-large-en-v1.5", "mixedbread-ai/mxbai-embed-large-v1", "WhereIsAI/UAE-Large-V1", "avsolatorio/GIST-large-Embedding-v0", "Labib11/MUG-B-1.6"]

#embed_model = HuggingFaceEmbedding(
#    model_name="BAAI/bge-large-en-v1.5"
#)
#Settings.embed_model = embed_model
Settings.llm = llm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#embed_model = HuggingFaceEmbedding(model_name=embedding_models[0], trust_remote_code=True)
#Settings.embed_model = embed_model

# Loading docs
loader = PyMuPDFReader()
#docs1 = loader.load(file_path="./LL144.pdf")
#docs2 = loader.load(file_path="./LL144_Definitions.pdf")
#docs = docs1 + docs2

docs = loader.load(file_path="./EUAIACT.pdf")
docs = docs[:20]
node_parser = SentenceSplitter(chunk_size=512)
base_nodes = node_parser.get_nodes_from_documents(docs)

In [3]:
# set node ids to be a constant
for idx, node in enumerate(base_nodes):
    node.id_ = f"node-{idx}"
print(base_nodes)



In [7]:
print(len(base_nodes))

#eval_dataset = EmbeddingQAFinetuneDataset.from_json("euaiact_retriever_eval_small.json")

22


In [3]:
eval_dataset = generate_question_context_pairs(
    base_nodes, llm, num_questions_per_chunk=5
)

eval_dataset.save_json("euaiact_retriever_eval_small2.json")

  0%|          | 0/22 [00:00<?, ?it/s]

100%|██████████| 22/22 [00:36<00:00,  1.67s/it]


In [4]:
def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    columns = {
        "retrievers": [name],
        **{k: [full_df[k].mean()] for k in metrics},
    }

    metric_df = pd.DataFrame(columns)

    return metric_df

metrics = ["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"]
#eval_dataset.save_json("euaiact_retriever_eval_small.json")

In [5]:
top_k = 10
results_dfs = []
#embedding_models = ["Alibaba-NLP/gte-large-en-v1.5"]#, "mixedbread-ai/mxbai-embed-large-v1", "WhereIsAI/UAE-Large-V1", "avsolatorio/GIST-large-Embedding-v0", "BAAI/bge-large-en-v1.5", "Labib11/MUG-B-1.6"]
embedding_models = ["mixedbread-ai/mxbai-embed-large-v1", "WhereIsAI/UAE-Large-V1", "avsolatorio/GIST-large-Embedding-v0", "Labib11/MUG-B-1.6"]

for embedding_model in embedding_models:
    embed_model = HuggingFaceEmbedding(model_name=embedding_model, trust_remote_code=True)
    Settings.embed_model = embed_model
    base_index = VectorStoreIndex(base_nodes, embed_model=embed_model)
    base_retriever = base_index.as_retriever(similarity_top_k=top_k)
    retriever_evaluator = RetrieverEvaluator.from_metric_names(
        ["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"], retriever=base_retriever
    )
    # try it out on an entire dataset
    results_base = await retriever_evaluator.aevaluate_dataset(
        eval_dataset, show_progress=True
    )
    metrics = ["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"]
    results_df = display_results(embedding_model, results_base)
    results_dfs.append(results_df)
    print(results_df)

final_results = pd.concat(results_dfs)
final_results

100%|██████████| 110/110 [00:29<00:00,  3.71it/s]


                           retrievers  hit_rate       mrr  precision  recall  \
0  mixedbread-ai/mxbai-embed-large-v1       1.0  0.900909        0.1     1.0   

         ap      ndcg  
0  0.900909  0.203722  


100%|██████████| 110/110 [00:06<00:00, 17.86it/s]


               retrievers  hit_rate       mrr  precision  recall        ap  \
0  WhereIsAI/UAE-Large-V1       1.0  0.906288        0.1     1.0  0.906288   

       ndcg  
0  0.204595  


100%|██████████| 110/110 [00:06<00:00, 17.58it/s]


                            retrievers  hit_rate       mrr  precision  \
0  avsolatorio/GIST-large-Embedding-v0  0.990909  0.882576   0.099091   

     recall        ap     ndcg  
0  0.990909  0.882576  0.20026  


100%|██████████| 110/110 [00:06<00:00, 17.16it/s]

          retrievers  hit_rate       mrr  precision  recall        ap  \
0  Labib11/MUG-B-1.6       1.0  0.920758        0.1     1.0  0.920758   

       ndcg  
0  0.206989  





Unnamed: 0,retrievers,hit_rate,mrr,precision,recall,ap,ndcg
0,mixedbread-ai/mxbai-embed-large-v1,1.0,0.900909,0.1,1.0,0.900909,0.203722
0,WhereIsAI/UAE-Large-V1,1.0,0.906288,0.1,1.0,0.906288,0.204595
0,avsolatorio/GIST-large-Embedding-v0,0.990909,0.882576,0.099091,0.990909,0.882576,0.20026
0,Labib11/MUG-B-1.6,1.0,0.920758,0.1,1.0,0.920758,0.206989


In [6]:
final_results.to_csv('embedding_euaiact_2.csv')