In [9]:
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
import pdb

import json
from tqdm import tqdm
import pandas as pd

from llama_index.core import ServiceContext, VectorStoreIndex
from llama_index.core.schema import TextNode
from langchain.schema.document import Document
from llama_index.embeddings.openai import OpenAIEmbedding
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

# Evaluation with LlamaIndex

##### 비교할 모델 :
1. OpenAI_ada : OpenAI embedding "ADA"
2. bert : open source "bert-base-uncased"
3. bert_ST : Only sentence finetuned "bert-base-uncased" ( BERT )
4. bert_KR : open source "Beomi/KcBERT"
5. bert_KR_ST : Only sentence finetuned "Beomi/KcBERT" ( 한국어로 사전 학습된 BERT )
6. bert_KR_DA : Domain Adaptation
7. bert_KR_DA_ST : Domain Adaptation + sentence finetuned


- 모델 저장할때 : sts_model_save_path = "output/training_sts-"+pretrained_model_name.replace("/", "-")\
    + '-' + datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
- 이름 설정 : DomainAdaptation => _DA SentenceFintuning => _ST

### llama_index 이용 

In [3]:
def evaluate(dataset, embed_model, top_k=5, verbose=False):
    
    corpus = dataset['corpus']  # 예: {"18f9717b-1c79-428e-9cc6-43e498abf29e": "Text for document"}
    queries = dataset['queries']  # 예: {"18f9717b-1c79-428e-9cc6-43e498abf29e": "질문"}
    relevant_docs = dataset['relevant_docs']  


    service_context = ServiceContext.from_defaults(embed_model=embed_model)
    nodes = [TextNode(id_=id_, text=text) for id_, text in corpus.items()]
    index = VectorStoreIndex(
        nodes,
        service_context=service_context,
        show_progress=True
    )

    # 임베딩 계산하는 파트
    retriever = index.as_retriever(similarity_top_k=top_k)

    eval_results = []
    for query_id, query in tqdm(queries.items(), desc="Evaluating queries"):
        try:
            retrieved_nodes = retriever.retrieve(query)
            retrieved_ids = [node.node.id_ for node in retrieved_nodes]   

            expected_id = relevant_docs[query_id][0]  # 기본값을 None으로 설정
            is_hit = expected_id in retrieved_ids

            is_hit_numeric = int(is_hit)

            eval_result = {
                'is_hit': is_hit_numeric,
                'retrieved': retrieved_ids,
                'expected': expected_id,
                'query': query_id,
            }
            eval_results.append(eval_result)
            if verbose:
                logging.info(f"Query ID: {query_id}, Hit: {is_hit}, Expected: {expected_id}, Retrieved: {retrieved_ids}")

        except Exception as e:
            logging.error(f"Error processing query ID {query_id}: {str(e)}")


    return eval_results

### faiss 이용 

In [10]:
import pandas as pd
import numpy as np
import torch
import faiss

def build_index(embeddings):
    d = embeddings.shape[1]  # 임베딩 벡터의 차원
    # Faiss IndexIDMap 생성
    index = faiss.IndexIDMap(faiss.IndexFlatL2(d))
    index.add_with_ids(embeddings, np.arange(embeddings.shape[0]))  # 인덱스에 임베딩과 ID 추가
    return index

def retrieve(query_embedding, id_array, index, top_k=5):
    # 쿼리 임베딩과 가장 유사한 문서 ID를 검색
    _, retrieved_indices = index.search(query_embedding, top_k)
    # 각 인덱스에 해당하는 문서 ID 반환
    retrieved_doc_ids = id_array[retrieved_indices]
    return retrieved_doc_ids


def evaluate(dataset, model, top_k=5, verbose=False):
    corpus = dataset['corpus']
    queries = dataset['queries']
    relevant_docs = dataset['relevant_docs']

    corpus_df = pd.DataFrame.from_dict(corpus, orient='index', columns=['text'])
    corpus_df.reset_index(inplace=True)
    corpus_df.columns = ['ID', 'text']

    # 코퍼스의 모든 텍스트를 인코딩
    corpus_embeddings = model.encode(list(corpus_df['text']), convert_to_tensor=True)

    if torch.cuda.is_available():  # GPU 사용 가능한 경우 CPU로 이동
        corpus_embeddings = corpus_embeddings.cpu()
    corpus_embeddings = corpus_embeddings.numpy()  # NumPy 배열로 변환

    # 인덱스를 빌드하고 ID 배열을 반환
    index = build_index(corpus_embeddings)
    id_array = corpus_df['ID'].values

    eval_results = []

    for query_id, query_text in queries.items():
        query_embedding = model.encode([query_text], convert_to_tensor=True)

        if torch.cuda.is_available():  # GPU 사용 가능한 경우 CPU로 이동
            query_embedding = query_embedding.cpu()
        query_embedding = query_embedding.numpy()  # NumPy 배열로 변환
        
        retrieved_doc_ids = retrieve(query_embedding, id_array, index, top_k)  # 비교 해서 찾은 값 변환
        # pdb.set_trace() #######################
        expected_id = relevant_docs[query_id][0]

        is_hit = expected_id in retrieved_doc_ids

        eval_result = {
            'is_hit': int(is_hit),
            'retrieved': retrieved_doc_ids,
            'expected': expected_id,
            'query': query_id,
        }
        eval_results.append(eval_result)

        if verbose:
            print(f"Query ID: {query_id}, Hit: {is_hit}, Expected: {expected_id}, Retrieved: {retrieved_doc_ids}")

    return eval_results


In [11]:
from sentence_transformers.evaluation import InformationRetrievalEvaluator
from sentence_transformers import SentenceTransformer
import os

def evaluate_st(dataset,model_id,name):

    corpus = dataset['corpus']
    queries = dataset['queries']
    relevant_docs = dataset['relevant_docs']

    evaluator = InformationRetrievalEvaluator(queries, corpus, relevant_docs, name=name)
    model = SentenceTransformer(model_id)
    return evaluator(model, output_path='/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/EVAL_results/')

In [12]:
OPENAI_API_KEY = "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"
os.environ["OPENAI_API_KEY"]=OPENAI_API_KEY

TRAIN_DATASET_FPATH = '/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/data/train_dataset.json'
VAL_DATASET_FPATH = '/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/data/val_dataset.json'

path = "/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/"

with open(TRAIN_DATASET_FPATH, 'r+') as f:
    train_dataset = json.load(f)

with open(VAL_DATASET_FPATH, 'r+') as f:
    val_dataset = json.load(f)

### 1. ada : OpenAI embedding "ADA"

In [27]:
openai_model = OpenAIEmbedding(embed_batch_size=10)#임베딩 생성
ada_val_results = evaluate(val_dataset, openai_model)
print("ING ADA")
df_ada = pd.DataFrame(ada_val_results)
df_ada['model'] = 'OpenAI_ada'
print("ADA Hit Rate:", df_ada['is_hit'].mean())

  service_context = ServiceContext.from_defaults(embed_model=embed_model)
Generating embeddings:   0%|          | 0/1029 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Generating embeddings: 100%|██████████| 1029/1029 [01:36<00:00, 10.65it/s]
Evaluating queries:  18%|█▊        | 354/2003 [02:36<11:50,  2.32it/s]ERROR:root:Error processing query ID 99b7d66f-4bce-4a3e-af68-a021c6c3dcc3: unsupported operand type(s) for *: 'NoneType' and 'float'
Evaluating queries: 100%|██████████| 2003/2003 [14:46<00:00,  2.26it/s]


ING ADA
ADA Hit Rate: 0.5114885114885115


### 2. bert : open source "bert-base-uncased"

In [16]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

! pip install faiss-gpu==1.6.3



In [8]:
# import pandas as pd
# import numpy as np
# import torch
# import faiss

# def build_index(embeddings):
#     d = embeddings.shape[1]  # 임베딩 벡터의 차원
#     # Faiss IndexIDMap 생성
#     index = faiss.IndexIDMap(faiss.IndexFlatL2(d))
#     index.add_with_ids(embeddings, np.arange(embeddings.shape[0]))  # 인덱스에 임베딩과 ID 추가
#     return index

# def retrieve(query_embedding, id_array, index, top_k=5):
#     # 쿼리 임베딩과 가장 유사한 문서 ID를 검색
#     _, retrieved_indices = index.search(query_embedding, top_k)
#     # 각 인덱스에 해당하는 문서 ID 반환
#     retrieved_doc_ids = id_array[retrieved_indices]
#     return retrieved_doc_ids


# def evaluate(dataset, model, top_k=5, verbose=False):
#     corpus = dataset['corpus']
#     queries = dataset['queries']
#     relevant_docs = dataset['relevant_docs']

#     corpus_df = pd.DataFrame.from_dict(corpus, orient='index', columns=['text'])
#     corpus_df.reset_index(inplace=True)
#     corpus_df.columns = ['ID', 'text']

#     # 코퍼스의 모든 텍스트를 인코딩
#     corpus_embeddings = model.encode(list(corpus_df['text']), convert_to_tensor=True)

#     if torch.cuda.is_available():  # GPU 사용 가능한 경우 CPU로 이동
#         corpus_embeddings = corpus_embeddings.cpu()
#     corpus_embeddings = corpus_embeddings.numpy()  # NumPy 배열로 변환

#     # 인덱스를 빌드하고 ID 배열을 반환
#     index = build_index(corpus_embeddings)
#     id_array = corpus_df['ID'].values

#     eval_results = []

#     for query_id, query_text in queries.items():
#         query_embedding = model.encode([query_text], convert_to_tensor=True)

#         if torch.cuda.is_available():  # GPU 사용 가능한 경우 CPU로 이동
#             query_embedding = query_embedding.cpu()
#         query_embedding = query_embedding.numpy()  # NumPy 배열로 변환
        
#         retrieved_doc_ids = retrieve(query_embedding, id_array, index, top_k)  # 비교 해서 찾은 값 변환
#         # pdb.set_trace() #######################
#         expected_id = relevant_docs[query_id][0]

#         is_hit = expected_id in retrieved_doc_ids

#         eval_result = {
#             'is_hit': int(is_hit),
#             'retrieved': retrieved_doc_ids,
#             'expected': expected_id,
#             'query': query_id,
#         }
#         eval_results.append(eval_result)

#         if verbose:
#             print(f"Query ID: {query_id}, Hit: {is_hit}, Expected: {expected_id}, Retrieved: {retrieved_doc_ids}")

#     return eval_results


In [15]:
from sentence_transformers import SentenceTransformer, models
#from llama_index.core.embeddings.utils import MockEmbedding
import pandas as pd

# 모델을 불러
model_name = 'bert-base-uncased'

transformer_model = models.Transformer(model_name)

# Pooling layer 추가
pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(),
                            pooling_mode_mean_tokens=True,
                            pooling_mode_cls_token=False,
                            pooling_mode_max_tokens=False)

# SentenceTransformer에 word_embedding_model과 pooling_model을 추가하여 모델 구성
model = SentenceTransformer(modules=[transformer_model, pooling_model])

# 평가를 위해 임베딩된 데이터셋과 함께 사용
bert_val_results = evaluate(val_dataset, model)

# 결과를 DataFrame으로 변환
df_bert = pd.DataFrame(bert_val_results)
df_bert['model'] = 'Bert'

# Hit Rate
hit_rate_bge = df_bert['is_hit'].mean()
print(hit_rate_bge)

0.01597603594608088


In [16]:
customevaluate_st(val_dataset, model, output_folder, name='bert')

NameError: name 'customevaluate_st' is not defined

### 3. bert_finetuned : Only sentence finetuned "bert-base-uncased" ( BERT )

In [19]:
bert_ST = "local:../EXPERIMENT1_BaseBERT_generated_QAdata/exp_finetune" 
val_results_finetuned = evaluate(val_dataset, bert_ST)
print("ING finetuned")
df_bert_ST = pd.DataFrame(val_results_finetuned)
df_bert_ST['model'] = 'Bert_ST'
print("finetuned Hit Rate:", df_bert_ST['is_hit'].mean())

  service_context = ServiceContext.from_defaults(embed_model=embed_model)
Generating embeddings: 100%|██████████| 1029/1029 [00:12<00:00, 84.12it/s]
Evaluating queries:  18%|█▊        | 354/2003 [00:25<01:59, 13.85it/s]ERROR:root:Error processing query ID 99b7d66f-4bce-4a3e-af68-a021c6c3dcc3: unsupported operand type(s) for *: 'NoneType' and 'float'
Evaluating queries: 100%|██████████| 2003/2003 [02:25<00:00, 13.78it/s]


ING finetuned
finetuned Hit Rate: 0.23926073926073926


In [20]:
model_id = "../EXPERIMENT1_BaseBERT_generated_QAdata/exp_finetune"
model = SentenceTransformer(model_id)
customevaluate_st(val_dataset, model, output_folder, name='Bert_ST')

Batches: 100%|██████████| 63/63 [00:02<00:00, 22.65it/s]
Corpus Chunks: 100%|██████████| 1/1 [00:09<00:00,  9.83s/it]


<class 'numpy.float64'>
0.17955108710795925


Unnamed: 0,0
0,0.179551


### 4. Kcbert : open source "Beomi/KcBERT"

In [17]:
# from sentence_transformers import SentenceTransformer, models

# # Load Embedding Model
# embedding_model = models.Transformer(
#     model_name_or_path='beomi/kcbert-base', 
#     max_seq_length=256,
#     do_lower_case=True
# )

# # Only use Mean Pooling -> Pooling all token embedding vectors of sentence.
# pooling_model = models.Pooling(
#     embedding_model.get_word_embedding_dimension(),
#     pooling_mode_mean_tokens=True,
#     pooling_mode_cls_token=False,
#     pooling_mode_max_tokens=False,
# )

# model = SentenceTransformer(modules=[embedding_model, pooling_model])

# # 평가를 위해 임베딩된 데이터셋과 함께 사용
# kcbert_val_results = evaluate(val_dataset, model)

In [7]:
import pdb
from sentence_transformers import SentenceTransformer, models
model_name = 'beomi/kcbert-base'


# 모델과 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# 모델의 config에서 위치 임베딩 크기 확인
print("Position embeddings size:", model.config.max_position_embeddings)
print("Hidden size (embedding dimension):", model.config.hidden_size)

transformer_model = models.Transformer(model_name) 
# 일반적으로 BERT 모델에서는 임베딩 차원의 크기가 768. 위치 임베딩의 크기가 300인 것으로 보입니다.
# 768로 맞추기

# Pooling layer 추가
pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(),
                            pooling_mode_mean_tokens=True,
                            pooling_mode_cls_token=False,
                            pooling_mode_max_tokens=False)
#pdb.set_trace()###########################################################
# SentenceTransformer에 word_embedding_model과 pooling_model을 추가하여 모델 구성
model = SentenceTransformer(modules=[transformer_model, pooling_model])



# 평가를 위해 임베딩된 데이터셋과 함께 사용
kcbert_val_results = evaluate(val_dataset, model)

# 결과를 DataFrame으로 변환
df_kcbert = pd.DataFrame(kcbert_val_results)
df_kcbert['model'] = 'Bert_KR'

# Hit Rate
hit_rate_bge = df_kcbert['is_hit'].mean()
print(hit_rate_bge)


# customevaluate_st(val_dataset, model, output_folder, name='Kcbert')

RuntimeError: The size of tensor a (768) must match the size of tensor b (300) at non-singleton dimension 1

### 5. kbert_finetuned : Only sentence finetuned "Beomi/KcBERT" ( 한국어로 사전 학습된 BERT )

In [22]:
kcbert_finetuned = "local:../EXPERIMENT2_KCBERT_generated_QAdata/exp_finetune" 
val_results_finetuned = evaluate(val_dataset, kcbert_finetuned)
print("ING finetuned")
df_kcbert_ST = pd.DataFrame(val_results_finetuned)
df_kcbert_ST['model'] = 'Bert_KR_ST'
print("finetuned Hit Rate:", df_kcbert_ST['is_hit'].mean())

  service_context = ServiceContext.from_defaults(embed_model=embed_model)
Generating embeddings: 100%|██████████| 1029/1029 [00:07<00:00, 137.79it/s]
Evaluating queries:  18%|█▊        | 354/2003 [00:26<02:05, 13.09it/s]ERROR:root:Error processing query ID 99b7d66f-4bce-4a3e-af68-a021c6c3dcc3: unsupported operand type(s) for *: 'NoneType' and 'float'
Evaluating queries: 100%|██████████| 2003/2003 [02:29<00:00, 13.37it/s]


ING finetuned
finetuned Hit Rate: 0.4515484515484515


In [23]:
model_id = "../EXPERIMENT2_KCBERT_generated_QAdata/exp_finetune"
model = SentenceTransformer(model_id)
customevaluate_st(val_dataset, model, output_folder,name='Bert_KR_ST')

Batches: 100%|██████████| 63/63 [00:01<00:00, 52.78it/s]
Corpus Chunks: 100%|██████████| 1/1 [00:05<00:00,  5.60s/it]


<class 'numpy.float64'>
0.3513958932589646


Unnamed: 0,0
0,0.351396


### 6. Domain Adaptation

In [13]:
from sentence_transformers import SentenceTransformer, models

model_name = '/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/MLM_trained_model/kcbert-base_CyclicLRtriangular-2024-05-07_06-21-18'
#klue-roberta-large_CyclicLRtriangular-2024-04-26_08-42-52'

transformer_model = models.Transformer(model_name)

# Pooling layer 추가
pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(),
                            pooling_mode_mean_tokens=True,
                            pooling_mode_cls_token=False,
                            pooling_mode_max_tokens=False)

# SentenceTransformer에 word_embedding_model과 pooling_model을 추가하여 모델 구성
OURS_DA = SentenceTransformer(modules=[transformer_model, pooling_model])

val_results_finetuned = evaluate(val_dataset, OURS_DA)
print("ING finetuned")
df_OURS_DA = pd.DataFrame(val_results_finetuned)
df_OURS_DA['model'] = 'Bert_KR_DA'
print("finetuned Hit Rate:", df_OURS_DA['is_hit'].mean())

Some weights of BertModel were not initialized from the model checkpoint at /home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/MLM_trained_model/kcbert-base_CyclicLRtriangular-2024-05-07_06-21-18 and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [16]:
from sentence_transformers import SentenceTransformer, models

model_name = '/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/MLM_trained_model/kcbert-base_CyclicLRtriangular-2024-05-01_16-01-15'
#klue-roberta-large_CyclicLRtriangular-2024-04-26_08-42-52'

transformer_model = models.Transformer(model_name)

# Pooling layer 추가
pooling_model = models.Pooling(transformer_model.get_word_embedding_dimension(),
                            pooling_mode_mean_tokens=True,
                            pooling_mode_cls_token=False,
                            pooling_mode_max_tokens=False)

# SentenceTransformer에 word_embedding_model과 pooling_model을 추가하여 모델 구성
OURS_DA = SentenceTransformer(modules=[transformer_model, pooling_model])

val_results_finetuned = evaluate(val_dataset, OURS_DA)
print("ING finetuned")
df_OURS_DA = pd.DataFrame(val_results_finetuned)
df_OURS_DA['model'] = 'Bert_KR_DA'
print("finetuned Hit Rate:", df_OURS_DA['is_hit'].mean())

Some weights of BertModel were not initialized from the model checkpoint at /home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/MLM_trained_model/kcbert-base_CyclicLRtriangular-2024-05-01_16-01-15 and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


ING finetuned
finetuned Hit Rate: 0.14628057913130305


### 7. OURS : Domain Adaptation + sentence finetuned

In [22]:
OURS_finetuned = "local:../EXPERIMENT3_DOMAINKCBERT_generated_QAdata/kcbert-base_CyclicLRtriangular-2024-04-30/exp_finetune" 

val_results_finetuned = evaluate(val_dataset, OURS_finetuned)
print("ING finetuned")
df_OURS_DA_ST = pd.DataFrame(val_results_finetuned)
df_OURS_DA_ST['model'] = 'Bert_KR_DA_ST'
print("finetuned Hit Rate:", df_OURS_DA_ST['is_hit'].mean())

  service_context = ServiceContext.from_defaults(embed_model=embed_model)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Generating embeddings: 100%|██████████| 1029/1029 [00:07<00:00, 137.97it/s]
Evaluating queries:  18%|█▊        | 354/2003 [00:26<02:01, 13.54it/s]ERROR:root:Error processing query ID 99b7d66f-4bce-4a3e-af68-a021c6c3dcc3: unsupported operand type(s) for *: 'NoneType' and 'float'
Evaluating queries: 100%|██████████| 2003/2003 [02:28<00:00, 13.46it/s]


ING finetuned
finetuned Hit Rate: 0.43456543456543456


In [8]:
model_id = "../EXPERIMENT3_DOMAINKCBERT_generated_QAdata/kcbert-base_CyclicLRtriangular-2024-04-29_08-12-32/exp_finetune"
model = SentenceTransformer(model_id)
customevaluate_st(val_dataset, model, output_folder, name='OURS_DA_ST')

KeyboardInterrupt: 

### SUMMARY

In [28]:
df_all = pd.concat([df_ada, df_bge, df_bge_finetuned, df_bert, df_bert_ST, df_kcbert, df_kcbert_ST, df_OURS_DA_ST])
df_all.groupby('model').mean('is_hit')

Unnamed: 0_level_0,is_hit
model,Unnamed: 1_level_1
OURS_DA_ST,0.240759
ada,0.511489
bert,0.004995
bert_ST,0.239261
bge,0.004995
bge_ST,0.092907
kcbert,0.004995
kcbert_ST,0.451548


### InformationRetrievalEvaluator

In [30]:
import os

# 결과를 저장할 디렉토리
results_dir = '/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/EVAL_results'

# 디렉토리가 없으면 생성
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

# 각 데이터프레임을 CSV 파일로 저장
df_ada.to_csv(os.path.join(results_dir, 'ada_results.csv'))
df_bge.to_csv(os.path.join(results_dir, 'bge_results.csv'))
df_bge_finetuned.to_csv(os.path.join(results_dir, 'bge_ST_results.csv'))
df_bert.to_csv(os.path.join(results_dir, 'bert_results.csv'))
df_bert_ST.to_csv(os.path.join(results_dir, 'bert_ST_results.csv'))
df_kcbert.to_csv(os.path.join(results_dir, 'kcbert_results.csv'))
df_kcbert_ST.to_csv(os.path.join(results_dir, 'kcbert_ST_results.csv'))
df_OURS_DA_ST.to_csv(os.path.join(results_dir, 'OURS_DA_ST_results.csv'))

# InformationRetrievalEvaluator

In [37]:
df_OURS_DA_ST

Unnamed: 0,is_hit,retrieved,expected,query,model
0,1,"[b6cafda1-526b-4309-82cc-b4878195ccfc, 9790f8e...",b6cafda1-526b-4309-82cc-b4878195ccfc,b51fe0a3-e40c-4e29-a855-3d5fee362245,OURS_DA_ST
1,0,"[c9a32263-4134-4405-979a-e1aee11831cf, ddf3c8d...",b6cafda1-526b-4309-82cc-b4878195ccfc,66bc7be1-1c44-46da-b054-77e454e2981e,OURS_DA_ST
2,0,"[9dbc6e1e-6843-459b-9f91-ceb036fc8ccc, 66c1325...",0fa29f16-ed5e-4572-a829-f59d7bcc675e,f66dfa34-e2ab-4f54-bf57-d765cdaa11d7,OURS_DA_ST
3,0,"[66c1325b-bc54-4615-ad8d-af6e8feefd14, 918a11a...",0fa29f16-ed5e-4572-a829-f59d7bcc675e,8f3643f4-a2e9-4b6b-b1ff-aa6d02065a85,OURS_DA_ST
4,0,"[cf7061ba-6b77-4b2e-a96e-9e6f752d9bd1, dade89a...",66c1325b-bc54-4615-ad8d-af6e8feefd14,acd9f2a6-9e59-4dd1-b5ca-370d2ecee8d7,OURS_DA_ST
...,...,...,...,...,...
1997,0,"[215b9ff0-2b90-4350-8cda-7a2af4de9bb9, c5da4e9...",ea0cb6c3-bca6-4818-a4df-eaa4fc95881d,f8790853-e00d-46c9-ac52-17fa9530433c,OURS_DA_ST
1998,0,"[029b6e20-44ff-4f3a-a005-2f41ede88e34, c2c941d...",ea0cb6c3-bca6-4818-a4df-eaa4fc95881d,2f658ded-9416-45c3-aa0a-f82c765331ca,OURS_DA_ST
1999,0,"[7922816a-bb11-4569-b77f-f1eb89ed8929, f2df720...",ea5f2696-ca05-4bf3-a07a-dd42da7186ad,a82d807f-0d46-4eb6-9790-8a563e5918b6,OURS_DA_ST
2000,0,"[24e3c14f-de1d-4f8c-bc7c-454d6825e41c, ded3119...",ea5f2696-ca05-4bf3-a07a-dd42da7186ad,d487a4fe-978c-4c5c-a0cb-18a9003f9b26,OURS_DA_ST


In [34]:
# df_st_ada= pd.read_csv('EVAL_results/Information-Retrieval_evaluation_ada_results.csv')
df_st_bge= pd.read_csv('/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/EVAL_results/bge_evaluation_results.csv')
df_st_bge_ST= pd.read_csv('/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/EVAL_results/bge_ST_evaluation_results.csv')
df_st_bert_ST= pd.read_csv('/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/EVAL_results/bert_ST_evaluation_results.csv')
df_st_kcbert_ST= pd.read_csv('/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/EVAL_results/kcbert_ST_evaluation_results.csv')
df_st_OURS_DA_ST= pd.read_csv('/home/azureuser/cloudfiles/code/Users/hb.suh/OUR_BERT/EVAL_results/OURS_DA_ST_evaluation_results.csv')

In [35]:
df_st_all = pd.concat([df_st_bge, df_st_bge_ST, df_st_bert_ST, df_st_kcbert_ST, df_st_OURS_DA_ST])
df_st_all = df_st_all.set_index('model')
df_st_all

KeyError: "None of ['model'] are in the columns"

In [8]:
pip list

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Package                                 Version
--------------------------------------- ------------
adal                                    1.2.7
aiohttp                                 3.9.5
aiosignal                               1.3.1
alembic                                 1.13.1
aniso8601                               9.0.1
annotated-types                         0.6.0
anyio                                   4.3.0
applicationinsights                     0.11.10
argcomplete                             3.3.0
asttokens                               2.4.1
async-timeout                           4.0.3
attrs                                   23.2.0
azure-ai-formrecognizer                 3.3.1
azure-ai-ml                             1.15.0
azure-common                            1.1.28
azure-core                              1.30.1
azure-graphrbac                         0.61.1
azure-identity                          1.12.0
azure-keyvault-secrets                  4.6.0
azure-mgmt-autho