In [11]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from service.embedding import _model as model

generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model=model))

In [12]:
from tasks.import_faq_data import file_path, _preprocessing_data_in_batch
import pandas as pd
import time
from langchain_core.documents import Document

def gen_langchain_docs(
    file_path: str = file_path,
):
    start_time = time.time()
    docs = []
    print("Import is running...")

    df = pd.read_csv(file_path)
    df = _preprocessing_data_in_batch(df)
    
    for index, row in df.iterrows():
        content = (
                    "Câu hỏi: " + row["question"] + "\nCâu trả lời: " + row["answer"]
                )
        doc = Document(page_content=content)
        docs.append(doc)
    return docs

In [13]:
from ragas.testset import TestsetGenerator
docs = gen_langchain_docs()
generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(docs, testset_size=len(docs))

Import is running...


Applying SummaryExtractor:   0%|          | 0/28 [00:00<?, ?it/s]

2024-12-06 01:16:30 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:   4%|▎         | 1/28 [00:01<00:37,  1.39s/it]

2024-12-06 01:16:30 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  11%|█         | 3/28 [00:01<00:11,  2.20it/s]

2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  18%|█▊        | 5/28 [00:01<00:05,  3.93it/s]

2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  25%|██▌       | 7/28 [00:01<00:03,  5.94it/s]

2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  32%|███▏      | 9/28 [00:01<00:02,  8.08it/s]

2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  39%|███▉      | 11/28 [00:02<00:01,  9.55it/s]

2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  46%|████▋     | 13/28 [00:02<00:01,  9.67it/s]

2024-12-06 01:16:31 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:32 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  54%|█████▎    | 15/28 [00:02<00:01, 10.20it/s]

2024-12-06 01:16:32 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:32 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  61%|██████    | 17/28 [00:02<00:00, 11.26it/s]

2024-12-06 01:16:32 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:32 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  68%|██████▊   | 19/28 [00:02<00:01,  8.96it/s]

2024-12-06 01:16:32 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:32 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  75%|███████▌  | 21/28 [00:03<00:00,  9.76it/s]

2024-12-06 01:16:32 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:32 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  82%|████████▏ | 23/28 [00:03<00:00,  8.11it/s]

2024-12-06 01:16:33 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:33 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  89%|████████▉ | 25/28 [00:03<00:00,  9.66it/s]

2024-12-06 01:16:33 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:33 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying SummaryExtractor:  96%|█████████▋| 27/28 [00:03<00:00,  9.27it/s]

2024-12-06 01:16:34 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying CustomNodeFilter:   0%|          | 0/34 [00:00<?, ?it/s]         

2024-12-06 01:16:34 - Node 30f874b1-95a9-4a5c-a61a-e836b0aab46e does not have a summary. Skipping filtering.
2024-12-06 01:16:34 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying CustomNodeFilter:   6%|▌         | 2/34 [00:00<00:11,  2.67it/s]

2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - Node 839b33c0-8999-4749-8a4a-d777232ed179 does not have a summary. Skipping filtering.
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/com

Applying CustomNodeFilter:  12%|█▏        | 4/34 [00:00<00:05,  5.35it/s]

2024-12-06 01:16:35 - Node 905d474b-b2a6-4077-8460-7eb10eed126a does not have a summary. Skipping filtering.
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - Node 44ccbde3-0a35-4cee-a46a-277605c1d555 does not have a summary. Skipping filtering.
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - Node c93df0db-c7bc-4966-97bd-282e507c22d9 does not have a summary. Skipping filtering.
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying CustomNodeFilter:  41%|████      | 14/34 [00:00<00:00, 22.20it/s]

2024-12-06 01:16:35 - Node 8fac342d-8693-49ff-b05c-4bda7a93affa does not have a summary. Skipping filtering.
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying CustomNodeFilter:  65%|██████▍   | 22/34 [00:01<00:00, 19.62it/s]

2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying CustomNodeFilter:  76%|███████▋  | 26/34 [00:01<00:00, 22.17it/s]

2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying CustomNodeFilter:  88%|████████▊ | 30/34 [00:01<00:00, 23.70it/s]

2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:35 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:36 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/96 [00:00<?, ?it/s]

2024-12-06 01:16:37 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-06 01:16:38 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-06 01:16:38 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-06 01:16:40 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-06 01:16:41 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-06 01:16:41 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-06 01:16:42 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-06 01:16:42 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-06 01:16:43 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-06 01:16:44 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-12-06 01:16:44 - HTTP Request: POST

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   1%|          | 1/96 [00:17<27:27, 17.34s/it]

2024-12-06 01:16:54 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  30%|███       | 29/96 [00:18<00:31,  2.13it/s]

2024-12-06 01:16:54 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:54 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  33%|███▎      | 32/96 [00:19<00:27,  2.36it/s]

2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  35%|███▌      | 34/96 [00:19<00:24,  2.57it/s]

2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  44%|████▍     | 42/96 [00:19<00:13,  4.08it/s]

2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:55 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  48%|████▊     | 46/96 [00:20<00:11,  4.24it/s]

2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  51%|█████     | 49/96 [00:20<00:09,  4.81it/s]

2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  55%|█████▌    | 53/96 [00:20<00:06,  6.22it/s]

2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  59%|█████▉    | 57/96 [00:20<00:04,  7.92it/s]

2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:56 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  62%|██████▎   | 60/96 [00:21<00:04,  8.10it/s]

2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  66%|██████▌   | 63/96 [00:21<00:03,  9.23it/s]

2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  68%|██████▊   | 65/96 [00:21<00:03,  9.57it/s]

2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  71%|███████   | 68/96 [00:21<00:02, 10.62it/s]

2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  75%|███████▌  | 72/96 [00:21<00:01, 14.21it/s]

2024-12-06 01:16:57 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  78%|███████▊  | 75/96 [00:22<00:01, 12.15it/s]

2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  80%|████████  | 77/96 [00:22<00:01, 11.82it/s]

2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  82%|████████▏ | 79/96 [00:22<00:01, 12.51it/s]

2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  89%|████████▊ | 85/96 [00:22<00:00, 19.27it/s]

2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:58 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  92%|█████████▏| 88/96 [00:22<00:00, 14.08it/s]

2024-12-06 01:16:59 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:59 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:59 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  94%|█████████▍| 90/96 [00:23<00:00, 14.01it/s]

2024-12-06 01:16:59 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  96%|█████████▌| 92/96 [00:23<00:00, 14.69it/s]

2024-12-06 01:16:59 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:59 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:16:59 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:  99%|█████████▉| 95/96 [00:23<00:00, 15.51it/s]

2024-12-06 01:16:59 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


                                                                                                             

2024-12-06 01:16:59 - found 0 clusters
2024-12-06 01:16:59 - found 8 clusters


Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

2024-12-06 01:17:00 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating personas:  33%|███▎      | 1/3 [00:00<00:01,  1.03it/s]

2024-12-06 01:17:00 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:00 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating personas: 100%|██████████| 3/3 [00:01<00:00,  2.82it/s]
Generating Scenarios:   0%|          | 0/2 [00:00<?, ?it/s]

2024-12-06 01:17:00 - found 8 clusters
2024-12-06 01:17:01 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:01 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:02 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:02 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:03 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:03 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:04 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:05 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:05 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:06 - HTTP Request: POST ht

Generating Scenarios:  50%|█████     | 1/2 [00:07<00:07,  7.43s/it]

2024-12-06 01:17:09 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:10 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:11 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:12 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:13 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:14 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:16 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:17 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:18 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:20 - HTTP Request: POST https://api.openai.com/v1/chat/completion

Generating Scenarios: 100%|██████████| 2/2 [00:21<00:00, 10.59s/it]
Generating Samples:   0%|          | 0/24 [00:00<?, ?it/s]

2024-12-06 01:17:23 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Samples:   4%|▍         | 1/24 [00:01<00:38,  1.67s/it]

2024-12-06 01:17:23 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:23 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:23 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Samples:   8%|▊         | 2/24 [00:02<00:20,  1.08it/s]

2024-12-06 01:17:23 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:23 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:23 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Samples:  29%|██▉       | 7/24 [00:02<00:03,  5.10it/s]

2024-12-06 01:17:23 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:24 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:24 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Samples:  42%|████▏     | 10/24 [00:02<00:01,  7.24it/s]

2024-12-06 01:17:24 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:24 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:24 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Samples:  54%|█████▍    | 13/24 [00:02<00:01,  8.87it/s]

2024-12-06 01:17:24 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:25 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Samples:  62%|██████▎   | 15/24 [00:03<00:01,  5.04it/s]

2024-12-06 01:17:25 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:25 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Samples:  71%|███████   | 17/24 [00:03<00:01,  5.20it/s]

2024-12-06 01:17:25 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:25 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Samples:  79%|███████▉  | 19/24 [00:03<00:00,  6.51it/s]

2024-12-06 01:17:26 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:26 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Samples:  88%|████████▊ | 21/24 [00:04<00:00,  4.90it/s]

2024-12-06 01:17:26 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-06 01:17:26 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Samples:  96%|█████████▌| 23/24 [00:04<00:00,  5.50it/s]

2024-12-06 01:17:28 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Generating Samples: 100%|██████████| 24/24 [00:06<00:00,  3.69it/s]


In [14]:
df = dataset.to_pandas()

In [15]:
import pandas as pd

test_df = dataset.to_pandas()

In [4]:
from service.store_chatbot import gen_answer
from uuid import uuid4, UUID
from models.message import Message

def generate_response_series_from_test_set(test_df: pd.DataFrame) -> tuple[pd.Series, pd.Series]:
    responses = []
    retrieved_contexts = []
    for index, row in test_df.iterrows():
        question = row["user_input"]
        answer = gen_answer(
            UUID("f6f200d4-e309-47fc-8cc2-ffac77cdb8ad"),
            uuid4(),
            [Message(content=question, author="user")],
        )

        for key, value in answer.metadata.items():
            if key == "search_faq_database_tool":
                retrieved_contexts.append(value)
            else:
                retrieved_contexts.append("")
        responses.append(answer.content)
    return (
        pd.Series(responses, dtype=str, name="response"),
        pd.Series(retrieved_contexts, dtype=str, name="retrieved_contexts"),
    )

response_series, retrieved_context_series = generate_response_series_from_test_set(test_df)

test_df["response"] = response_series
test_df["retrieved_contexts"] = retrieved_context_series

NameError: name 'pd' is not defined

In [17]:
test_df

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name,response,retrieved_contexts
0,FPTshop.com.vn thu thập thông tin cá nhân của ...,[1143f59b-29ff-4b7a-909c-b1c146baf092\n\nCâu h...,FPTshop.com.vn thu thập thông tin cá nhân của ...,single_hop_specifc_query_synthesizer,An error occurred: Chainlit context not found,
1,What is the scope of personal information usag...,[66d17c2a-af11-4ac8-b154-68fb1ee86281\n\nCâu h...,FPTshop.com.vn chỉ sử dụng thông tin cá nhân c...,single_hop_specifc_query_synthesizer,An error occurred: Chainlit context not found,
2,Tôi muốn biết là thông tin cá nhân của khách h...,[af5fea67-0372-4b65-9961-6b52e0f247a1\n\nCâu h...,Dữ liệu cá nhân sẽ được lưu trữ đến khi có yêu...,single_hop_specifc_query_synthesizer,An error occurred: Chainlit context not found,
3,FPT Shop nói là bảo mật thông tin khách hàng r...,[f656f658-4e85-4cc6-a295-a6b8027b2153\n\nCâu h...,FPTshop.com.vn cam kết bảo mật thông tin cá nh...,single_hop_specifc_query_synthesizer,An error occurred: Chainlit context not found,
4,How does FPT Shop comply with PCI DSS for paym...,[b0244ec7-120c-493c-bd0f-c5de100d17ef\n\nCâu h...,FPTShop ensures that customer payment card inf...,single_hop_specifc_query_synthesizer,An error occurred: Chainlit context not found,
5,Làm sao tôi có thể yêu cầu xóa dữ liệu cá nhân...,[57330c48-5708-4505-bbee-2bfd35ab8f1e\n\nCâu h...,Khách hàng có thể gửi yêu cầu xóa dữ liệu qua ...,single_hop_specifc_query_synthesizer,An error occurred: Chainlit context not found,
6,FPT Shop co chinh sach giao hang tai nha nhu t...,[7a523fad-e48f-4f31-a5bf-8ebc6f391d7a\n\nCâu h...,FPT Shop hỗ trợ giao hàng tại nhà trên toàn qu...,single_hop_specifc_query_synthesizer,An error occurred: Chainlit context not found,
7,"Khi mua hàng trực tuyến tại FPT Shop, nếu đơn ...",[1753ba0e-9093-4d75-863f-a4a493d848cd\n\nCâu h...,Đơn hàng có giá trị dưới 50 triệu: Quý khách c...,single_hop_specifc_query_synthesizer,An error occurred: Chainlit context not found,
8,"As a Data Privacy Officer, I am particularly i...",[30f874b1-95a9-4a5c-a61a-e836b0aab46e\n\nCâu h...,FPT Shop hỗ trợ lắp đặt miễn phí cho các sản p...,single_hop_specifc_query_synthesizer,An error occurred: Chainlit context not found,
9,FPT Shop đổi trả sản phẩm ra sao?,[15d705c6-9e7e-4f94-973b-27e87b7e2963\n\nCâu h...,Khách hàng có thể đổi sản phẩm nếu phát hiện l...,single_hop_specifc_query_synthesizer,An error occurred: Chainlit context not found,


In [18]:
from ragas import EvaluationDataset

eval_dataset = EvaluationDataset.from_pandas(test_df)

ValidationError: 1 validation error for SingleTurnSample
retrieved_contexts
  Input should be a valid list [type=list_type, input_value=nan, input_type=float]
    For further information visit https://errors.pydantic.dev/2.10/v/list_type

In [None]:
EvaluationDataset.fr

In [16]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
evaluator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(model=model))


In [17]:
from ragas.metrics import LLMContextRecall, FactualCorrectness, Faithfulness, SemanticSimilarity
from ragas import evaluate

metrics = [
    LLMContextRecall(llm=evaluator_llm),
    FactualCorrectness(llm=evaluator_llm),
    Faithfulness(llm=evaluator_llm),
    SemanticSimilarity(embeddings=evaluator_embeddings),
]
results = evaluate(dataset=eval_dataset, metrics=metrics)

ValueError: The metric [context_recall] that is used requires the following additional columns ['retrieved_contexts'] to be present in the dataset.