In [1]:
from tqdm.auto import tqdm
from call_model import call_chat_once
import asyncio
import httpx
from uuid import uuid4
import json
import datasets
from typing import Optional
import os
import csv
import nest_asyncio
nest_asyncio.apply()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
eval_dataset = datasets.load_dataset("m-ric/huggingface_doc_qa_eval", split="train") # or load from data/test.csv
print(eval_dataset)

Using the latest cached version of the dataset since m-ric/huggingface_doc_qa_eval couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /home/minhthuy/.cache/huggingface/datasets/m-ric___huggingface_doc_qa_eval/default/0.0.0/5f70aa9a1e2430f528ac3f27f01f0ba8719c0704 (last modified on Mon Aug 11 14:41:56 2025).


Dataset({
    features: ['context', 'question', 'answer', 'source_doc', 'standalone_score', 'standalone_eval', 'relatedness_score', 'relatedness_eval', 'relevance_score', 'relevance_eval'],
    num_rows: 65
})


In [3]:
OUTPUT_FILE = "data/rag_results_default_as_retriever.csv"

def load_existing_results(file_path):
    dataset = []
    existing_questions = set()
    if os.path.exists(file_path):
        with open(file_path, newline='', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            for row in reader:
                existing_questions.add(row["user_input"])
                # parse retrieved_contexts lại từ JSON
                row["retrieved_contexts"] = json.loads(row["retrieved_contexts"])
                dataset.append(row)
    return existing_questions, dataset


def append_result_to_csv(file_path, row, fieldnames):
    file_exists = os.path.exists(file_path)
    with open(file_path, "a", newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        if not file_exists:
            writer.writeheader()
        writer.writerow(row)

def run_rag_tests(
    eval_dataset: datasets.Dataset,
    verbose: Optional[bool] = False
):
    """Runs RAG tests on the given dataset and saves the results to the given output file."""
    fieldnames = ["user_input", "retrieved_contexts", "response", "reference"]

    processed_questions, dataset = load_existing_results(OUTPUT_FILE)
    print(f"Đã có {len(processed_questions)} câu hỏi xử lý trước đó, sẽ bỏ qua chúng.")


    for i, example in enumerate(tqdm(eval_dataset)):
        question = example["question"]

        if question in processed_questions:
            continue
        
        payload = {
            "question": question,
            "session_id": str(uuid4()),
            "chat_history": []
        }

        try:
            relevant_docs, answer = asyncio.run(call_chat_once(payload))
        except Exception as e:
            print(f"Error from question: '{question}': {e}")

        if verbose:
            print("=======================================================")
            print(f"Question: {question}")
            print(f"Answer: {answer}")
            print(f'True answer: {example["answer"]}')

        row = {
            "user_input": question,
            "retrieved_contexts": json.dumps(
                [doc['kwargs'].get('page_content') for doc in relevant_docs],
                ensure_ascii=False
            ),
            "response": answer,
            "reference": example['answer']
        }
        append_result_to_csv(OUTPUT_FILE, row, fieldnames)
        dataset.append({
            "user_input": question,
            "retrieved_contexts": json.loads(row["retrieved_contexts"]),
            "response": answer,
            "reference": example['answer']
        })
    return dataset

In [4]:
data = run_rag_tests(eval_dataset=eval_dataset)


Đã có 0 câu hỏi xử lý trước đó, sẽ bỏ qua chúng.


  0%|          | 0/65 [00:00<?, ?it/s]2025-08-13 17:25:50,033 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
  2%|▏         | 1/65 [00:03<03:51,  3.62s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What architecture is the `tokenizers-linux-x64-musl` binary designed for?\n', 'type': 'human', 'id': '8be83939-ff1b-4cd9-bfb8-d771f2b64c66'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_iTUG5dqzmzRzV2QxuHLXepUd', 'function': {'arguments': '{"query":"tokenizers-linux-x64-musl architecture"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--0a6bf289-050a-43ea-b8c5-ca9fff42ea01-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'tokenizers-linux-x64-musl architecture'}, 'id': 'call_iTUG5dqzmzRzV2QxuHLXepUd', 'type': 'tool_call'}], 'usage_metadata': {'input_tok

2025-08-13 17:25:56,109 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
  3%|▎         | 2/65 [00:09<05:19,  5.07s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of the BLIP-Diffusion model?\n', 'type': 'human', 'id': 'cf0a1dd1-451c-4ba9-8a26-a76867c62d17'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_30gknCmeGQtVnVm3cQNcGgOw', 'function': {'arguments': '{"query":"BLIP-Diffusion model purpose"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--795b3785-55a8-46de-86b7-f415bdb11009-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'BLIP-Diffusion model purpose'}, 'id': 'call_30gknCmeGQtVnVm3cQNcGgOw', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 361, 'output_tokens': 22, 'total_tokens

2025-08-13 17:25:59,924 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
  5%|▍         | 3/65 [00:13<04:38,  4.49s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'How can a user claim authorship of a paper on the Hugging Face Hub?\n', 'type': 'human', 'id': 'a12bf66d-284e-457c-9724-1d363d482209'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_3rQj6FAHCWRmaNoX5oLFaRjN', 'function': {'arguments': '{"query":"claim authorship of a paper on the Hugging Face Hub"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--b10f8c01-5c9d-43a3-9488-21d03c1191c7-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'claim authorship of a paper on the Hugging Face Hub'}, 'id': 'call_3rQj6FAHCWRmaNoX5oLFaRjN', 'type': 'tool_call'}], 'usage_me

2025-08-13 17:26:03,182 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
  6%|▌         | 4/65 [00:16<04:04,  4.01s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of the /healthcheck endpoint in the Datasets server API?\n', 'type': 'human', 'id': 'cea00673-1ed3-4884-8659-5a71cb35cb1d'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_0XIV6KxRIsT64nS3iRsUyIqh', 'function': {'arguments': '{"query":"/healthcheck endpoint in the Datasets server API"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--55f4fd60-7f9b-4c70-8250-60f67610afb6-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': '/healthcheck endpoint in the Datasets server API'}, 'id': 'call_0XIV6KxRIsT64nS3iRsUyIqh', 'type': 'tool_call'}], 'usage

2025-08-13 17:26:06,493 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
  8%|▊         | 5/65 [00:20<03:45,  3.76s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the default context window size for Local Attention in the LongT5 model?\n', 'type': 'human', 'id': '27ce1a67-f9b0-4286-a60c-6b7033764326'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_Q178fXcyNaDITxlNCvA0ukEm', 'function': {'arguments': '{"query":"default context window size for Local Attention in LongT5 model"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--c32704e3-39b1-4ae2-925b-1bd654c70dd1-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'default context window size for Local Attention in LongT5 model'}, 'id': 'call_Q178fXcyNaDITxlNCvA0uk

2025-08-13 17:26:10,047 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
  9%|▉         | 6/65 [00:23<03:37,  3.69s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What method is used to load a checkpoint for a task using `AutoPipeline`?\n', 'type': 'human', 'id': '04d1069e-8ca4-4625-b2a1-041b8418c751'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_QzaBTOBh0IJrEYv4apbGeQiw', 'function': {'arguments': '{"query":"load a checkpoint for a task using AutoPipeline"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--677bb1e1-3a34-41b6-ae07-e0b9b1abc375-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'load a checkpoint for a task using AutoPipeline'}, 'id': 'call_QzaBTOBh0IJrEYv4apbGeQiw', 'type': 'tool_call'}], 'usage_meta

2025-08-13 17:26:15,577 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 11%|█         | 7/65 [00:29<04:08,  4.29s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of Diffusers library?\n', 'type': 'human', 'id': 'd98e89af-8806-4874-822f-96156d6a924f'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_4hpOFTHdBoh8KtOkXcHfjvk0', 'function': {'arguments': '{"query":"Diffusers library purpose"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--f647d764-8494-47ce-a12c-e3a5b9582409-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Diffusers library purpose'}, 'id': 'call_4hpOFTHdBoh8KtOkXcHfjvk0', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 357, 'output_tokens': 19, 'total_tokens': 376, 'inpu

2025-08-13 17:26:19,281 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 12%|█▏        | 8/65 [00:32<03:53,  4.10s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What method does the EulerAncestralDiscreteScheduler use for sampling?\n', 'type': 'human', 'id': '6e4762dd-b524-4f5b-b2eb-bcaf55ab45bd'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_5WCmbaF73UlBDIXj7vJxsQAT', 'function': {'arguments': '{"query":"EulerAncestralDiscreteScheduler sampling method"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--95c60274-fc2d-4f0d-a0da-ab262e37de11-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'EulerAncestralDiscreteScheduler sampling method'}, 'id': 'call_5WCmbaF73UlBDIXj7vJxsQAT', 'type': 'tool_call'}], 'usage_metadat

2025-08-13 17:26:22,699 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 14%|█▍        | 9/65 [00:36<03:37,  3.89s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the name of the large multimodal model that can solve image-text tasks and is based on Flamingo?\n', 'type': 'human', 'id': '1d6d712e-7333-4836-bb91-5693a721d58f'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_ZTrCfrOe1ABB1sbdz3cmQXeF', 'function': {'arguments': '{"query":"large multimodal model based on Flamingo"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--5a56ebc4-7842-41c1-96d2-0b0ba40cc49d-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'large multimodal model based on Flamingo'}, 'id': 'call_ZTrCfrOe1ABB1sbdz3cmQXeF', 'type': 'tool_cal

2025-08-13 17:26:26,726 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 15%|█▌        | 10/65 [00:40<03:36,  3.93s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of the `gradio.Blocks` API?\n', 'type': 'human', 'id': '1df5bb64-2669-42e0-9a63-2fb37653f823'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_IRBSA4SBunt5cLnJSi5Pa2x2', 'function': {'arguments': '{"query":"gradio.Blocks API purpose"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--7c9e6dac-8431-4517-9dc3-0b4976b6e51f-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'gradio.Blocks API purpose'}, 'id': 'call_IRBSA4SBunt5cLnJSi5Pa2x2', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 362, 'output_tokens': 21, 'total_tokens': 383,

2025-08-13 17:26:31,514 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 17%|█▋        | 11/65 [00:45<03:46,  4.19s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of the two-stage model proposed in the paper "Hierarchical Text-Conditional Image Generation with CLIP Latents"?\n', 'type': 'human', 'id': 'ae19c242-0f44-40de-9325-29f7b5e485d7'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_MPDAEcf85buLKaCjerbyF4Vn', 'function': {'arguments': '{"query":"Hierarchical Text-Conditional Image Generation with CLIP Latents"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--4627bddf-174b-4fe8-af73-0757e97786e7-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Hierarchical Text-Conditional Image Generation w

2025-08-13 17:26:35,296 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 18%|█▊        | 12/65 [00:48<03:35,  4.07s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What command is used to install the requirements for a research project using 🤗 Transformers?\n', 'type': 'human', 'id': 'b970b030-357b-4d21-a53d-9ef7397c63b0'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_RCuBsX7egfkZVdMYGISj0Xw3', 'function': {'arguments': '{"query":"install requirements for research project using 🤗 Transformers"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--86126926-d70a-4162-9314-db08a09b8c71-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'install requirements for research project using 🤗 Transformers'}, 'id': 'call_RCuBsX7egfk

2025-08-13 17:26:40,486 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 20%|██        | 13/65 [00:54<03:49,  4.41s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What task does the `roberta-large-mnli` checkpoint perform?\n', 'type': 'human', 'id': '3fea9174-7de7-424d-b755-e368e0ae2074'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_dNy8yGaVlwgBhs8z5d1cbFhq', 'function': {'arguments': '{"query":"roberta-large-mnli checkpoint task"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--1bb2706f-7747-4bbf-9dbe-fc0df0536038-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'roberta-large-mnli checkpoint task'}, 'id': 'call_dNy8yGaVlwgBhs8z5d1cbFhq', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 363, 'output_tok

2025-08-13 17:26:46,132 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 22%|██▏       | 14/65 [00:59<04:03,  4.78s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What service is replacing the Paid tier of the Inference API at Hugging Face?\n', 'type': 'human', 'id': 'f55a639f-1cb2-4384-a550-1c478e87204b'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_xLS7KZzIpZ3isoVFMIdTYTEJ', 'function': {'arguments': '{"query":"Paid tier of the Inference API replacement service Hugging Face"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--db70e445-18fa-4896-bd53-b7bdc431a661-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Paid tier of the Inference API replacement service Hugging Face'}, 'id': 'call_xLS7KZzIpZ3isoVFMIdTYTEJ'

2025-08-13 17:26:51,861 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 23%|██▎       | 15/65 [01:05<04:13,  5.07s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What architectural feature does SqueezeBERT use instead of fully-connected layers for the Q, K, V, and FFN layers?\n', 'type': 'human', 'id': 'a6834deb-d624-4611-a1fa-9d4532af3559'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_ZtPfaDrVWUvUKfZ5lSS6c6xc', 'function': {'arguments': '{"query":"SqueezeBERT architectural feature instead of fully-connected layers for Q, K, V, and FFN layers"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--5cc84c79-5dcd-4c38-b141-e25cfd396184-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'SqueezeBERT architectural feature i

2025-08-13 17:26:54,879 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 25%|██▍       | 16/65 [01:08<03:38,  4.45s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': "What type of license is the HuggingFace Team's software distributed under?\n", 'type': 'human', 'id': '6345f636-19d3-416a-97b1-4a144be4ef1f'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_jB1HEhLZIDBN0iTiCXHH22No', 'function': {'arguments': '{"query":"HuggingFace software license"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--802e95a4-fc74-4415-bbef-37a5c75a43a9-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'HuggingFace software license'}, 'id': 'call_jB1HEhLZIDBN0iTiCXHH22No', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 363, 'output_

2025-08-13 17:26:58,411 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 26%|██▌       | 17/65 [01:12<03:20,  4.17s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What are the two parameter-reduction techniques proposed in the ALBERT model to lower memory consumption and increase training speed?\n', 'type': 'human', 'id': 'eb5117ca-d3d9-48c9-a9fb-0c2fc4ee67c0'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_xQh2AgwfByjPU2KLEtIQmmDf', 'function': {'arguments': '{"query":"ALBERT model parameter-reduction techniques"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--b6471f2d-8820-4068-b819-62cb7f4f981f-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'ALBERT model parameter-reduction techniques'}, 'id': 'call_xQh2AgwfB

2025-08-13 17:27:02,364 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 28%|██▊       | 18/65 [01:15<03:13,  4.11s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What are the three main steps for fine-tuning a model with the 🤗 Datasets library?\n', 'type': 'human', 'id': 'd592584e-edbe-471d-b506-9ce0ef789633'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_XbTPRjDgX6HPaSWHXGTUbKNi', 'function': {'arguments': '{"query":"three main steps for fine-tuning a model with the 🤗 Datasets library"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--56ad210f-332b-40d2-b4e4-e9b606176ba2-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'three main steps for fine-tuning a model with the 🤗 Datasets library'}, 'id': 'call_XbTPRjDgX6

2025-08-13 17:27:06,095 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 29%|██▉       | 19/65 [01:19<03:03,  4.00s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the maximum improvement in throughput achieved by Hugging Face Infinity compared to vanilla transformers?\n', 'type': 'human', 'id': 'e78f66d4-aef1-41ec-978a-fd596745f826'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_Acx2ZyDCxmWIjP3aLIpyDgQJ', 'function': {'arguments': '{"query":"maximum improvement in throughput achieved by Hugging Face Infinity compared to vanilla transformers"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--acbc4e9a-2802-48da-bb48-16348c3133b1-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'maximum improvement in throughp

2025-08-13 17:27:10,856 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 31%|███       | 20/65 [01:24<03:10,  4.22s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the command to upload a spaCy pipeline to the Hugging Face Hub?\n', 'type': 'human', 'id': 'cc3b9b1b-f8f4-4c39-a727-b01778bc4812'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_K3xCCa4kaZIm8mxZr1anWQNG', 'function': {'arguments': '{"query":"upload spaCy pipeline to Hugging Face Hub"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--1e43061a-2499-48c6-930c-cb11c81805f0-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'upload spaCy pipeline to Hugging Face Hub'}, 'id': 'call_K3xCCa4kaZIm8mxZr1anWQNG', 'type': 'tool_call'}], 'usage_metadata': {'input

2025-08-13 17:27:14,549 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 32%|███▏      | 21/65 [01:28<02:58,  4.07s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': "What is the time and memory complexity of the Nyströmformer's approximation of self-attention?\n", 'type': 'human', 'id': 'd180482e-0b5b-4a9a-a376-506f4e4cf6f7'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_DYvPivPVkP6L9whnVZfKuiJB', 'function': {'arguments': '{"query":"Nyströmformer time and memory complexity"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--895a166e-547e-4c0f-af40-5ffbdbd6de5d-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Nyströmformer time and memory complexity'}, 'id': 'call_DYvPivPVkP6L9whnVZfKuiJB', 'type': 'tool_call'}], 'usa

2025-08-13 17:27:19,354 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 34%|███▍      | 22/65 [01:32<03:04,  4.29s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the goal of the Named Entity Recognition task in token classification?\n', 'type': 'human', 'id': 'e66fdb9f-a27a-4cae-a72e-21ad958c967d'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_7jS5HWMcjGtmZx8H1GoEkkzn', 'function': {'arguments': '{"query":"goal of Named Entity Recognition in token classification"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--483c1e02-f2bf-427b-a083-0c3ec5b635bf-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'goal of Named Entity Recognition in token classification'}, 'id': 'call_7jS5HWMcjGtmZx8H1GoEkkzn', 'type': 'to

2025-08-13 17:27:22,524 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 35%|███▌      | 23/65 [01:36<02:45,  3.95s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the resolution of images used by the CLIPSeg model?\n', 'type': 'human', 'id': '1756e875-b059-4068-9132-d6e6fddda43c'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_u4w28a0E6oZQMYNKUNvrOgYn', 'function': {'arguments': '{"query":"CLIPSeg model image resolution"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--34f1b0f0-b9a4-49c9-ad07-76f2cd7bdec6-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'CLIPSeg model image resolution'}, 'id': 'call_u4w28a0E6oZQMYNKUNvrOgYn', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 362, 'output_tokens': 21

2025-08-13 17:27:27,180 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 37%|███▋      | 24/65 [01:40<02:50,  4.16s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What can you use Gradio for?\n', 'type': 'human', 'id': '5724e3e4-ee0f-404e-9541-9732014ef778'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_mlu23oOrE6FyHrRDkwyGJ2eP', 'function': {'arguments': '{"query":"Gradio use cases"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--cf17545e-3dd3-4f3b-9d31-3f37a1bf1d83-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Gradio use cases'}, 'id': 'call_mlu23oOrE6FyHrRDkwyGJ2eP', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 356, 'output_tokens': 19, 'total_tokens': 375, 'input_token_details': {'audio': 0, 

2025-08-13 17:27:30,183 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 38%|███▊      | 25/65 [01:43<02:32,  3.81s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What TensorFlow API function is used to load a saved tensor file?\n', 'type': 'human', 'id': 'ba358c36-8fc3-41de-bdd9-eeeecab88a4c'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_p3XiNKNMwH2GceZVL8ksILHC', 'function': {'arguments': '{"query":"TensorFlow API function to load a saved tensor file"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--ff355a99-2cc9-4e9b-aad5-9eb218dc5701-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'TensorFlow API function to load a saved tensor file'}, 'id': 'call_p3XiNKNMwH2GceZVL8ksILHC', 'type': 'tool_call'}], 'usage_meta

2025-08-13 17:27:35,145 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 40%|████      | 26/65 [01:48<02:42,  4.16s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'Where can you access the logs of your Endpoints in Hugging Face Endpoints?\n', 'type': 'human', 'id': '0d169d02-6d35-4e76-8265-27810094bb03'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_JkopRoxySNkarhzwfVJfmWdj', 'function': {'arguments': '{"query":"Hugging Face Endpoints logs access"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--b7573f4e-5cad-43e8-87f2-882f405b9763-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Hugging Face Endpoints logs access'}, 'id': 'call_JkopRoxySNkarhzwfVJfmWdj', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 3

2025-08-13 17:27:38,740 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 42%|████▏     | 27/65 [01:52<02:31,  3.99s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the latest task added to Hugging Face AutoTrain for Computer Vision?\n', 'type': 'human', 'id': 'df4c353a-ef7f-4099-94cb-7a450f54f35c'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_UnMtC4it92JuxoXqQvoRrqP9', 'function': {'arguments': '{"query":"latest task added to Hugging Face AutoTrain for Computer Vision"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--09ef9f39-5687-4c81-a366-e68540a405ae-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'latest task added to Hugging Face AutoTrain for Computer Vision'}, 'id': 'call_UnMtC4it92JuxoXqQvoRrqP9',

2025-08-13 17:27:42,129 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 43%|████▎     | 28/65 [01:55<02:20,  3.81s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the default repository type created by the `create_repo` function on Hugging Face Hub?\n', 'type': 'human', 'id': 'ecdf059b-dd2d-4720-bed2-7704c16f1870'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_qAb0orq0kB9FbAtqHrdhtkdq', 'function': {'arguments': '{"query":"default repository type created by create_repo function Hugging Face Hub"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--e279a58c-da1a-4598-971f-05ddc1a720fa-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'default repository type created by create_repo function Hugging Face Hub'}, 'i

2025-08-13 17:27:44,994 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 45%|████▍     | 29/65 [01:58<02:06,  3.53s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'How many splits does the "duorc" dataset have?\n', 'type': 'human', 'id': '860f3688-7ea0-4790-be2f-de168c2de4a6'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_5WPCOE2DQ3YbN0gVtGiR5EEL', 'function': {'arguments': '{"query":"duorc dataset splits"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--55b8d572-8321-423f-8eac-c021fe689206-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'duorc dataset splits'}, 'id': 'call_5WPCOE2DQ3YbN0gVtGiR5EEL', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 360, 'output_tokens': 19, 'total_tokens': 379, 'input_tok

2025-08-13 17:27:49,915 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 46%|████▌     | 30/65 [02:03<02:18,  3.94s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of Fully Sharded Data Parallel (FSDP) in distributed training?\n', 'type': 'human', 'id': '250dacc4-229a-4875-adc9-6e35ecd79b74'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_HMx1hojnTueaGU8HA2Qdd1I7', 'function': {'arguments': '{"query":"Fully Sharded Data Parallel (FSDP) purpose in distributed training"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--bbe28b5a-baa1-4760-ab3e-1657627b1eba-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Fully Sharded Data Parallel (FSDP) purpose in distributed training'}, 'id': 'call_HMx1hojnTueaGU

2025-08-13 17:27:54,709 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 48%|████▊     | 31/65 [02:08<02:22,  4.20s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What file format is used to save and store PyTorch model weights more securely than `.bin` files?\n', 'type': 'human', 'id': '528601cb-ec6a-4863-b45c-c7fa9bfd6cce'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_bU6Utbc6GD4QnxHbhwiMdmFf', 'function': {'arguments': '{"query":"PyTorch model weights file format more secure than .bin"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--189d79c2-90a6-47dc-8222-7d53008b1ab3-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'PyTorch model weights file format more secure than .bin'}, 'id': 'call_bU6Utbc6GD4QnxHbhwiMd

2025-08-13 17:28:01,141 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 49%|████▉     | 32/65 [02:14<02:40,  4.87s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What type of security certification does Hugging Face have?\n', 'type': 'human', 'id': 'b5569a6b-a8a9-4dc6-8196-c79f0996dbc0'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_K2Mie7qycdBz1uHLsQXLinVr', 'function': {'arguments': '{"query":"Hugging Face security certification"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--3b18b9cf-86ae-40bc-841c-3744edd50f20-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Hugging Face security certification'}, 'id': 'call_K2Mie7qycdBz1uHLsQXLinVr', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 359, 'output_t

2025-08-13 17:28:04,704 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 51%|█████     | 33/65 [02:18<02:23,  4.48s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What do RAG models combine to generate outputs?\n', 'type': 'human', 'id': '2399f264-a9d4-459e-b0de-088a359a2f00'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_TJTWw1gz5MZ3tZnAORqrWanz', 'function': {'arguments': '{"query":"RAG models combine to generate outputs"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--a5c5d995-cc9e-49ef-b5f3-7e1c18e7b96d-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'RAG models combine to generate outputs'}, 'id': 'call_TJTWw1gz5MZ3tZnAORqrWanz', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 358, 'output_tokens'

2025-08-13 17:28:08,246 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 52%|█████▏    | 34/65 [02:21<02:10,  4.20s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What library does MarkupLMFeatureExtractor use to extract data from HTML and XML files?\n', 'type': 'human', 'id': '754270ee-5c66-4db8-8ad9-106b455dd4f6'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_NFtiabzaGmCvIjpKgPuTzqUk', 'function': {'arguments': '{"query":"MarkupLMFeatureExtractor library for extracting data from HTML and XML files"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--8bd4337a-65dc-4aef-bdda-64640b5d1cf5-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'MarkupLMFeatureExtractor library for extracting data from HTML and XML files'}, '

2025-08-13 17:28:11,482 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 54%|█████▍    | 35/65 [02:25<01:57,  3.91s/it]



2025-08-13 17:28:14,468 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 55%|█████▌    | 36/65 [02:28<01:45,  3.63s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the title of the paper introducing the ByT5 model?\n', 'type': 'human', 'id': '7a6aa0f8-7056-435c-9b80-2784d50d0274'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_FD0Q8lgUrGcPHZ4T0Q3RhHPc', 'function': {'arguments': '{"query":"ByT5 model paper title"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--cad8f78a-31bf-47f2-96f9-63d6dc9478e4-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'ByT5 model paper title'}, 'id': 'call_FD0Q8lgUrGcPHZ4T0Q3RhHPc', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 362, 'output_tokens': 21, 'total_tokens':

2025-08-13 17:28:17,550 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 57%|█████▋    | 37/65 [02:31<01:37,  3.47s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the dimension of the feature vector for the base BERT model?\n', 'type': 'human', 'id': 'ce76e3f7-3aca-4d46-841d-be7957ac7cc1'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_eoiGxLRD175z18Gkrtqo1iOs', 'function': {'arguments': '{"query":"dimension of the feature vector for the base BERT model"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--c8ab5ec5-5628-457b-8ad9-41546cdc62ad-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'dimension of the feature vector for the base BERT model'}, 'id': 'call_eoiGxLRD175z18Gkrtqo1iOs', 'type': 'tool_call'}], 

2025-08-13 17:28:22,158 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 58%|█████▊    | 38/65 [02:35<01:42,  3.81s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What special identifier does the WordPiece Model use for continuing subwords?\n', 'type': 'human', 'id': '33784b78-32a4-4043-bc72-9cffa9325184'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_aEAcQ7IXVuYFsNkHCnhfC1C7', 'function': {'arguments': '{"query":"WordPiece Model special identifier for continuing subwords"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--00d6373c-1fb9-41d1-ae56-ff5762576959-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'WordPiece Model special identifier for continuing subwords'}, 'id': 'call_aEAcQ7IXVuYFsNkHCnhfC1C7', 'type': 

2025-08-13 17:28:27,690 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 60%|██████    | 39/65 [02:41<01:52,  4.33s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of the 🧨 Diffusers tutorials?\n', 'type': 'human', 'id': '331c1b85-0453-4e75-8b40-420cadcfb8ef'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_PnVxlKujqa61sQiPQMhFJIUR', 'function': {'arguments': '{"query":"Diffusers tutorials purpose"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--8e529add-f7a2-4e53-b883-ed433c31874c-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Diffusers tutorials purpose'}, 'id': 'call_PnVxlKujqa61sQiPQMhFJIUR', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 361, 'output_tokens': 19, 'total_tokens'

2025-08-13 17:28:31,007 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 62%|██████▏   | 40/65 [02:44<01:40,  4.02s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': "What is the default setting for the `allow_flagging` parameter in Gradio's `Interface`?\n", 'type': 'human', 'id': '7cc10e95-3c87-4b5a-a304-a1e4c00afe36'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_zRd5CLrKnUBdB4T46qf7IKRU', 'function': {'arguments': '{"query":"default setting for the allow_flagging parameter in Gradio\'s Interface"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--f0514336-c54e-4db6-bb0a-69c1480a32ba-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': "default setting for the allow_flagging parameter in Gradio's Interface"}, 'id': 'call_

2025-08-13 17:28:34,388 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 63%|██████▎   | 41/65 [02:47<01:31,  3.83s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'Where can the full code for the Stable Diffusion demo be found?\n', 'type': 'human', 'id': '596a25b7-d1d2-45cc-a36e-93be4eda34c6'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_cWVqktzOO0ocCAzwsmO9wKmc', 'function': {'arguments': '{"query":"Stable Diffusion demo full code"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--f5849aaf-1952-418d-8ba0-5c1df23027df-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Stable Diffusion demo full code'}, 'id': 'call_cWVqktzOO0ocCAzwsmO9wKmc', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 362, 'output_token

2025-08-13 17:28:38,310 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 65%|██████▍   | 42/65 [02:51<01:28,  3.86s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What transformation does the FNet model use to replace the self-attention layer in a BERT model?\n', 'type': 'human', 'id': 'c04006b7-f215-481c-a81f-a549ef08b17a'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_5txv6TH0oyl9DSBZK70n5pPh', 'function': {'arguments': '{"query":"FNet model self-attention layer replacement BERT"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--457e755d-1395-4300-bf49-6bcaf130a160-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'FNet model self-attention layer replacement BERT'}, 'id': 'call_5txv6TH0oyl9DSBZK70n5pPh', 'type': '

2025-08-13 17:28:41,676 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 66%|██████▌   | 43/65 [02:55<01:21,  3.71s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': "What type of test should typically accompany a bug fix in Gradio's testing strategy?\n", 'type': 'human', 'id': 'b7f5681c-3f86-4fc5-b36d-c6af64992d7b'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_ktQf8szL4SYHJKVGjJ6noLTF', 'function': {'arguments': '{"query":"Gradio testing strategy bug fix"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--b1cd36e6-834b-45da-965e-8d8a048073ec-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Gradio testing strategy bug fix'}, 'id': 'call_ktQf8szL4SYHJKVGjJ6noLTF', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens

2025-08-13 17:28:45,599 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 68%|██████▊   | 44/65 [02:59<01:19,  3.77s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'How can you force mixed precision training when initializing the Accelerator in 🤗 Accelerate?\n', 'type': 'human', 'id': 'ec3827ed-36f0-41c5-a1b3-fd8268cf6d6b'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_qJsUREIdVjjVmUHZNtKTcnF2', 'function': {'arguments': '{"query":"force mixed precision training when initializing the Accelerator in 🤗 Accelerate"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--b26f47a5-f368-4860-a1a0-420b8f134aa1-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'force mixed precision training when initializing the Accelerator in 🤗 A

2025-08-13 17:28:50,548 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 69%|██████▉   | 45/65 [03:04<01:22,  4.13s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of tokenizers in the NLP pipeline?\n', 'type': 'human', 'id': 'dd860b58-f9f5-4de8-adcd-1c7bd04c7a84'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_nZzi4Pv5OmdxWMai7Cofp9oA', 'function': {'arguments': '{"query":"purpose of tokenizers in NLP pipeline"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--dfc36c74-0096-42aa-9d0c-92e598dd664b-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'purpose of tokenizers in NLP pipeline'}, 'id': 'call_nZzi4Pv5OmdxWMai7Cofp9oA', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 360, 'output_to

2025-08-13 17:28:54,703 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 71%|███████   | 46/65 [03:08<01:18,  4.14s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of the Safety Checker in the Diffusers library?\n', 'type': 'human', 'id': 'a45c4d7f-afe5-4e66-af99-4b3ec5031642'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_FAgx1ZScDViKBp8RXWaJLgLQ', 'function': {'arguments': '{"query":"Safety Checker in the Diffusers library"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--4e7dba05-5c66-4663-96e9-3637b7995f68-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Safety Checker in the Diffusers library'}, 'id': 'call_FAgx1ZScDViKBp8RXWaJLgLQ', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens'

2025-08-13 17:28:58,204 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 72%|███████▏  | 47/65 [03:11<01:10,  3.94s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What Python class allows you to retrieve Discussions and Pull Requests from a given repository on the Hugging Face Hub?\n', 'type': 'human', 'id': '8b7da79e-4d2f-4664-be0b-e30e71c65172'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_IurSy1Xtb5VNlqvqRfWiIEuS', 'function': {'arguments': '{"query":"Python class retrieve Discussions and Pull Requests from a repository on Hugging Face Hub"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--5ef14647-3ccf-49d3-88dd-2dfdce303c78-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Python class retrieve Discussions an

2025-08-13 17:29:01,185 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 74%|███████▍  | 48/65 [03:14<01:02,  3.66s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the name of the new library introduced by Hugging Face for hosting scikit-learn models?\n', 'type': 'human', 'id': 'abc61b5c-3617-4c1a-a31c-9c9f1c551a56'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_P66XtXPzpVeVs9EBdwzOiP9W', 'function': {'arguments': '{"query":"new library introduced by Hugging Face for hosting scikit-learn models"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--c87ddccc-2840-43d0-9a79-c09993d289f1-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'new library introduced by Hugging Face for hosting scikit-learn models'}, 'id':

2025-08-13 17:29:06,181 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 75%|███████▌  | 49/65 [03:19<01:04,  4.06s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of Textual Inversion?\n', 'type': 'human', 'id': 'f26dba3d-a4ba-438b-9f9a-a7374ac03b8e'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_BbY38KPQP9mgEToBB8vulM78', 'function': {'arguments': '{"query":"Textual Inversion purpose"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--70acdae4-ca16-41e8-bb38-4e2074c78021-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Textual Inversion purpose'}, 'id': 'call_BbY38KPQP9mgEToBB8vulM78', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 358, 'output_tokens': 20, 'total_tokens': 378, 'inpu

2025-08-13 17:29:10,613 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 77%|███████▋  | 50/65 [03:24<01:02,  4.17s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the recommended multiple of batch size for fp16 data type on an A100 GPU?\n', 'type': 'human', 'id': 'd2a5b7c4-9086-4a4c-9467-abde897f6223'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_nUQCX8Emj0WFuMvbwDLI1xxE', 'function': {'arguments': '{"query":"recommended multiple of batch size for fp16 data type on A100 GPU"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--dd85832b-5622-48b6-a866-c6301313d51c-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'recommended multiple of batch size for fp16 data type on A100 GPU'}, 'id': 'call_nUQCX8Emj0WFuMvbw

2025-08-13 17:29:16,768 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 78%|███████▊  | 51/65 [03:30<01:06,  4.77s/it]



2025-08-13 17:29:26,458 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 80%|████████  | 52/65 [03:40<01:21,  6.24s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'How can you install the Hugging Face Unity API in your Unity project?\n', 'type': 'human', 'id': '6eb713e9-7a90-42b4-89b2-35b39019a787'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_ZElW7zjvirGlUtudJSwZEjir', 'function': {'arguments': '{"query":"install Hugging Face Unity API in Unity project"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--3ac18dd6-a56b-48be-a6fe-306e071bc15d-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'install Hugging Face Unity API in Unity project'}, 'id': 'call_ZElW7zjvirGlUtudJSwZEjir', 'type': 'tool_call'}], 'usage_metadata

2025-08-13 17:29:30,498 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 82%|████████▏ | 53/65 [03:44<01:06,  5.58s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the pretraining objective of the Wav2Vec2 context network?\n', 'type': 'human', 'id': '9998ad2e-f2cd-47dd-8734-4a20ecd52491'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_B6xQzGzz4SJWhOL9bHcRh4qz', 'function': {'arguments': '{"query":"Wav2Vec2 pretraining objective"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--588a7952-a7d5-4218-a082-3118539f3335-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Wav2Vec2 pretraining objective'}, 'id': 'call_B6xQzGzz4SJWhOL9bHcRh4qz', 'type': 'tool_call'}], 'usage_metadata': {'input_tokens': 364, 'output_toke

2025-08-13 17:29:33,869 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 83%|████████▎ | 54/65 [03:47<00:54,  4.92s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the default checkpoint used by the sentiment analysis pipeline in the Transformers library?\n', 'type': 'human', 'id': '64823c96-fd97-4041-ba79-0e1a52c43e77'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_HCBrm17IS5wVbc2vu8wJvHQ2', 'function': {'arguments': '{"query":"default checkpoint sentiment analysis pipeline Transformers library"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--e3006055-ae29-41e3-aa79-ef97ae00572e-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'default checkpoint sentiment analysis pipeline Transformers library'}, 'id': '

2025-08-13 17:29:38,901 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 85%|████████▍ | 55/65 [03:52<00:49,  4.95s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of the notebook "How to use DeepSpeed to train models with billions of parameters on Habana Gaudi"?\n', 'type': 'human', 'id': '3b374270-bc3f-4d5d-82d3-573cc932afab'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_3pUYEx152oV5lGCj0L84ZNae', 'function': {'arguments': '{"query":"How to use DeepSpeed to train models with billions of parameters on Habana Gaudi"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--9ffc0282-06fc-4933-b4a5-eee269d9081c-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'How to use DeepSpeed to train models with bil

2025-08-13 17:29:43,540 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 86%|████████▌ | 56/65 [03:57<00:43,  4.86s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What command line module does PyTorch provide to run a script on multiple GPUs?\n', 'type': 'human', 'id': 'f23a840b-6bb8-47ae-8c77-c71bd9e317af'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_XpB4RoOZt1NZ0X0FBwkLmTkW', 'function': {'arguments': '{"query":"PyTorch command line module for running a script on multiple GPUs"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--3142949e-8976-4f92-a8ab-a36ae2df10ad-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'PyTorch command line module for running a script on multiple GPUs'}, 'id': 'call_XpB4RoOZt1NZ0X0FBwk

2025-08-13 17:29:47,588 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 88%|████████▊ | 57/65 [04:01<00:36,  4.62s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the most popular vision transformer model on the Hugging Face Model Hub for image classification?\n', 'type': 'human', 'id': 'c1208530-b7ef-4873-8a13-64795ce4d455'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_tr9cwCpwHjvpL09z6B9b8xl9', 'function': {'arguments': '{"query":"most popular vision transformer model for image classification site:huggingface.co"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--3471909f-5773-41d6-b767-94838c5dac0e-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'most popular vision transformer model for image classific

2025-08-13 17:29:50,560 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 89%|████████▉ | 58/65 [04:04<00:28,  4.12s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the command to upload an ESPnet model to a Hugging Face repository?\n', 'type': 'human', 'id': 'ff10692a-42bd-4b33-ad09-79ff02ba6210'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_7S1wpTPQIJ6OvBmw7YFpYOCj', 'function': {'arguments': '{"query":"upload ESPnet model to Hugging Face repository"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--aa8c2abb-c567-4a29-8c7f-ab6de75822b8-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'upload ESPnet model to Hugging Face repository'}, 'id': 'call_7S1wpTPQIJ6OvBmw7YFpYOCj', 'type': 'tool_call'}], 'usage_meta

2025-08-13 17:29:54,696 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 91%|█████████ | 59/65 [04:08<00:24,  4.13s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What file should be added to a model repository to install custom Python dependencies for Inference Endpoints?\n', 'type': 'human', 'id': 'bc871f17-3af9-440e-a01e-9238d4f79b12'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_ZADbKtcSR8yjH9SDhFf5GjA9', 'function': {'arguments': '{"query":"custom Python dependencies Inference Endpoints model repository file"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--d787bb0d-b75e-4c96-8bc1-54d469f4d4b9-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'custom Python dependencies Inference Endpoints model repository fi

2025-08-13 17:29:57,835 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 92%|█████████▏| 60/65 [04:11<00:19,  3.83s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'How many images are needed to teach new concepts to Stable Diffusion using Textual Inversion?\n', 'type': 'human', 'id': '3f69493b-978e-4772-acb8-20cba2c9df9f'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_Yb5qeS18OVd57DsSvF4Swjbc', 'function': {'arguments': '{"query":"Textual Inversion Stable Diffusion how many images needed"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--420e6752-3fdc-4c67-bd8a-05d9ed1def5d-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Textual Inversion Stable Diffusion how many images needed'}, 'id': 'call_Yb5qeS18OVd57DsSvF4Sw

2025-08-13 17:30:01,297 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 94%|█████████▍| 61/65 [04:14<00:14,  3.72s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the maximum size of a model checkpoint before it is automatically sharded in Transformers version 4.18.0?\n', 'type': 'human', 'id': '05b5dcf6-5d53-48aa-897b-95a95f2e9c84'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_wRkZH7dR65JX6jytR5WMiKZF', 'function': {'arguments': '{"query":"maximum size of a model checkpoint before it is automatically sharded in Transformers version 4.18.0"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--fa87b081-62eb-44ad-877d-2f131b0d6898-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'maximum size of a model checkpo

2025-08-13 17:30:05,340 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 95%|█████████▌| 62/65 [04:18<00:11,  3.82s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the purpose of Weights and Biases (W&B) for data scientists and machine learning scientists?\n', 'type': 'human', 'id': '21073c59-100c-4e92-a9ed-5971ded597e5'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_IrMfK6dXfOeJ0SERPOdRMOFl', 'function': {'arguments': '{"query":"Weights and Biases W&B purpose for data scientists and machine learning scientists"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--53128416-43e3-4a95-bd6c-d4d016b5f3eb-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Weights and Biases W&B purpose for data scientists and machine

2025-08-13 17:30:08,872 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 97%|█████████▋| 63/65 [04:22<00:07,  3.73s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the name of the open-source library created by Hugging Face to simplify Transformer acceleration?\n', 'type': 'human', 'id': 'f24c4dc2-e30d-434e-bf4b-3d75c21433e3'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_V5E50znKeNYlbMvcoE2va0XG', 'function': {'arguments': '{"query":"open-source library created by Hugging Face to simplify Transformer acceleration"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--af94b493-4f08-456b-bdd2-5d9e6c081bca-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'open-source library created by Hugging Face to simplify Tra

2025-08-13 17:30:12,595 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
 98%|█████████▊| 64/65 [04:26<00:03,  3.73s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What parameter is used to ensure that elements in a row have the same height in Gradio?\n', 'type': 'human', 'id': 'c46c26cc-682e-4852-9b62-1f09fcc42aae'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_GimwLOegVFbVwvbOEah7NmHD', 'function': {'arguments': '{"query":"Gradio same height elements in a row parameter"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--279307b7-456d-4af0-8e28-fcc608eeb70d-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'Gradio same height elements in a row parameter'}, 'id': 'call_GimwLOegVFbVwvbOEah7NmHD', 'type': 'tool_call'}],

2025-08-13 17:30:16,117 - INFO - HTTP Request: POST http://localhost:8001/api/v1/chat-evaluation "HTTP/1.1 200 OK"
100%|██████████| 65/65 [04:29<00:00,  4.15s/it]

{'messages': [{'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'HumanMessage'], 'kwargs': {'content': 'What is the command to install the latest version of Optimum with OpenVINO support?\n', 'type': 'human', 'id': 'cf695375-ecba-47b7-bf4e-778cda1a0181'}}, {'lc': 1, 'type': 'constructor', 'id': ['langchain', 'schema', 'messages', 'AIMessage'], 'kwargs': {'content': '', 'additional_kwargs': {'tool_calls': [{'index': 0, 'id': 'call_6Aw9FmAouJ3wTnVLE6TmGu6o', 'function': {'arguments': '{"query":"install latest version of Optimum with OpenVINO support"}', 'name': 'data_retriever'}, 'type': 'function'}]}, 'response_metadata': {'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c'}, 'type': 'ai', 'id': 'run--f3f54d28-905e-4c11-82c6-250680f52763-0', 'tool_calls': [{'name': 'data_retriever', 'args': {'query': 'install latest version of Optimum with OpenVINO support'}, 'id': 'call_6Aw9FmAouJ3wTnVLE6TmGu6o', 'type': 




In [5]:
from ragas import EvaluationDataset
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
    FactualCorrectness
)
evaluation_dataset = EvaluationDataset.from_list(data)

# Khởi tạo các metric
metrics = [
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
]
# metrics = [FactualCorrectness()]

In [6]:
print(len(evaluation_dataset))

65


In [None]:
from app.config import configs

import os
os.environ['OPENAI_API_KEY'] = configs.OPENAI_API_KEY

result = evaluate(
    evaluation_dataset,
    metrics=metrics,
)

print(result) 

Evaluating:   0%|          | 0/260 [00:00<?, ?it/s]2025-08-13 17:30:19,680 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-08-13 17:30:19,745 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-08-13 17:30:19,748 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-08-13 17:30:19,780 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-08-13 17:30:19,816 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-08-13 17:30:19,829 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-08-13 17:30:19,867 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-08-13 17:30:19,870 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-08-13 17:30:20,716 - INFO - HTTP Request