In [1]:
import os

os.chdir("/app")

from langchain_anthropic import ChatAnthropic
from ragas.testset import TestsetGenerator, evolutions

from apps.inners.models.dtos.graph_state import GraphState
from apps.inners.use_cases.document_processor.category_document_processor import CategoryDocumentProcessor
from apps.inners.use_cases.document_processor.partition_document_processor import PartitionDocumentProcessor
from apps.inners.use_cases.document_processor.summary_document_processor import SummaryDocumentProcessor
from apps.inners.use_cases.graphs.long_form_qa_graph import LongFormQaGraph

from apps.inners.use_cases.graphs.passage_search_graph import PassageSearchGraph

from langchain_community.embeddings.infinity import InfinityEmbeddings

from apps.outers.settings.one_embedding_setting import OneEmbeddingSetting

from tools import cache_tool

import gc

from apps.outers.settings.one_llm_setting import OneLlmSetting

from sqlmodel.ext.asyncio.session import AsyncSession
from starlette.datastructures import State

from apps.inners.use_cases.managements.document_management import DocumentManagement
from apps.inners.use_cases.managements.file_document_management import FileDocumentManagement
from apps.inners.use_cases.managements.text_document_management import TextDocumentManagement
from apps.inners.use_cases.managements.web_document_management import WebDocumentManagement
from typing import Dict, Any

import dotenv
from datasets import load_dataset
from dotenv import find_dotenv
from langgraph.graph import StateGraph
from langgraph.graph.graph import CompiledGraph, END
from ragas import evaluate, metrics

from apps.outers.datastores.four_datastore import FourDatastore
from apps.outers.datastores.one_datastore import OneDatastore
from apps.outers.datastores.three_datastore import ThreeDatastore
from apps.outers.datastores.two_datastore import TwoDatastore
from apps.outers.repositories.file_document_repository import FileDocumentRepository
from apps.outers.repositories.text_document_repository import TextDocumentRepository
from apps.outers.repositories.web_document_repository import WebDocumentRepository
from tests.containers.test_container import TestContainer
from tests.seeders.all_seeder import AllSeeder


2024-04-12 06:30:28.422539: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-12 06:30:28.442070: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-12 06:30:28.442094: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-12 06:30:28.442724: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-12 06:30:28.446703: I tensorflow/core/platform/cpu_feature_guar

In [2]:
# !pip show flagembedding
# !pip show langchain-anthropic
# !pip show pymilvus
# !pip show opencv-python

In [3]:
import tensorflow

tensorflow.config.list_physical_devices('GPU')

2024-04-12 06:30:31.600980: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-12 06:30:31.601669: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-12 06:30:31.601694: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
import torch

torch.cuda.is_available()

True

In [5]:
dotenv.load_dotenv(find_dotenv())


True

In [10]:

test_container = TestContainer()

one_llm_setting: OneLlmSetting = test_container.applications.settings.one_llm()
one_embedding_setting: OneEmbeddingSetting = test_container.applications.settings.one_embedding()

one_datastore: OneDatastore = test_container.applications.datastores.one()
two_datastore: TwoDatastore = test_container.applications.datastores.two()
three_datastore: ThreeDatastore = test_container.applications.datastores.three()
four_datastore: FourDatastore = test_container.applications.datastores.four()
temp_datastore: ThreeDatastore = test_container.applications.datastores.temp()

file_document_repository: FileDocumentRepository = test_container.applications.repositories.file_document()
text_document_repository: TextDocumentRepository = test_container.applications.repositories.text_document()
web_document_repository: WebDocumentRepository = test_container.applications.repositories.web_document()

document_management: DocumentManagement = test_container.applications.use_cases.managements.document()
file_document_management: FileDocumentManagement = test_container.applications.use_cases.managements.file_document()
text_document_management: TextDocumentManagement = test_container.applications.use_cases.managements.text_document()
web_document_management: WebDocumentManagement = test_container.applications.use_cases.managements.web_document()

long_form_qa_graph: LongFormQaGraph = test_container.applications.use_cases.graphs.long_form_qa()
passage_search_graph: PassageSearchGraph = test_container.applications.use_cases.graphs.passage_search()

all_seeder: AllSeeder = test_container.seeders.all()

In [11]:
await all_seeder.up()

In [19]:
await all_seeder.down()

In [5]:
await two_datastore.async_client.set("test", "test", ex=10)

True

In [8]:
partition_document_processor: PartitionDocumentProcessor = PartitionDocumentProcessor(
    document_management=document_management,
    file_document_management=file_document_management,
    text_document_management=text_document_management,
    web_document_management=web_document_management,
)

summary_document_processor: SummaryDocumentProcessor = SummaryDocumentProcessor()
category_document_processor: CategoryDocumentProcessor = CategoryDocumentProcessor(
    summary_document_processor=summary_document_processor
)

In [9]:
state = State()
state.authorized_session = all_seeder.session_seeder.session_fake.data[0]
state.session = one_datastore.get_session()
elements = await partition_document_processor.partition(
    state=state,
    document_id=all_seeder.document_seeder.document_fake.data[0].id
)

IndexError: list index out of range

In [None]:
categorized_elements = await category_document_processor.categorize_elements(
    elements=elements
)
categorized_elements.texts = categorized_elements.texts[:1]
categorized_documents = await category_document_processor.get_categorized_documents(
    categorized_elements=categorized_elements,
    summarization_model=ChatAnthropic(
        anthropic_api_key=one_llm_setting.LLM_ONE_ANTHROPIC_API_KEY_ONE,
        model="claude-3-haiku-20240307",
        max_tokens=100,
        streaming=True,
        temperature=0
    ),
    is_include_tables=False,
    is_include_images=False,
    chunk_size=100,
    overlap_size=50,
)
categorized_documents.texts

In [12]:
output_state: GraphState


async def handler(session: AsyncSession):
    global output_state

    state: State = State()
    state.authorized_session = all_seeder.session_seeder.session_fake.data[0]
    state.session = session

    compiled_long_form_qa_graph: CompiledGraph = long_form_qa_graph.compile()

    data: Dict[str, Any] = {
        "state": state,
        "document_ids": [all_seeder.document_seeder.document_fake.data[0].id,
                         all_seeder.document_seeder.document_fake.data[1].id,
                         all_seeder.document_seeder.document_fake.data[2].id],
        "llm": {
            "model_name": "claude-3-haiku-20240307",
            "max_token": 500,
        },
        "preprocessor_setting": {
            "is_force_refresh_categorized_element": False,
            "is_force_refresh_categorized_document": False,
            "chunk_size": 500,
            "overlap_size:": 50,
            "is_include_tables": False,
            "is_include_images": False,
        },
        "embedder_setting": {
            "is_force_refresh_embedding": False,
            "is_force_refresh_document": False,
            # "model_name": "intfloat/multilingual-e5-large-instruct",
            "model_name": "BAAI/bge-m3",
            "query_instruction": "Given the question, retrieve passage that answer the question.",
        },
        "retriever_setting": {
            "is_force_refresh_relevant_document": False,
            "top_k": 50,
        },
        "reranker_setting": {
            "model_name": "BAAI/bge-reranker-v2-m3",
            "is_force_refresh_re_ranked_document": False,
            "top_k": 5,
        },
        "question": "what is political science?",
        "generator_setting": {
            "is_force_refresh_generated_answer": False,
            "is_force_refresh_generated_question": False,
            "is_force_refresh_generated_hallucination_grade_hash": False,
            "is_force_refresh_generated_answer_relevancy_grade_hash": False,
            "prompt_text": """Instruction: Create a concise and informative answer for a given question based solely on the given passages. You must only use information from the given passages. Use an unbiased and journalistic tone. Do not repeat text. Cite at least one passage in each sentence. Cite the passages using passage number notation like "[number]". If multiple passages contain the answer, cite those passages like "[number, number, etc.]". If the passages do not contain the answer to the question, then say that answering is not possible given the available information with the explanation. Ensure the output is only the answer without re-explain the instruction.
            Passages:
            {% for passage in passages %}
            [{{ loop.index }}]={{ passage.page_content }}
            {% endfor %}
            Question: {{ question }}
            Answer:"""
        },
        "transform_question_max_retry": 0
    }

    print(compiled_long_form_qa_graph.get_graph().draw_mermaid())

    input_state: GraphState = GraphState(
        data=data
    )
    output_state = await compiled_long_form_qa_graph.ainvoke(input_state)

In [13]:
# cache_tool.clear_cache()
await one_datastore.retryable(handler)
torch.cuda.empty_cache()
gc.collect()
cache_tool.get_cache()

%%{init: {'flowchart': {'curve': 'linear'}}}%%
graph TD;
	__start__[__start__]:::startclass;
	__end__[__end__]:::endclass;
	node_get_llm_model([node_get_llm_model]):::otherclass;
	node_prepare_get_categorized_documents([node_prepare_get_categorized_documents]):::otherclass;
	node_get_categorized_documents([node_get_categorized_documents]):::otherclass;
	node_prepare_embed([node_prepare_embed]):::otherclass;
	node_embed([node_embed]):::otherclass;
	node_get_relevant_documents([node_get_relevant_documents]):::otherclass;
	node_get_re_ranked_documents([node_get_re_ranked_documents]):::otherclass;
	node_generate_answer([node_generate_answer]):::otherclass;
	node_grade_hallucination([node_grade_hallucination]):::otherclass;
	node_grade_answer_relevancy([node_grade_answer_relevancy]):::otherclass;
	node_transform_question([node_transform_question]):::otherclass;
	node_get_categorized_documents_node_decide_get_categorized_documents_or_embed([node_get_categorized_documents_node_decide_get_cate

Some weights of the model checkpoint at microsoft/table-transformer-structure-recognition were not used when initializing TableTransformerForObjectDetection: ['model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BaseDocumentProcessor.categorize_elements: Ignoring element type Title.
BaseDocumentProcessor.categorize_elements: Ignoring element type ListItem.
BaseDocumentProcessor.categorize_elements: Ignoring element type Header.
BaseDocumentProcessor.categorize_elements: Ignoring element type FigureCaption.
BaseDocumentProcessor.categorize_elements: Ignoring element type Header.
BaseDocumentProcessor.categorize_elements: Ignoring element type Header.
BaseDocumentProcessor.categorize_elements: Ignoring element type Header.
BaseDocumentProcessor.categorize_elements: Ignoring element type Title.
BaseDocumentProcessor.categorize_elements: Ignoring element type FigureCaption.
BaseDocumentProcessor.categorize_elements: Ignoring element type Header.
BaseDocumentProcessor.categorize_elements: Ignoring element type Header.
BaseDocumentProcessor.categorize_elements: Ignoring element type Header.
BaseDocumentProcessor.categorize_elements: Ignoring element type Header.
BaseDocumentProcessor.categorize_elem



BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocumentProcessor.categorize_elements: Ignoring element type HTMLTitle.
BaseDocument

Fetching 22 files:   0%|          | 0/22 [00:00<?, ?it/s]

  warn_beta(


{'categorized_element-2d0bce8d956ebfdc7b486548be891862c1368d256747efd688fb65bff867cdd1': ElementCategory(texts=[<unstructured.documents.elements.NarrativeText object at 0x7f39641ec580>, <unstructured.documents.elements.NarrativeText object at 0x7f39641ed900>, <unstructured.documents.elements.Text object at 0x7f3966ab5b40>, <unstructured.documents.elements.NarrativeText object at 0x7f39641ece80>, <unstructured.documents.elements.NarrativeText object at 0x7f39641eca60>, <unstructured.documents.elements.NarrativeText object at 0x7f39641ec7c0>, <unstructured.documents.elements.NarrativeText object at 0x7f39641ecca0>, <unstructured.documents.elements.NarrativeText object at 0x7f39641ed8a0>, <unstructured.documents.elements.NarrativeText object at 0x7f39641ecd30>, <unstructured.documents.elements.NarrativeText object at 0x7f394499d1b0>, <unstructured.documents.elements.Text object at 0x7f3966ab4040>, <unstructured.documents.elements.NarrativeText object at 0x7f394499d120>, <unstructured.docu

In [14]:
output_state

{'data': {'state': <starlette.datastructures.State at 0x7f395c5edf00>,
  'document_ids': [UUID('b9d03df2-ddb2-4de2-9041-839ac60e3ac9'),
   UUID('14c19f42-81d5-49ad-af57-591a634e981e'),
   UUID('beab4b42-1e43-4429-97b5-ec92bc8e9d2e')],
  'llm': {'model_name': 'claude-3-haiku-20240307',
   'max_token': 500,
   'model': ChatAnthropic(model='claude-3-haiku-20240307', max_tokens=500, temperature=0.0, anthropic_api_key=SecretStr('**********'), streaming=True, _client=<anthropic.Anthropic object at 0x7f3ae658b3d0>, _async_client=<anthropic.AsyncAnthropic object at 0x7f3ae6589360>)},
  'preprocessor_setting': {'is_force_refresh_categorized_element': False,
   'is_force_refresh_categorized_document': False,
   'chunk_size': 500,
   'overlap_size:': 50,
   'is_include_tables': False,
   'is_include_images': False},
  'embedder_setting': {'is_force_refresh_embedding': False,
   'is_force_refresh_document': False,
   'model_name': 'BAAI/bge-m3',
   'query_instruction': 'Given the question, retriev

In [15]:
output_state["data"]["question"]

'what is political science?'

In [16]:
output_state["data"]["relevant_documents"]

[Document(page_content='Political science is the scientific study of politics. It is a social science dealing with systems of governance and power, and the analysis of political activities, political thoughts, political behavior, and political structures.', metadata={'id': 'c1e2d497-c3cb-439d-8a25-f95445dd5bad', 'category': 'text', 'orig_metadata': [{'languages': ['eng'], 'filetype': 'text/plain'}], 'document_id': '14c19f42-81d5-49ad-af57-591a634e981e', 'relevancy_score': 0.032786883413791656}),
 Document(page_content='science whose pursuits are aimed at solving different cognitive problems commonly associated with the human intelligence, such as learning, problem solving, and pattern recognition, and subsequently adapting [11]. As a the- ory, Chassignol et al. deﬁned AI as a theoretical framework guiding the development and use of computer systems with the capabilities of human beings, more particularly, intelli- gence and the ability to perform tasks that require human intelligence, 

In [17]:
output_state["data"]["re_ranked_documents"]

[Document(page_content='Political science is the scientific study of politics. It is a social science dealing with systems of governance and power, and the analysis of political activities, political thoughts, political behavior, and political structures.', metadata={'re_ranked_score': 0.9999151889582765, 'id': 'c1e2d497-c3cb-439d-8a25-f95445dd5bad', 'category': 'text', 'orig_metadata': [{'languages': ['eng'], 'filetype': 'text/plain'}], 'document_id': '14c19f42-81d5-49ad-af57-591a634e981e', 'relevancy_score': 0.032786883413791656}),
 Document(page_content='science whose pursuits are aimed at solving different cognitive problems commonly associated with the human intelligence, such as learning, problem solving, and pattern recognition, and subsequently adapting [11]. As a the- ory, Chassignol et al. deﬁned AI as a theoretical framework guiding the development and use of computer systems with the capabilities of human beings, more particularly, intelli- gence and the ability to perform 

In [18]:
output_state["data"]["generated_answer"]

'Political science is the scientific study of politics. It is a social science dealing with systems of governance and power, and the analysis of political activities, political thoughts, political behavior, and political structures [1].'

In [4]:
document_id = all_seeder.file_document_seeder.file_document_fake.data[0].id
output_state: GraphState = GraphState(
    data={}
)


async def handler(session: AsyncSession):
    global output_state

    state: State = State()
    state.authorized_session = all_seeder.session_seeder.session_fake.data[0]
    state.session = session

    graph_lfqa: LongFormQaGraph = LongFormQaGraph(
        one_embedding_setting=one_embedding_setting,
        one_llm_setting=one_llm_setting,
        two_datastore=two_datastore,
        four_datastore=four_datastore,
        category_document_processor=category_document_processor
    )

    graph_document: StateGraph = StateGraph(GraphState)
    graph_document.add_node(
        key=graph_lfqa.node_get_llm_model.__name__,
        action=graph_lfqa.node_get_llm_model
    )
    graph_document.add_node(
        key=graph_lfqa.node_prepare_get_categorized_documents.__name__,
        action=graph_lfqa.node_prepare_get_categorized_documents
    )
    graph_document.add_node(
        key=graph_lfqa.node_get_categorized_documents.__name__,
        action=graph_lfqa.node_get_categorized_documents
    )

    graph_document.set_entry_point(
        key=graph_lfqa.node_get_llm_model.__name__
    )

    graph_document.add_edge(
        start_key=graph_lfqa.node_get_llm_model.__name__,
        end_key=graph_lfqa.node_prepare_get_categorized_documents.__name__
    )
    graph_document.add_edge(
        start_key=graph_lfqa.node_prepare_get_categorized_documents.__name__,
        end_key=graph_lfqa.node_get_categorized_documents.__name__
    )
    graph_document.add_conditional_edges(
        start_key=graph_lfqa.node_get_categorized_documents.__name__,
        condition=graph_lfqa.node_decide_get_categorized_documents_or_embed,
        conditional_edge_mapping={
            "GET_CATEGORIZED_DOCUMENTS": graph_lfqa.node_prepare_get_categorized_documents.__name__,
            "EMBED": END
        }
    )
    compiled_graph_document = graph_document.compile()

    data: Dict[str, Any] = {
        "state": state,
        "document_ids": [document_id],
        "llm": {
            "model_name": "claude-3-haiku-20240307",
            "max_token": 500,
        },
        "preprocessor_setting": {
            "is_force_refresh_categorized_element": False,
            "is_force_refresh_categorized_document": False,
            "chunk_size": 50,
            "overlap_size:": 10,
            "is_include_tables": False,
            "is_include_images": False,
        },
    }

    input_state = GraphState(
        data=data
    )
    output_state = await compiled_graph_document.ainvoke(
        input=input_state
    )


await one_datastore.retryable(handler)
output_state

NameError: name 'all_seeder' is not defined

In [19]:
documents = output_state["data"]["categorized_documents"][document_id].get_all()

generator_llm = ChatAnthropic(
    model="claude-3-haiku-20240307",
    anthropic_api_key=one_llm_setting.LLM_ONE_ANTHROPIC_API_KEY_ONE
)
critic_llm = ChatAnthropic(
    model="claude-3-opus-20240229",
    anthropic_api_key=one_llm_setting.LLM_ONE_ANTHROPIC_API_KEY_ONE
)
embeddings = InfinityEmbeddings(
    model="intfloat/multilingual-e5-large-instruct",
    infinity_api_url=one_embedding_setting.URL
)

generator = TestsetGenerator.from_langchain(
    generator_llm=generator_llm,
    critic_llm=critic_llm,
    embeddings=embeddings
)

test_set = generator.generate_with_langchain_docs(
    documents=documents,
    test_size=1,
    distributions={
        evolutions.simple: 0.5,
        evolutions.reasoning: 0.25,
        evolutions.multi_context: 0.25
    }
)

embedding nodes:   0%|          | 0/708 [00:00<?, ?it/s]

Exception in thread Thread-11:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 96, in run
    results = self.loop.run_until_complete(self._aresults())
  File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
    return future.result()
  File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 84, in _aresults
    raise e
  File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 79, in _aresults
    r = await future
  File "/usr/lib/python3.10/asyncio/tasks.py", line 571, in _wait_for_one
    return f.result()  # May raise f.exception().
  File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 38, in sema_coro
    return await coro
  File "/usr/local/lib/python3.10/dist-packages/ragas/executor.py", line 112, in wrapped_callable_async
    return counter, await callable(

ExceptionInRunner: The runner thread which was running the jobs raised an exeception. Read the traceback above to debug it. You can also pass `raise_exceptions=False` incase you want to show only a warning message instead.

In [27]:
eval_set = test_set.to_dataset()
eval_set.rename_column(
    original_column_name="answer",
    new_column_name="ground_truth"
)

NameError: name 'test_set' is not defined

In [166]:
for index, eval in enumerate(eval_set):
    async def handler(session: AsyncSession):
        global output_state

        state: State = State()
        state.authorized_session = all_seeder.session_seeder.session_fake.data[0]
        state.session = session

        graph_lfqa: LongFormQaGraph = LongFormQaGraph(
            one_embedding_setting=one_embedding_setting,
            one_llm_setting=one_llm_setting,
            two_datastore=two_datastore,
            four_datastore=four_datastore,
            category_document_processor=category_document_processor
        )
        compiled_graph_lfqa: CompiledGraph = graph_lfqa.compile()

        data: Dict[str, Any] = {
            "state": state,
            "document_ids": [document_id],
            "llm": {
                "model_name": "claude-3-haiku-20240307",
                "max_token": 500,
            },
            "preprocessor_setting": {
                "is_force_refresh_categorized_document": False,
                "chunk_size": 50,
                "overlap_size": 10,
                "is_include_tables": False,
                "is_include_images": False,
            },
            "embedder_setting": {
                "is_force_refresh_embedding": False,
                "is_force_refresh_document": False,
                "model_name": "BAAI/bge-m3",
                "query_instruction": "Given the question, retrieve passage that answer the question.",
            },
            "retriever_setting": {
                "is_force_refresh_relevant_document": False,
                "top_k": 50,
            },
            "reranker_setting": {
                "model_name": "BAAI/bge-reranker-v2-m3",
                "is_force_refresh_re_ranked_document": False,
                "top_k": 5,
            },
            "question": "what is political science?",
            "generator_setting": {
                "is_force_refresh_generated_answer": False,
                "is_force_refresh_generated_question": False,
                "is_force_refresh_generated_hallucination_grade_hash": False,
                "is_force_refresh_generated_answer_relevancy_grade_hash": False,
                "prompt_text": """Instruction: Create a concise and informative answer for a given question based solely on the given passages. You must only use information from the given passages. Use an unbiased and journalistic tone. Do not repeat text. Cite at least one passage in each sentence. Cite the passages using passage number notation like "[number]". If multiple passages contain the answer, cite those passages like "[number, number, etc.]". If the passages do not contain the answer to the question, then say that answering is not possible given the available information with the explanation. Ensure the output is only the answer without re-explain the instruction.
                Passages:
                {% for passage in passages %}
                [{{ loop.index }}]={{ passage.page_content }}
                {% endfor %}
                Question: {{ question }}
                Answer:"""
            },
            "transform_question_max_retry": 3
        }

        input_state: GraphState = GraphState(
            data=data
        )
        output_state = await compiled_graph_lfqa.ainvoke(input_state)

        eval_set[index]["contexts"] = [document.page_content for document in
                                       output_state["data"]["categorized_documents"][document_id].get_all()]
        eval_set[index]["answer"] = output_state["data"]["generated_answer"]

NameError: name 'eval_set' is not defined

In [23]:
# loading the V2 dataset
amnesty_qa = load_dataset("explodinggradients/amnesty_qa", "english_v2", trust_remote_code=True)

Repo card metadata block was not found. Setting CardData to empty.


In [24]:
eval_set_2 = amnesty_qa["eval"].select(range(1))
eval_set_2

Dataset({
    features: ['question', 'ground_truth', 'answer', 'contexts'],
    num_rows: 1
})

In [25]:
result = evaluate(
    dataset=eval_set_2,
    llm=critic_llm,
    embeddings=embeddings,
    metrics=[
        metrics.faithfulness,
        metrics.answer_relevancy,
        metrics.context_recall,
        metrics.context_precision,
        #     metrics.answer_correctness,
        #     metrics.context_relevancy,
        #     metrics.context_entity_recall,
    ],
)

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

  for attr in assigned:
Task was destroyed but it is pending!
task: <Task pending name='Task-369' coro=<as_completed.<locals>.sema_coro() running at /usr/local/lib/python3.10/dist-packages/ragas/executor.py:37> wait_for=<Future pending cb=[Task.task_wakeup()]> cb=[as_completed.<locals>._on_completion() at /usr/lib/python3.10/asyncio/tasks.py:558]>
Task was destroyed but it is pending!
task: <Task pending name='Task-52' coro=<as_completed.<locals>.sema_coro() running at /usr/local/lib/python3.10/dist-packages/ragas/executor.py:38> wait_for=<Future pending cb=[Task.task_wakeup()]> cb=[as_completed.<locals>._on_completion() at /usr/lib/python3.10/asyncio/tasks.py:558]>
Task was destroyed but it is pending!
task: <Task pending name='Task-55' coro=<as_completed.<locals>.sema_coro() running at /usr/local/lib/python3.10/dist-packages/ragas/executor.py:38> wait_for=<Future pending cb=[Task.task_wakeup()]> cb=[as_completed.<locals>._on_completion() at /usr/lib/python3.10/asyncio/tasks.py:558]>


In [26]:
result

{'faithfulness': 0.5714, 'answer_relevancy': 1.0000, 'context_recall': 1.0000, 'context_precision': 1.0000}