In [None]:
## Genshin Assistant
# Un programa que te brinda informacion acerca de los personajes de GI, con documentos guardados en memoria

# Arquitectura

- Text Generation Model for PromptNode (text2text):
    - text-davinci-003
    - google/flan-t5-small (posible de usar)
    - mrm8488/t5-small-spanish-finetuned-squadv1 (posible de usar, necesita pregunta y contexto"
- 

In [None]:
# Init and load the datasets

In [None]:
from haystack.agents.memory import ConversationSummaryMemory
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.nodes import TextConverter, PreProcessor, BM25Retriever, PromptNode, PromptTemplate, AnswerParser
from haystack.agents import Tool, AgentStep, Agent
from haystack.agents.base import Agent, ToolsManager
from haystack import Pipeline
import logging
import os
os.environ["ELASTICSEARCH_HOST"] = "127.0.0.1"

In [None]:
logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
logging.getLogger("haystack").setLevel(logging.INFO)

In [None]:
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")

In [None]:
host

In [None]:
document_store = ElasticsearchDocumentStore(host=host, username="", password="", index="document")

In [None]:
document_store

In [None]:
indexing_pipeline = Pipeline()
text_converter = TextConverter()
preprocessor = PreProcessor(
    clean_whitespace=True,
    clean_header_footer=True,
    clean_empty_lines=True,
    split_by="word",
    split_length=200,
    split_overlap=20,
    split_respect_sentence_boundary=True,
)

In [None]:
indexing_pipeline.add_node(component=text_converter, name="TextConverter", inputs=["File"])
indexing_pipeline.add_node(component=preprocessor, name="PreProcessor", inputs=["TextConverter"])
indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["PreProcessor"])

In [None]:
doc_dir = "datasets"

In [None]:
files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir) if os.path.isfile(doc_dir + "/" + f)]
indexing_pipeline.run_batch(file_paths=files_to_index)

In [None]:
retriever = BM25Retriever(document_store=document_store, top_k=5)

In [None]:
# Creacion del nodo de Prompt Para preguntas, contexto y respuestas

In [None]:
rag_prompt = PromptTemplate(
    prompt="""Synthesize a comprehensive answer from the following text for the given question.
        Provide a clear and concise response that summarizes the key points and information presented in the text.
        Your answer should be in your own words and be no longer than 100 words.
        \n\n Related text: {join(documents)} \n\n Question: {query} \n\n Answer:""",
    output_parser=AnswerParser(),
)

#google/flan-t5-base --> Decente
#google/flan-t5-small --> No funciona correctamente
#google/flan-t5-large --> Necesita mucho entrenamiento y es bastante pesado

prompt_node = PromptNode(model_name_or_path="google/flan-t5-large", default_prompt_template=rag_prompt)


In [None]:
rag_prompt = PromptTemplate(
    prompt="question: {query} context: {join(documents)}",
    output_parser=AnswerParser(),
)
# Probar creando nuevos prompts
# Answer the following question in a coherent way:{query} usign the following context: {join(documents)}. If you don't know the answer return the text 'Lo siento, desconozco la respuesta' whitout the quoutes.
# {query} context: {join(documents)}

#mrm8488/spanish-t5-small-sqac-for-qa --> Buenos resultados, falta hacerle un fine-tuning

prompt_node = PromptNode(model_name_or_path="mrm8488/spanish-t5-small-sqac-for-qa", default_prompt_template=rag_prompt)

In [None]:
gen_pipe = Pipeline()
gen_pipe.add_node(component=retriever, name="retriever", inputs=["Query"])
gen_pipe.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])

In [None]:
output = gen_pipe.run(query="¿?")
print(output["answers"][0].answer)

In [None]:
search_tool = Tool(
    name="genshin_answers_tool",
    pipeline_or_node=gen_pipe,
    description="useful for when you need to answer questions about genshin impact",
    output_variable="answers",
)

In [None]:
agent_prompt_node = PromptNode(
    "mrm8488/spanish-gpt2",
    max_length=256,
    stop_words=["Observation:"],
    model_kwargs={"temperature": 0.5}
)

In [None]:
memory_prompt_node = PromptNode(
    "IIC/mt5-spanish-mlsum", max_length=256, model_kwargs={"task_name": "text2text-generation"}
)
memory = ConversationSummaryMemory(memory_prompt_node, prompt_template="{chat_transcript}")

In [None]:
agent_prompt = """
In the following conversation, a human user interacts with an AI Agent. The human user poses questions, and the AI Agent goes through several steps to provide well-informed answers.
The AI Agent must use the available tools to find the up-to-date information. The final answer to the question should be truthfully based solely on the output of the tools. The AI Agent should ignore its knowledge when answering the questions.
The AI Agent has access to these tools:
{tool_names_with_descriptions}

The following is the previous conversation between a human and The AI Agent:
{memory}

AI Agent responses must start with one of the following:

Thought: [the AI Agent's reasoning process]
Tool: [tool names] (on a new line) Tool Input: [input as a question for the selected tool WITHOUT quotation marks and on a new line] (These must always be provided together and on separate lines.)
Observation: [tool's result]
Final Answer: [final answer to the human user's question]
When selecting a tool, the AI Agent must provide both the "Tool:" and "Tool Input:" pair in the same response, but on separate lines.

The AI Agent should not ask the human user for additional information, clarification, or context.
If the AI Agent cannot find a specific answer after exhausting available tools and approaches, it answers with Final Answer: inconclusive

Question: {query}
Thought:
{transcript}
"""

In [None]:
def resolver_function(query, agent, agent_step):
    return {
        "query": query,
        "tool_names_with_descriptions": agent.tm.get_tool_names_with_descriptions(),
        "transcript": agent_step.transcript,
        "memory": agent.memory.load(),
    }

In [None]:
conversational_agent = Agent(
    agent_prompt_node,
    prompt_template=agent_prompt,
    prompt_parameters_resolver=resolver_function,
    memory=memory,
    tools_manager=ToolsManager([search_tool]),
)

In [None]:
conversational_agent.run("¿Quien es la Shogun Raiden?")