In [None]:
from dotenv import load_dotenv
import openai
import os
from llama_index.core import (
    Settings,
    VectorStoreIndex,
    SimpleDirectoryReader, 
    StorageContext,
    load_index_from_storage,
    Document,
    PromptTemplate
)
load_dotenv()

In [10]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-base")
Settings.embed_model = embed_model

In [11]:
def get_artist_documents(filename: str) -> list[Document]:
    with open(filename) as file:
        data = file.read()
    songs = data.split("===")
    artist = songs.pop(0).strip()
    
    documents = [
        Document(
            text=song,
            metadata={
                "category":"music",
                "artist": artist,
            }
        )
        for song in songs
    ]    
    return documents

In [12]:
import os
PERSIST_DIR = "lyrics_store2"

if not os.path.exists(PERSIST_DIR):
    documents = get_artist_documents("8988_Kjarkas.txt")
    index = VectorStoreIndex.from_documents(documents, show_progress=True)
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR) 
    index = load_index_from_storage(storage_context)

In [15]:
query_engine = index.as_query_engine()

In [16]:
qa_template_str = """
    You are an expert in Bolivian Folk music, your task is to guide and teach the user 
    about your field. Answer the user queries only with supported data in your context.
    Your context may contain complete lyrics or parts of them in different languages, but
    your answer will always be in Spanish. 

    Context information is below.
    ---------------------
    {context_str}
    ---------------------
    Given the context information and not prior knowledge, 
    answer the query with detailed source information, include direct quotes and use bullet lists in your 
    answers, in one of the bullets detail the tone/sentiment of the song.
    Query: {query_str}
    Answer: 
"""
qa_template = PromptTemplate(qa_template_str)

In [17]:
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_template}
)

In [18]:
from llama_index.agent.openai import OpenAIAgent
from llama_index.core.tools import QueryEngineTool, ToolMetadata

description = """
A set of lyrics for songs from the Bolivian Folk Group Los Kjarkas. 
Use plain text question as input to the tool. 
MANDATORY: Pass the response to the user as is, mantaining the format, do not try to summarize when using this tool.
"""
tools = [
    QueryEngineTool(
        query_engine=query_engine,
        metadata=ToolMetadata(
            name="Kjarkas_songs_lyrics",
            description=description,
            return_direct=False
        )
    )
]
agent = OpenAIAgent.from_tools(tools=tools, verbose=True)

In [None]:
response = agent.chat("que canciones de los kjarkas hablan de traicion?")

In [None]:
response.response

In [None]:
response = agent.chat("cuales hablan sobre la naturaleza?")