In [None]:
from marvin import ai_fn, AIApplication, ai_model
from marvin import settings as marvin_settings
import marvin.tools.filesystem
import marvin.tools.shell
from marvin.tools.chroma import MultiQueryChroma as marvin_QueryChroma
#from marvin.tools.chroma import 
import marvin.utilities.embeddings

import os
import pathlib
from dotenv import load_dotenv
import certifi

# workaround for mac to solve "SSL: CERTIFICATE_VERIFY_FAILED Error"
os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()
os.environ["SSL_CERT_FILE"] = certifi.where()


load_dotenv()
marvin_settings.openai.api_key = os.getenv('OPENAI_API_KEY')

In [None]:
FILE_DIR = pathlib.Path.cwd() / "data"
str(FILE_DIR)

## Problem: 
- with the "naive" approach below no vector data base is stored to the given directory, so that the embeddings will most likely not be reused and have to be created each time a new question is prompted, this is quite expensive.
- nevertheless, there is a way to create a vector database with marvin, which however, is much more complicated than with langchain: https://github.com/PrefectHQ/marvin/blob/main/cookbook/test_writing_application.py (marvis-recipies, MultiQuery)
- for reading pdf files an appropriate tool/ function has to be provided (written)
- I feel marvins big advantages/ potential lies definetely more in the area of classifying unstructured data. By using ai_models and scraping websites data can easily be collected and structured, also labeling data and sentiment analysis are great possibilities to leverage the marvin library. ai_applications like used here are also very impressing, but by giving marvin the tools and rights to read and write seems to be unsafe for my environment ... since the magic lays in prompt engineering here, words have to be chosen carefully and are prone to misunderstandings

Nevertheless, the AIApplication below opens the given txt file on his own, reads it and answeres the prompted question by leveraging the openai API successfully...

In [None]:

description = ("A chatbot. Users will ask questions concerning a given text. ")


qa = AIApplication(
    name="Chatbot",
    #state=QAState(),
    #history=
    description=f"""
    You are a chatbot answering to to all questions concerning the content of a given
    text file.
    The text file has the name {"test2.txt"} and is located in the following directory {FILE_DIR}. Please make the embeddings for
    the content of the text file only once and save them into a chroma vector database, which should itself be saved for later use in 
    {FILE_DIR} .
    You are only allowed to read or write files in the {FILE_DIR}. 
    The user will give you instructions on what questions to answer. Make sure you always reuse the embeddings from the 
    existing vector database as knowledgebase saved by yourself only once in the {FILE_DIR}.
    When you write the answers, you will need to ensure that the
    user's expectations are met. Remember, you are an accurate and experianced author 
    and you write unique and short answers stronly aligned to the content of the given text file.
    You should use friendly, easy to read language, but stay correct and focussed.
    The answers should not have more than 10 sentences.
    """, 
    tools=[
        marvin.tools.filesystem.ReadFile(root_dir=FILE_DIR),
        marvin.tools.filesystem.WriteFile(root_dir=FILE_DIR),
        marvin_QueryChroma(
            name="chroma_db_text",
            description="chroma data base to store the embeddings of the content of the given text"
        ),
        marvin.utilities.embeddings.create_openai_embeddings,
        #marvin.tools.shell.Shell(
        #    require_confirmation=False, working_directory=FILE_DIR
        #)
    ], 
    
    )

    #     
response = qa("Please give a summary of the given text")
print(response)


### Some useful Links: 
- https://github.com/PrefectHQ/marvin-recipes/blob/main/examples/slackbot/slackbot.py
- https://www.askmarvin.ai/components/ai_application/
- https://www.askmarvin.ai/components/ai_model/
- https://github.com/PrefectHQ/marvin/blob/main/docs/src/docs/deployment.ipynb
- https://github.com/PrefectHQ/marvin-recipes
- @tool: https://www.askmarvin.ai/components/ai_application/
- Marvin source: https://github.com/PrefectHQ/marvin/blob/main/src/marvin/tools/chroma.py
- https://github.com/PrefectHQ/marvin-recipes/blob/main/examples/flows/github_digest.py
- https://github.com/PrefectHQ/marvin-recipes/tree/main
- https://github.com/PrefectHQ/marvin/blob/main/cookbook/test_writing_application.py
- https://github.com/PrefectHQ/marvin/blob/main/cookbook/slackbot/chatbot.py