In this Notebook we create and sketch tools for the Agent. Building proofs of concept (PoC)

### Setup

In [None]:
# Libraries

import os

In [None]:
# Local Modules


In [None]:
# import Whisper

import whisper

In [None]:
# Import Agent

os.sys.path.append("../src")
os.sys.path.append("../src/agents")

import react  # My AI assistant

In [None]:
# DEBUG

os.sys.path.append("../src/tools")

In [None]:

# Import GAIA Questions
from datasets import load_dataset


In [None]:

import pandas as pd


In [None]:
from dotenv import load_dotenv
from huggingface_hub import snapshot_download, login

In [None]:
# Load Hugging face credentials

#load_dotenv()
#login(os.getenv(key="HF_TOKEN_CHAPPIE"))  # Replace with your hf api key name


In [None]:
#gaia_questions_path = snapshot_download(repo_id="gaia-benchmark/GAIA", repo_type="dataset")
#gaia_questions = load_dataset(path="gaia-benchmark/GAIA", name="2023_level1")

In [None]:
gaia_index_dir = "../../../.cache/huggingface/datasets/gaia-benchmark___gaia/2023_level1/0.0.1/ec492fe4320ee795b1aed6bb46229c5f693226b0f1316347501c24b4baeee005"
#gaia_index_dir = os.path.abspath(gaia_index_dir)
gaia_data_path = os.path.join(gaia_index_dir, "gaia-validation.arrow")

In [None]:
# Temporal cell: wrote because of hugging face api calls limit

import pyarrow.ipc as ipc

with open(gaia_data_path, "rb") as f:
    reader = ipc.RecordBatchStreamReader(f)
    table = reader.read_all()

gaia_df = table.to_pandas()


In [None]:
#gaia_questions = gaia_questions["validation"]  # Filter for dev purposes
#gaia_df = pd.DataFrame(gaia_questions)

In [None]:
gaia_df.head()

In [None]:
# DEL
# temp filter to get chess images
gaia_df[gaia_df["file_path"].map(lambda f: f.endswith("44.png"))]

---

In [None]:
filetypes = {d[1].file_path.split(".")[-1] for d in gaia_df.iterrows()}
filetypes

### Read Historical XPs

In [None]:
xp_paths = "../data/agent_experiments/iterations/"

In [None]:
# Index all XPs
import os
import pandas as pd

xp_dir = "../data/agent_experiments/iterations"
xp_list = []
# FIXME: from the 10th XP iteration, the sorted method is unuseful

for i, xp_path in enumerate(sorted(os.listdir(xp_paths))):  
    xp_path = os.path.join(xp_dir, xp_path)
    temp_xp_df = pd.read_csv(xp_path)
    xp_list.append((i, temp_xp_df))
    del temp_xp_df 

In [None]:
# DEL
xp[xp["Question"] == xp_question][["is_correct"]]

In [None]:
# Join latest XP result to each question

index = gaia_df.index.tolist()  # Index for all the questions
questions = gaia_df["Question"].tolist()
answers = list()


def filter_condition(xp_data: list) -> bool:
    i, xp = xp_data
    for question in questions:
        xp_addressed_questions = xp["Question"].tolist()
        if question in xp_addressed_questions:
            return True
    return False

filtered_xps = list(filter(filter_condition, xp_list))
filtered_xps_reversed = filtered_xps
filtered_xps_reversed.reverse()

# Join latest answer result (i.e. result of the latest xp)

# Join latest answer result (i.e. result of the latest xp)

for question in questions:
    answer_result = 0  # Assume wrong answer by default
    for i, xp in filtered_xps_reversed:
        for xp_question in xp["Question"].tolist():
            
            if question == xp_question:
                answer_result_row = list(xp[xp["Question"] == xp_question]["is_correct"])#.loc[0]
                answer_result_temp = answer_result_row[0]                    
                if answer_result_temp >= answer_result:
                    answer_result = answer_result_temp
        del xp
    answers.append(answer_result)

# Join answers
historical_xp_results = gaia_df.copy()
historical_xp_results["is_correct"] = pd.Series(answers)
del answers, questions

In [None]:
historical_xp_results

In [None]:
historical_xp_results.is_correct.mean()

Let's study first which are the most common wrong tasks. I.e. How can we increase accuracy with a single next step (e.g. implement a new tool, modify sys message, etc.)

In [None]:
wrong_ans_df = historical_xp_results[historical_xp_results["is_correct"]==0]
wrong_ans_df["fp_extension"] = wrong_ans_df["file_path"].map(lambda path: path.split(".")[-1])
wrong_ans_df

In [None]:
# Summary
wrong_ans_df.groupby("fp_extension")["is_correct"].count()

The vast majority of remaining tasks do not include files to read. So we sould study them at first

In [None]:
wrong_ans_df_no_extension = wrong_ans_df[wrong_ans_df["file_path"].map(lambda fp: len(fp)==0)]

In [None]:
wrong_ans_df_no_extension

## Audio Tool

Let's study where does our current Agent fail, especially on audio-like questions

In [None]:
run = False

In [None]:
gaia_df

### Identify Audio-like tasks

The first questions is to understand which questions need audio? 

* Hypothesis: Just look at such questions that have a .mp3 file attached


In [None]:
# Gather a sample file from any task

audio_tasks = gaia_df[(gaia_df["file_path"].str.len()>0) & (gaia_df["file_path"].str.endswith(".mp3"))]
sample_task = audio_tasks.iloc[0]

In [None]:
print(f"{(audio_tasks.shape[0] / gaia_df.shape[0]) * 100:.1f}% of tasks need audio processing")

In [None]:
sample_task

In [None]:
# Get the sample file

filepath = sample_task["file_path"]


Let's import the mp3 file with [ffmpeg](https://stackoverflow.com/questions/9458480/read-mp3-in-python-3#:~:text=%24%20ffmpeg%20%2Di%20foo.mp3%20%2Dvn%20%2Dacodec%20pcm_s16le%20%2Dac%201%20%2Dar%2044100%20%2Df%20wav%20foo.wav)

In [None]:
temp_data_path = "/home/santiagoal/current-projects/chappie/data/temp-data/"

In [None]:
if run:
    os.system(f"ffmpeg -i {filepath} -vn -acodec pcm_s16le -ac 1 -ar 44100 -f wav {temp_data_path}sample_audio.wav")

In [None]:
if run:
    use_gpu = False
    model_size = "tiny"

    model = (
        whisper.load_model(model_size).cuda()
        if use_gpu
        else whisper.load_model(model_size)
    )

In [None]:
print(filepath)

In [None]:
if run: 
    raw_transcript = model.transcribe(
            filepath,
            word_timestamps=False,
            no_speech_threshold=0.5,
            condition_on_previous_text=True,
            compression_ratio_threshold=2.0,
        )

    transcript = raw_transcript["text"]
    print(transcript)

### Tool POC


In [None]:

if run: 
    def transcriber(audio_path: str, ai_model = model) -> str:
        """
        Transcribes an audio file

        Parameters
        ----------
        audio_path : str
            Path to the audio file
        ai_model
            audio-to-text AI model 

        Returns:
            str: Text of the transcript 
        """
        raw_transcript = ai_model.transcribe(
            audio_path,
            word_timestamps=False,
            no_speech_threshold=0.5,
            condition_on_previous_text=True,
            compression_ratio_threshold=2.0,
        )

        transcript = raw_transcript["text"]

        return transcript

### Integrate Tool POC and experiment


The changes have been integrated, now we will experiment with the new version of the agent

In [None]:
audio_tasks["Agent response"] = audio_tasks.apply(func=get_agent_response, axis=1)
audio_tasks["is_correct"] = audio_tasks.apply(func=eval_answer, axis=1)

In [None]:
audio_tasks

In [None]:
# Next steps: Update model... 