In [None]:
import os

import numpy as np
import pandas as pd
import phitter
from llama_index.core.agent.react import ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.llms.openai import OpenAI
import dotenv

In [None]:
dotenv.load_dotenv(dotenv.find_dotenv())

In [None]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [None]:
df = pd.read_csv("https://gist.githubusercontent.com/phitterio/6e1cdb92a5f518c7459484c619185fc5/raw/4072c5233d03204d8999943398b23c2393bcd637/iris.csv")
df.head()

In [None]:
phitter_cont: phitter.PHITTER = None

In [None]:
def get_column_data(column_name: str) -> list[float]:
    """
    Gets the data from a specific column of the global DataFrame.
    """
    return df[column_name].tolist()


def fit_distributions_to_data(data: list[float], num_bins=None) -> float:
    """
    Fit the best probability distribution to a dataset
    """
    global phitter_cont
    phitter_cont = phitter.PHITTER(data=data)
    phitter_cont.fit(n_workers=2)
    id_distribution = phitter_cont.best_distribution["id"]
    parameters = phitter_cont.best_distribution["parameters"]
    parameters_str = ", ".join([f"{k}: {v:.4g}" for k, v in parameters.items()])
    return f"The best distribution is {id_distribution} with parameters {parameters_str}"

def plot_histogram():
    """
    Fit the best probability distribution to a dataset
    """
    global phitter_cont
    phitter_cont.plot_histogram()
    return "showing histogram ..."

In [None]:
get_column_tool = FunctionTool.from_defaults(
    fn=get_column_data,
    name="get_column_data",
    description="Gets the data from a specific column of the global DataFrame.",
)
fit_distribution_tool = FunctionTool.from_defaults(
    fn=fit_distributions_to_data,
    name="fit_distribution",
    description="Find the best probability distribution to a dataset and returns the distribution name and parameters.",
)
plot_histogram_tool = FunctionTool.from_defaults(
    fn=plot_histogram,
    name="plot_histogram",
    description="Plot hitogram to the current phitter process",
)

In [None]:
llm = OpenAI(model="gpt-4o", temperature=0)

In [None]:
tools = [get_column_tool, fit_distribution_tool, plot_histogram_tool]
agent = ReActAgent.from_tools(tools, llm=llm, verbose=True)

In [None]:
column_name = "SepalWidthCm"

In [None]:
query = f"Find the best probability distribution to the '{column_name}' column"
response = agent.chat(query)
print(response)

In [None]:
query = f"Find the best probability distribution to the '{column_name}' column using 14 bins"
response = agent.chat(query)
print(response)

In [None]:
query = "Plot histogram"
response = agent.chat(query)