In [14]:
import os

import numpy as np
import pandas as pd
import phitter
from llama_index.core.agent.react import ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.llms.together import TogetherLLM
import dotenv

In [15]:
dotenv.load_dotenv(dotenv.find_dotenv())

True

In [16]:
os.environ["TOGETHER_API_KEY"] = os.getenv("TOGETHER_API_KEY")

In [17]:
df = pd.read_csv("https://gist.githubusercontent.com/phitterio/6e1cdb92a5f518c7459484c619185fc5/raw/4072c5233d03204d8999943398b23c2393bcd637/iris.csv")
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [18]:
phitter_cont: phitter.PHITTER = None

In [19]:
def get_column_data(column_name: str) -> list[float]:
    """
    Gets the data from a specific column of the global DataFrame.
    """
    return df[column_name].tolist()


def fit_distributions_to_data(data: list[float]) -> float:
    """
    Fit the best probability distribution to a dataset
    """
    global phitter_cont
    phitter_cont = phitter.PHITTER(data=data)
    phitter_cont.fit(n_workers=2)
    id_distribution = phitter_cont.best_distribution["id"]
    parameters = phitter_cont.best_distribution["parameters"]
    parameters_str = ", ".join([f"{k}: {v:.4g}" for k, v in parameters.items()])
    return f"The best distribution is {id_distribution} with parameters {parameters_str}"

def plot_histogram():
    """
    Fit the best probability distribution to a dataset
    """
    global phitter_cont
    phitter_cont.plot_histogram()
    return "showing histogram ..."

In [20]:
get_column_tool = FunctionTool.from_defaults(
    fn=get_column_data,
    name="get_column_data",
    description="Gets the data from a specific column of the global DataFrame.",
)
fit_distribution_tool = FunctionTool.from_defaults(
    fn=fit_distributions_to_data,
    name="fit_distribution",
    description="Find the best probability distribution to a dataset and returns the distribution name and parameters.",
)
plot_histogram_tool = FunctionTool.from_defaults(
    fn=plot_histogram,
    name="plot_histogram",
    description="Plot hitogram to the current phitter process"
)

In [21]:
llm = OpenAI(model="gpt-4o-mini", temperature=0)

In [22]:
tools = [get_column_tool, fit_distribution_tool, plot_histogram_tool]
agent = ReActAgent.from_tools(tools, llm=llm, verbose=True)

In [23]:
column_name = "SepalWidthCm"

In [24]:
try:
    # Create the query for the agent
    query = f"Fit the best probability distribution to the '{column_name}' column. First, use the get_column_data tool to get the column data, then use the fit_distribution tool on that data."
    response = agent.chat(query)
    print(response)
except Exception as e:
    print(f"Error: {str(e)}")

[1;3;38;5;200mThought: The current language of the user is: English. I need to use the get_column_data tool to get the 'SepalWidthCm' column data.
Action: get_column_data
Action Input: {'column_name': 'SepalWidthCm'}
[0m[1;3;34mObservation: [3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.1, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
[0m

In [25]:
print(response)

The best probability distribution for the 'SepalWidthCm' column is generalized_normal with parameters beta: 1.454, mu: 3.03, alpha: 0.4917.


In [26]:
query = "Plot histogram"
response = agent.chat(query)

[1;3;38;5;200mThought: I need to plot a histogram of the 'SepalWidthCm' column data.
Action: plot_histogram
Action Input: {}
[0m

[1;3;34mObservation: showing histogram ...
[0m[1;3;38;5;200mThought: I have the histogram, now I can answer the question about fitting the best probability distribution to the 'SepalWidthCm' column.
Action: get_column_data
Action Input: {'column_name': 'SepalWidthCm'}
[0m[1;3;34mObservation: [3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.1, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3