In [10]:
import os

import numpy as np
import pandas as pd
import phitter
from llama_index.core.agent.react import ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.llms.openai import OpenAI
import dotenv

In [None]:
dotenv.load_dotenv(dotenv.find_dotenv())

In [11]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [12]:
def read_csv(url: str, column_name: str) -> list[float]:
    """
    Reads a CSV file and returns the specified column as a list of floats.
    """
    df = pd.read_csv(url)
    return df[column_name].tolist()


def fit_distributions_to_data(data: list[float]) -> float:
    """
    Fit the best probability distribution to a dataset
    """
    phitter_cont = phitter.PHITTER(data=data)
    phitter_cont.fit(n_workers=2)
    id_distribution = phitter_cont.best_distribution["id"]
    parameters = phitter_cont.best_distribution["parameters"]
    parameters_str = ", ".join([f"{k}: {v:.4g}" for k, v in parameters.items()])
    return f"The best distribution is {id_distribution} with parameters {parameters_str}"

In [13]:
read_csv_tool = FunctionTool.from_defaults(fn=read_csv, name="read_csv", description="Reads a CSV file from a URL and returns the specified column as a list of floats.")
fit_distribution_tool = FunctionTool.from_defaults(
    fn=fit_distributions_to_data, name="fit_distribution", description="Find the best probability distribution to a dataset and returns the distribution name and parameters."
)

In [14]:
llm = OpenAI(model="gpt-4o-mini", temperature=0)

In [15]:
tools = [read_csv_tool, fit_distribution_tool]
agent = ReActAgent.from_tools(tools, llm=llm, verbose=True)

In [16]:
url = "https://gist.githubusercontent.com/phitterio/6e1cdb92a5f518c7459484c619185fc5/raw/4072c5233d03204d8999943398b23c2393bcd637/iris.csv"
column_name = "SepalWidthCm"

In [17]:
try:
    query = f"Find the best probability distribution to the '{column_name}' column in the CSV file at this URL: {url}"
    response = agent.chat(query)
except Exception as e:
    print(f"Error: {str(e)}")

[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: read_csv
Action Input: {'url': 'https://gist.githubusercontent.com/phitterio/6e1cdb92a5f518c7459484c619185fc5/raw/4072c5233d03204d8999943398b23c2393bcd637/iris.csv', 'column_name': 'SepalWidthCm'}
[0m[1;3;34mObservation: [3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.1, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 

In [18]:
print(response)

The best probability distribution for the 'SepalWidthCm' column is the generalized normal distribution, with the following parameters: beta = 1.454, mu = 3.03, and alpha = 0.4917.
