In [1]:
import os
import pandas as pd
import numpy as np
import ast
from dotenv import load_dotenv
import openai
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from langchain.agents import initialize_agent, Tool
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory

In [2]:
# loads .env file with OPENAI_API_KEY
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [3]:
iris = load_iris()
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [4]:
data = pd.DataFrame(
    np.concatenate((iris.data, np.array([iris.target]).T), axis=1),
    columns=iris.feature_names + ["target"],
)
data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0


In [5]:
y = data["target"]
X = data.drop(columns=["target"])

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

In [7]:
X_train

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
96,5.7,2.9,4.2,1.3
105,7.6,3.0,6.6,2.1
66,5.6,3.0,4.5,1.5
0,5.1,3.5,1.4,0.2
122,7.7,2.8,6.7,2.0
...,...,...,...,...
71,6.1,2.8,4.0,1.3
106,4.9,2.5,4.5,1.7
14,5.8,4.0,1.2,0.2
92,5.8,2.6,4.0,1.2


In [8]:
model = RandomForestClassifier()
model.fit(X=X_train, y=y_train)

In [9]:
model.score(X=X_test, y=y_test)

0.98

In [10]:
X_train[:3]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
96,5.7,2.9,4.2,1.3
105,7.6,3.0,6.6,2.1
66,5.6,3.0,4.5,1.5


In [11]:
y_train[:3]

96     1.0
105    2.0
66     1.0
Name: target, dtype: float64

In [12]:
def agent_predict(iris_features):
    """
    Predicts the species of an iris flower based on its features.

    Parameters:
    iris_features (tuple, list, or str): A tuple or list containing the sepal length, sepal width,
                                          petal length, and petal width of the iris flower, or a
                                          string representation of such a tuple or list.

    Returns:
    str: A string indicating the predicted species of the iris flower along with the confidence
         percentage of the prediction.

    Raises:
    ValueError: If the input data type is not a tuple, list, or string.
    """

    if isinstance(iris_features, str):
        iris_features = ast.literal_eval(iris_features)

    if len(iris_features) != 4:
        raise ValueError(
            "There should be 4 iris features: sepal length, sepal width, petal length and petal width."
        )
    sepal_length, sepal_width, petal_length, petal_width = iris_features

    df = pd.DataFrame(
        [
            {
                "sepal length (cm)": sepal_length,
                "sepal width (cm)": sepal_width,
                "petal length (cm)": petal_length,
                "petal width (cm)": petal_width,
            }
        ]
    )
    species_idx = int(model.predict(df)[0])
    probability = model.predict_proba(df)[0][species_idx]
    species_map = {
        0: "Iris setosa",
        1: "Iris versicolor",
        2: "Iris virginica",
    }
    species_name = species_map.get(species_idx)
    return f"{species_name} with confidence {probability * 100 :.2f} %"

In [13]:
tools = [
    Tool(
        name="Iris species predictor",
        func=agent_predict,
        description="Returns predicted iris species.",
    )
]

In [14]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent="zero-shot-react-description",
    memory=memory,
    verbose=True,
)

  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
  agent = initialize_agent(


In [15]:
agent.run(
    "What species will be iris with sepal length 7.6, sepal width 3.0, petal length 6.6, petal width 2.1? And with what confidence?"
)

  agent.run(




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to use the iris species predictor to determine the species of the iris based on the provided features: sepal length, sepal width, petal length, and petal width. 

Action: Iris species predictor  
Action Input: (7.6, 3.0, 6.6, 2.1)  
[0m
Observation: [36;1m[1;3mIris virginica with confidence 100.00 %[0m
Thought:[32;1m[1;3mI now know the final answer.  
Final Answer: The iris species is Iris virginica with a confidence of 100.00%.[0m

[1m> Finished chain.[0m


'The iris species is Iris virginica with a confidence of 100.00%.'

In [None]:
# agent.run(
#     "What species will be iris with sepal length 7.6, sepal width 3.0, petal length 6.6? And with what confidence?"
# )