In [85]:
# from langchain_community.utilities.sql_database import SQLDatabase
from langchain import OpenAI, SQLDatabase
from langchain.chat_models import ChatOpenAI

from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.agents import create_sql_agent


In [69]:
API_KEY = os.getenv('OPENAI_API_KEY')
DBPASS=os.getenv('DB_PASS')
DATABASE=os.getenv('DATABASE')
SCHEMA=os.getenv('SCHEMA')

db = SQLDatabase.from_uri(
    f"postgresql+psycopg2://postgres:{DBPASS}@localhost:5432/{DATABASE}", schema='search')

toolkit = SQLDatabaseToolkit(db=db, llm=OpenAI(temperature=0))

In [70]:
# Setup database
from langchain.agents.agent_types import AgentType
from langchain.chat_models import ChatOpenAI

#The agent is using a ReAct style prompt
#ZERO_SHOT_REACT_DESCRIPTION
llm = ChatOpenAI(model="gpt-4", temperature=0)

agent_executor = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION
)

In [78]:
from langchain.prompts.chat import ChatPromptTemplate
final_prompt = ChatPromptTemplate.from_messages([
    ("system", '''You are a helpful AI assistant expert in querying SQL Database to find answers to user's question about Kubernetes resources, clusters etc. 
    All resource details and key names for filters are in the jsonb column 'data' within the resources table - which is mostly 1 level deep. 
    Create the final query using the key names in data column.
    The types of resources are in the 'kind' key within the data column. 
    The answer set should be de-duplicated. Run the final query and get the answer. A recursive query on edges table will show all resource relationships'''),
    ("user", "{input}")
])


In [72]:
#gpt 4 - after changing prompt
agent_executor.invoke(final_prompt.format(
        input ="Find the names of Pod resources with label 'app=search'. Limit results to 10"
  ))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mTo answer this question, I need to query the 'resources' table, specifically the 'data' column which is a jsonb type. I need to filter for 'kind' = 'Pod' and 'app' = 'search' within the labels. I also need to limit the results to 10 and ensure they are unique. However, before I can write the query, I need to understand the schema of the 'resources' table and the structure of the 'data' column.
Action: sql_db_schema
Action Input: resources[0m[33;1m[1;3m
CREATE TABLE search.resources (
	uid TEXT NOT NULL, 
	cluster TEXT, 
	data JSONB, 
	CONSTRAINT resources_pkey PRIMARY KEY (uid)
)

/*
3 rows from resources table:
uid	cluster	data
local-cluster/5745254b-9297-4e00-a463-7ca94d4abe40	local-cluster	{'kind': 'APIRequestCount', 'name': 'agentclusterinstalls.v1beta1.extensions.hive.openshift.io', 'cr
local-cluster/942700c1-06ae-4b6a-b131-42db7a758ddc	local-cluster	{'kind': 'APIRequestCount', 'name': 'policies.v1.policy.open-cl

{'input': "System: You are a helpful AI assistant expert in querying SQL Database to find answers to user's question about Kubernetes resources, clusters etc. \n    All resource details and key names for filters are in the jsonb column 'data' within the resources table - which is mostly 1 level deep. \n    Create the final query using the key names in data column.\n    The types of resources are in the 'kind' key within the data column. \n    The answer set should be de-duplicated. Run the final query and get the answer\nHuman: Find the names of Pod resources with label 'app=search'. Limit results to 10",
 'output': "The names of Pod resources with label 'app=search' are 'search-api-5fd6956747-qwv69', 'search-collector-f6b9ddd79-xphsm', 'search-indexer-6f886d68df-d2ptz', and 'search-postgres-d7778bcb6-wm6zm'."}

In [73]:
agent_executor.invoke(final_prompt.format(
        input ="Which cluster has a pod named klusterlet-addon-search-%"
  ))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mTo answer this question, I need to query the 'resources' table, specifically the 'data' column, to find the cluster that has a pod with a name that matches the pattern 'klusterlet-addon-search-%'. The 'kind' key within the 'data' column will help me identify the type of resource. Since the answer set should be de-duplicated, I will use the DISTINCT keyword in my SQL query. However, before I write the query, I need to check the schema of the 'resources' table to understand its structure.

Action: sql_db_schema
Action Input: resources[0m[33;1m[1;3m
CREATE TABLE search.resources (
	uid TEXT NOT NULL, 
	cluster TEXT, 
	data JSONB, 
	CONSTRAINT resources_pkey PRIMARY KEY (uid)
)

/*
3 rows from resources table:
uid	cluster	data
local-cluster/5745254b-9297-4e00-a463-7ca94d4abe40	local-cluster	{'kind': 'APIRequestCount', 'name': 'agentclusterinstalls.v1beta1.extensions.hive.openshift.io', 'cr
local-cluster/942700c1-06ae-4b6a

{'input': "System: You are a helpful AI assistant expert in querying SQL Database to find answers to user's question about Kubernetes resources, clusters etc. \n    All resource details and key names for filters are in the jsonb column 'data' within the resources table - which is mostly 1 level deep. \n    Create the final query using the key names in data column.\n    The types of resources are in the 'kind' key within the data column. \n    The answer set should be de-duplicated. Run the final query and get the answer\nHuman: Which cluster has a pod named klusterlet-addon-search-%",
 'output': "The cluster that has a pod named 'klusterlet-addon-search-%' is 'sav-remote'."}

In [74]:
agent_executor.invoke(final_prompt.format(
        input ="What all addon pods are enabled?"
  ))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mTo answer this question, I need to query the 'resources' table and look for the 'kind' key in the 'data' column. The value of the 'kind' key should be 'Pod'. I also need to filter the results to only include addon pods. However, I'm not sure what key in the 'data' column would indicate whether a pod is an addon or not. I'll need to look at the schema and some sample rows of the 'resources' table to figure this out.
Action: sql_db_schema
Action Input: resources[0m[33;1m[1;3m
CREATE TABLE search.resources (
	uid TEXT NOT NULL, 
	cluster TEXT, 
	data JSONB, 
	CONSTRAINT resources_pkey PRIMARY KEY (uid)
)

/*
3 rows from resources table:
uid	cluster	data
local-cluster/5745254b-9297-4e00-a463-7ca94d4abe40	local-cluster	{'kind': 'APIRequestCount', 'name': 'agentclusterinstalls.v1beta1.extensions.hive.openshift.io', 'cr
local-cluster/942700c1-06ae-4b6a-b131-42db7a758ddc	local-cluster	{'kind': 'APIRequestCount', 'name': 'poli

{'input': "System: You are a helpful AI assistant expert in querying SQL Database to find answers to user's question about Kubernetes resources, clusters etc. \n    All resource details and key names for filters are in the jsonb column 'data' within the resources table - which is mostly 1 level deep. \n    Create the final query using the key names in data column.\n    The types of resources are in the 'kind' key within the data column. \n    The answer set should be de-duplicated. Run the final query and get the answer\nHuman: What all addon pods are enabled?",
 'output': "The enabled addon pods are: 'cluster-manager-addon-manager-controller-7d746c687f-2xz84', 'cluster-proxy-addon-manager-856c48887f-fdcj8', 'cluster-proxy-addon-manager-856c48887f-tldbh', 'cluster-proxy-addon-user-66b57c5dff-k5pxr', 'cluster-proxy-addon-user-66b57c5dff-lln22', 'grc-policy-addon-controller-868b7747f6-dltwn', 'grc-policy-addon-controller-868b7747f6-zhbml', 'hypershift-addon-agent-6db974bbfc-ftbm5', 'hype

In [75]:
agent_executor.invoke(final_prompt.format(
        input ="Show all Deployments related to 'search%' pods"
  ))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mTo answer this question, I need to query the 'resources' table and filter the results based on the 'kind' key in the 'data' column. The 'kind' should be 'Deployment' and the pods should be related to 'search%'. However, I need to check the schema of the 'resources' table to understand its structure and the exact keys in the 'data' column.
Action: sql_db_schema
Action Input: resources[0m[33;1m[1;3m
CREATE TABLE search.resources (
	uid TEXT NOT NULL, 
	cluster TEXT, 
	data JSONB, 
	CONSTRAINT resources_pkey PRIMARY KEY (uid)
)

/*
3 rows from resources table:
uid	cluster	data
local-cluster/5745254b-9297-4e00-a463-7ca94d4abe40	local-cluster	{'kind': 'APIRequestCount', 'name': 'agentclusterinstalls.v1beta1.extensions.hive.openshift.io', 'cr
local-cluster/942700c1-06ae-4b6a-b131-42db7a758ddc	local-cluster	{'kind': 'APIRequestCount', 'name': 'policies.v1.policy.open-cluster-management.io', 'created': '202
local-cluster/1ab8

{'input': "System: You are a helpful AI assistant expert in querying SQL Database to find answers to user's question about Kubernetes resources, clusters etc. \n    All resource details and key names for filters are in the jsonb column 'data' within the resources table - which is mostly 1 level deep. \n    Create the final query using the key names in data column.\n    The types of resources are in the 'kind' key within the data column. \n    The answer set should be de-duplicated. Run the final query and get the answer\nHuman: Show all Deployments related to 'search%' pods",
 'output': "The deployments related to 'search%' pods are 'search-api', 'search-collector', 'search-indexer', 'search-postgres', and 'search-v2-operator-controller-manager'."}

In [77]:
agent_executor.invoke(final_prompt.format(
        input ="Find all resources related to 'search' pods. Get their kind and name."
  ))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mFirst, I need to understand the structure of the tables involved. I will use the sql_db_schema tool to get the schema and sample rows for the resources and edges tables.
Action: sql_db_schema
Action Input: resources, edges[0m[33;1m[1;3m
CREATE TABLE search.edges (
	sourceid TEXT NOT NULL, 
	sourcekind TEXT, 
	destid TEXT NOT NULL, 
	destkind TEXT, 
	edgetype TEXT NOT NULL, 
	cluster TEXT, 
	CONSTRAINT edges_pkey PRIMARY KEY (sourceid, destid, edgetype)
)

/*
3 rows from edges table:
sourceid	sourcekind	destid	destkind	edgetype	cluster
local-cluster/32f1a268-2389-4790-9648-0958a4003441	Role	local-cluster/c79b072f-62aa-44e6-b450-0c2762ce8c6b	MultiClusterEngine	ownedBy	local-cluster
local-cluster/eb553de3-6454-4f11-bd77-bc7a6bbd3198	ReplicaSet	local-cluster/0ebea3d3-158b-4287-ab20-eccb1a0a534c	Deployment	ownedBy	local-cluster
local-cluster/eb553de3-6454-4f11-bd77-bc7a6bbd3198	ReplicaSet	local-cluster/d9c4700e-afe5-4a7c-b

{'input': "System: You are a helpful AI assistant expert in querying SQL Database to find answers to user's question about Kubernetes resources, clusters etc. \n    All resource details and key names for filters are in the jsonb column 'data' within the resources table - which is mostly 1 level deep. \n    Create the final query using the key names in data column.\n    The types of resources are in the 'kind' key within the data column. \n    The answer set should be de-duplicated. Run the final query and get the answer. Resource relationships can be found in edges table\nHuman: Find all resources related to 'search' pods. Get their kind and name.",
 'output': "The resources related to 'search' pods and their kinds and names are as follows:\n1. Kind: Pod, Name: klusterlet-addon-search-669b58bc8b-nlbdc\n2. Kind: Pod, Name: search-api-5fd6956747-qwv69\n3. Kind: Pod, Name: search-collector-f6b9ddd79-xphsm\n4. Kind: Pod, Name: search-indexer-6f886d68df-d2ptz\n5. Kind: Pod, Name: search-pos

In [79]:
agent_executor.invoke(final_prompt.format(
        input ="Find all resources related to 'search' pods. Get their kind and name."
  ))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mFirst, I need to understand the structure of the database and the tables involved. I will use the sql_db_list_tables tool to get a list of all tables in the database.
Action: sql_db_list_tables
Action Input: ""[0m[38;5;200m[1;3medges, resources[0m[32;1m[1;3mThe tables in the database are 'edges' and 'resources'. Now, I need to understand the schema of these tables. I will use the sql_db_schema tool to get the schema and sample rows for these tables.
Action: sql_db_schema
Action Input: "edges, resources"[0m[33;1m[1;3m
CREATE TABLE search.edges (
	sourceid TEXT NOT NULL, 
	sourcekind TEXT, 
	destid TEXT NOT NULL, 
	destkind TEXT, 
	edgetype TEXT NOT NULL, 
	cluster TEXT, 
	CONSTRAINT edges_pkey PRIMARY KEY (sourceid, destid, edgetype)
)

/*
3 rows from edges table:
sourceid	sourcekind	destid	destkind	edgetype	cluster
local-cluster/32f1a268-2389-4790-9648-0958a4003441	Role	local-cluster/c79b072f-62aa-44e6-b450-0c27

{'input': "System: You are a helpful AI assistant expert in querying SQL Database to find answers to user's question about Kubernetes resources, clusters etc. \n    All resource details and key names for filters are in the jsonb column 'data' within the resources table - which is mostly 1 level deep. \n    Create the final query using the key names in data column.\n    The types of resources are in the 'kind' key within the data column. \n    The answer set should be de-duplicated. Run the final query and get the answer. A recursive query on edges table will show all resource relationships\nHuman: Find all resources related to 'search' pods. Get their kind and name.",
 'output': "The resources related to 'search' pods and their kinds and names are as follows:\n- ConfigMap: search-postgres\n- Deployment: klusterlet-addon-search\n- Deployment: search-api\n- Deployment: search-collector\n- Deployment: search-indexer\n- Deployment: search-postgres\n- Deployment: search-v2-operator-controll

In [82]:
response = agent_executor.invoke(final_prompt.format(
        input ="How many Pod resources are present?"
  ))
print(response)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mTo answer this question, I need to query the 'resources' table and count the number of rows where the 'kind' key in the 'data' column is 'Pod'. Since the 'data' column is a jsonb type, I need to use the appropriate PostgreSQL functions to access the 'kind' key. I also need to ensure that the results are de-duplicated. Before I write the query, I should check the schema of the 'resources' table to confirm the structure and data types.
Action: sql_db_schema
Action Input: resources[0m[33;1m[1;3m
CREATE TABLE search.resources (
	uid TEXT NOT NULL, 
	cluster TEXT, 
	data JSONB, 
	CONSTRAINT resources_pkey PRIMARY KEY (uid)
)

/*
3 rows from resources table:
uid	cluster	data
local-cluster/57cf7096-72c3-4e90-9fe1-21c05f44939b	local-cluster	{'kind': 'ManagedClusterInfo', 'name': 'local-cluster', 'label': {'name': 'local-cluster', 'cloud': 
local-cluster/c83f2215-eec5-4873-a9e0-ef1ff5f0c822	local-cluster	{'kind': 'Deployment',

In [84]:
response['output']

'There are 332 Pod resources present.'