## Load settings and create project client.

In [13]:
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import FileSearchTool, FilePurpose
import os
from dotenv import load_dotenv

load_dotenv("../../.env")
SETTINGS_FILE = "k8s_agent.yaml"
IMPORTED_FILES = "k8s_files.txt"
IMPORTED_FILE_IDS = "k8s_file_ids.txt"

In [14]:
project_client = AIProjectClient.from_connection_string(
    credential=DefaultAzureCredential(),
    conn_str=os.environ["PROJECT_CONNECTION_STRING"],
)

## Create or reload the k8s vector store

In [15]:
import yaml
import os


if os.path.exists(SETTINGS_FILE):
    with open(SETTINGS_FILE, "r") as file:
        settings = yaml.safe_load(file)
    vector_store_id = settings["vector_store_id"]
    if vector_store_id is not None:    
        vector_store = project_client.agents.get_vector_store(vector_store_id)
        print(f"Reloaded vector store: {vector_store.id}")
else:
    # create a vector store with no file and wait for it to be processed
    vector_store = project_client.agents.create_vector_store_and_poll(
        data_sources=[], name="k8s_vector_store"
    )
    print(f"Created vector store, vector store ID: {vector_store.id}")
    # Save the vector store ID to k8s_agent.yaml

    settings = {"vector_store_id": vector_store.id}

    with open(SETTINGS_FILE, "w") as file:
        yaml.dump(settings, file)
    print(f"Saved vector store ID to settings.yaml")

Created vector store, vector store ID: vs_KMdcBBaMsb8KFMgZtBfDYNtD
Saved vector store ID to settings.yaml


## Create or reload the k8s agent

In [16]:
def create_k8s_expert_agent(vector_store_id: str):
    print("This is k8s Azure AI Agent Service .......")    
    file_search_tool = FileSearchTool(vector_store_ids=[vector_store_id])

    agent = project_client.agents.create_agent(
        model="gpt-4o-mini",
        name="k8s-agent",
        instructions="You are a helpful agent for Kubernetes, capable of generating question and answer pairs to assist with Kubernetes-related inquiries.",
        tools=file_search_tool.definitions,
        tool_resources=file_search_tool.resources,
    )
    return agent

In [17]:
if os.path.exists(SETTINGS_FILE):
    with open(SETTINGS_FILE, "r") as file:
        settings = yaml.safe_load(file)
    if "agent_id" in settings:
        agent_id = settings["agent_id"]       
        agent = project_client.agents.get_agent(agent_id)
        print(f"Reloaded vector store: {agent.id}")
    else:
        agent = create_k8s_expert_agent(vector_store.id)
        print(f"Created agent: {agent.id}")
        # Save the vector store ID to settings.yaml
    settings["agent_id"] = agent.id
    with open(SETTINGS_FILE, "w") as file:
        yaml.dump(settings, file)
    print(f"Saved agent ID to settings.yaml")
else:
    ValueError(f"{SETTINGS_FILE} file not found")

This is k8s Azure AI Agent Service .......
Created agent: asst_zCVjnwJwfuUvNWdW4WVjnjb5
Saved agent ID to settings.yaml


## Load data into Vector store

In [18]:
import requests
import zipfile
import os
from tqdm import tqdm

# Create data directory if it doesn't exist
data_dir = "../../data"
if not os.path.exists(data_dir):
    os.makedirs(data_dir)
    print(f"Created directory: {data_dir}")

# URL for Kubernetes website repository
url = "https://github.com/dohsimpson/kubernetes-doc-pdf/archive/refs/heads/master.zip"
zip_path = os.path.join(data_dir, "k8s-website.zip")

if os.path.exists(zip_path):
    print(f"Zip file already exists: {zip_path}")
    print("Skipping download...")
else:
    # Download the zip file with progress bar
    print(f"Downloading {url}...")
    response = requests.get(url, stream=True)
    response.raise_for_status()  # Raise exception for HTTP errors

    total_size = int(response.headers.get("content-length", 0))
    block_size = 1024  # 1 KB
    progress_bar = tqdm(total=total_size, unit="B", unit_scale=True)

    with open(zip_path, "wb") as file:
        for data in response.iter_content(block_size):
            progress_bar.update(len(data))
            file.write(data)
    progress_bar.close()

    # Extract the zip file
    print(f"Extracting {zip_path} to {data_dir}...")
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(data_dir)

    print(
        f"Successfully downloaded and extracted Kubernetes website repository to {data_dir}"
    )

Zip file already exists: ../../data/k8s-website.zip
Skipping download...


In [19]:
import os

def get_all_files(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        for file in files:
            file_paths.append(os.path.join(root, file))
    return file_paths

directory = os.path.join(data_dir, "kubernetes-doc-pdf-master", "PDFs")
all_files = get_all_files(directory)
print(f"Total files found: {len(all_files)}")


Total files found: 6


In [20]:
def upload_files_to_vector_store(file_path):    
    vector_store_file = project_client.agents.upload_file_and_poll(file_path=file_path, purpose=FilePurpose.AGENTS)
    return vector_store_file

In [21]:
import tqdm 
import os

stored_vector_store_files = []
if os.path.exists(IMPORTED_FILES):
    with open(IMPORTED_FILES, "r") as imported_files:
        stored_vector_store_files = [line.strip() for line in imported_files.readlines()]
        print(f"Loaded {len(stored_vector_store_files)} files from {IMPORTED_FILES}")

all_files = [file_path for file_path in all_files if file_path not in stored_vector_store_files]

vector_store_files = []
with open(IMPORTED_FILE_IDS, "w") as imported_file_ids:
    with open(IMPORTED_FILES, "w") as imported_files:
        for file_path in tqdm.tqdm(all_files, desc="Uploading files", unit="file"):
            vector_store_file = upload_files_to_vector_store(file_path)
            vector_store_files.append(vector_store_file.id)
            imported_files.write(f"{file_path}\n")
            imported_file_ids.write(f"{vector_store_file.id} {file_path}\n")
   
            

Uploading files: 100%|██████████| 6/6 [01:09<00:00, 11.51s/file]


In [22]:
files = project_client.agents.list_files()
for d in files['data']:
    print(d)

{'object': 'file', 'id': 'assistant-BPifuV3GVABrEBzhU6J8Pu', 'purpose': 'assistants', 'filename': 'Tutorials.pdf', 'bytes': 782577, 'created_at': 1741662260, 'status': 'processed', 'status_details': None}
{'object': 'file', 'id': 'assistant-FwtMgc9DTK8X1umorF8TuR', 'purpose': 'assistants', 'filename': 'Concepts.pdf', 'bytes': 3643479, 'created_at': 1741662248, 'status': 'processed', 'status_details': None}
{'object': 'file', 'id': 'assistant-L7Hedvjd71SwPdUGWnPzsm', 'purpose': 'assistants', 'filename': 'Setup.pdf', 'bytes': 479891, 'created_at': 1741662236, 'status': 'processed', 'status_details': None}
{'object': 'file', 'id': 'assistant-L2BxtEddWNJBCRJ2ne95ZD', 'purpose': 'assistants', 'filename': 'Tasks.pdf', 'bytes': 3950463, 'created_at': 1741662226, 'status': 'processed', 'status_details': None}
{'object': 'file', 'id': 'assistant-L3eQjjLPWwT2Ms1h92Cn2k', 'purpose': 'assistants', 'filename': 'kubectl-commands.pdf', 'bytes': 615154, 'created_at': 1741662214, 'status': 'processed',

In [23]:
batch_size = 500
for i in range(0, len(vector_store_files), batch_size):
    batch_files = vector_store_files[i:i + batch_size]
    batch = project_client.agents.create_vector_store_file_batch_and_poll(
        vector_store_id=vector_store.id,
        file_ids=batch_files
    )
    print(f"Batch created with ID: {batch.id}")

Batch created with ID: vsfb_369107538f9d45afa93a85ef89f1349e


## Test the k8s Agent

In [24]:
# Create a thread
thread = project_client.agents.create_thread()

message = project_client.agents.create_message(
        thread_id=thread.id,
        role="user",
        content="""    
               List out kubebernetes entities.
            """,
)
# create and execute a run
run = project_client.agents.create_and_process_run(thread_id=thread.id, assistant_id=agent.id)
print(f"Run finished with status: {run.status}")

Run finished with status: RunStatus.COMPLETED


In [25]:
from pprint import pprint

messages = project_client.agents.list_messages(thread_id=thread.id)
pprint(messages["data"])

[{'id': 'msg_4NUVqiC55RnDHi1h9IDix9Y4', 'object': 'thread.message', 'created_at': 1741662384, 'assistant_id': 'asst_zCVjnwJwfuUvNWdW4WVjnjb5', 'thread_id': 'thread_LCdigCBO3bsmgEJiKMUmZJ5Y', 'run_id': 'run_wToWAef9Gph6qNpxKtepMsWq', 'role': 'assistant', 'content': [{'type': 'text', 'text': {'value': 'Kubernetes entities, also known as Kubernetes objects, are persistent entities used to represent the state of your cluster. Here are some key Kubernetes entities:\n\n1. **Pod**: The basic deployment unit in Kubernetes, representing a single instance of a running process in the cluster.\n\n2. **Service**: An abstraction that defines a logical set of Pods and a way to access them. Services enable communication between different Pods and external clients.\n\n3. **Deployment**: Manages the deployment of Pods, allowing for updates and scaling operations.\n\n4. **ReplicaSet**: Ensures that a specified number of Pod replicas are running at all times.\n\n5. **StatefulSet**: Similar to a Deployment

In [26]:
from IPython.display import display, Markdown

last_msg =  messages.get_last_text_message_by_role("assistant")
if last_msg:
    display(Markdown(last_msg.text.value))

Kubernetes entities, also known as Kubernetes objects, are persistent entities used to represent the state of your cluster. Here are some key Kubernetes entities:

1. **Pod**: The basic deployment unit in Kubernetes, representing a single instance of a running process in the cluster.

2. **Service**: An abstraction that defines a logical set of Pods and a way to access them. Services enable communication between different Pods and external clients.

3. **Deployment**: Manages the deployment of Pods, allowing for updates and scaling operations.

4. **ReplicaSet**: Ensures that a specified number of Pod replicas are running at all times.

5. **StatefulSet**: Similar to a Deployment, but for managing stateful applications that require stable network identifiers and persistent storage.

6. **DaemonSet**: Ensures that a copy of a Pod runs on all or some nodes in the cluster.

7. **Job**: Creates one or more Pods and ensures that a specified number of them successfully terminate.

8. **CronJob**: Similar to a Job, but allows for the creation of Jobs on a scheduled basis.

9. **ConfigMap**: Used to store non-confidential data in key-value pairs, which can be consumed by Pods.

10. **Secret**: Similar to ConfigMap but is used to store sensitive data, such as passwords or tokens.

11. **PersistentVolume (PV)**: A piece of storage in the cluster that has been provisioned by an administrator.

12. **PersistentVolumeClaim (PVC)**: A request for storage by a user.

13. **Namespace**: A way to divide cluster resources between multiple users or teams.

14. **Ingress**: Manages external access to services, typically HTTP.

15. **NetworkPolicy**: Defines how Pods can communicate with each other and with other network endpoints.

These entities help Kubernetes manage resources and applications running on the cluster efficiently 【4:3†source】.

In [27]:
pprint(last_msg.text.annotations)

[{'type': 'file_citation', 'text': '【4:3†source】', 'start_index': 1842, 'end_index': 1854, 'file_citation': {'file_id': 'assistant-FwtMgc9DTK8X1umorF8TuR'}}]


# Clean up

In [12]:
if os.path.exists(SETTINGS_FILE):
    with open(SETTINGS_FILE, "r") as file:
        settings = yaml.safe_load(file)

    if "agent_id" in settings:
        agent_id = settings["agent_id"]
        project_client.agents.delete_agent(agent_id)
        print(f"Deleted agent with ID: {agent_id}")
    else:
        print("Agent ID not found in settings.")

    if "vector_store_id" in settings:
        vector_store_id = settings["vector_store_id"]
        project_client.agents.delete_vector_store(vector_store_id)
        print(f"Deleted vector store with ID: {vector_store_id}")
    else:
        print("Vector store ID not found in settings.")
    os.remove(SETTINGS_FILE)
    os.remove(IMPORTED_FILES)
    # os.remove(IMPORTED_FILE_IDS)
    print("Deleted settings.yaml file.")

Deleted agent with ID: asst_ujCbHwxZvxUZrzPw8ku7RqXP
Deleted vector store with ID: vs_PGrKdlx7jl46WkMm06i8gUAU
Deleted settings.yaml file.


In [None]:
project_client.agents.list_vector_stores()

## Delete all files

In [None]:

files = project_client.agents.list_files()
for file in files["data"]:
    project_client.agents.delete_file(file.id)
    print(f"Deleted file with ID: {file.id}")
