# Initializing
## Downloading and Importing all libs

In [1]:
import sys
import os
from dotenv import load_dotenv, find_dotenv

from llama_index.core import Settings
from langchain.llms import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex, SummaryIndex
from llama_index.readers.file import MarkdownReader
from llama_index.core import SimpleDirectoryReader
from llama_index.core.tools import QueryEngineTool
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner, ReActAgentWorker, ReActAgent
from llama_index.core.tools import QueryEngineTool
from llama_index.core.tools import QueryEngineTool
from llama_index.core import StorageContext, load_index_from_storage
from langchain.output_parsers.openai_tools import JsonOutputToolsParser
from langchain_community.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain.chains import create_extraction_chain
from typing import Optional, List
from langchain.chains import create_extraction_chain_pydantic
from langchain_core.pydantic_v1 import BaseModel
from langchain import hub
import nest_asyncio

nest_asyncio.apply()



## Loading all the keys

# Experimenting with Mardown Text Splitting for meaningful chunks
### Using Llama Index's ***MarkdownTextSplitter***

In [3]:
chunk_size = 1024
chunk_overlap = 0

In [4]:
root_doc_path = './docs'
# Markdown Reader example
file_path = f"{root_doc_path}/overview/EMP Overview.md"
md_reader = MarkdownReader(remove_hyperlinks=False)
file_extractor = {".md": md_reader}
documents= SimpleDirectoryReader(input_files=[file_path] ,
                                          file_extractor=file_extractor,
                                          recursive=True).load_data()

In [5]:
type(documents[0])

llama_index.core.schema.Document

In [6]:
for item in documents[0]:
    print(item)

('id_', '0cd222c1-f70a-4d5a-85b4-dfee79b8c0e4')
('embedding', None)
('metadata', {'file_path': 'docs/overview/EMP Overview.md', 'file_name': 'EMP Overview.md', 'file_size': 6480, 'creation_date': '2024-09-06', 'last_modified_date': '2024-09-05'})
('excluded_embed_metadata_keys', ['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'])
('excluded_llm_metadata_keys', ['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'])
('relationships', {})
('text', '---\ntype: page\ntitle: EMP Overview\nlisted: true\nslug: what-is-emp\ndescription: \nindex_title: EMP Overview\nhidden: \nkeywords: \ntags: \n---published\n\nPlatform9 Elastic Machine Pool (EMP) is a Kubernetes cost optimization platform for public cloud native Kubernetes offerings. EMP helps you reclaim &gt; 70% of your wasted Kubernetes compute that may be sitting idle today, thus reducing your Kubernetes cluster costs by &gt; 70%. \n\nEMP suppor

In [7]:
for i,doc in enumerate(documents):
    print(f"\nDocument {i} \n", doc.text)


Document 0 
 ---
type: page
title: EMP Overview
listed: true
slug: what-is-emp
description: 
index_title: EMP Overview
hidden: 
keywords: 
tags: 
---published

Platform9 Elastic Machine Pool (EMP) is a Kubernetes cost optimization platform for public cloud native Kubernetes offerings. EMP helps you reclaim &gt; 70% of your wasted Kubernetes compute that may be sitting idle today, thus reducing your Kubernetes cluster costs by &gt; 70%. 

EMP supports AWS EKS Kubernetes service today. Support for GKE and AKS is coming in the future. 


Document 1 
 

[Who Should Use This Documentation?](https://platform9.com/docs/kubernetes/about-pmkwho-is-this-documentation-for)

This documentation is intended for:

- Administrators and operators managing AWS EKS clusters
- DevOps teams seeking to optimize AWS and EKS cloud spend
- IT decision-makers evaluating cost saving solutions for AWS and Kubernetes

For a comprehensive understanding of EKS, please refer to the official [AWS EKS documentation](h

### Using Langchain's ***MarkdownHeaderTextSplitter***

In [8]:
from langchain.text_splitter import MarkdownHeaderTextSplitter, ExperimentalMarkdownSyntaxTextSplitter


# Markdown Reader example
file_path = f"{root_doc_path}/overview/EMP Overview.md"
# file_path = f"{root_doc_path}/networking/ALB for EMP.md" # issues with code blocks
with open(file_path) as file:
    overview_file = file.read()

headers_to_split_on = [
    ("#", "Markdown Header 1"),
    ("##", "Markdown Header 2"),
    ("###", "Markdown Header 3"),
    ("####", "Markdown Header 3"),
]    

md_splitter = ExperimentalMarkdownSyntaxTextSplitter(headers_to_split_on=headers_to_split_on, strip_headers=False)
# md_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on, strip_headers=False)

documents = md_splitter.split_text(overview_file)

In [9]:
for i, doc in enumerate(documents):
    print(f"\nDocument {i} \n{'-' * 100}")  # This prints 10 dashes
    print(f"\n {doc.metadata} \n")
    print(doc.page_content)


Document 0 
----------------------------------------------------------------------------------------------------

 {} 

type: page
title: EMP Overview
listed: true
slug: what-is-emp
description: 
index_title: EMP Overview
hidden: 
keywords: 
tags: 
---published

Platform9 Elastic Machine Pool (EMP) is a Kubernetes cost optimization platform for public cloud native Kubernetes offerings. EMP helps you reclaim &gt; 70% of your wasted Kubernetes compute that may be sitting idle today, thus reducing your Kubernetes cluster costs by &gt; 70%. 

EMP supports AWS EKS Kubernetes service today. Support for GKE and AKS is coming in the future. 



Document 1 
----------------------------------------------------------------------------------------------------

 {'Markdown Header 2': '[Who Should Use This Documentation?](https://platform9.com/docs/kubernetes/about-pmk#who-is-this-documentation-for)'} 

## [Who Should Use This Documentation?](https://platform9.com/docs/kubernetes/about-pmk#who-is-t

### Try using Agentic Chunking

In [10]:
obj = hub.pull("wfh/proposal-indexing")
llm = ChatOpenAI(model='gpt-4-1106-preview')

  prompt = loads(json.dumps(prompt_object.manifest))
  llm = ChatOpenAI(model='gpt-4-1106-preview')


In [11]:
print(obj.messages[0].prompt.template)

Decompose the "Content" into clear and simple propositions, ensuring they are interpretable out of
context.
1. Split compound sentence into simple sentences. Maintain the original phrasing from the input
whenever possible.
2. For any named entity that is accompanied by additional descriptive information, separate this
information into its own distinct proposition.
3. Decontextualize the proposition by adding necessary modifier to nouns or entire sentences
and replacing pronouns (e.g., "it", "he", "she", "they", "this", "that") with the full name of the
entities they refer to.
4. Present the results as a list of strings, formatted in JSON.

Example:

Input: Title: ¯Eostre. Section: Theories and interpretations, Connection to Easter Hares. Content:
The earliest evidence for the Easter Hare (Osterhase) was recorded in south-west Germany in
1678 by the professor of medicine Georg Franck von Franckenau, but it remained unknown in
other parts of Germany until the 18th century. Scholar Richar

In [12]:
# use it in a runnable
runnable = obj | llm

In [13]:
# Pydantic data class
class Sentences(BaseModel):
    sentences: List[str]
    
# Extraction
extraction_chain = create_extraction_chain_pydantic(pydantic_schema=Sentences, llm=llm)

  extraction_chain = create_extraction_chain_pydantic(pydantic_schema=Sentences, llm=llm)


In [14]:
def get_propositions(text):
    runnable_output = runnable.invoke({
    	"input": text
    }).content
    
    propositions = extraction_chain.run(runnable_output)[0].sentences
    return propositions

In [15]:
docs_propositions = []

for i, doc in enumerate(documents):
    # commenting as its output is already stored in json file
    # propositions = get_propositions(doc.page_content)
    # docs_propositions.extend(propositions) 
    print (f"Done with {i}")

Done with 0
Done with 1
Done with 2
Done with 3
Done with 4
Done with 5
Done with 6
Done with 7
Done with 8
Done with 9


In [16]:
print (f"You have {len(docs_propositions)} propositions")
docs_propositions[:20]

You have 0 propositions


[]

In [17]:
from agentic_chunker import AgenticChunker
agentic_chunker = AgenticChunker()

In [18]:
# chunks = agentic_chunker.add_propositions(docs_propositions)
# commenting as this is a long and costly process, rather read chunks from JSON file

# Open the JSON file
import json
with open('./agentic-chunking-output.json', 'r') as file:
    # Load the JSON data as a Python dictionary
    agentic_chunker.chunks = json.load(file)

# Print the data to check the content


In [19]:
print(agentic_chunker.get_chunks(get_type='dict'))

{'81da1': {'chunk_id': '81da1', 'propositions': ['Platform9 Elastic Machine Pool (EMP) is a Kubernetes cost optimization platform.', 'Platform9 Elastic Machine Pool (EMP) is for public cloud native Kubernetes offerings.', 'Platform9 Elastic Machine Pool (EMP) helps users to reclaim more than 70% of their wasted Kubernetes compute.', 'The wasted Kubernetes compute may be sitting idle today.', 'Platform9 Elastic Machine Pool (EMP) reduces Kubernetes cluster costs by more than 70%.', 'Platform9 Elastic Machine Pool (EMP) supports AWS EKS Kubernetes service today.', 'Support for GKE and AKS by Platform9 Elastic Machine Pool (EMP) is coming in the future.', 'EMP optimizes Kubernetes utilization by reclaiming existing unused capacity in clusters.', 'EMP enables optimization of capacity that is allocated to pods via request and limit values but not actually utilized.', "Many existing tools optimize Kubernetes resource consumption by modifying pods' request and limit values.", 'EMP taps into e

In [20]:
import json
with open("agentic-chunking-output.json", "w") as file:
    json.dump(agentic_chunker.get_chunks(get_type='dict'), file, indent=4)

### Try using Semantic Chunking ( Not working Well)

In [21]:
%pip install --quiet langchain_experimental langchain_openai


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [22]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings


semantic_chunker = SemanticChunker(OpenAIEmbeddings(), buffer_size=1, breakpoint_threshold_amount=90, sentence_split_regex=r'(^#{1,6}\s.*$)')

In [23]:
sc_docs = semantic_chunker.create_documents([overview_file])


In [24]:
for i, doc in enumerate(sc_docs):
    print(f"\nDocument {i} \n{'-' * 100}")  # This prints 10 dashes
    print(f"\n {doc.metadata} \n")
    print(doc.page_content)


Document 0 
----------------------------------------------------------------------------------------------------

 {} 

---
type: page
title: EMP Overview
listed: true
slug: what-is-emp
description: 
index_title: EMP Overview
hidden: 
keywords: 
tags: 
---published

Platform9 Elastic Machine Pool (EMP) is a Kubernetes cost optimization platform for public cloud native Kubernetes offerings. EMP helps you reclaim &gt; 70% of your wasted Kubernetes compute that may be sitting idle today, thus reducing your Kubernetes cluster costs by &gt; 70%. 

EMP supports AWS EKS Kubernetes service today. Support for GKE and AKS is coming in the future. 

## [Who Should Use This Documentation?](https://platform9.com/docs/kubernetes/about-pmk#who-is-this-documentation-for)

This documentation is intended for:

- Administrators and operators managing AWS EKS clusters
- DevOps teams seeking to optimize AWS and EKS cloud spend
- IT decision-makers evaluating cost saving solutions for AWS and Kubernetes

F

In [25]:
from typing import List
from langchain.schema import Document

# Function that returns a list of LangChain Document objects
def create_documents(chunks: dict) -> List[Document]:
    documents = []
    
    for chunk_id, chunk_data in chunks.items():
        content = " ".join(chunk_data['propositions'])
        
        metadata = {
            "chunk_id": chunk_data['chunk_id'],
            "title": chunk_data['title'],
            "summary": chunk_data['summary'],
            "chunk_index": chunk_data['chunk_index'],
        }
        doc = Document(page_content=content, metadata=metadata)
        
        documents.append(doc)
    
    return documents  

In [26]:
agentic_docs = create_documents(agentic_chunker.chunks)

In [27]:
agentic_docs

[Document(metadata={'chunk_id': '81da1', 'title': 'Kubernetes Optimization & Cloud Compatibility with Platform9 EMP', 'summary': 'This chunk provides detailed information about Platform9 Elastic Machine Pool (EMP), focusing on its Kubernetes cost optimization capabilities, resource utilization features, and compatibility with cloud services, while maintaining existing pod configurations.', 'chunk_index': 0}, page_content="Platform9 Elastic Machine Pool (EMP) is a Kubernetes cost optimization platform. Platform9 Elastic Machine Pool (EMP) is for public cloud native Kubernetes offerings. Platform9 Elastic Machine Pool (EMP) helps users to reclaim more than 70% of their wasted Kubernetes compute. The wasted Kubernetes compute may be sitting idle today. Platform9 Elastic Machine Pool (EMP) reduces Kubernetes cluster costs by more than 70%. Platform9 Elastic Machine Pool (EMP) supports AWS EKS Kubernetes service today. Support for GKE and AKS by Platform9 Elastic Machine Pool (EMP) is comin

In [28]:
markdown_docs=documents

### Testing ***Agentic Chunking*** vs ***MarkdownHeaderSplitter***

In [29]:
embedding_model = OpenAIEmbeddings()
embedding_model.model

'text-embedding-ada-002'

In [30]:
%pip install -qU langchain-community faiss-cpu


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


#### How do semantic search look  for different docs (markdown vs agentic)

In [31]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

# Initialize embedding model
embedding_model = OpenAIEmbeddings()

# Create vector stores for both sets of documents
agentic_vector_store = FAISS.from_documents(agentic_docs, embedding_model)
markdown_vector_store = FAISS.from_documents(markdown_docs, embedding_model)

  embedding_model = OpenAIEmbeddings()


In [32]:
import pprint
query = "How does EMP optimize Kubernetes utilization?"

# Perform similarity search for both sets of documents
agentic_results = agentic_vector_store.similarity_search(query)
markdown_results = markdown_vector_store.similarity_search(query)

# Output the results for manual inspection
print("Agentic Results:")
for result in agentic_results:
    pprint.pprint(result.page_content)

print("\nMarkdown Results:")
for result in markdown_results:
    pprint.pprint(result.page_content)

Agentic Results:
('Platform9 Elastic Machine Pool (EMP) is a Kubernetes cost optimization '
 'platform. Platform9 Elastic Machine Pool (EMP) is for public cloud native '
 'Kubernetes offerings. Platform9 Elastic Machine Pool (EMP) helps users to '
 'reclaim more than 70% of their wasted Kubernetes compute. The wasted '
 'Kubernetes compute may be sitting idle today. Platform9 Elastic Machine Pool '
 '(EMP) reduces Kubernetes cluster costs by more than 70%. Platform9 Elastic '
 'Machine Pool (EMP) supports AWS EKS Kubernetes service today. Support for '
 'GKE and AKS by Platform9 Elastic Machine Pool (EMP) is coming in the future. '
 'EMP optimizes Kubernetes utilization by reclaiming existing unused capacity '
 'in clusters. EMP enables optimization of capacity that is allocated to pods '
 'via request and limit values but not actually utilized. Many existing tools '
 "optimize Kubernetes resource consumption by modifying pods' request and "
 'limit values. EMP taps into existing unuse

In [33]:
agentic_results

[Document(metadata={'chunk_id': '81da1', 'title': 'Kubernetes Optimization & Cloud Compatibility with Platform9 EMP', 'summary': 'This chunk provides detailed information about Platform9 Elastic Machine Pool (EMP), focusing on its Kubernetes cost optimization capabilities, resource utilization features, and compatibility with cloud services, while maintaining existing pod configurations.', 'chunk_index': 0}, page_content="Platform9 Elastic Machine Pool (EMP) is a Kubernetes cost optimization platform. Platform9 Elastic Machine Pool (EMP) is for public cloud native Kubernetes offerings. Platform9 Elastic Machine Pool (EMP) helps users to reclaim more than 70% of their wasted Kubernetes compute. The wasted Kubernetes compute may be sitting idle today. Platform9 Elastic Machine Pool (EMP) reduces Kubernetes cluster costs by more than 70%. Platform9 Elastic Machine Pool (EMP) supports AWS EKS Kubernetes service today. Support for GKE and AKS by Platform9 Elastic Machine Pool (EMP) is comin

In [34]:
markdown_results

[Document(metadata={'Markdown Header 2': 'Key Differentiators of EMP'}, page_content="## Key Differentiators of EMP\n\n1. **Go beyond bin-packing.** EMP optimizes your Kubernetes utilization by reclaiming existing unused capacity in your cluster. This enables you to go much further beyond just the bin packing benefits, and actually optimize capacity that is allocated to your pods via request and limit values but not actually utilized. \n2. **Optimize without right-sizing**: Many existing tools try to optimize your Kubernetes resource consumption by modifying your pod's request and limit values. But since EMP can actually tap into existing unused capacity to deploy new workloads, EMP requires making no changes to your pod's request and limit values. \n3. **Rebalance with zero pod downtime**: When EMP needs to scale and add more capacity, or consolidate to better use existing capacity, it can 'live migrate' workloads around to do this. This means zero pod disruption and much higher avail

#### Cosine Similarity for different query -> docs (markdown vs agentic)

In [35]:
from sklearn.metrics.pairwise import cosine_similarity

# Get the embedding of the query
query_embedding = embedding_model.embed_query(query)

# Calculate similarity for agentic results
agentic_similarity_scores = [
    cosine_similarity([query_embedding], [embedding_model.embed_query(doc.page_content)])[0][0]
    for doc in agentic_results
]

# Calculate similarity for markdown results
markdown_similarity_scores = [
    cosine_similarity([query_embedding], [embedding_model.embed_query(doc.page_content)])[0][0]
    for doc in markdown_results
]

# Output similarity scores for comparison
print("Agentic Similarity Scores:", agentic_similarity_scores)
print("Markdown Similarity Scores:", markdown_similarity_scores)

Agentic Similarity Scores: [0.8882975080884399, 0.8596526834565243, 0.8534091032005438, 0.8258291274751139]
Markdown Similarity Scores: [0.9063072633257371, 0.8879783226257386, 0.8850892970478984, 0.8415356210338503]


#### Looking for results for query from llm using those chunks from docs

In [26]:
# Initialize the language model (LLM) for question answering
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

#### RetrievalQAChain

In [37]:
from langchain import hub
from langchain.chains import RetrievalQA

# See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
prompt = hub.pull("pratik-rag-demo")
prompt = hub.pull("pratik-rag-big-formatted")

agentic_retrieval_chain = RetrievalQA.from_llm(
    llm, retriever=agentic_vector_store.as_retriever(), prompt=prompt
)
markdown_retrieval_chain = RetrievalQA.from_llm(
    llm, retriever=markdown_vector_store.as_retriever(), prompt=prompt
)

#### ConversationalRetrievalChain

In [46]:
# from langchain.chains import ConversationalRetrievalChain, LLMChain
# from langchain.memory import ConversationBufferMemory
# from langchain.chains.combine_documents import create_stuff_documents_chain
# from langchain_core.prompts import ChatPromptTemplate
# from langchain_core.prompts import PromptTemplate

# # Create a memory to store conversation context
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
# stuff_chain = create_stuff_documents_chain(llm, prompt)
# retriever = markdown_vector_store.as_retriever()

# # This controls how the standalone question is generated.
# # Should take `chat_history` and `question` as input variables.
# template = (
#     "Combine the chat history and follow up question into "
#     "a standalone question. Chat History: {chat_history}"
#     "Follow up question: {question}"
# )
# prompt_1 = PromptTemplate.from_template(template)
# question_generator_chain = LLMChain(llm=llm, prompt=prompt_1)
# conversation_chain = ConversationalRetrievalChain(
#     combine_docs_chain=stuff_chain,
#     retriever=retriever,
#     question_generator=question_generator_chain,
# )

In [45]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': '-', 'lc_hub_repo': 'pratik-rag-big-formatted', 'lc_hub_commit_hash': '2ea8d1c3e81eecba1cfc44186f66f299e005084e8b105876a3f1bb27fe152e19'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are an intelligent and helpful documentation assistant whose task is to help first-time customers and visitors understand and navigate our product. Your primary goals are to provide accurate, clear, and structured guidance. You should assist users by explaining product features, guiding them through processes step by step, and answering any questions related to the product.\n\nGuidelines for Responses:-\n1. Accuracy and Verification:\n Ensure that all information provided is accurate and relevant to the user’s query.\nIf you are unsure about the accuracy of a response, explicitly state that you do not have the information and offer alternative suggestions or sources where 

In [47]:
def ask_query_rqa(query: str):
    print(f"\nQuery: {query}\n")

    # Retrieve and generate answers from Agentic chunking
    print("Agentic Chunking Results:")
    agentic_result = agentic_retrieval_chain.invoke(query)
    print(agentic_result)

    # Retrieve and generate answers from Markdown chunking
    print("\nMarkdown Chunking Results:") 
    markdown_result = markdown_retrieval_chain.invoke(query)
    print(markdown_result)

    return agentic_result, markdown_result

In [41]:
# def ask_query_crc(query: str):
#     print(f"\nQuery: {query}\n")

#     response = conversation_chain({"input": query})
#     return response['output']


In [48]:
# Example queries
query_1 = "How does EMP optimize Kubernetes utilization?"
query_2 = "What is EMP? and what does it do?"

# Fire queries and compare the results
ask_query_rqa(query_1)
# ask_query(query_2)


Query: How does EMP optimize Kubernetes utilization?

Agentic Chunking Results:
{'query': 'How does EMP optimize Kubernetes utilization?', 'result': '<h1>How EMP Optimizes Kubernetes Utilization</h1>\n\nPlatform9 Elastic Machine Pool (EMP) is designed to enhance the efficiency of Kubernetes workloads, particularly in AWS EKS environments. Below, we outline the key features and mechanisms through which EMP optimizes Kubernetes utilization.\n\n<h2>1. Reclaiming Unused Capacity</h2>\n<p>EMP focuses on identifying and reclaiming existing unused capacity within Kubernetes clusters. This is achieved without requiring any changes to the request and limit values set for application pods. By optimizing the allocation of resources, EMP can significantly reduce costs associated with Kubernetes compute.</p>\n\n<h2>2. Live Migration of Workloads</h2>\n<p>One of the standout features of EMP is its ability to <strong>live migrate</strong> workloads. This means that when scaling is necessary, EMP can

({'query': 'How does EMP optimize Kubernetes utilization?',
  'result': '<h1>How EMP Optimizes Kubernetes Utilization</h1>\n\nPlatform9 Elastic Machine Pool (EMP) is designed to enhance the efficiency of Kubernetes workloads, particularly in AWS EKS environments. Below, we outline the key features and mechanisms through which EMP optimizes Kubernetes utilization.\n\n<h2>1. Reclaiming Unused Capacity</h2>\n<p>EMP focuses on identifying and reclaiming existing unused capacity within Kubernetes clusters. This is achieved without requiring any changes to the request and limit values set for application pods. By optimizing the allocation of resources, EMP can significantly reduce costs associated with Kubernetes compute.</p>\n\n<h2>2. Live Migration of Workloads</h2>\n<p>One of the standout features of EMP is its ability to <strong>live migrate</strong> workloads. This means that when scaling is necessary, EMP can move workloads around without causing any disruption to running pods. This fe

In [49]:
# Example queries
query_1 = "How does EMP optimize Kubernetes utilization?"

# Fire queries and compare the results
# ask_query_crc(query_1)
# ask_query(query_2)

# Split using the ***MarkdownHeaderTextSplitter*** and store its embeddings for all the docs.

In [9]:
root_doc_path = './docs'

In [10]:
from langchain.text_splitter import MarkdownHeaderTextSplitter


file_path = f"{root_doc_path}/evm-pools/What is an EVM Pool.md"
# file_path = f"{root_doc_path}/networking/ALB for EMP.md" # issues with code blocks
with open(file_path) as file:
    overview_file = file.read()

headers_to_split_on = [
    ("#", "Markdown Header 1"),
    ("##", "Markdown Header 2"),
]    

md_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on, strip_headers=False)
# md_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on, strip_headers=False)

documents = md_splitter.split_text(overview_file)

In [11]:
documents

[Document(page_content='---\ntype: page\ntitle: What is an EVM Pool\nlisted: true\nslug: elastic-virtual-machine-pools\ndescription:\nindex_title: What is an EVM Pool\nhidden:\nkeywords:\ntags:\n---published  \nAn Elastic Virtual Machine Pool (EVM Pool) provides a way to organize the EVM nodes for your EKS clusters. Think of EVM Pool as an EKS Node Pool.  \nAn EVM Pool represents a collection of EVMs. Each EKS cluster imported into an EMP instance will have one or more EVM Pools. EVM Pools are not shared across different EKS clusters. This ensures that each EKS cluster has its own dedicated resources for running workloads.'),
 Document(metadata={'Markdown Header 2': 'EVM Pool Specification'}, page_content='## EVM Pool Specification  \nEVM Pool specification serves as a template for easy configuration of EVM Pool settings that can then be used to create EVMs for one or multiple EKS clusters.  \nFor instance, if majority of your workloads are run on an `m5.4xlarge`  instance type, you ca

In [12]:
import os
from langchain.text_splitter import MarkdownHeaderTextSplitter


# Define the headers you want to split on
headers_to_split_on = [
    ("#", "Markdown Header 1"),
    ("##", "Markdown Header 2"),
]    

# Create an instance of the markdown splitter
md_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on, strip_headers=False)

# Initialize a list to hold all document parts
all_documents = []

# Walk through the directory and process each markdown file
for dirpath, _, filenames in os.walk(root_doc_path):
    for filename in filenames:
        if filename.endswith(".md"):  # Process only markdown files
            file_path = os.path.join(dirpath, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                print(f"Reading file ->{file_path}")
                file_content = file.read()
                documents = md_splitter.split_text(file_content)
                all_documents.extend(documents)  


Reading file ->./docs/Supported Clouds & Infrastructure.md
Reading file ->./docs/Cost Analyzer.md
Reading file ->./docs/EMP Metrics.md
Reading file ->./docs/account-configuration/Create Cloud Provider.md
Reading file ->./docs/account-configuration/IAM Credential Requirements.md
Reading file ->./docs/account-configuration/Authorization & User Management.md
Reading file ->./docs/evm-pools/What is an EVM Pool.md
Reading file ->./docs/evm-pools/Provision Workloads On EVMs.md
Reading file ->./docs/evm-pools/What Is an Elastic Virtual Machine.md
Reading file ->./docs/eks-clusters/Security Group Updates.md
Reading file ->./docs/eks-clusters/Importing EKS Clusters.md
Reading file ->./docs/bare-metal-pools/Bare Metal Pool Configuration.md
Reading file ->./docs/bare-metal-pools/What Are Bare Metal Pools.md
Reading file ->./docs/rebalancer/EMP Live Migration.md
Reading file ->./docs/rebalancer/EMP Rebalancer.md
Reading file ->./docs/networking/NLB for EMP.md
Reading file ->./docs/networking/Bare 

In [13]:
len(all_documents)

91

In [14]:
for i,doc in enumerate(all_documents[10:15]):
    print(f"\Metadata {i} \n", doc.metadata)
    print(f"\nDocument {i} \n", doc.page_content)

\Metadata 0 
 {}

Document 0 
 ---
type: page
title: Create Cloud Provider
listed: true
slug: cloud-provider
description:
index_title: Create Cloud Provider
hidden:
keywords:
tags:
---published  
Before creating an instance of EMP, you must create a cloud provider by specifying your AWS account credentials.
\Metadata 1 
 {'Markdown Header 2': 'Privileges Required'}

Document 1 
 ## Privileges Required  
Here is a **summary** of the required privileges.  
1. ELB Management: Permissions to manage AWS Elastic Load Balancer (ELB).
2. Route 53 DNS Configuration: Access to configure DNS settings in Route 53.
3. Access to 2 or More Availability Zones: The credentials should have permission to interact with resources in at least two Availability Zones within the specified region.
4. EC2 Instance Management: Permission to manage EC2 instances, including creating, terminating, and modifying instances.
5. EBS Volume Management: Access to manage Elastic Block Storage (EBS) volumes for storage conf

  print(f"\Metadata {i} \n", doc.metadata)


## Create Embeddings for the chunks and store them

In [15]:
%pip install -qU langchain-openai langchain-chroma

from langchain.embeddings.openai import OpenAIEmbeddings



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


### Define the embedding model

In [16]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

### Initialise the Chroma Client

In [476]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="emp-docs-collection",
    embedding_function=embeddings,
    persist_directory="./emp_chroma_db",
    collection_metadata={"hnsw:space": "cosine"} # l2 is the default
)

In [477]:
len(all_documents)

NameError: name 'all_documents' is not defined

### Adding the documents in the emp chroma db


In [212]:
from uuid import uuid4

uuids = [str(uuid4()) for _ in range(len(all_documents))]

ids = vector_store.add_documents(documents=all_documents, ids=uuids)

In [213]:
len(ids)

91

In [478]:
query = "What is EVM in EMP?"

searched_docs = vector_store.similarity_search_with_score(query, k=5)


In [220]:
def sort_docs_by_similarity(docs):
    # Sort the documents based on the similarity score (the second element in each tuple)
    return sorted(docs, key=lambda x: x[1], reverse=True)

searched_docs = sort_docs_by_similarity(searched_docs)

In [479]:
searched_docs

[(Document(metadata={'Markdown Header 2': 'Benefits of EVMs'}, page_content="1. **Bridging Performance and Flexibility:** EVMs combine the performance advantages of bare metal servers with the flexibility of virtualization. An EVM looks and feels exactly like a regular EC2 virtual machine, except that it's created by EMP on an AWS bare metal node. This makes them suitable for all workload types, from high-performance computing to web applications.\n2. **Zero-touch Lifecycle Management:** EMP handles complete lifecycle management of all your EVMs. This includes provisioning, scaling, monitoring, and maintenance, allowing DevOps teams to focus on their applications without worrying about infrastructure management."),
  0.7103937623085662),
 (Document(page_content="---\ntype: page\ntitle: What Is an Elastic Virtual Machine\nlisted: true\nslug: what-is-evm\ndescription:\nindex_title: What Is EVM\nhidden:\nkeywords:\ntags:\n---published  \nElastic Virtual Machines (EVMs) are a key component

In [222]:
for doc in searched_docs:
    print('Similarity Score', doc[1])
    print(doc[0].page_content)
    print("-"*100)

Similarity Score 0.4695711612151453
## Incrementally move workloads to EVMs  
When working with EMP for the first time, we recommend that you incrementally move portions of your workload on to EVM nodes, then test those workload components over 1-2 weeks to ensure that the performance is as expected, then migrate more components of the workload to run on EVMs.  
The eventual goal should be to run all workloads on a given EKS cluster on EVMs, for maximum utilization and cost reduction benefits.
----------------------------------------------------------------------------------------------------
Similarity Score 0.4642098124486418
---
type: page
title: EMP Metrics
listed: true
slug: emp-metrics
description:
index_title: EMP Metrics
hidden:
keywords:
tags:
---published  
Each EMP instance reports metrics data that shows information about EMP performance over a period of time.  
#### VM Steal Time  
VM steam time is the percentage of time the EVM CPU process is waiting on the physical CPU o

# Try and setup a series of langchain chains :)
## First Identify the functions each chain needs to do

In [45]:
from langchain.schema.messages import SystemMessage, HumanMessage, AIMessage
from langchain_openai import ChatOpenAI

# Initialize openai's chat model
llm_chat = ChatOpenAI(temperature=0.0, model='gpt-4o-mini')

def chat(messages):
    # messages.append(HumanMessage( message))
    response = llm_chat(messages)
    # messages.append(AIMessage(response.content))
    return response


### Define System Prompt

In [480]:
from langchain.prompts import ChatPromptTemplate
from langchain.chains import  LLMChain

# prompt = """Translate the text that is delimited by triple backticks into a style that is {style}. text: ```{text}```"""
system_message_parent = """
Role: 
You are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to experienced system admins and DevOps personnel—navigate, understand, and troubleshoot the product.

Task: 
Provide accurate, very coincise structured guidance to explain EMP features, guide users step-by-step through processes, and answer any product-related questions, especially regarding cost-saving on AWS EKS.

Guidelines for Responses:
1. **Structured and Readable Output**:
   * Organize responses using HTML tags: 
     - `<h1>` for main headings
     - `<h2>` for subheadings
     - `<p>` for text
     - `<ul>` and `<li>` for lists
     - `<strong>` for emphasis
     - `<a href="#">` for hyperlinks.
   * Break down complex topics into simple steps or segments. 
   **Avoid adding unnecessary newlines** between HTML tags like `<h1>`, `<h2>`, `<p>`, and others. Keep HTML code clean and readable.

2. **Tailored Responses**: Adjust depth and tone depending on whether the user is new or experienced. Be concise for experts and more explanatory for beginners.

3. **Proactive Assistance**: If a query is beyond your scope, direct the user to relevant documentation or external resources.

4. **Provide Examples**: Whenever possible, give examples related to AWS EKS and Kubernetes to connect EMP features to real-world scenarios.

"""
# system_prompt_template = ChatPromptTemplate.from_template(system_message)
# system_message_chain = LLMChain(llm=llm, prompt=system_prompt_template, output_key="system_message")


### Define a LLM chain to extract key topics from the prompt.

In [481]:
from langchain.chains import  LLMChain

# Define your system prompt
extract_topics_system_prompt = """You are an expert support assistant for product EMP(Elastic Machine Pools) proficient in Cloud Native technologies \
      like Amazon Elastic Kubernetes Service (Amazon EKS), Kubernetes who is also \
          trained to extract key topics from a user's query. 
Your task is to identify 1-2 main search topics that are most relevant to the query. 
Provide concise and clear topics that capture the core of what the user is asking."""

# Define your user input placeholder
user_message = "{query}"

# Create the ChatPromptTemplate
extract_topics_prompt_template = ChatPromptTemplate.from_messages([
    ("system", extract_topics_system_prompt),
    ("user", user_message)
])

# Example usage with a query
query = "How can EMP (Elastic Machine Pools) help save costs on AWS EKS"

# Format the prompt with the user query
extract_topics_prompt = extract_topics_prompt_template.format(query=query)

extract_topics_chain = LLMChain(llm=llm, prompt=extract_topics_prompt_template, output_key="key_topics")

  extract_topics_chain = LLMChain(llm=llm, prompt=extract_topics_prompt_template, output_key="key_topics")


In [483]:
# chat(extract_topics_prompt).content

### Define a Transform Chain to write a summary of the key topics in few lines.

In [484]:
from langchain.chains import  TransformChain 

def summarise_topics(topics):
     
    # prompt_template = ChatPromptTemplate.from_template("""Summarize the key concepts given below from the retrieved documentation that are essential for understanding,\
    #     navigating, or troubleshooting Elastic Machine Pools (EMP).\
    #     Present the information in a concise, actionable format.
    #     Key Topics : "{topics}"
    #     """
    #  )
    prompt_template = ChatPromptTemplate.from_template( """Write a concise summary of each topic in detail in the product EMP:
    "{topics}"
    """)

    stuff_summary_chain = RetrievalQA.from_chain_type(
        llm=llm, 
        chain_type="stuff", 
        retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 2, "fetch_k": 6}), 
        verbose=True
    )
    return{"summary": stuff_summary_chain.invoke(prompt_template.format(topics=topics))}


summarise_chain = TransformChain(
    input_variables=["key_topics"], 
    output_variables=["summary"],
    transform=summarise_topics
)

### Define a Transform Chain to answer actually the query.

In [485]:
system_message= """
Role: 
You are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to experienced system admins and DevOps personnel—navigate, understand, and troubleshoot the product.

Task: 
Provide accurate, very coincise structured guidance to explain EMP features, guide users step-by-step through processes, and answer any product-related questions, especially regarding cost-saving on AWS EKS.

DO NOT SUMMARISE , JUST PROVIDE A VERY ACCURATE AND COINCISE ANSWER TO THE QUESTION.
"""

def answer_query(query):
    user_message = "The question is : {query}"
    qa_answer_prompt_template = ChatPromptTemplate.from_messages([
        ("system", system_message),
        ("user", user_message)
    ])

    stuff_qa_chain = RetrievalQA.from_chain_type(
        llm=llm, 
        chain_type="stuff", 
        retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}), 
        verbose=True
    )
    return{"answers": stuff_qa_chain.invoke(qa_answer_prompt_template.format(query=query))}


qa_chain = TransformChain(
    input_variables=["query"], 
    output_variables=["answers"],
    transform=answer_query
)

### Define a Transform Chain to give step by step instructions to configure the  features mentioned in the  query.


In [486]:
def how_to_configure(params):
    # print('The params of how_to_configure', params["summary"])
    system_message = """
    You are an support assistant expert at guiding users to use the product EMP and its features, step by step in detail.  
"""

    user_message ="""Given are key topics of the conversation and the query the user asked. 
    Guide me step by step with detailed instructions, the procedure to configure and use it these features/key topics/features in EMP to start saving costs.
    Also mention any probable gotchas/caveats.
    Key Topics : {key_topics} , Query: {query}

    Important Note: If there are no configuration steps for the key concepts in the docs, skip this and return blank response.

    """
    
    configuration_prompt_template = ChatPromptTemplate.from_messages([
        ("system", system_message),
        ("user", user_message)
    ])

    configuration_qa_chain = RetrievalQA.from_chain_type(
        llm=llm, 
        chain_type="stuff", 
        retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 2, "fetch_k": 4}), 
        verbose=True
    )
    return{"configuration_steps": configuration_qa_chain.invoke(configuration_prompt_template.format(key_topics=params["key_topics"], query=params["query"]))}


configuration_chain = TransformChain(
    input_variables=["key_topics", "query"], 
    output_variables=["configuration_steps"],
    transform=how_to_configure
)

### Adding ***ConversationBufferMemory*** Memory

In [487]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="history", input_key="query", output_key="result")

In [488]:

system_message= """
Role: 
You are an expert at combining various sections of text and creating a conherent passage of text, which gives a holistic answer to the user query.

Task: 
Combine the summary, answers, and configuration steps in that order to form a complete answer to the query given below. Be informative and creative in the way you present these section headers, don't spell them out exactly as it is.
Also give a very relevant and coincise header to the entire conversation, which captures the entire essence of the answer.

Guidelines for Responses:
1. **Structured and Readable Output**:
* Organize responses using HTML tags: 
    - `<h1>` for main headings
    - `<h2>` for subheadings
    - `<p>` for text
    - `<ul>` and `<li>` for lists
    - `<strong>` for emphasis
    - `<a href="#">` for hyperlinks.
* Break down complex topics into simple steps or segments. 
**Avoid adding unnecessary newlines** between HTML tags like `<h1>`, `<h2>`, `<p>`, and others. Keep HTML code clean and readable.

2. **Role and Tone **:  Maintain a professional, helpful, and friendly tone throughout the interaction. 

3. **Proactive Assistance**: If a query is beyond your scope, direct the user to relevant documentation or external resources.

4. **Provide Examples**: Whenever possible, give examples related to AWS EKS and Kubernetes to connect EMP features to real-world scenarios."""

user_message ="""Following is the summary to the key topics of the query, answers to the actual question asked in the query,  \
    and the configuration steps, combine them into a conherent response for the user query. All of those given here ->
    Summary : {summary}
    Answers : {answers}
    Configuration Steps: {configuration_steps}
    Query: {query}

    Important: Please consider the conversation history : {history} too while constructing you answer, if applicable.
    Important: IT IS NOT IMPORTANT TO CONSIDER SUMMARY, ANSWERS, CONFIGURATION STEPS, ALWAYS. \
          Analyse the query and decide whether to just answer the query or augment it with the above information. \
            Remember we need to keep our answers crisp and to the point.
"""

result_prompt_template = ChatPromptTemplate.from_messages([
    ("system", system_message),
    ("user", user_message)
])

result_chain = LLMChain(llm=llm, prompt=result_prompt_template, output_key="result")

In [490]:
from langchain.chains import SequentialChain

overall_chain = SequentialChain(chains=[extract_topics_chain, summarise_chain, qa_chain, configuration_chain, result_chain],
                                 input_variables=["query"],  output_variables=["key_topics", "summary", "answers", "configuration_steps", "result"],  verbose=True, memory=memory)

In [491]:
query = "What is  EMP?"

result = overall_chain.invoke({"query" : query})



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


In [495]:
def stripExtraSpaces(query):
    return  query.replace("\\n", "").replace("\n", "")

In [496]:
result["key_topics"]

'Elastic Machine Pools'

In [497]:
result["configuration_steps"]["result"]

'Platform9 Elastic Machine Pool (EMP) is a Kubernetes cost optimization platform specifically designed for public cloud native Kubernetes offerings. It focuses on helping you reduce wasted Kubernetes compute, aiming to reclaim over 70% of resources that may be sitting idle, thus significantly lowering your Kubernetes cluster costs.\n\nHere are the step-by-step instructions to configure and use EMP to start saving costs:\n\n1. **Log into the EMP UI**:\n   - Start by logging into the EMP user interface with your credentials.\n\n2. **Create an EMP Instance**:\n   - Follow the "Create EMP" wizard to set up your first instance of EMP.\n\n3. **Select Cloud Credentials**:\n   - Choose the cloud credentials that you have set up previously.\n\n4. **Choose AWS Region**:\n   - Select the AWS region where your EKS cluster is located.\n\n5. **Select EKS Cluster**:\n   - From the list provided, select the EKS cluster that you wish to optimize.\n\n6. **Configure Security Groups**:\n   - Set the secur

In [498]:
result["summary"]["result"]

'**What is EMP?**\nPlatform9 Elastic Machine Pool (EMP) is a cost optimization platform designed specifically for public cloud native Kubernetes offerings. It aims to help organizations reclaim over 70% of wasted Kubernetes compute resources that are typically idle, leading to a significant reduction in Kubernetes cluster costs, also by over 70%. Currently, EMP supports the AWS EKS Kubernetes service, with plans to extend support to Google Kubernetes Engine (GKE) and Azure Kubernetes Service (AKS) in the future.\n\n**Elastic Machine Pools**\nElastic Machine Pools refer to the dynamic allocation and management of computing resources within Kubernetes environments. EMP optimizes the usage of these resources, ensuring that compute power is efficiently utilized and that costs are minimized. By leveraging Elastic Machine Pools, organizations can better manage their Kubernetes workloads, reduce waste, and improve overall operational efficiency.'

In [499]:
result

{'query': 'What is  EMP?',
 'history': '',
 'key_topics': 'Elastic Machine Pools',
 'summary': {'query': 'Human: Write a concise summary of each topic in detail in the product EMP:\n    "{\'query\': \'What is  EMP?\', \'history\': \'\', \'key_topics\': \'Elastic Machine Pools\'}"\n    ',
  'result': '**What is EMP?**\nPlatform9 Elastic Machine Pool (EMP) is a cost optimization platform designed specifically for public cloud native Kubernetes offerings. It aims to help organizations reclaim over 70% of wasted Kubernetes compute resources that are typically idle, leading to a significant reduction in Kubernetes cluster costs, also by over 70%. Currently, EMP supports the AWS EKS Kubernetes service, with plans to extend support to Google Kubernetes Engine (GKE) and Azure Kubernetes Service (AKS) in the future.\n\n**Elastic Machine Pools**\nElastic Machine Pools refer to the dynamic allocation and management of computing resources within Kubernetes environments. EMP optimizes the usage of 

### Printing the final result( WOW, its pretty good)

In [191]:
stripExtraSpaces(result["result"])

'<h1>Exploring Burstable Machine Pools in Elastic Machine Pools</h1><h2>Introduction to Elastic Machine Pools (EMP)</h2><p>Elastic Machine Pools (EMP) is a Kubernetes cost optimization platform that focuses on enhancing the efficiency of public cloud native Kubernetes offerings. It enables organizations to reclaim over 70% of wasted Kubernetes compute resources that may otherwise remain idle, leading to significant reductions in Kubernetes cluster costs. Currently, EMP supports the AWS EKS Kubernetes service, with plans to extend support to Google Kubernetes Engine (GKE) and Azure Kubernetes Service (AKS) in the future.</p><h2>Understanding Burstable Machine Pools (BMP)</h2><p>Unfortunately, specific information regarding Burstable Machine Pools (BMP) within the context of Elastic Machine Pools (EMP) is not available at this time. For comprehensive insights, it is advisable to consult the official documentation or resources related to EMP.</p><h2>Steps to Get Started with EMP</h2><p>To

In [192]:
# memory.clear()
memory.buffer_as_messages

[HumanMessage(content='What is BMP in EMP?'),
 AIMessage(content='<h1>Exploring Burstable Machine Pools in Elastic Machine Pools</h1>\n<h2>Introduction to Elastic Machine Pools (EMP)</h2>\n<p>Elastic Machine Pools (EMP) is a Kubernetes cost optimization platform that focuses on enhancing the efficiency of public cloud native Kubernetes offerings. It enables organizations to reclaim over 70% of wasted Kubernetes compute resources that may otherwise remain idle, leading to significant reductions in Kubernetes cluster costs. Currently, EMP supports the AWS EKS Kubernetes service, with plans to extend support to Google Kubernetes Engine (GKE) and Azure Kubernetes Service (AKS) in the future.</p>\n<h2>Understanding Burstable Machine Pools (BMP)</h2>\n<p>Unfortunately, specific information regarding Burstable Machine Pools (BMP) within the context of Elastic Machine Pools (EMP) is not available at this time. For comprehensive insights, it is advisable to consult the official documentation or

# Play with Agents

In [193]:
from langchain.agents import initialize_agent, Tool
from langchain.agents.agent_types import AgentType
from langchain.chains import RetrievalQA



In [194]:
retriever = vector_store.as_retriever()


def query_tool(query: str) -> str:
    system_message= """
    Role: 
    You are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to experienced system admins and DevOps personnel—navigate, understand, and troubleshoot the product.

    Task: 
    Provide accurate, very coincise structured guidance to explain EMP features, guide users step-by-step through processes, and answer any product-related questions, especially regarding cost-saving on AWS EKS.

    # DO NOT SUMMARISE , JUST PROVIDE A VERY ACCURATE AND COINCISE ANSWER TO THE QUESTION.
    """
    user_message = "The question is : {query}"
    qa_answer_prompt_template = ChatPromptTemplate.from_messages([
        ("system", system_message),
        ("user", user_message)
    ])

    stuff_qa_chain = RetrievalQA.from_chain_type(
        llm=llm, 
        chain_type="stuff", 
        retriever=vector_store.as_retriever(k=4), 
        verbose=True
    )
    return stuff_qa_chain.invoke(qa_answer_prompt_template.format(query=query))

def summary_tool(query:str) -> str:
    system_message= """
    Role: You are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to \
        experienced system admins and DevOps personnel—navigate, understand, and troubleshoot the product.
        """

    user_message = """
    Extract key topics from the following query and then write a concise summary of each topic in detail for the product EMP.
        "Query : {query}"
    """
    prompt_template = ChatPromptTemplate.from_template( """:
    "Query : {query}"
    """)

    stuff_summary_chain = RetrievalQA.from_chain_type(
        llm=llm, 
        chain_type="stuff", 
        retriever=vector_store.as_retriever(k=4), 
        verbose=True
    )

    return stuff_summary_chain.invoke(query)

tools = [
     Tool(
        name="Search",
        func=query_tool,
        description="This is the tool to get specific answers to the query."
    ),
    Tool(
        name="Summarise",
        func=summary_tool,
        description="Use this tool to summarise key topics in the query to create a more holistic answer"
    ),
]

In [195]:
agent= initialize_agent(
    tools, 
    llm, 
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
    verbose=True,
    max_iterations=10
)

In [196]:
agent("What are BMP in EMP?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to clarify what BMP refers to in the context of EMP. BMP could stand for various things, so I should search for its specific meaning in relation to EMP (which could refer to Environmental Management Plans or something else). 
Action: Search
Action Input: "BMP in EMP meaning"[0m

[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m

Observation: [36;1m[1;3m{'query': 'System: \n    Role: \n    You are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to experienced system admins and DevOps personnel—navigate, understand, and troubleshoot the product.\n\n    Task: \n    Provide accurate, very coincise structured guidance to explain EMP features, guide users step-by-step through processes, and answer any product-related questions, especially regarding cost-saving on AWS EKS.\n\n    # DO NOT SUMMARISE , JUST PROVIDE A VERY ACCURATE AND COINCISE ANSW

{'input': 'What are BMP in EMP?',
 'output': 'BMP in EMP stands for Bare Metal Pool, which is a collection of one or more AWS bare metal servers used to create Elastic Virtual Machines (EVMs) that serve as worker nodes for EKS clusters.'}

# Simple Retrieval QA Chain

In [189]:
system_message = """
Role:
You are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to experienced \
    system admins and DevOps personnel navigate, understand, and troubleshoot the product.

Task:
Provide accurate, concise and structured(rules given below in point no:7) responses to the queries of the users based on the below rules. Please  do  not explicitly list the thought process headers.

Guidelines for Responses:
	1.Initial Understanding and Topic Identification:
	•	First, identify the core aspects of the user’s query. Think through what the user is really asking. What 1-2 main search topics or key concepts are most relevant?
	
    2.Topic Breakdown and Explanation:
	•	After identifying the key topics, provide a concise and clear explanation of each one. Summarize each key concept and their relation(if any) that will help the user better understand EMP, AWS EKS, and how they relate to cost savings.
	•	Think: “What does the user need to know to understand this topic fully?”
	•	Think: "Have I already covered these topics in the above conversation?" If yes, then do not repeat.
	
    3.Answer the Core Query:
	•	After helping the user grasp the necessary concepts, think about the precise question the user has asked. What is the most direct and concise answer to their question? Avoid unnecessary details, and focus on clarity and brevity.
	•	Think: “What’s the best way to answer this query succinctly while making sure the user understands the core of the solution?”
	
    4.Provide Step-by-Step Instructions:
	•	Now that the user understands the main concepts, think through the procedure they need to follow. Guide them with detailed, step-by-step instructions on how to configure or the above key topis in the application.
	•	Consider the user’s experience level. If they are new, explain each step in greater detail. For seasoned users, focus on efficiency.
	•	If this procedure has already been discussed in the history, acknowledge that, and only provide new information or further details.
	
    5.	Identify and Mention Gotchas or Caveats:
	•	Before finishing the response, think about any common issues or pitfalls that could arise when implementing the instructions. Include these gotchas or caveats to ensure the user is prepared for potential challenges.
	•	Again, check the conversation history to avoid repeating information that’s already been mentioned.
    
	6.	Structured and Readable Output:
	•	Organize your response logically using HTML tags for readability:
	•	<h1> for main headings
	•	<h2> for subheadings
	•	<p> for text
	•	<ul> and <li> for lists
	•	<strong> for emphasis
	•	<a href="#"> for hyperlinks
	•	Ensure clean, readable HTML code by avoiding unnecessary newlines between tags.
    
	Finally, combine the above points  into a single coherent reponse for the user query.
    Let's now welcome the user and start the conversation.
"""
user_message = "The query is : {query}. Let's think step by step as directed in the sytem prompt."

qa_answer_prompt_template = ChatPromptTemplate.from_messages([
    ("system", system_message),
    ("user", user_message)
])

stuff_qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}), 
    verbose=True
)

map_reduce_qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="map_reduce", 
    retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}), 
    verbose=True
)

refine_qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="refine", 
    retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 4, "fetch_k": 10}), 
    verbose=True
)

In [207]:
query="How to configure BMPs in EMP?"


result = stuff_qa_chain.invoke(qa_answer_prompt_template.format(query=query))
result_stuff = stripExtraSpaces(result['result'])
result_stuff



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'<h1>Configuring Bare Metal Pools (BMPs) in EMP</h1><p>To configure Bare Metal Pools (BMPs) in EMP, it\'s essential to understand the relationship between EMP and AWS EKS, as well as the steps involved in the configuration process.</p><h2>Understanding BMPs in EMP</h2><p>Bare Metal Pools in EMP are used to create Elastic Virtual Machines (EVMs) that act as worker nodes for your EKS clusters. This setup allows for optimized resource allocation and management, enhancing the performance of your Kubernetes workloads.</p><h2>Steps to Configure BMPs in EMP</h2><p>Follow these steps to configure BMPs in your EMP instance:</p><ol><li><strong>Log into the EMP UI:</strong> Access the EMP user interface with your credentials.</li><li><strong>Create EMP Instance:</strong> Follow the "Create EMP" wizard. Select your cloud credentials and the AWS region where your EKS cluster resides.</li><li><strong>Select EKS Cluster:</strong> Choose the EKS cluster you wish to optimize from the list provided.</li

In [208]:
query="How to configure BMPs in EMP?"
result = map_reduce_qa_chain.invoke(qa_answer_prompt_template.format(query=query))
result_map_reduce = stripExtraSpaces(result['result'])
result_map_reduce



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'<h1>Configuring BMPs in EMP</h1><p>To configure Bare Metal Pools (BMPs) in Elastic Machine Pools (EMP), it\'s important to understand the relationship between BMPs and AWS EKS, as well as the steps involved in the configuration process.</p><h2>Key Concepts</h2><ul><li><strong>Bare Metal Pools (BMPs):</strong> BMPs allow you to utilize physical servers for workloads, leading to improved performance and lower latency for applications that require direct access to hardware resources.</li><li><strong>Elastic Machine Pools (EMP):</strong> EMP enables the management of pools of elastic virtual machines (EVMs) that optimize workloads in your AWS EKS clusters, enhancing performance and cost efficiency.</li></ul><h2>Step-by-Step Instructions to Configure BMPs</h2><ol><li><strong>Access the EMP Dashboard:</strong> Log in to your EMP account and navigate to the dashboard.</li><li><strong>Create a New BMP:</strong><ul><li>Navigate to the "Resource Pools" section.</li><li>Select "Create New Pool" 

In [209]:
query="How to configure BMPs in EMP?"
result = refine_qa_chain.invoke(qa_answer_prompt_template.format(query=query))
result_refine = stripExtraSpaces(result['result'])
result_refine



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'<h1>Configuring Bare Metal Pools (BMPs) in EMP</h1><p>To configure Bare Metal Pools (BMPs) in Elastic Machine Pools (EMP), it\'s essential to understand the key concepts involved in the process. Below is a structured guide to help you through the configuration.</p><h2>Key Concepts</h2><ul><li><strong>Bare Metal Pools (BMPs):</strong> These are pools of EC2 bare metal instances that provide high performance and low latency for workloads. They are particularly useful for applications that require direct access to hardware.</li><li><strong>EMP Architecture:</strong> EMP integrates seamlessly with your AWS EKS environment, allowing you to deploy workloads to Elastic Virtual Machines (EVMs) and BMPs. The architecture includes components like the EMP Service, Admission Controller, and Rebalancer, which work together to optimize resource utilization.</li><li><strong>EMP Cost Analyzer:</strong> This optional component helps you analyze potential cost savings when using EMP by collecting metri

### Rough Work for testing


In [474]:
import chromadb
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory, ConversationSummaryBufferMemory

client = chromadb.PersistentClient(path="./emp_chroma_db")
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
llm = ChatOpenAI(model="gpt-4o-mini")

vector_store = Chroma(
        client=client,
        collection_name='emp-docs-collection',
        embedding_function=embeddings)

retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 6, "fetch_k": 10})
# memory = ConversationSummaryBufferMemory( llm=llm, memory_key="chat_history", input_key="query", output_key="result", max_token_limit=2048, return_messages=True)
memory = ConversationBufferMemory( memory_key="chat_history", input_key="query", output_key="result", llm=llm, return_messages=True)

chain = RetrievalQA.from_chain_type(
        llm=llm, 
        chain_type='stuff', 
        verbose=True,
        retriever=retriever, 
        memory=memory
)

In [475]:
retriever.invoke("How to use EBS with EMP?")

[Document(metadata={'Markdown Header 2': 'Steps to configure EBS'}, page_content='Install the [upstream EBS CSI driver](https://github.com/kubernetes-sigs/aws-ebs-csi-driver/blob/master/docs/install.md) or update your existing CSI driver to the latest version.  \n### Configuring EBS Driver  \nEMP requires that you use EBS volumes of type io2 and with multi-attach functionality enabled, for EVM live migration to operate with best performance. Learn more about [why io2 volumes with EMP](/emp/io2-migration-guide-for-emp).  \nThe next step is to ensure that your EBS CSI driver creates io2 volumes by default using StorageClass. Additionally, use the provided PVC template to enable multi-attach functionality for the io2 volumes.  \n### StorageClass Config  \nThis StorageClass template will ensure the EBS CSI driver creates io2 volumes.  \n$plugin[{\n"type": "code-block",\n"data": {\n"languageBlocks": [\n{\n"code": "apiVersion: storage.k8s.io\\/v1\\nkind: StorageClass\\nmetadata:\\n  name: <S

In [431]:
system_message = """
Role:
You are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to experienced \
    system admins and DevOps personnel navigate, understand, and troubleshoot the product.

Task:
Provide accurate and structured(rules given below in point no:7) responses to the queries of the users based on the below rules. Please  do  not explicitly list the thought process headers.

Guidelines for Responses:
	1.Initial Understanding and Topic Identification:
	•	First, identify the core aspects of the user’s query. Think through what the user is really asking. What 1-2 main search topics or key concepts are most relevant?
	
    2.Topic Breakdown and Explanation:
	•	After identifying the key topics, provide a concise and clear explanation of each one. Summarize each key concept and their relation(if any) that will help the user better understand EMP, AWS EKS, and how they relate to cost savings.
	•	Think: “What does the user need to know to understand this topic fully?”
	•	Think: "Have I already covered these topics in the above conversation?" If yes, then do not repeat.
	
    3.Answer the Core Query:
	•	After helping the user grasp the necessary concepts, think about the precise question the user has asked. What is the most direct and concise answer to their question? Avoid unnecessary details, and focus on clarity and brevity.
	•	Think: “What’s the best way to answer this query succinctly while making sure the user understands the core of the solution?”
	
    4.Provide Step-by-Step Instructions:
	•	Now that the user understands the main concepts, think through the procedure they need to follow. Guide them with detailed, step-by-step instructions on how to configure or the above key topis in the application.
	•	Consider the user’s experience level. If they are new, explain each step in greater detail. For seasoned users, focus on efficiency.
	•	If this procedure has already been discussed in the history, acknowledge that, and only provide new information or further details.
	
    5.	Identify and Mention Gotchas or Caveats:
	•	Before finishing the response, think about any common issues or pitfalls that could arise when implementing the instructions. Include these gotchas or caveats to ensure the user is prepared for potential challenges.
	•	Again, check the conversation history to avoid repeating information that’s already been mentioned.
    
	6.	Structured and Readable Output:
	•	Organize your response logically using HTML tags for readability:
	•	<h1> for main headings
	•	<h2> for subheadings
	•	<p> for text
	•	<ul> and <li> for lists
	•	<strong> for emphasis
	•	<a href="#"> for hyperlinks
	•	Ensure clean, readable HTML code by avoiding unnecessary newlines between tags.
    
	Finally, combine the above points  into a single coherent reponse for the user query.
    Let's now welcome the user and start the conversation.
"""

In [432]:
from langchain.schema import SystemMessage
prompt_template = ChatPromptTemplate.from_messages([
     SystemMessage(content=system_message),
    # MessagesPlaceholder("chat_history"),
    # ("user",  user_message)
    ])
result = chain.invoke(prompt_template.format())



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [433]:
# chain.memory.clear()
chain.memory.buffer

[HumanMessage(content='System: \nRole:\nYou are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to experienced     system admins and DevOps personnel navigate, understand, and troubleshoot the product.\n\nTask:\nProvide accurate, concise and structured(rules given below in point no:7) responses to the queries of the users based on the below rules. Please  do  not explicitly list the thought process headers.\n\nGuidelines for Responses:\n\t1.Initial Understanding and Topic Identification:\n\t•\tFirst, identify the core aspects of the user’s query. Think through what the user is really asking. What 1-2 main search topics or key concepts are most relevant?\n\t\n    2.Topic Breakdown and Explanation:\n\t•\tAfter identifying the key topics, provide a concise and clear explanation of each one. Summarize each key concept and their relation(if any) that will help the user better understand EMP, AWS EKS, and how they relate to cost saving

In [434]:
# print(result['result'])

In [435]:
print(result['query'])

System: 
Role:
You are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to experienced     system admins and DevOps personnel navigate, understand, and troubleshoot the product.

Task:
Provide accurate, concise and structured(rules given below in point no:7) responses to the queries of the users based on the below rules. Please  do  not explicitly list the thought process headers.

Guidelines for Responses:
	1.Initial Understanding and Topic Identification:
	•	First, identify the core aspects of the user’s query. Think through what the user is really asking. What 1-2 main search topics or key concepts are most relevant?
	
    2.Topic Breakdown and Explanation:
	•	After identifying the key topics, provide a concise and clear explanation of each one. Summarize each key concept and their relation(if any) that will help the user better understand EMP, AWS EKS, and how they relate to cost savings.
	•	Think: “What does the user need to 

In [436]:
chain.memory.chat_memory.messages

[HumanMessage(content='System: \nRole:\nYou are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to experienced     system admins and DevOps personnel navigate, understand, and troubleshoot the product.\n\nTask:\nProvide accurate, concise and structured(rules given below in point no:7) responses to the queries of the users based on the below rules. Please  do  not explicitly list the thought process headers.\n\nGuidelines for Responses:\n\t1.Initial Understanding and Topic Identification:\n\t•\tFirst, identify the core aspects of the user’s query. Think through what the user is really asking. What 1-2 main search topics or key concepts are most relevant?\n\t\n    2.Topic Breakdown and Explanation:\n\t•\tAfter identifying the key topics, provide a concise and clear explanation of each one. Summarize each key concept and their relation(if any) that will help the user better understand EMP, AWS EKS, and how they relate to cost saving

In [437]:
# from langchain.prompts.chat import ChatPromptTemplate

# user_message = """
# Given a chat history and the latest user query \
# which might reference context in the chat history,\
# answer the user query.
# The user query is : "{query}"
# The chat history is : "{chat_history}"
# """
# query = "What is EMP?"
# user_prompt = user_message.format(query=query, chat_history=chain.memory.chat_memory.messages)
# user_query_prompt = ChatPromptTemplate.from_messages(
#     ('user', user_prompt)
# )
# result = chain.invoke(user_prompt)
# result['result']

In [451]:
from langchain.prompts.chat import ChatPromptTemplate
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

user_message = """
The user query is : {query}
Make sure you follow the guidelines given in the system prompt.
"""
query = "How to configure BMPs in EMP?"
user_prompt = user_message.format(query=query)
user_query_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_message),
        MessagesPlaceholder("chat_history"),
        ("human", "{query}"),
    ]
)
result = chain.invoke(user_query_prompt.format(query=query, chat_history=chain.memory.chat_memory.messages))



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [453]:
print(result['result'])

<h1>Configuring Bare Metal Pools (BMPs) in EMP</h1>
<p>To configure Bare Metal Pools (BMPs) in Platform9 Elastic Machine Pools (EMP), follow the steps outlined below. This process will help you set up your environment to optimize Kubernetes workloads effectively.</p>

<h2>Step-by-Step Instructions</h2>
<ol>
<li><strong>Log into the EMP UI:</strong> Start by logging into your EMP user interface.</li>
<li><strong>Create EMP Instance:</strong> Follow the "Create EMP" wizard to initiate the configuration.</li>
<ul>
<li>Select your cloud credentials that you've previously set up.</li>
<li>Choose the AWS region where your EKS cluster resides.</li>
<li>Select the EKS cluster you want to optimize from the list.</li>
</ul>
<li><strong>Configure Security Groups:</strong> Select the security group configuration options to enable communication between the EKS control plane and the EMP-created worker nodes (EVMs).</li>
<li><strong>Select EC2 Instance Type:</strong> Choose an AWS EC2 bare metal inst

In [452]:
print(result['chat_history'])

[HumanMessage(content='System: \nRole:\nYou are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to experienced     system admins and DevOps personnel navigate, understand, and troubleshoot the product.\n\nTask:\nProvide accurate, concise and structured(rules given below in point no:7) responses to the queries of the users based on the below rules. Please  do  not explicitly list the thought process headers.\n\nGuidelines for Responses:\n\t1.Initial Understanding and Topic Identification:\n\t•\tFirst, identify the core aspects of the user’s query. Think through what the user is really asking. What 1-2 main search topics or key concepts are most relevant?\n\t\n    2.Topic Breakdown and Explanation:\n\t•\tAfter identifying the key topics, provide a concise and clear explanation of each one. Summarize each key concept and their relation(if any) that will help the user better understand EMP, AWS EKS, and how they relate to cost saving

In [448]:
chain.memory.buffer

[HumanMessage(content='System: \nRole:\nYou are an expert support assistant for EMP (Elastic Machine Pools), helping users—ranging from first-time users to experienced     system admins and DevOps personnel navigate, understand, and troubleshoot the product.\n\nTask:\nProvide accurate, concise and structured(rules given below in point no:7) responses to the queries of the users based on the below rules. Please  do  not explicitly list the thought process headers.\n\nGuidelines for Responses:\n\t1.Initial Understanding and Topic Identification:\n\t•\tFirst, identify the core aspects of the user’s query. Think through what the user is really asking. What 1-2 main search topics or key concepts are most relevant?\n\t\n    2.Topic Breakdown and Explanation:\n\t•\tAfter identifying the key topics, provide a concise and clear explanation of each one. Summarize each key concept and their relation(if any) that will help the user better understand EMP, AWS EKS, and how they relate to cost saving

In [443]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
     llm,  retriever, contextualize_q_prompt
)

In [None]:
from langchain_community.chat_models import ChatOpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

retrieval_qa_chat_prompt = ChatPromptTemplate.from_messages([
    ("system", system_message),
    MessagesPlaceholder("chat_history"),
    ("human",  user_message)
    ])

combine_docs_chain = create_stuff_documents_chain(
    llm, retrieval_qa_chat_prompt
)

In [80]:
# client.delete_collection("emp-docs-collection")

In [98]:
# List all collections
collections = client.list_collections()

# Print the collection names
for collection in collections:
    print(collection.name)

emp-docs-collection
