In [None]:
%env BASE_URL = https://tom-canada-openai.openai.azure.com/
%env API_KEY = mykey

In [2]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
import os

embedding = OpenAIEmbeddings(
    openai_api_base=os.environ["BASE_URL"],
    openai_api_key=os.environ["API_KEY"],
    openai_api_type="azure",
    openai_api_version="2023-05-15",
    deployment="text-embedding-ada-002",
    model="text-embedding-ada-002",
    chunk_size=16)   # Note chunk_size here is misleading - it is more like batch size, how many should be send to API at once


In [3]:
vectorstore = FAISS.load_local(folder_path=".", index_name="azuredocs", embeddings=embedding)

In [21]:
question = "What is NSG and how can I configure it?"

In [22]:
# Simple similarity search, top 10 results
docs = vectorstore.similarity_search(query=question,k=10)
docs

[Document(page_content="#### Network security rules (NSGs)\n\nIf you need basic network level access control (based on IP address and the TCP or UDP protocols), you can use Network Security Groups (NSGs). An NSG is a basic, stateful, packet filtering firewall, and it enables you to control access based on a [5-tuple](https://www.techopedia.com/definition/28190/5-tuple). NSGs include functionality to simplify management and reduce the chances of configuration mistakes:\n\n* **Augmented security rules** simplify NSG rule definition and allow you to create complex rules rather than having to create multiple simple rules to achieve the same result.\n* **Service tags** are Microsoft created labels that represent a group of IP addresses. They update dynamically to include IP ranges that meet the conditions that define inclusion in the label. For example, if you want to create a rule that applies to all Azure storage on the east region you can use Storage.EastUS\n* **Application security grou

In [23]:
# MultiQueryRetriever - generates multiple queries from question and combines results
from langchain.chat_models import AzureChatOpenAI
from langchain.retrievers.multi_query import MultiQueryRetriever

# Get access credentials
BASE_URL = os.getenv("BASE_URL")
API_KEY = os.getenv("API_KEY")
# DEPLOYMENT_NAME = os.getenv("DEPLOYMENT_NAME")

# Create chat model
llm = AzureChatOpenAI(
    openai_api_base=BASE_URL,
    openai_api_version="2023-05-15",
    deployment_name="gpt-35-turbo",
    openai_api_key=API_KEY,
    openai_api_type="azure",
)

# Set logging for the queries
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(search_kwargs={"k":3}), llm=llm)
unique_docs = retriever_from_llm.get_relevant_documents(query=question)
len(unique_docs)


INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can I configure NSG and what is its purpose?', '2. What are the steps to configure NSG and what does it do?', '3. Can you explain the configuration process for NSG and provide an overview of its functionality?']


5

In [24]:
unique_docs

[Document(page_content="#### Network security rules (NSGs)\n\nIf you need basic network level access control (based on IP address and the TCP or UDP protocols), you can use Network Security Groups (NSGs). An NSG is a basic, stateful, packet filtering firewall, and it enables you to control access based on a [5-tuple](https://www.techopedia.com/definition/28190/5-tuple). NSGs include functionality to simplify management and reduce the chances of configuration mistakes:\n\n* **Augmented security rules** simplify NSG rule definition and allow you to create complex rules rather than having to create multiple simple rules to achieve the same result.\n* **Service tags** are Microsoft created labels that represent a group of IP addresses. They update dynamically to include IP ranges that meet the conditions that define inclusion in the label. For example, if you want to create a rule that applies to all Azure storage on the east region you can use Storage.EastUS\n* **Application security grou

In [25]:
import tiktoken
encoding = tiktoken.get_encoding("cl100k_base")

sum_tokens = 0
for doc in unique_docs:
    file_name = doc.metadata["file_path"]
    num_tokens = len(encoding.encode(doc.page_content))
    sum_tokens += num_tokens
    print(f"Tokens: {num_tokens} in file {file_name}")

print(f"\n\n***************\nTotal tokens: {sum_tokens}")

Tokens: 315 in file articles/security/fundamentals/network-overview.md
Tokens: 149 in file articles/virtual-machine-scale-sets/virtual-machine-scale-sets-networking.md
Tokens: 236 in file articles/virtual-network/network-overview.md
Tokens: 428 in file articles/lab-services/how-to-connect-vnet-injection.md
Tokens: 115 in file articles/virtual-network/vnet-integration-for-azure-services.md


***************
Total tokens: 1243
