# Faiss DB

In [1]:
import langchain
import os
import openai

from dotenv import load_dotenv
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import TokenTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate

In [2]:
print('Open AI version:', openai.__version__)
print('langchain version:', langchain.__version__)

Open AI version: 0.27.9
langchain version: 0.0.278


In [3]:
# Load environment variables (set OPENAI_API_KEY, OPENAI_API_BASE, and OPENAI_API_VERSION in .env)
load_dotenv("azure.env")

# Configure OpenAI API
openai.api_type = "azure"
openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_version = os.getenv('OPENAI_API_VERSION')

In [4]:
# Initialize gpt-35-turbo and our embedding model
llm = AzureChatOpenAI(deployment_name="gpt-35-turbo")

In [5]:
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002',
                              deployment='text-embedding-ada-002',
                              openai_api_base=openai.api_base,
                              openai_api_type='azure',
                              openai_api_key=openai.api_key,
                              chunk_size=1)

In [6]:
llm

AzureChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.7, model_kwargs={}, openai_api_key='8d0786663aa1480f9dee3c9edd842b1a', openai_api_base='https://azure-openai-serge.openai.azure.com', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None, tiktoken_model_name=None, deployment_name='gpt-35-turbo', model_version='', openai_api_type='azure', openai_api_version='2023-05-15')

In [7]:
embeddings

OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='2023-05-15', openai_api_base='https://azure-openai-serge.openai.azure.com', openai_api_type='azure', openai_proxy='', embedding_ctx_length=8191, openai_api_key='8d0786663aa1480f9dee3c9edd842b1a', openai_organization='', allowed_special=set(), disallowed_special='all', chunk_size=1, max_retries=6, request_timeout=None, headers=None, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={})

In [8]:
loader = DirectoryLoader('docs/',
                         glob="*.txt",
                         loader_cls=TextLoader,
                         loader_kwargs={'autodetect_encoding': True})

documents = loader.load()
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [9]:
db = FAISS.from_documents(documents=docs, embedding=embeddings)
db

<langchain.vectorstores.faiss.FAISS at 0x7f04eca25a50>

In [10]:
# Adapt if needed
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:""")

qa = ConversationalRetrievalChain.from_llm(llm=llm,
                                           retriever=db.as_retriever(),
                                           condense_question_prompt=CONDENSE_QUESTION_PROMPT,
                                           return_source_documents=True,
                                           verbose=False)

In [11]:
chat_history = []

query = "what is Azure OpenAI Service?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("\nAnswer:", result["answer"])

Question: what is Azure OpenAI Service?

Answer: Azure OpenAI is a service that provides REST API access to OpenAI's language models such as GPT-3, Codex and Embeddings model series. Users can adapt the models to their specific task including content generation, summarization, semantic search, and natural language to code translation. The service can be accessed through REST APIs, Python SDK, or a web-based interface in the Azure OpenAI Studio. Azure OpenAI offers virtual network support, managed identity, and responsible AI content filtering. Access to the service is currently limited, but you can apply for initial access or for a production review.


In [12]:
chat_history = [(query, result["answer"])]

query = "Which regions does the service support?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("\nAnswer:", result["answer"])

Question: Which regions does the service support?

Answer: Azure OpenAI Service is currently available in three regions: East US, South Central US, and West Europe.


In [13]:
chat_history = [(query, result["answer"])]

query = "Is it available for East US?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("\nAnswer:", result["answer"])

Question: Is it available for East US?

Answer: The Azure OpenAI Service is currently available in East US, South Central US, and West Europe.
