In [None]:
pip install -U azure-search-documents

In [5]:
import os
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_openai import AzureOpenAIEmbeddings

load_dotenv()

api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
api_version = os.getenv("OPENAI_API_VERSION")
vector_store_address: str = os.getenv("AZURE_SEARCH_ENDPOINT")
vector_store_password: str = os.getenv("AZURE_SEARCH_ADMIN_KEY")

llm = AzureChatOpenAI(
  api_key=os.environ['OPENAI_API_KEY'],
  azure_endpoint=os.environ['OPENAI_AZURE_ENDPOINT'],
  api_version=os.environ['OPENAI_API_VERSION'],
  azure_deployment=os.environ['OPENAI_AZURE_DEPLOYMENT'],
  temperature=0,
  top_p=1
)

In [6]:
embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    azure_deployment=os.environ['OPENAI_AZURE_DEPLOYMENT_EMBEDDINGS'],
    openai_api_version=os.environ['OPENAI_API_VERSION'],
    azure_endpoint=os.environ['OPENAI_AZURE_ENDPOINT'],
    api_key=os.environ['OPENAI_API_KEY'],
)

In [7]:
index_name: str = "autopodcaster-demo"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"),
    azure_search_key=os.getenv("AZURE_SEARCH_ADMIN_KEY"),
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter

loader = TextLoader("test.txt", encoding="utf-8")

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

vector_store.add_documents(documents=docs)

In [None]:
docs = vector_store.similarity_search(
    query="What datasets were used to pre-train the OpenAI o1 models?",
    k=3,
    search_type="hybrid",
)
print(docs[0].page_content)

In [None]:
docs_and_scores = vector_store.similarity_search_with_relevance_scores(
    query="What data was used to train the OpenAI o1 models?",
    k=4,
    score_threshold=0.50,
)
from pprint import pprint

pprint(docs_and_scores)