In [4]:
pip install -U azure-search-documents

Collecting azure-search-documents
  Downloading azure_search_documents-11.5.1-py3-none-any.whl.metadata (23 kB)
Downloading azure_search_documents-11.5.1-py3-none-any.whl (297 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.7/297.7 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: azure-search-documents
Successfully installed azure-search-documents-11.5.1
Note: you may need to restart the kernel to use updated packages.


In [5]:
import os
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_openai import AzureOpenAIEmbeddings

load_dotenv()

api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
api_version = os.getenv("OPENAI_API_VERSION")
vector_store_address: str = os.getenv("AZURE_SEARCH_ENDPOINT")
vector_store_password: str = os.getenv("AZURE_SEARCH_ADMIN_KEY")

llm = AzureChatOpenAI(
  api_key=os.environ['OPENAI_API_KEY'],
  azure_endpoint=os.environ['OPENAI_AZURE_ENDPOINT'],
  api_version=os.environ['OPENAI_API_VERSION'],
  azure_deployment=os.environ['OPENAI_AZURE_DEPLOYMENT'],
  temperature=0,
  top_p=1
)

In [6]:
embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    azure_deployment=os.environ['OPENAI_AZURE_DEPLOYMENT_EMBEDDINGS'],
    openai_api_version=os.environ['OPENAI_API_VERSION'],
    azure_endpoint=os.environ['OPENAI_AZURE_ENDPOINT'],
    api_key=os.environ['OPENAI_API_KEY'],
)

In [7]:
index_name: str = "autopodcaster-demo"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"),
    azure_search_key=os.getenv("AZURE_SEARCH_ADMIN_KEY"),
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [8]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter

loader = TextLoader("test.txt", encoding="utf-8")

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

vector_store.add_documents(documents=docs)

['ZDA1ZTBiYTYtZmFjNi00N2E5LWEwNzctN2YwNTkzNGY5ZDBh']

In [12]:
docs = vector_store.similarity_search(
    query="What datasets were used to pre-train the OpenAI o1 models?",
    k=3,
    search_type="hybrid",
)
print(docs[0].page_content)

OpenAI o1 System Card
OpenAI
Sept 12, 2024
1 Introduction
The o1 model series is trained with large-scale reinforcement learning to reason using chain of
thought. These advanced reasoning capabilities provide new avenues for improving the safety and
robustness of our models. In particular, our models can reason about our safety policies in context
when responding to potentially unsafe prompts. This leads to state-of-the-art performance on
certain benchmarks for risks such as generating illicit advice, choosing stereotyped responses,
and succumbing to known jailbreaks. Training models to incorporate a chain of thought before
answering has the potential to unlock substantial benefits, while also increasing potential risks that
stem from heightened intelligence. Our results underscore the need for building robust alignment
methods, extensively stress-testing their efficacy, and maintaining meticulous risk management
protocols. This report outlines the safety work carried out for the OpenA

In [11]:
docs_and_scores = vector_store.similarity_search_with_relevance_scores(
    query="What data was used to train the OpenAI o1 models?",
    k=4,
    score_threshold=0.50,
)
from pprint import pprint

pprint(docs_and_scores)

  0.63461286)]
