In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
import sys, os

print(f"Installing packages into environment {sys.executable}")

In [None]:
!{sys.executable} -m pip install llama-index openai langchain azure-identity

# Get Auth Token

In [None]:
from azure.identity import DefaultAzureCredential

In [None]:
# Request credential
default_credential = DefaultAzureCredential()
token = default_credential.get_token("https://cognitiveservices.azure.com/.default")

# Configure Parameters

In [None]:
aoai_base = "https://<aoai instance name>.openai.azure.com/"

# Setup Contexts

In [None]:
from llama_index import StorageContext

In [None]:
storage_context = StorageContext.from_defaults(persist_dir="./doc_store") # No persist_dir means all stores will be new Simple stores

In [None]:
len(storage_context.docstore.docs)

## Setup custom LLM

In [None]:
azure_kwargs={"api_type": "azure_ad", "api_version": "2023-03-15-preview", "api_base": aoai_base}

In [None]:
from langchain.llms import AzureOpenAI
llm = AzureOpenAI(temperature=0.9, deployment_name="text-davinci-003", model_name="text-davinci-003", openai_api_key=token.token, model_kwargs=azure_kwargs)

In [None]:
from llama_index import LLMPredictor

# define LLM
llm_predictor = LLMPredictor(llm)

In [None]:
llm_predictor.get_llm_metadata()

## Setup custom Embedding

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from llama_index import LangchainEmbedding

In [None]:
os.environ["OPENAI_API_KEY"]=token.token

In [None]:
# load in AOAI embedding model from langchain
oai_embeddings = OpenAIEmbeddings(model="text-embedding-ada-002",
                              deployment="text-embedding-ada-002",
                              openai_api_key=token.token,
                              openai_api_base=azure_kwargs["api_base"],
                              openai_api_type=azure_kwargs["api_type"],
                              openai_api_version=azure_kwargs["api_version"],
                              chunk_size=1)

In [None]:
embeddings = LangchainEmbedding(oai_embeddings)

In [None]:
from llama_index import ServiceContext
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embeddings) # using default chunk limit

# Create Index

In [None]:
from llama_index import load_index_from_storage

In [None]:
index = load_index_from_storage(storage_context=storage_context, service_context=service_context)

In [None]:
# query will use the same embed_model
query_engine = index.as_query_engine(
    verbose=True, 
)


In [None]:
%%time
response = query_engine.query(
    "How have emissions metrics reported in 2021 and 2022 differed?")
print(response)