In [None]:

import os
from llama_index import Document, ServiceContext
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index.node_parser import SimpleNodeParser
from llama_index.llms import AzureOpenAI

# Initialize Azure OpenAI
llm = AzureOpenAI(
    engine="gpt-35-turbo",
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version="2023-07-01-preview"
)

# Initialize embeddings
embed_model = AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name="text-embedding-ada-002",
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version="2023-07-01-preview"
)

# Create service context
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model
)

# Load documents from folder
documents = []
folder_path = "./fundmgmt_data"
for filename in os.listdir(folder_path):
    if filename.endswith(".txt"):
        with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as f:
            text = f.read()
            doc = Document(text=text, metadata={"filename": filename})
            documents.append(doc)

# Parse documents into nodes/chunks
parser = SimpleNodeParser.from_defaults()
nodes = parser.get_nodes_from_documents(documents)

# Create dictionary with chunks and embeddings
vectorstore_dict = {
    "chunks": [],
    "embeddings": []
}

# Get embeddings for each chunk
for node in nodes:
    vectorstore_dict["chunks"].append(node.text)
    embedding = embed_model.get_text_embedding(node.text)
    vectorstore_dict["embeddings"].append(embedding)


In [None]:
# cody prompt : write a llama-index + Azure OpenAI based code to take a list of Documents, embed them and store in a dictionary of list like this {"chunks" : <list_of_chunks>, "embeddings" : <corresponding embedding vectors> }, The data is stored in mutliple .txt files in ./fundmgmt_data folder, also add file name to the metadata so that the embeddings also capture that