In [None]:
%pip install llama-index
%pip install llama-index-llms
%pip install llama-index-embeddings
%pip install dotenv

## Load Credentials

In [2]:
from __future__ import print_function
import logging
import sys
import os
from dotenv import load_dotenv
from llama_index.core import ( Settings, VectorStoreIndex, SimpleDirectoryReader, ServiceContext, StorageContext, load_index_from_storage)
from llama_index.core.callbacks import CallbackManager
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.llms.azure_openai import AzureOpenAI

logging.getLogger().setLevel(logging.WARNING)

load_dotenv('../Credentials/.env')

endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
credential = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_api_version = "2024-04-01-preview"
azure_openai_embedding_deployment = "text-embedding-ada-002"
embedding_model_name = "text-embedding-ada-002"
llm_model_name = "gpt-35-turbo-16k"
api_type = "azure"

## Load Documents

In [4]:
reader = SimpleDirectoryReader("../Data/", recursive=True, filename_as_id=True, required_exts=[".pdf", ".docx", ".xlsx", ".pptx"])

documents = []
for docs in reader.iter_data():
    documents.extend(docs)        



Failed to load file ../Data/course113113/downloads/2-7-23~1.PPT with error: The expanded size of the tensor (16) must match the existing size (13) at non-singleton dimension 0.  Target sizes: [16].  Tensor sizes: [13]. Skipping...
Failed to load file ../Data/course113113/downloads/2-9-23~1.PPT with error: The expanded size of the tensor (16) must match the existing size (14) at non-singleton dimension 0.  Target sizes: [16].  Tensor sizes: [14]. Skipping...


In [9]:
documents[0].metadata

{'page_label': '1',
 'file_name': '1-s2.0-S1538544221000821-main.pdf',
 'file_path': '../Data/course113113/downloads/1-s2.0-S1538544221000821-main.pdf',
 'file_type': 'application/pdf',
 'file_size': 719173,
 'creation_date': '2024-04-16',
 'last_modified_date': '2024-04-16',
 'last_accessed_date': '2024-04-30'}

## Node Parsing

In [5]:
# create the sentence window node parser w/ default settings
sentence_node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text"
)

nodes = sentence_node_parser.get_nodes_from_documents(documents)

## Create Index

In [6]:
llm = AzureOpenAI(
            model = llm_model_name,
            deployment_name = llm_model_name,
            api_key = credential,
            azure_endpoint = endpoint,
            api_version = azure_openai_api_version,
            api_type = api_type
        )

embed_model = AzureOpenAIEmbedding(
            model = embedding_model_name,
            deployment_name = embedding_model_name,
            api_key = credential,
            azure_endpoint = endpoint,
            api_version = azure_openai_api_version,
            api_type = api_type,
            embed_batch_size=50
        )

Settings.llm = llm
Settings.embed_model = embed_model

callback_manager = CallbackManager()

index = VectorStoreIndex(nodes)

print("Index created")

Index created


## Save to Persistent Storage

In case you want to load your index later, saving you from having to re-parse your documents every time

In [7]:
index.storage_context.persist(persist_dir="../Data/course113113_index")

Saving to Persistent Storage


## Create Query Engine, Ask a Question

In [8]:
query_engine = index.as_query_engine(similarity_top_k=5)

query = (
    'Give me a 3 sentence summary of Hemodynamics'
)

query_response = query_engine.query(
    query
)

print(query_response)

Hemodynamics refers to the study of the forces and pressures involved in the circulation of blood within the cardiovascular system. It involves the evaluation of various parameters such as blood pressure, cardiac output, and stroke volume, which are crucial in maintaining the normal functioning of the heart and blood vessels. Understanding hemodynamics helps in assessing the compensation and homeostasis of the cardiovascular system and can guide interventions to restore normal blood flow and pressure.
