# Introduction

# Install the dependicies

In [None]:
!pip install llama-index
!pip install langchain
!pip install sentence_transformers

# Define the functions
The following code defines the functions we need to construct the index and query it

In [None]:
from llama_index import SimpleDirectoryReader, GPTListIndex, readers, VectorStoreIndex, LLMPredictor, PromptHelper, ServiceContext, StorageContext, load_index_from_storage
from langchain.chat_models import ChatOpenAI
# from langchain import OpenAI
import sys
import os
from IPython.display import Markdown, display


def construct_index(directory_path):
    # set maximum input size
    max_input_size = 4096
    # set number of output tokens
    num_outputs = 2000
    # set maximum chunk overlap
    max_chunk_overlap = 0.2
    # set chunk size limit
    chunk_size_limit = 600

    # define prompt helper
    prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)

    # define LLM
    # llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="gpt-3", max_tokens=num_outputs))
    llm_predictor = LLMPredictor(llm=ChatOpenAI(openai_api_key=openai.api_key, temperature=0.5, model_name="gpt-4", max_tokens=num_outputs))

    documents = SimpleDirectoryReader(directory_path).load_data()

    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
    index = VectorStoreIndex.from_documents(documents, service_context=service_context)

    index.storage_context.persist()

    return index

def ask_ai():
    # index = VectorStoreIndex.load_from_disk('index.json')
    # rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    # load index
    index = load_index_from_storage(storage_context)
    while True:
        query_engine = index.as_query_engine()
        query = input("Hello! How may I help today? ")
        response = query_engine.query(query)
        # response = index.query(query)
        display(Markdown(f"Response: <b>{response.response}</b>"))

# Set OpenAI API Key

In [None]:
# os.environ["OPENAI_API_KEY"] = input("Please enter your OpenAI API key: ")
import openai
openai.api_key = "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"

# Construct an index
Now we are ready to construct the index. This will take every file in the folder 'data', split it into chunks, and embed it with OpenAI's embeddings API.

In [None]:
construct_index("./data")

# Ask questions

In [None]:
ask_ai()