# Simple Index Demo

#### Load documents, build the GPTSimpleVectorIndex

In [None]:
%pip install llama-index==0.6.27

In [None]:
# My OpenAI Key
import os
os.environ["OPENAI_API_KEY"] = "sk-..."
# openai.api_key = os.environ["OPENAI_API_KEY"]

In [None]:
import logging
import sys
import requests

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


from llama_index import GPTVectorStoreIndex, download_loader
from IPython.display import Markdown, display

### Use SimpleWebPageReader

#### Build Index

In [None]:
# download web page loader from LlamaHub
SimpleWebPageReader = download_loader("SimpleWebPageReader")

In [None]:
SimpleWebPageReader

In [None]:
# load in PG's essay
documents = SimpleWebPageReader(html_to_text=True).load_data(["http://paulgraham.com/worked.html"])

In [None]:
documents[0].get_text()

In [None]:
index = GPTVectorStoreIndex.from_documents(documents)

#### Query Index

In [None]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")

In [None]:
display(Markdown(f"<b>{response}</b>"))

In [None]:
sn1 = response.source_nodes[0]
sn1.similarity
print(sn1.node.get_text())

In [None]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("What are times the author was angry?")

In [None]:
display(Markdown(f"<b>{response}</b>"))

#### Get Sources

In [None]:
print(response.get_formatted_sources())

### Use Image Reader

In [None]:
from llama_index.response.notebook_utils import (
    display_response,
    display_image,
)
from llama_index.indices.query.query_transform.base import (
    ImageOutputQueryTransform,
)

In [None]:
# download directory reader from LlamaHub
# directory reader will download image parsers as needed depending on file extension
SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
ImageReader = download_loader("ImageReader")

In [None]:
import requests
# unpack receipts
response = requests.get("https://www.dropbox.com/s/wl2h9mn7rb1dypc/receipts.zip?dl=1")
with open("receipts.zip", "wb") as fp:
  fp.write(response.content)

In [None]:
%unzip receipts.zip

In [None]:
# initialize directory reader
image_extractor = {
    ".jpg": ImageReader(text_type="donut")
}
reader = SimpleDirectoryReader("receipts", file_extractor=image_extractor)



In [None]:
# load the image
# NOTE: will be slow on Google Colab
documents = reader.load_data()

In [None]:
documents[0]

In [None]:
index = GPTSimpleVectorIndex.from_documents(documents)

In [None]:
query_engine = index.as_query_engine()
receipts_response = query_engine  .query(
    'When was the last time I went to McDonald\'s and how much did I spend. \
    Also show me the receipt from my visit.',
    query_transform=ImageOutputQueryTransform(width=400)
)

In [None]:
display_response(receipts_response)