# Web Page Reader

Demonstrates our web page reader.

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

#### Using SimpleWebPageReader

In [None]:
from llama_index import GPTListIndex, SimpleWebPageReader
from IPython.display import Markdown, display
import os

In [None]:
# NOTE: the html_to_text=True option requires html2text to be installed

In [None]:
documents = SimpleWebPageReader(html_to_text=True).load_data(["http://paulgraham.com/worked.html"])

In [None]:
documents[0]

In [None]:
index = GPTListIndex.from_documents(documents)

In [None]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")

In [None]:
display(Markdown(f"<b>{response}</b>"))

#### Using TrafilaturaWebReader

In [None]:
from llama_index import TrafilaturaWebReader

In [None]:
documents = TrafilaturaWebReader().load_data(["http://paulgraham.com/worked.html"])

In [None]:
index = GPTListIndex.from_documents(documents)

In [None]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")

In [None]:
display(Markdown(f"<b>{response}</b>"))

### Using RssReader

In [None]:
from llama_index import GPTListIndex, RssReader

documents = RssReader().load_data([
    "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml"
    ])

index = GPTListIndex.from_documents(documents)

# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("What happened in the news today?")