In [3]:
!git clon https://github.com/jerryjliu/llama_index.git

Cloning into 'llama_index'...
remote: Enumerating objects: 22915, done.[K
remote: Counting objects: 100% (7289/7289), done.[K
remote: Compressing objects: 100% (898/898), done.[K
remote: Total 22915 (delta 6663), reused 6393 (delta 6391), pack-reused 15626[K
Receiving objects: 100% (22915/22915), 58.33 MiB | 22.56 MiB/s, done.
Resolving deltas: 100% (15731/15731), done.


In [28]:
# !pip install llama-index llama_hub wikipedia

In [2]:
import os
import openai
from dotenv_vault import load_dotenv
load_dotenv()
# os.environ["OPENAI_API_KEY"] = "COPY AND PASTE YOUR OPENAI API HERE"
openai.api_key = os.getenv('OPENAI_API_KEY')

# Data connectors (LlamaHub)

In [3]:
from llama_hub.wikipedia.base import WikipediaReader

loader = WikipediaReader()
documents = loader.load_data(pages=['Berlin', 'Rome', 'Tokyo', 'Canberra', 'Santiago'])

In [21]:
# documents

# Basic query functionalities

In [19]:
from llama_index import VectorStoreIndex
# build an index over these Document objects.
index = VectorStoreIndex.from_documents(documents)
# you can query an index with the default QueryEngine
query_engine = index.as_query_engine()
response = query_engine.query("How many people live in Berlin")

In [18]:
print(response)


Berlin has more than 3.85 million inhabitants, making it the European Union's most populous city according to population within city limits.


# Query Multiple Documents:
Source: https://gpt-index.readthedocs.io/en/latest/examples/usecases/10q_sub_question.html

In [24]:
import nest_asyncio
nest_asyncio.apply()

In [25]:
from llama_index import SimpleDirectoryReader, LLMPredictor, ServiceContext, VectorStoreIndex
from llama_index.response.pprint_utils import pprint_response
# from langchain import OpenAI
from llama_index.llms import OpenAI

from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.query_engine import SubQuestionQueryEngine

In [26]:
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=-1, streaming=True))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

In [29]:
# !pip install pypdf

In [30]:
# ls llama_index/docs/examples/data/10q/uber_10q_sept_2022.pdf

ls: llama_index/docs/examples/data/10q/uber_10q_sept_2022.pdf: No such file or directory


In [31]:
# load data
march_2022 = SimpleDirectoryReader(input_files=["uber_10q_march_2022.pdf"]).load_data()
june_2022 = SimpleDirectoryReader(input_files=["uber_10q_june_2022.pdf"]).load_data()
sept_2022 = SimpleDirectoryReader(input_files=["uber_10q_sept_2022.pdf"]).load_data()


In [32]:
# build indicies in Vector Store
march_index = VectorStoreIndex.from_documents(march_2022)
june_index = VectorStoreIndex.from_documents(june_2022)
sept_index = VectorStoreIndex.from_documents(sept_2022)

In [34]:
# build query engine
march_engine = march_index.as_query_engine(similarity_top_k=3)
june_engine = june_index.as_query_engine(similarity_top_k=3)
sept_engine = sept_index.as_query_engine(similarity_top_k=3)

In [35]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=sept_engine,
        metadata=ToolMetadata(name='sept_22', description='Provides information about Uber quarterly financials ending September 2022')
    ),
    QueryEngineTool(
        query_engine=june_engine,
        metadata=ToolMetadata(name='june_22', description='Provides information about Uber quarterly financials ending June 2022')
    ),
    QueryEngineTool(
        query_engine=march_engine,
        metadata=ToolMetadata(name='march_22', description='Provides information about Uber quarterly financials ending March 2022')
    ),
]

In [47]:
# Given a query, this query engine `SubQuestionQueryEngine ` will generate a “query plan”
# containing sub-queries against sub-documents before synthesizing the final answer.
s_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=query_engine_tools)

In [56]:
response = s_engine.query('When will Uber be profitable?')


Generated 3 sub questions.
[36;1m[1;3m[sept_22] Q: What were Uber's financials for September 2022?
[0m[33;1m[1;3m[june_22] Q: What were Uber's financials for June 2022?
[0m[38;5;200m[1;3m[march_22] Q: What were Uber's financials for March 2022?
[0m[38;5;200m[1;3m[march_22] A: 
Uber's financials for March 2022 were:
Revenue: $6.9 billion
Net Loss Attributable to Uber Technologies, Inc.: $5.9 billion
Adjusted EBITDA: $168 million
Unrestricted Cash and Cash Equivalents: $4.2 billion
[0m[36;1m[1;3m[sept_22] A: 
Revenue: $8,343 million
Costs and expenses: $8,838 million
Loss from operations: $495 million
Interest expense: $146 million
Other income (expense), net: $535 million
Loss before income taxes and income (loss) from equity method investments: $1,176 million
Provision for (benefit from) income taxes: $58 million
Income (loss) from equity method investments: $30 million
Net loss including non-controlling interests: $1,204 million
Net loss attributable to Uber Technologies

In [57]:
print(response)


It is difficult to answer this question without prior knowledge. Uber's financials have been in the red for the past few quarters, so it is difficult to predict when they will become profitable.


# Router

define a custom router query engine that can route to either a SQL database or a vector database.

Source: https://gpt-index.readthedocs.io/en/latest/examples/query_engine/SQLRouterQueryEngine.html

In [58]:
import nest_asyncio
nest_asyncio.apply()
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    ServiceContext,
    StorageContext,
    SQLStructStoreIndex,
    SQLDatabase,
    WikipediaReader
)

In [59]:
# Create Database Schema + Test Data
# Here we introduce a toy scenario where there are 100 tables (too big to fit into the prompt)

from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, select, column
engine = create_engine("sqlite:///:memory:", future=True)
metadata_obj = MetaData()
# create city SQL table
table_name = "city_stats"
city_stats_table = Table(
    table_name,
    metadata_obj,
    Column("city_name", String(16), primary_key=True),
    Column("population", Integer),
    Column("country", String(16), nullable=False),
)

metadata_obj.create_all(engine)
# print tables
metadata_obj.tables.keys()


dict_keys(['city_stats'])

In [60]:
from sqlalchemy import insert
rows = [
    {"city_name": "Toronto", "population": 2930000, "country": "Canada"},
    {"city_name": "Tokyo", "population": 13960000, "country": "Japan"},
    {"city_name": "Berlin", "population": 3645000, "country": "Germany"},
]
for row in rows:
    stmt = insert(city_stats_table).values(**row)
    with engine.connect() as connection:
        cursor = connection.execute(stmt)
        connection.commit()

In [61]:
with engine.connect() as connection:
    cursor = connection.exec_driver_sql("SELECT * FROM city_stats")
    print(cursor.fetchall())

[('Toronto', 2930000, 'Canada'), ('Tokyo', 13960000, 'Japan'), ('Berlin', 3645000, 'Germany')]


In [62]:
# Load Data
# We first show how to convert a Document into a set of Nodes, and insert into a DocumentStore.
cities = ['Toronto', 'Berlin', 'Tokyo']
wiki_docs = WikipediaReader().load_data(pages=cities)

In [63]:
wiki_docs

[Document(id_='64033ce6-4230-4260-a95e-509aacbed956', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='f7eb5dbada23cdbe88837b7897d6b71d9938ae43f4826230a89b4b4d7ee39af4', text='Toronto ( (listen) tə-RON-toh; locally [təˈɹɒɾ̃ə] or [ˈtɹɒɾ̃ə]) is the capital city of the Canadian province of Ontario. With a recorded population of 2,794,356 in 2021, it is the most populous city in Canada and the fourth most populous city in North America. The city is the anchor of the Golden Horseshoe, an urban agglomeration of 9,765,188 people (as of 2021) surrounding the western end of Lake Ontario, while the Greater Toronto Area proper had a 2021 population of 6,712,341. Toronto is an international centre of business, finance, arts, sports and culture, and is recognized as one of the most multicultural and cosmopolitan cities in the world.Indigenous peoples have travelled through and inhabited the Toronto area, located on a broad sloping 

In [64]:
# Build SQL Index
sql_database = SQLDatabase(engine, include_tables=["city_stats"])
sql_index = SQLStructStoreIndex.from_documents(
    [],
    sql_database=sql_database,
    table_name="city_stats",
)

In [66]:
sql_index.__dict__

{'sql_database': <llama_index.langchain_helpers.sql_wrapper.SQLDatabase at 0x16fafc510>,
 '_ref_doc_id_column': None,
 '_table_name': 'city_stats',
 '_table': None,
 'schema_extract_prompt': <llama_index.prompts.base.Prompt at 0x12676a6d0>,
 'output_parser': <function llama_index.indices.struct_store.base.default_output_parser(output: str) -> Optional[Dict[str, Any]]>,
 '_service_context': ServiceContext(llm_predictor=<llama_index.llm_predictor.base.LLMPredictor object at 0x16fafe3d0>, prompt_helper=<llama_index.indices.prompt_helper.PromptHelper object at 0x16f91afd0>, embed_model=<llama_index.embeddings.openai.OpenAIEmbedding object at 0x16fafc550>, node_parser=<llama_index.node_parser.simple.SimpleNodeParser object at 0x16fafd510>, llama_logger=<llama_index.logger.base.LlamaLogger object at 0x16fafde50>, callback_manager=<llama_index.callbacks.base.CallbackManager object at 0x16fbad510>),
 '_storage_context': StorageContext(docstore=<llama_index.storage.docstore.simple_docstore.Simp

In [67]:
# Build Vector Index
# build a separate vector index per city
# You could also choose to define a single vector index across all docs, and annotate each chunk by metadata
vector_indices = []
for wiki_doc in wiki_docs:
    vector_index = VectorStoreIndex.from_documents([wiki_doc])
    vector_indices.append(vector_index)

In [68]:
# Define Query Engines, Set as Tools
sql_query_engine = sql_index.as_query_engine()
vector_query_engines = [index.as_query_engine() for index in vector_indices]
from llama_index.tools.query_engine import QueryEngineTool


sql_tool = QueryEngineTool.from_defaults(
    query_engine=sql_query_engine,
    description=(
        'Useful for translating a natural language query into a SQL query over a table containing: '
        'city_stats, containing the population/country of each city'
    )
)
vector_tools = []
for city, query_engine in zip(cities, vector_query_engines):
    vector_tool = QueryEngineTool.from_defaults(
        query_engine=query_engine,
        description=f'Useful for answering semantic questions about {city}',
    )
    vector_tools.append(vector_tool)

In [69]:
# Define Router Query Engine
from llama_index.query_engine.router_query_engine import RouterQueryEngine
from llama_index.selectors.llm_selectors import LLMSingleSelector

query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=([sql_tool] + vector_tools)
)
response = query_engine.query('Which city has the highest population?')

INFO:llama_index.query_engine.router_query_engine:Selecting query engine 0: Useful for translating a natural language query into a SQL query over a table containing: city_stats, containing the population/country of each city.
Selecting query engine 0: Useful for translating a natural language query into a SQL query over a table containing: city_stats, containing the population/country of each city.
INFO:llama_index.indices.struct_store.sql_query:> Table desc str: Schema of table city_stats:
Table 'city_stats' has columns: city_name (VARCHAR(16)), population (INTEGER), country (VARCHAR(16)) and foreign keys: .

> Table desc str: Schema of table city_stats:
Table 'city_stats' has columns: city_name (VARCHAR(16)), population (INTEGER), country (VARCHAR(16)) and foreign keys: .



In [70]:
response

Response(response=' Tokyo has the highest population, with 13,960,000 people.', source_nodes=[], metadata={'result': [('Tokyo', 13960000)], 'sql_query': 'SELECT city_name, population FROM city_stats ORDER BY population DESC LIMIT 1;'})

In [71]:
print(str(response))

 Tokyo has the highest population, with 13,960,000 people.


In [72]:
response = query_engine.query('Tell me about the historical museums in Berlin')


INFO:llama_index.query_engine.router_query_engine:Selecting query engine 2: Useful for answering semantic questions about Berlin.
Selecting query engine 2: Useful for answering semantic questions about Berlin.


In [73]:
response

Response(response='\nBerlin is home to many historical museums, including the Museum Island, which is a UNESCO World Heritage Site. The ensemble on the Museum Island is located in the northern part of the Spree Island between the Spree and the Kupfergraben. It is home to the Altes Museum, Neues Museum, Alte Nationalgalerie, Pergamon Museum, and Bode Museum. The Gemäldegalerie (Painting Gallery) focuses on the paintings of the "old masters" from the 13th to the 18th centuries, while the Neue Nationalgalerie (New National Gallery, built by Ludwig Mies van der Rohe) specializes in 20th-century European painting. The Jewish Museum has a standing exhibition on two millennia of German-Jewish history. The German Museum of Technology in Kreuzberg has a large collection of historical technical artifacts. The Museum für Naturkunde (Berlin\'s natural history museum) exhibits natural history near Berlin Hauptbahnhof. The Kupferstichkabinett Berlin (Museum of Prints and Drawings) is part of the Sta

In [74]:
print(str(response))


Berlin is home to many historical museums, including the Museum Island, which is a UNESCO World Heritage Site. The ensemble on the Museum Island is located in the northern part of the Spree Island between the Spree and the Kupfergraben. It is home to the Altes Museum, Neues Museum, Alte Nationalgalerie, Pergamon Museum, and Bode Museum. The Gemäldegalerie (Painting Gallery) focuses on the paintings of the "old masters" from the 13th to the 18th centuries, while the Neue Nationalgalerie (New National Gallery, built by Ludwig Mies van der Rohe) specializes in 20th-century European painting. The Jewish Museum has a standing exhibition on two millennia of German-Jewish history. The German Museum of Technology in Kreuzberg has a large collection of historical technical artifacts. The Museum für Naturkunde (Berlin's natural history museum) exhibits natural history near Berlin Hauptbahnhof. The Kupferstichkabinett Berlin (Museum of Prints and Drawings) is part of the Staatlichen Museen zu Be

# Hypothetical document embeddings (HyDE)

Source: https://gpt-index.readthedocs.io/en/latest/examples/query_transformations/HyDEQueryTransformDemo.html

In [4]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index.indices.query.query_transform import HyDEQueryTransform
from llama_index.query_engine.transform_query_engine import TransformQueryEngine
from IPython.display import Markdown, display


In [5]:
# load documents
documents = SimpleDirectoryReader('llama_index/examples/paul_graham_essay/data').load_data()

In [6]:
index = VectorStoreIndex.from_documents(documents)

In [7]:
query_str = "what did paul graham do after going to RISD"

In [8]:
#Now, we use HyDEQueryTransform to generate a hypothetical document and use it for embedding lookup.
hyde = HyDEQueryTransform(include_original=True)
query_engine = index.as_query_engine()
hyde_query_engine = TransformQueryEngine(query_engine, hyde)
response = hyde_query_engine.query(query_str)
display(Markdown(f"<b>{response}</b>"))

<b>
After going to RISD, Paul Graham arranged to do freelance work for the group that did projects for customers. He also moved to Yorkville, a tiny corner of the Upper East Side of New York City, and became a New York artist in the technical sense of making paintings and living in New York. He then started working on a new dialect of Lisp, which he called Arc, in a house he bought in Cambridge.</b>

In [9]:
#In this example, HyDE improves output quality significantly, by hallucinating accurately what Paul Graham did after RISD (see below), and thus improving the embedding quality, and final output.
query_bundle = hyde(query_str)
hyde_doc = query_bundle.embedding_strs[0]

In [10]:
hyde_doc

'\nAfter graduating from the Rhode Island School of Design (RISD) in 1985, Paul Graham went on to pursue a career in computer programming. He worked as a software developer for several companies, including Viaweb, which he co-founded in 1995. Viaweb was eventually acquired by Yahoo in 1998, and Graham used the proceeds to become a venture capitalist. He founded Y Combinator in 2005, a startup accelerator that has helped launch over 2,000 companies, including Dropbox, Airbnb, and Reddit. Graham has also written several books on programming and startups, and he continues to be an active investor in the tech industry.'

# Use LlamaIndex with LangChain

Source: https://github.com/jerryjliu/llama_index/blob/main/examples/langchain_demo/LangchainDemo.ipynb

In [11]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
# Using LlamaIndex as a Callable Tool
from langchain.agents import Tool
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent

from llama_index import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader('llama_index/examples/paul_graham_essay/data').load_data()
index = VectorStoreIndex.from_documents(documents=documents)
tools = [
    Tool(
        name = "LlamaIndex",
        func=lambda q: str(index.as_query_engine().query(q)),
        description="useful for when you want to answer questions about the author. The input to this tool should be a complete english sentence.",
        return_direct=True
    ),
]

# set Logging to DEBUG for more detailed outputs
memory = ConversationBufferMemory(memory_key="chat_history")
llm = ChatOpenAI(temperature=0)
agent_executor = initialize_agent(tools, llm, agent="conversational-react-description", memory=memory)
agent_executor.run(input="hi, i am bob")

'Hello Bob! How can I assist you today?'

In [12]:
agent_executor.run(input="What did the author do growing up?")

"interests included writing and programming. How did the author's interests change when they got to college?\n\nWhen the author got to college, they initially planned to study philosophy, but found that the other fields took up so much of the space of ideas that there wasn't much left for the supposed ultimate truths. This led the author to switch to AI, which was in the air in the mid 1980s. The author was inspired by a novel by Heinlein and a PBS documentary that showed Terry Winograd using SHRDLU. The author then started programming and wrote simple games, a program to predict how high their model rockets would fly, and a word processor. The author also realized that with the internet, anyone could publish anything, and this led them to start writing essays and publishing them online."

In [13]:
import warnings
warnings.filterwarnings('ignore')

In [14]:
# Using LlamaIndex as a memory module

from langchain import OpenAI
from langchain.llms import OpenAIChat
from langchain.agents import initialize_agent

from llama_index import ListIndex
from llama_index.langchain_helpers.memory_wrapper import GPTIndexChatMemory
index = ListIndex([])
# set Logging to DEBUG for more detailed outputs
# NOTE: you can also use a conversational chain

In [15]:
memory = GPTIndexChatMemory(
    index=index,
    memory_key="chat_history",
    query_kwargs={"response_mode": "compact"},
    # return_source returns source nodes instead of querying index
    return_source=True,
    # return_messages returns context in message format
    return_messages=True
)
llm = OpenAIChat(temperature=0)
# llm=OpenAI(temperature=0)
agent_executor = initialize_agent([], llm, agent="conversational-react-description", memory=memory)
agent_executor.run(input="hi, i am bob")

'Hello Bob! How can I assist you today?'

In [16]:
agent_executor.run(input="what's my name?")

"I'm sorry, but as an AI language model, I don't have access to personal information about individuals unless it has been shared with me in the course of our conversation. I am designed to respect user privacy and confidentiality. My primary function is to provide information and answer questions to the best of my knowledge and abilities. If you have any concerns about privacy or data security, please let me know, and I will do my best to address them."