In [None]:
!pip install pypdf llama_index

In [None]:
!wget https://privatewealth.goldmansachs.com/outlook/2025-isg-outlook.pdf

In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

In [None]:
# Import necessary classes from the llama_index package
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

In [None]:
# Read documents from the specified directory and load a specific document, "report.pdf".
documents = SimpleDirectoryReader("./").load_data("2025-isg-outlook.pdf")

# Create a VectorStoreIndex object from the documents. This will involve processing the documents
# and creating a vector representation for each of them, suitable for semantic searching.
index = VectorStoreIndex.from_documents(documents)

# Convert the VectorStoreIndex object into a query engine. This query engine can be used to
# perform semantic searches on the index, matching natural language queries to the most relevant
# documents in the index.
query_engine = index.as_query_engine()

# Use the query engine to search for documents that are relevant to the query
# from the indexed documents based on the semantic understanding of the query.
response = query_engine.query("What is the 2025 outlook for US GDP?")

# Print the response obtained from the query. This will display the result of the semantic search,
# showing the information or documents that best match the query about the 2025 outlook.
print(response)

# Adding memory

In [None]:
chat_engine = index.as_chat_engine(chat_mode="simple", verbose=True)
response = chat_engine.chat("Hi there")
print(response)

In [None]:
response = chat_engine.chat("What is the liklihood of recession in 2025?")
print(response)

In [None]:
response = chat_engine.chat("Can you expand on that")
print(response)

In [None]:
chat_engine.chat_history

# Chunking

In [None]:
from llama_index.core import Settings


Settings.chunk_size = 1000
Settings.chunk_overlap = 50

# Create a VectorStoreIndex object from the documents. This will involve processing the documents
# and creating a vector representation for each of them, suitable for semantic searching.
index = VectorStoreIndex.from_documents(documents)

# Convert the VectorStoreIndex object into a query engine. This query engine can be used to
# perform semantic searches on the index, matching natural language queries to the most relevant
# documents in the index.
query_engine = index.as_query_engine()

# Use the query engine to search for documents that are relevant to the query
# from the indexed documents based on the semantic understanding of the query.
response = query_engine.query("What is the 2025 outlook for US GDP?")

# Print the response obtained from the query. This will display the result of the semantic search,
# showing the information or documents that best match the query about the 2025 outlook.
print(response)


# Vector DB

In [None]:
!pip install faiss-cpu

In [None]:
!pip install llama-index-vector-stores-faiss

In [None]:
import faiss

# dimensions of text-ada-embedding-002
d = 1536
faiss_index = faiss.IndexFlatL2(d)

In [None]:
from llama_index.core import (
    SimpleDirectoryReader,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
)
from llama_index.vector_stores.faiss import FaissVectorStore

In [None]:
documents = SimpleDirectoryReader("./").load_data("2025-isg-outlook.pdf")
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)
query_engine = index.as_query_engine()
response = query_engine.query("What is the 2025 outlook for US GDP?")
print(response)

In [None]:
# save index to disk
index.storage_context.persist()

In [None]:
# load index from disk
vector_store = FaissVectorStore.from_persist_dir("./storage")
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir="./storage"
)
index = load_index_from_storage(storage_context=storage_context)

# Wikipedia

Other readers: https://llamahub.ai/?tab=readers

In [None]:
!pip install llama-index-readers-wikipedia

In [None]:
!pip install wikipedia

In [None]:
from llama_index.readers.wikipedia import WikipediaReader

# Initialize WikipediaReader
reader = WikipediaReader()

# Load data from Wikipedia
documents = reader.load_data(pages=["List of most-visited websites", "ChatGPT", "Freemium"])

index = VectorStoreIndex.from_documents(documents)

In [None]:
index = VectorStoreIndex.from_documents(documents)

# Convert the VectorStoreIndex object into a query engine. This query engine can be used to
# perform semantic searches on the index, matching natural language queries to the most relevant
# documents in the index.
query_engine = index.as_query_engine()

# Use the query engine to search for documents that are relevant to the query
# from the indexed documents based on the semantic understanding of the query.
response = query_engine.query("Top 10 most visited websites in the Internet")

# Print the response obtained from the query. This will display the result of the semantic search,
# showing the information or documents that best match the query about the 2025 outlook.
print(response)