In [None]:
!pip install openai
!pip install sentence-transformers
!pip install langchain pypdf langchain-openai llama-index llama-index-question-gen-openai pypdf #tiktoken chromadb

In [None]:
!pip install nest-asyncio
import nest_asyncio
nest_asyncio.apply()

# RAG

In [None]:
!wget https://www.goldmansachs.com/pdfs/insights/podcasts/episodes/ai-tom-acemoglu-covello/transcript.pdf

In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

# Routing

# Routing

In [None]:
# Import necessary classes from the llama_index package
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, SummaryIndex
from llama_index.core import Settings

# Read documents from the specified directory and load a specific document, "report.pdf".
documents = SimpleDirectoryReader("./").load_data("transcript.pdf")

# initialize settings (set chunk size)
Settings.chunk_size = 1024
nodes = Settings.node_parser.get_nodes_from_documents(documents)

# Create a VectorStoreIndex object from the documents. This will involve processing the documents
# and creating a vector representation for each of them, suitable for semantic searching.
summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()


In [None]:
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import PydanticSingleSelector
from llama_index.core.tools import QueryEngineTool


summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description="Useful for summarization questions related to the data source",
)
vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description="Useful for retrieving specific context related to the data source",
)

query_engine = RouterQueryEngine(
    selector=PydanticSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
)

In [None]:
response = query_engine.query("Is AI overhyped?")
print(response)

In [None]:
response = query_engine.query("Summarize the document in 4 bullet points")
print(response)

# Sub Question Query Engine

In [None]:
# Import necessary classes and modules from llama_index.core and llama_index.core.tools
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core import Settings
from IPython.display import display, Markdown

# Load the data from a PDF file located in the current directory using SimpleDirectoryReader
# The load_data method reads the content of the file "transcript.pdf"
documents = SimpleDirectoryReader("./").load_data("transcript.pdf")

# Build a VectorStoreIndex from the loaded documents
# This index will allow efficient querying of the document content
vector_query_engine = VectorStoreIndex.from_documents(
    documents,
    use_async=True,  # Enable asynchronous processing for faster performance
).as_query_engine()  # Convert the index to a query engine

# Define a list of query engine tools, each with its own metadata
# This setup is necessary for the SubQuestionQueryEngine
query_engine_tools = [
    QueryEngineTool(
        query_engine=vector_query_engine,  # Use the vector_query_engine built above
        metadata=ToolMetadata(
            name="documents",  # Name of the tool
            description="Report",  # Description of the tool
        ),
    ),
]

# Create an instance of SubQuestionQueryEngine using the default settings
# This query engine can handle sub-questions and use the provided tools for querying
query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools,  # Provide the list of query engine tools
    use_async=True,  # Enable asynchronous processing
)

# Query the SubQuestionQueryEngine with the question "Is AI overhyped?"
response = query_engine.query(
    "Is AI overhyped?"
)

# Print the response from the query
print(response.response)

In [None]:
display(Markdown(response.response))

# Calling OpenAI AssistantAPI (Code interpreter)

In [None]:
from llama_index.agent.openai import OpenAIAssistantAgent

agent = OpenAIAssistantAgent.from_new(
    name="Python agent",
    openai_tools=[{"type": "code_interpreter"}],
    instructions="You are an expert at writing python code to solve problems.",
    verbose=True
)

response = agent.chat(
    """Generate code to answer the following question:
    How much is the us population likely to grow to by 2030?
    Return and answer and the code used."""
)

In [None]:
print(str(response))

In [None]:
display(Markdown(response.response))

In [None]:
population_2023 = 332_000_000  # 332 million

# Assumed average annual growth rate
annual_growth_rate = 0.7 / 100  # 0.7% growth rate

# Number of years from 2023 to 2030
years = 2030 - 2023

# Calculate the projected population for 2030
population_2030 = population_2023 * ((1 + annual_growth_rate) ** years)
population_2030