In [None]:
# Installing required libraries
!pip install llama_index
!pip install huggingface
!pip install llama-index-embeddings-huggingface
!pip install llama-index-llms-huggingface
!pip install "transformers[torch]" "huggingface_hub[inference]"
!pip install llama-index-llms-cohere

Collecting llama_index
  Downloading llama_index-0.10.39-py3-none-any.whl (6.8 kB)
Collecting llama-index-agent-openai<0.3.0,>=0.1.4 (from llama_index)
  Downloading llama_index_agent_openai-0.2.5-py3-none-any.whl (13 kB)
Collecting llama-index-cli<0.2.0,>=0.1.2 (from llama_index)
  Downloading llama_index_cli-0.1.12-py3-none-any.whl (26 kB)
Collecting llama-index-core<0.11.0,>=0.10.39 (from llama_index)
  Downloading llama_index_core-0.10.39.post1-py3-none-any.whl (15.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.4/15.4 MB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting llama-index-embeddings-openai<0.2.0,>=0.1.5 (from llama_index)
  Downloading llama_index_embeddings_openai-0.1.10-py3-none-any.whl (6.2 kB)
Collecting llama-index-indices-managed-llama-cloud<0.2.0,>=0.1.2 (from llama_index)
  Downloading llama_index_indices_managed_llama_cloud-0.1.6-py3-none-any.whl (6.7 kB)
Collecting llama-index-legacy<0.10.0,>=0.9.48 (from llama_index)
  Downl

In [None]:
# Importing
from llama_index.core import SimpleDirectoryReader, ServiceContext
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex, SummaryIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.vector_stores import MetadataFilters, FilterCondition
from llama_index.core.tools import FunctionTool, QueryEngineTool
from llama_index.core.objects import ObjectIndex
from llama_index.core.agent import AgentRunner, ReActAgent
from llama_index.llms.cohere import Cohere
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.bridge.pydantic import Field, PrivateAttr


from transformers import AutoTokenizer
# from llama_index.llms.huggingface import HuggingFaceLLM

import json
import glob

import nest_asyncio
nest_asyncio.apply()

import torch
torch.set_default_device("cpu")

In [None]:
# Setting up the LLM

cohere_api_key = '********************' # Redacted the cohere API Key

llm = Cohere(api_key= cohere_api_key)

In [None]:
# Using the HuggingFaceEmbeddings to create the embeddings. Could have used Cohere, but I only have limited access to Cohere

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5", device='cpu')

service_context = ServiceContext.from_defaults(
    chunk_size= 1024,
    llm= llm,
    embed_model=embed_model
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  service_context = ServiceContext.from_defaults(


In [None]:
# Reading all the PDFs in the directory

documents = glob.glob('*.pdf')
documents

['Boeing_787_Dreamliner.pdf',
 'Antonov_An-225_Mriya.pdf',
 'Boeing_777.pdf',
 'Airbus_A350.pdf',
 'Airbus_A320_family.pdf',
 'Boeing_747.pdf',
 'Airbus_A380.pdf',
 'Concorde.pdf',
 'Airbus_A321.pdf']

In [None]:
# Creating the tools for each document

tools = []

for document in documents:
    documents_data = SimpleDirectoryReader(input_files= [document]).load_data()
    splitter = SentenceSplitter(chunk_size=1024)
    nodes = splitter.get_nodes_from_documents(documents_data)
    vector_index = VectorStoreIndex(nodes, embed_model=embed_model)

    vector_query_engine = vector_index.as_query_engine(llm = llm, similarity_top_k = 2)
    vector_query_tool = QueryEngineTool.from_defaults(name = f"vector_tool_{document}", query_engine = vector_query_engine)

    summary_index = SummaryIndex(nodes)
    summary_query_engine = summary_index.as_query_engine(llm = llm, response_mode = 'tree_summarize', use_async=True)
    summary_query_tool = QueryEngineTool.from_defaults(name = f"summary_tool_{document}", query_engine =summary_query_engine )

    tools.append(vector_query_tool)
    tools.append(summary_query_tool)

In [None]:
# obj_index = ObjectIndex.from_objects(
#     tools,
#     index_cls= VectorStoreIndex, service_context=service_context
# )

# obj_retriever = obj_index.as_retriever(similarity_top_k=1)

In [None]:
# Creating the query engine using the provided tools
query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=tools,
    llm=llm
)

In [None]:
response = query_engine.query('Tell me about Boeing 747?')

Generated 5 sub questions.
[1;3;38;2;237;90;200m[vector_tool_Boeing_747.pdf] Q: What is the maximum speed of Boeing 747?
[0m[1;3;38;2;90;149;237m[summary_tool_Boeing_747.pdf] Q: What is the wingspan of Boeing 747?
[0m[1;3;38;2;11;159;203m[summary_tool_Airbus_A380.pdf] Q: Does Boeing 747 have a larger wingspan than Airbus A380?
[0m[1;3;38;2;155;135;227m[vector_tool_Boeing_747.pdf] Q: What are the different models of Boeing 747?
[0m[1;3;38;2;237;90;200m[summary_tool_Concorde.pdf] Q: How does the fuel efficiency of Boeing 747 compare to Concorde?
[0m[1;3;38;2;237;90;200m[vector_tool_Boeing_747.pdf] A: The maximum speed of the Boeing 747 is Mach 0.855 or 583 mph (939 km/h).
[0m[1;3;38;2;155;135;227m[vector_tool_Boeing_747.pdf] A: There are several models of the Boeing 747, a wide-body jet airliner manufactured by Boeing. Here are the different models:
- Boeing 747-100: The initial model, introduced in 1970.
- Boeing 747-200: An upgraded version of the -100, introduced in 1971,

In [None]:
print(response.response)

The Boeing 747 is a wide-body jet airliner boasting a maximum speed of Mach 0.855, or 583 mph (939 km/h). Its wingspan spans 195 feet 8 inches (59.6 metres).

Several models of the Boeing 747 are available:

- Boeing 747-100: The original version introduced in 1970.
- Boeing 747-200: An upgraded model launched in 1971.
- Boeing 747SP: A shorter version introduced in 1976.
- Boeing 747-300: A model launched in 1983 with a stretched upper deck.
- Boeing 747-400: A heavier variant introduced in 1989.
- Boeing 747-8: A stretched version of the 747, launched in 2005.

The fuel efficiency of the Boeing 747 is significantly better than that of the Concorde.


In [None]:
response = query_engine.query('What are some of the issues with concorde?')

Generated 2 sub questions.
[1;3;38;2;237;90;200m[vector_tool_Concorde.pdf] Q: Search vector_tool_Concorde.pdf for issues with concorde
[0m[1;3;38;2;90;149;237m[summary_tool_Concorde.pdf] Q: Search summary_tool_Concorde.pdf for issues with concorde
[0m[1;3;38;2;237;90;200m[vector_tool_Concorde.pdf] A: I have searched the PDF for the term 'issues' and found some information that might be relevant. 

There is a section titled 'Human Factor Issues Emerge from Concorde Crash Investigation'. It discusses how an investigation into a Concorde crash revealed issues arising from human factors, including a series of errors made by the flight crew, and design features of the aircraft that increased the risks of such accidents. 

Another problem discussed in the PDF is the noise pollution caused by Concorde flights, which led to complaints from residents living near airports. This issue apparently impacted the environmental decision-making process regarding the Concorde.
[0m[1;3;38;2;90;149;

In [None]:
print(response.response)

Here are some of the issues encountered by the Concorde:

- Human error from the flight crew and design features that increased the risk of accidents emerged as problems following an investigation into a Concorde crash
- It had a high angle of attack at low speeds, which caused issues during development and required a major redesign
- Considerable sales difficulties due to poor sales performance and high per-unit costs
- The sonic boom it created limited its ability to operate over land, restricting it to transoceanic flights
- High maintenance and operational costs, partly due to extensive modifications required to fix safety issues
- The 1973–74 stock market crash and the oil crisis reduced the prospects of Concorde sales due to its high fuel consumption
- Noise pollution from flights caused complaints from residents living near airports.


In [None]:
response = query_engine.query('WHat happened to Antonov?')

Generated 2 sub questions.
[1;3;38;2;237;90;200m[vector_tool_Antonov_An-225_Mriya.pdf] Q: Search Antonov files for information on recent developments
[0m[1;3;38;2;90;149;237m[summary_tool_Antonov_An-225_Mriya.pdf] Q: Search Antonov files for any mentions of issues or incidents
[0m[1;3;38;2;237;90;200m[vector_tool_Antonov_An-225_Mriya.pdf] A: Recent developments regarding Antonov include: 

- On February 3, 2021, Rytis Beresnevicius reported that Ukraine was considering completing a second Antonov An-225 Mriya. The article, published by Aerotime, mentioned that the fate of the second An-225 was undecided, despite the country's intention to resume the project.
- On March 26, 2020, David Kaminski-Morrow wrote about the An-225's return to flight after modernisation. The aircraft's return to service was marked by its participation in a training flight over Ukraine.
- According to a 2020 article published by Military Factory, the An-225 was classified as a heavy lift strategic long-rang

In [None]:
print(response.response)

I found several events involving Antonov, a Ukrainian aircraft company:

- The Antonov An-225 aircraft was destroyed on February 27, 2022, during the Battle of Antonov Airport amidst the Russian invasion of Ukraine. The aircraft was reportedly intact two days prior, but it was ultimately destroyed in its hangar, reportedly intact. Antonov staff faced scrutiny for their handling of the situation, with some alleging that they did not do enough to prevent the loss of the aircraft. 
- Two of three Antonov officials were detained by Ukrainian authorities for allegedly preventing the Ukrainian National Guard from setting up defences at Hostomel Airport before the Russian invasion. The former head of Antonov was also charged with "official negligence".
- Ukraine considered resuming a project to complete a second Antonov An-225 Mriya, which was partially constructed during the late 1980s. The fate of this second aircraft remains undecided.
- The An-225 aircraft returned to flight after moderni

In [None]:
response = query_engine.query('Which is the biggest aircraft among all?')

Generated 6 sub questions.
[1;3;38;2;237;90;200m[summary_tool_Airbus_A380.pdf] Q: Can we consider the Airbus A380 as one of the biggest aircraft?
[0m[1;3;38;2;90;149;237m[vector_tool_Airbus_A380.pdf] Q: What are the key characteristics of Airbus A380 that make it one of the biggest aircraft?
[0m[1;3;38;2;11;159;203m[summary_tool_Boeing_747.pdf] Q: Are there any other aircraft that rival the Airbus A380 in terms of size?
[0m[1;3;38;2;155;135;227m[vector_tool_Boeing_747.pdf] Q: How do the dimensions of Boeing 747 compare to Airbus A380?
[0m[1;3;38;2;237;90;200m[summary_tool_Antonov_An-225_Mriya.pdf] Q: Is there any other aircraft that is bigger than Airbus A380?
[0m[1;3;38;2;90;149;237m[vector_tool_Antonov_An-225_Mriya.pdf] Q: What are some key specifications of Antonov An-225 that showcase its size?
[0m[1;3;38;2;155;135;227m[vector_tool_Boeing_747.pdf] A: The Boeing 747 and Airbus A380 are both wide-body jet airliners. Here's a comparison of their dimensions:

Boeing 747:
-

In [None]:
print(response.response)

The Antonov An-225 is bigger than the Airbus A380, which is already considered one of the biggest aircraft. The Antonov An-225 holds the title of being the heaviest aircraft ever built, with a maximum takeoff weight of 640 tonnes. It also has the largest wingspan of any operational aircraft, measuring an impressive 88.4 metres. 

Some key specifications of the An-225 include:
- Wingspan: 88.4 metres (290 feet)
- Length: 84 metres (275 feet)
- Height: 18.1 metres (59 feet)
- Cargo Hold: 1,300 cubic metres (46,000 cubic feet)
- Maximum Takeoff Weight: 640,000 kilograms
- Fuel Capacity: 375,000 litres

The Airbus A380, which is the world's largest passenger airliner, is quite a bit smaller in comparison:
- Length: 72.72 metres 
- Wingspan: 79.75 metres
- Height: 24.09 metres


In [None]:
response = query_engine.query('Is Concorde the fastest aicraft')

Generated 2 sub questions.
[1;3;38;2;237;90;200m[vector_tool_Concorde.pdf] Q: What is the fastest aircraft?
[0m[1;3;38;2;90;149;237m[summary_tool_Concorde.pdf] Q: Does the Concorde hold the record for fastest aircraft?
[0m[1;3;38;2;237;90;200m[vector_tool_Concorde.pdf] A: The fastest aircraft mentioned in the text is the Concorde. It could maintain a supercruise up to Mach 2.04, which equates to 2,170 km/h or 1,350 mph. The Concorde's top cruising speed was Mach 2.02 (~2,154 km/h or 1,338 mph) for optimum fuel consumption.
[0m[1;3;38;2;90;149;237m[summary_tool_Concorde.pdf] A: The Concorde does hold the record for being the fastest airliner or commercial passenger aircraft, with a maximum recorded speed of Mach 2.04, or around 1,354 mph. However, it is not the fastest aircraft ever built. That title goes to either the North American XB-70 Valkyrie, a prototype bomber which had a top speed of Mach 3.0+ or the SR-71 Blackbird, a reconnaissance aircraft which could reach a speed of

In [None]:
response.response

"Concorde is the fastest airliner or commercial passenger aircraft, with a maximum recorded speed of Mach 2.04 or around 1,354 mph. It can maintain a supercruise up to Mach 2.04 and has a top cruising speed of Mach 2.02 for optimal fuel consumption. However, it's not the fastest aircraft ever built. That distinction goes to either the North American XB-70 Valkyrie, a prototype bomber, or the SR-71 Blackbird, a reconnaissance aircraft."