In [18]:
! pip install llama-index nltk milvus pymilvus langchain openai python-dotenv requests

In [19]:
import nltk
import ssl

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

nltk.download("stopwords")

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/yujiantang/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [13]:
from llama_index import (
    GPTVectorStoreIndex, 
    GPTSimpleKeywordTableIndex, 
    SimpleDirectoryReader,
    LLMPredictor,
    ServiceContext,
    StorageContext
)
from langchain.llms.openai import OpenAIChat

In [12]:
import os
from dotenv import load_dotenv
import openai
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

from llama_index.vector_stores import MilvusVectorStore
from milvus import default_server

default_server.start()
vector_store = MilvusVectorStore(
   host = "127.0.0.1",
   port = default_server.listen_port
)




    __  _________ _   ____  ______
   /  |/  /  _/ /| | / / / / / __/
  / /|_/ // // /_| |/ / /_/ /\ \
 /_/  /_/___/____/___/\____/___/ {Lite}

 Welcome to use Milvus!

 Version:   v2.2.8-lite
 Process:   49630
 Started:   2023-05-18 16:00:02
 Config:    /Users/yujiantang/.milvus.io/milvus-server/2.2.8/configs/milvus.yaml
 Logs:      /Users/yujiantang/.milvus.io/milvus-server/2.2.8/logs

 Ctrl+C to exit ...


[93m[has_collection] retry:4, cost: 0.27s, reason: <_MultiThreadedRendezvous: StatusCode.UNAVAILABLE, internal: Milvus Proxy is not ready yet. please wait>[0m
[93m[has_collection] retry:5, cost: 0.81s, reason: <_MultiThreadedRendezvous: StatusCode.UNAVAILABLE, internal: Milvus Proxy is not ready yet. please wait>[0m


In [5]:
wiki_titles = ["Toronto", "Seattle", "San Francisco", "Chicago", "Boston", "Washington, D.C.", "Cambridge, Massachusetts", "Houston"]

In [6]:
from pathlib import Path

import requests
for title in wiki_titles:
    response = requests.get(
        'https://en.wikipedia.org/w/api.php',
        params={
            'action': 'query',
            'format': 'json',
            'titles': title,
            'prop': 'extracts',
            # 'exintro': True,
            'explaintext': True,
        }
    ).json()
    page = next(iter(response['query']['pages'].values()))
    wiki_text = page['extract']

    data_path = Path('data')
    if not data_path.exists():
        Path.mkdir(data_path)

    with open(data_path / f"{title}.txt", 'w') as fp:
        fp.write(wiki_text)

In [7]:
# Load all wiki documents
city_docs = {}
for wiki_title in wiki_titles:
    city_docs[wiki_title] = SimpleDirectoryReader(input_files=[f"data/{wiki_title}.txt"]).load_data()

In [10]:
llm_predictor_chatgpt = LLMPredictor(llm=OpenAIChat(temperature=0, model_name="gpt-3.5-turbo"))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt)



In [14]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [15]:
# Build city document index
city_indices = {}
index_summaries = {}
for wiki_title in wiki_titles:
    city_indices[wiki_title] = GPTVectorStoreIndex.from_documents(city_docs[wiki_title], service_context=service_context, storage_context=storage_context)
    # set summary text for city
    index_summaries[wiki_title] = f"Wikipedia articles about {wiki_title}"

In [16]:
from llama_index.indices.composability import ComposableGraph

In [20]:
graph = ComposableGraph.from_indices(
    GPTSimpleKeywordTableIndex,
    [index for _, index in city_indices.items()], 
    [summary for _, summary in index_summaries.items()],
    max_keywords_per_chunk=50
)

In [21]:
from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
decompose_transform = DecomposeQueryTransform(
    llm_predictor_chatgpt, verbose=True
)

In [23]:
from llama_index.query_engine.transform_query_engine import TransformQueryEngine
custom_query_engines = {}

In [25]:
for index in city_indices.values():
    query_engine = index.as_query_engine(service_context=service_context)
    transform_extra_info = {'index_summary': index.index_struct.summary}
    tranformed_query_engine = TransformQueryEngine(query_engine, decompose_transform, transform_extra_info=transform_extra_info)
    custom_query_engines[index.index_id] = tranformed_query_engine

custom_query_engines[graph.root_index.index_id] = graph.root_index.as_query_engine(
    retriever_mode='simple', 
    response_mode='tree_summarize', 
    service_context=service_context
)

query_engine_decompose = graph.as_query_engine(
    custom_query_engines=custom_query_engines,)

In [26]:
response_chatgpt = query_engine_decompose.query(
    "Compare and contrast the airports in Seattle, Houston, and Toronto. "
)

[33;1m[1;3m> Current query: Compare and contrast the airports in Seattle, Houston, and Toronto. 
[0m[38;5;200m[1;3m> New query: What is the name of the airport in Seattle?
[0m[33;1m[1;3m> Current query: Compare and contrast the airports in Seattle, Houston, and Toronto. 
[0m[38;5;200m[1;3m> New query: What is the name of the airport in Seattle?
[0m[33;1m[1;3m> Current query: Compare and contrast the airports in Seattle, Houston, and Toronto. 
[0m[38;5;200m[1;3m> New query: What are the major airports in Houston?
[0m[33;1m[1;3m> Current query: Compare and contrast the airports in Seattle, Houston, and Toronto. 
[0m[38;5;200m[1;3m> New query: What are the major airports in Houston?
[0m[33;1m[1;3m> Current query: Compare and contrast the airports in Seattle, Houston, and Toronto. 
[0m[38;5;200m[1;3m> New query: What are some notable features of the Toronto airport?
[0m[33;1m[1;3m> Current query: Compare and contrast the airports in Seattle, Houston, and Tor

In [None]:
print(str(response_chatgpt))

Seattle has one major airport called Seattle-Tacoma International Airport, while Houston has two major airports called George Bush Intercontinental Airport and William P. Hobby Airport, as well as a third municipal airport called Ellington Airport. Toronto's busiest airport is called Toronto Pearson International Airport and is located on the city's western boundary with Mississauga. It offers limited commercial and passenger service to nearby destinations in Canada and the United States. Seattle-Tacoma International Airport and George Bush Intercontinental Airport are both major international airports, while William P. Hobby Airport and Ellington Airport are smaller and serve more regional destinations. Toronto Pearson International Airport is Canada's busiest airport and offers a direct link to Union Station through the Union Pearson Express train service.


In [28]:
custom_query_engines = {}
for index in city_indices.values():
    query_engine = index.as_query_engine(service_context=service_context)
    custom_query_engines[index.index_id] = query_engine

custom_query_engines[graph.root_index.index_id] = graph.root_index.as_query_engine(
    retriever_mode='simple', 
    response_mode='tree_summarize', 
    service_context=service_context
)

query_engine = graph.as_query_engine(
    custom_query_engines=custom_query_engines,    
)

In [29]:
response_chatgpt = query_engine.query(
    "Compare and contrast the airports in Seattle, Houston, and Toronto. "
)
str(response_chatgpt)

'The context information provided does not contain enough information to answer the question.'

In [30]:
response_chatgpt = query_engine_decompose.query(
    "Compare and contrast the sports environment of Houston and Boston. "
)

[33;1m[1;3m> Current query: Compare and contrast the sports environment of Houston and Boston. 
[0m[38;5;200m[1;3m> New query: What sports teams are based in Houston?
[0m[33;1m[1;3m> Current query: Compare and contrast the sports environment of Houston and Boston. 
[0m[38;5;200m[1;3m> New query: What sports teams are based in Houston?
[0m[33;1m[1;3m> Current query: Compare and contrast the sports environment of Houston and Boston. 
[0m[38;5;200m[1;3m> New query: What are some notable sports teams based in Boston?
[0m[33;1m[1;3m> Current query: Compare and contrast the sports environment of Houston and Boston. 
[0m[38;5;200m[1;3m> New query: What are some notable sports teams based in Boston?
[0m

In [31]:
str(response_chatgpt)

"Houston has sports teams for every major professional league except the National Hockey League, while Boston has teams for the NFL, MLB, NBA, and NHL. Both cities have professional soccer teams, with Houston having a Major League Soccer franchise and Boston having a National Women's Soccer League team. Boston also has several college sports teams and Esports teams, while Houston does not have any notable college sports teams or Esports teams. Both cities are known for hosting major sporting events, with Boston hosting the Boston Marathon and the Head of the Charles Regatta, while Houston does not have any comparable events. Overall, Boston has a more diverse sports environment with teams in all major professional leagues and a strong presence in college sports and Esports."