In [None]:
import os
import json
import openai
from langchain.llms import AzureOpenAI
from langchain.embeddings import OpenAIEmbeddings
from llama_index import LangchainEmbedding
from llama_index import (
    GPTVectorStoreIndex,
    SimpleDirectoryReader, 
    LLMPredictor,
    PromptHelper,
    ServiceContext
)
from llama_index.indices.knowledge_graph.base import GPTKnowledgeGraphIndex
import logging
import sys
import nest_asyncio
nest_asyncio.apply()

logging.basicConfig(stream=sys.stdout, level=logging.INFO) # logging.DEBUG for more verbose output
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
from llama_index import SimpleDirectoryReader, LLMPredictor, ServiceContext
from llama_index.indices.knowledge_graph.base import GPTKnowledgeGraphIndex
from langchain import OpenAI
from IPython.display import Markdown, display

In [None]:
openai.api_type = "azure"
openai.api_base = 
openai.api_version = "2023-03-15-preview"
os.environ["OPENAI_API_KEY"] = 
openai.api_key = os.getenv("OPENAI_API_KEY")

In [None]:
llm = AzureOpenAI(deployment_name="gpt-35-turbo", model_kwargs={
    "api_key": openai.api_key,
    "api_base": openai.api_base,
    "api_type": openai.api_type,
    "api_version": openai.api_version,
})
llm_predictor = LLMPredictor(llm=llm)

embedding_llm = LangchainEmbedding(
    OpenAIEmbeddings(
        model="text-embedding-ada-002",
        deployment="text-embedding-ada-002",
        openai_api_key= openai.api_key,
        openai_api_base=openai.api_base,
        openai_api_type=openai.api_type,
        openai_api_version=openai.api_version,
    ),
    embed_batch_size=1,
)

In [None]:
wiki_titles = ["Toronto", "Seattle", "Chicago", "Boston", "Houston"]

from pathlib import Path

import requests
for title in wiki_titles:
    response = requests.get(
        'https://en.wikipedia.org/w/api.php',
        params={
            'action': 'query',
            'format': 'json',
            'titles': title,
            'prop': 'extracts',
            # 'exintro': True,
            'explaintext': True,
        }
    ).json()
    page = next(iter(response['query']['pages'].values()))
    wiki_text = page['extract']

    data_path = Path('data')
    if not data_path.exists():
        Path.mkdir(data_path)

    with open(data_path / f"{title}.txt", 'w', encoding="utf-8") as fp:
        fp.write(wiki_text)

In [None]:
city_docs = []
for wiki_title in wiki_titles:
    docs = SimpleDirectoryReader(input_files=[f"data/{wiki_title}.txt"]).load_data()
    docs[0].doc_id = wiki_title
    city_docs.extend(docs)

In [None]:
llm_predictor_chatgpt = LLMPredictor(llm=llm)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt, chunk_size_limit=1024)

In [None]:
from llama_index.storage.index_store import MongoIndexStore
from llama_index import StorageContext
from llama_index.storage.docstore import MongoDocumentStore

In [None]:
connection_string=""
index_store = MongoIndexStore.from_uri(uri=str(connection_string), db_name="kg_index", namespace="index")
doc_store = MongoDocumentStore.from_uri(uri=str(connection_string), db_name = "doc_store", namespace="doc")
storage_context = StorageContext.from_defaults(
            docstore=doc_store,
            index_store=index_store)

In [None]:
# NOTE: can take a while! 
new_index = GPTKnowledgeGraphIndex.from_documents(
    docs, 
    max_triplets_per_chunk=5,
    service_context=service_context,
    storage_context=storage_context
)

In [None]:
query_engine = new_index.as_query_engine(
    include_text=False, 
    response_mode="tree_summarize"
)
response = query_engine.query(
    "Tell me more about Interleaf", 
)

In [None]:
display(Markdown(f"<b>{response}</b>"))

In [None]:
query_engine = new_index.as_query_engine(
    include_text=True, 
    response_mode="tree_summarize"
)
response = query_engine.query(
    "Boston和Chicago有多少人口", 
)

In [None]:
display(Markdown(f"<b>{response}</b>"))

In [None]:
from llama_index import (
    SimpleDirectoryReader,
    LLMPredictor,
    ServiceContext,
    ResponseSynthesizer
)
from llama_index.indices.document_summary import GPTDocumentSummaryIndex
from langchain.chat_models import AzureChatOpenAI

In [None]:
llm = AzureChatOpenAI(deployment_name="gpt-35-turbo", model_kwargs={
    "api_key": openai.api_key,
    "api_base": openai.api_base,
    "api_type": openai.api_type,
    "api_version": openai.api_version,
})
llm_predictor_chatgpt = LLMPredictor(llm=llm)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size_limit=1024, embedding_llm=embedding_llm)

In [None]:
response_synthesizer = ResponseSynthesizer.from_args(response_mode="tree_summarize", use_async=True)
doc_summary_index = GPTDocumentSummaryIndex.from_documents(
    city_docs, 
    service_context=service_context,
    response_synthesizer=response_synthesizer
)

In [None]:
from pyvis.network import Network

g = new_index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("example.html")