In [None]:
print("test")

test


# **Run prior to starting the server and chatbot**

In [None]:
#Installations for constructing knowledge graph
!pip install llama-index-llms-openai
!pip install llama-index-readers-file
!pip install llama-index-embeddings-openai



In [None]:
#installations for Fast API
!pip install fastapi nest-asyncio pyngrok uvicorn
!ngrok config add-authtoken $NGROK_AUTHTOKEN

add-authtoken - save authtoken to configuration file

USAGE:
  ngrok config add-authtoken TOKEN [flags]

AUTHOR:
  ngrok - <support@ngrok.com>

COMMANDS: 
  config          update or migrate ngrok's configuration file
  http            start an HTTP tunnel
  tcp             start a TCP tunnel
  tunnel          start a tunnel for use with a tunnel-group backend

EXAMPLES: 
  ngrok http 80                                                 # secure public URL for port 80 web server
  ngrok http --domain baz.ngrok.dev 8080                        # port 8080 available at baz.ngrok.dev
  ngrok tcp 22                                                  # tunnel arbitrary TCP traffic to port 22
  ngrok http 80 --oauth=google --oauth-allow-email=foo@foo.com  # secure your app with oauth

Paid Features: 
  ngrok http 80 --domain mydomain.com                           # run ngrok with your own custom domain
  ngrok http 80 --allow-cidr 2600:8c00::a03c:91ee:fe69:9695/32  # run ngrok with IP policy rest

In [None]:
# Needed for information extraction
import requests
from bs4 import BeautifulSoup
import networkx as nx
import spacy
from prettytable import PrettyTable
import textwrap
from google.colab import userdata

# For Google custom search engine
api_key = userdata.get('api_key')
cse_id = userdata.get('cse_id')
nlp = spacy.load("en_core_web_sm")

In [None]:
# To store the extracted info in GDrive
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
os.environ["OPENAI_API_KEY"] = userdata.get('openai_api-key')

In [None]:
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)

In [None]:
# For constructing knowledge graph and displaying results
from llama_index.core import SimpleDirectoryReader, KnowledgeGraphIndex
from llama_index.core.graph_stores import SimpleGraphStore
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from IPython.display import Markdown, display
from llama_index.core import StorageContext

In [None]:
import os
# Define the path to the specific folder in Google Drive
folder_path = '/content/drive/My Drive/KG'

# Create the directory if it doesn't exist
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

# Define the path for the file
file_path = os.path.join(folder_path, 'results.txt')

In [None]:
# Default value
curr_query = 'use cases of transformers in machine learning'

In [None]:
os.environ['NGROK_AUTHTOKEN'] = userdata.get('ngrok_auth_token')

# **Chatbot calls these functions**

In [None]:
# Extract information using the Google API key and Custom search engine id
def google_search(query, api_key, cse_id, num_results=5):
    search_url = f"https://www.googleapis.com/customsearch/v1?q={query}&key={api_key}&cx={cse_id}&num={num_results}"
    response = requests.get(search_url)
    results = response.json().get('items', [])
    return [item['link'] for item in results]

def extract_relevant_info(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        title = soup.find('title').get_text()
        paragraphs = soup.find_all('p')
        text = "\n".join([p.get_text() for p in paragraphs])
        return {"title": title, "text": text}
    except requests.exceptions.RequestException as e:
        return {"error": str(e)}

# Function to wrap text for table display
def wrap_text(text, width):
    return "\n".join(textwrap.wrap(text, width=width))

# Function to format and print the output using PrettyTable
def print_formatted_output(results):
    table = PrettyTable()
    table.field_names = ["Title", "URL", "Text"]

    for result in results:
        wrapped_title = wrap_text(result['title'], width=30)
        wrapped_url = wrap_text(result['url'], width=30)
        wrapped_text = wrap_text(result['text'], width=50)
        table.add_row([wrapped_title, wrapped_url, wrapped_text])

    print(table)



def user_input_kg(query):
  if len(query)>0:
    curr_query = query
  else:
    curr_query = 'use cases of transformers in machine learning'
  query = curr_query
  urls = google_search(query, api_key, cse_id)
  all_entities = set()
  all_relationships = []
  results = []

  for url in urls:
      info = extract_relevant_info(url)
      if "error" not in info:
          info['url'] = url
          results.append(info)
  print_formatted_output(results)
  return results


In [None]:

# Function to save results to text file in Google Drive
def save_results_to_text(results, filename=file_path):
    try:
        with open(filename, mode='w', encoding='utf-8') as file:
            for result in results:
                file.write(f"Text: {result['text']}\n")
        # Check if the file was created and is not empty
        if os.path.exists(filename) and os.path.getsize(filename) > 0:
            print(f"File '{filename}' created successfully.")
            return True
        else:
            print(f"File '{filename}' not created or is empty.")
            return False
    except Exception as e:
        print(f"An error occurred: {e}")
        return False


In [None]:
# Stores the results in results.txt file in a GDrive folder titled 'KG'
def store_extracted_info(results):

  stored_result=save_results_to_text(results)
  folder_path = '/content/drive/My Drive/KG'
  files = os.listdir(folder_path)
  print(files)
  # Commenting the following line of code because the graph has already been constructed for the demo
  # construct_knowledge_graph()
  run_async_function()
  if stored_result:
    return 200
  else:
    return 400

In [None]:
# Load all the text files in the folder to construct knowledge graph
"""
Reads documents from the KG directory (GDrive) and creates KnowledgeGraphIndex with triplets,
and the embeddings. The graph is stored in SimpleGraphStore
"""
def construct_knowledge_graph():
  documents = SimpleDirectoryReader(
    "/content/drive/My Drive/KG"
  ).load_data()
  llm = OpenAI(temperature=0, model="gpt-4")
  Settings.llm = llm
  Settings.chunk_size = 512
  graph_store = SimpleGraphStore()
  storage_context = StorageContext.from_defaults(graph_store=graph_store)
  # Considering embeddings
  global new_index
  new_index = KnowledgeGraphIndex.from_documents(
      documents,
      max_triplets_per_chunk=2,
      include_embeddings=True,
  )
  global query_engine
  query_engine = new_index.as_query_engine(
      include_text=True, response_mode="tree_summarize"
    )
  print("Knowledge graph constructed")


In [None]:
# As the knowledge graph construction takes a longer time, and HTTP requests time out shortly, the knowledge graph function is called async
import threading
import time

def run_async_function():
    thread = threading.Thread(target=construct_knowledge_graph)
    thread.start()
    print("Sync function has been called")

In [None]:
# Do not run now  : Note: This code block is for testing if the responses are received as expected
# Expecting a correct response for this question
query_engine = new_index.as_query_engine(
    include_text=False, response_mode="tree_summarize"
)
response = query_engine.query(
    "What is machine learning",
)
display(Markdown(f"{response}"))
# No information about this query will be received as the data doesn't exist in the document
response = query_engine.query(
    "What is Jyothi studying",
)
display(Markdown(f"{response}"))

In [None]:
def fn_query_engine(question):
  global query_engine
  query_engine = new_index.as_query_engine(
      include_text=False, response_mode="tree_summarize"
    )



In [None]:
# The chatbot accesses these APIs
# Creating a FastAPI application
from fastapi import FastAPI,Query
from fastapi.middleware.cors import CORSMiddleware

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=['*'],
    allow_headers=['*'],
)

@app.get('/')
async def root():
    return {'hello': 'world'}

# Called when the user logs in to the application and wants to talk to the chatbot
@app.get('/answer_questions')
async def answer_questions(question: str = Query(..., description="The question to be answered")):
    query_engine = new_index.as_query_engine(
      include_text=False, response_mode="tree_summarize"
    )
    response = query_engine.query(
        question,
    )
    print(str(response))
    return {'question': question, 'response': str(response)}

# Get info from the user for the topic to generate a knowledge graph about
@app.get('/generate_knowledge_graph')
async def generate_knowledge_graph(kg_query: str = Query(..., description="Topic to generate the knowledge graph on")):
    print("generate knowledge graph")
    results = user_input_kg(kg_query)
    print(results)
    status = store_extracted_info(results)
    print(status)
    return {'question': kg_query, 'response': str(status)}

In [None]:
# Set up a local web server to make it accessible over the internet, the APIs will be called by the React.js application
import nest_asyncio
from pyngrok import ngrok
import uvicorn

ngrok_tunnel = ngrok.connect(8000)
print('URL: ', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)

INFO:     Started server process [33149]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


URL:  https://07a9-34-75-24-62.ngrok-free.app
generate knowledge graph




INFO:     73.162.97.244:0 - "GET /generate_knowledge_graph?kg_query=AI%20powered%20Agilex%205%20SoC%20FPGAs HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/protocols/http/h11_impl.py", line 398, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/middleware/proxy_headers.py", line 70, in __call__
    return await self.app(scope, receive, send)
  File "/usr/local/lib/python3.10/dist-packages/fastapi/applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.10/dist-packages/starlette/applications.py", line 123, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/errors.py", line 164, in __call__
    await self.app(scope, receive, _send)
  File "/usr/local/lib/python3.10/dist-pack

generate knowledge graph
+--------------------------------+--------------------------------+----------------------------------------------------+
|             Title              |              URL               |                        Text                        |
+--------------------------------+--------------------------------+----------------------------------------------------+
| Intel and Altera Announce Edge | https://www.intel.com/content/ | You can easily search the entire Intel.com site in |
|  and FPGA Offerings for AI at  | www/us/en/newsroom/news/intel- |   several ways. You can also try the quick links   |
|          Embedded...           |  altera-bring-ai-to-embedded-  |  below to see results for most popular searches.   |
|                                |           world.html           |      The browser version you are using is not      |
|                                |                                |     recommended for this site.Please consider      |
|      