In [None]:
# $:> conda env create --file=env.yaml
# $:> conda activate discoverai_env
# $:> python --version

- Online Document
- Search 
- OCR
- OpenAI

# Imports

In [4]:
import sys
import os
import uuid
import re
import time
import requests
import random
import re
import ntpath
import json
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
get_ipython().run_line_magic('matplotlib', 'inline')
from PIL import Image
from io import BytesIO
import http.client, urllib.request, urllib.parse, urllib.error, base64
from pdf2image import convert_from_path

# AZURE - OCR
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from msrest.authentication import CognitiveServicesCredentials
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__
from ocrlayout.bboxhelper import BBOXOCRResponse,BBoxHelper

# AZURE - Search
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient

# OpenAI
import openai

import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

## :: config ::

In [8]:
def load_config(config_fp):
    with open(config_fp, "r") as f:
        config_data = json.load(fp=f)
        os.environ["STORAGE_NAME"] = config_data["STORAGE_NAME"]
        os.environ["STORAGE_CONN"] = config_data["STORAGE_CONN"]
        os.environ["OCR_KEY"] = config_data["OCR_KEY"]
        os.environ["OCR_ENDPOINT"] = config_data["OCR_ENDPOINT"]
        os.environ["OPENAI_KEY"] = config_data["OPENAI_KEY"]
        os.environ["STORAGE_NAME"] = config_data["STORAGE_NAME"]
        os.environ["SEARCH_ENDPOINT"] = config_data["SEARCH_ENDPOINT"]
        os.environ["INDEX_NAME"] = config_data["INDEX_NAME"]
        os.environ["SEARCH_KEY"] = config_data["SEARCH_KEY"]    
    return

load_config("azure_config.json")

# Data

In [None]:
# load data from AZURE-STORAGE-CONTAINER-BLOB ITEMS

blob_service_client = BlobServiceClient.from_connection_string(os.environ.get("STORAGE_CONN"))
container_client = blob_service_client.get_container_client("documents")
data={}
blobs_list = container_client.list_blobs()
for blob in blobs_list:
    print(blob.name)
    data[blob.name] = container_client.get_blob_client(blob.name).url

# OCR

#### Using READ API v3.2

API Readme:
 
https://centraluseuap.dev.cognitive.microsoft.com/docs/services/computer-vision-v3-2/operations/5d986960601faab4bf452005

In [12]:
def run_azure_ocr_api(fp):
    
    key = os.environ.get("OCR_KEY")
    endpoint = os.environ.get("OCR_ENDPOINT")
    
    headers = {
        'Content-Type': 'application/json',
        'Ocp-Apim-Subscription-Key': key
    }

    params = {
        'language': 'en'
    }
    
    body = {"url": fp}
    
    # "https://{endpoint}/vision/v3.2/read/analyze[?language][&pages][&readingOrder][&model-version]"
    
    # Submit a request
    url_request = endpoint + "vision/v3.2/read/analyze"
    response = requests.post(url_request, headers=headers, params=params, json=body)
    if response.status_code == 202:
        
        # service has been submitted, now retrieve the operation-unique-id
        operation_location = response.headers["Operation-Location"]
        operation_id = operation_location.split("/")[-1]
        print("Success - Submitted a request | Operation ID: %s |" % operation_id)

        # get results
        url_readResults =  endpoint + "vision/v3.2/read/analyzeResults/" + operation_id
        while True:
            response = requests.get(url_readResults, headers=headers)
            if response.json()['status'].strip().lower() not in ['notstarted', 'running']:
                break
            print("Waiting for results...")
            time.sleep(1)
        print("Compiled results!")
        return response

    else:
        print("Failed to submit request! ERROR :: ", str(response.text))

In [None]:
# read-api-v3.2

api_output = run_azure_ocr_api(data["The Prospect of a Continued Correction.pdf"])
api_output.json()["analyzeResult"]["readResults"]

### OCR Module

In [250]:
class run_ocr_process:
    
    def __init__(self, FILE_PATH):
        if not FILE_PATH.startswith(('https', 'http', 'www', 'ftp', 'localhost')) and not os.path.exists(FILE_PATH):
            raise Exception("ERROR: File not found!")

        self.supported_formats = ['png', 'jpg', 'jpeg', 'heic', 'gif', 'bmp', 'tiff', 'pdf', 'txt', 'md']
        
        self.file_path, \
        self.file_basename,  \
        self.file_ext = str(FILE_PATH), \
                        ntpath.basename(FILE_PATH).split(".")[0].lower().strip(), \
                        ntpath.basename(FILE_PATH).split(".")[1].lower().strip()
        
        if self.file_ext not in self.supported_formats:
            raise Exception("ERROR: Unsupported file format!")
            
        key = os.environ.get("OCR_KEY")
        self.endpoint = os.environ.get("OCR_ENDPOINT")

        # Set credentials & create client
        self.credentials = CognitiveServicesCredentials(key)
        self.client = ComputerVisionClient(self.endpoint, self.credentials)

        return
    
    
    def run_ocr(self):
        """
        Runs OCR READ API V3.2 using client libraries.
        :return: result - obj: Azure ReadAPI v3.2 response
        """
        start = time.time()
        response = None
        result = None

        if self.file_path.startswith(('https', 'http', 'www', 'ftp', 'localhost')):
            # > hosted doc url (blob)
            self.file_type = 'online'
            response = self.client.read(self.file_path, raw=True)

        elif self.file_ext in ['png', 'jpg', 'jpeg', 'heic', 'gif', 'bmp', 'tiff', 'pdf']:
            # > image/pdf
            fp = open(self.file_path,'rb')
            response = self.client.read_in_stream(fp, raw=True)
            fp.close()

        elif self.file_ext in ['txt', 'md']:
            # > local plain text files
            with open(self.file_path, "r") as f:
                response = f.readlines()

        elif self.file_ext in ['doc', 'docs', 'docx']:
            # > Word file - coming soon
            sys.exit()

        else:
            raise Exception("ERROR: Unsupported file format!")

        # Get the operation location and ID from the response:
        operation_location = response.headers["Operation-Location"]
        operation_id = operation_location.split("/")[-1]
        print("Success - Submitted a request | Operation ID: %s |" % operation_id)

        # read 'results' using operation-id:
        while True:
            result = self.client.get_read_result(operation_id)
            if result.status.lower () not in ['notstarted', 'running']:
                break
            print ('Waiting for result...')
            time.sleep(1)

        print("%%time taken(s): ", (time.time() - start) )
        return result
        
    
    def format_boundingBox(self, ocr_result):
        """
        Cleans and formats (sorting) on BB coordinates/font-size.
        :prarm:  ocr_result - obj: origninal Azure ReadAPI v3.2 response obj
        :return: bboxresponse.text - str: cleaned corpus (with delimitters & context info)
        
        ref - https://puthurr.github.io/
        
        """
        COMPUTERVISION_SUBSCRIPTION_KEY = os.environ["OCR_KEY"]
        COMPUTERVISION_LOCATION = "eastus"
        
        ocr_str = json.dumps(ocr_result.serialize())
        
        # Create BoundingBox OCR Response (intersect with coordinates)
        bboxresponse = BBoxHelper().processAzureOCRResponse(ocr_str)
        return bboxresponse.text


    def process_ocr_results(self, ocr_result):
        """
        Extracts meaningful info from Azure's ReadAPI v3.2 response object.
        :prarm:  ocr_result - obj: origninal Azure ReadAPI v3.2 response obj
        :return: output_df - pd.DataFrame: line-by-line text with BB coords; corpus_json - dict; ocr_output - dict;
        """
        # Displays text captured and its bounding box
        result = ocr_result
        output_df = pd.DataFrame()
        
        # Print the detected text, line by line
        if result.status == OperationStatusCodes.succeeded:
            
            for readResult in result.analyze_result.read_results:
                page_change_marker = "\n\n"
                for line in readResult.lines:
                    # print(line.text)
                    # print(">>", line.bounding_box)
                    """
                    bounding box:
                    X top left, Y top left, 
                    X top right, Y top right, 
                    X bottom right, Y bottom right, 
                    X bottom left, Y bottom left
                    """
                    data = {
                        "lines": page_change_marker + line.text,
                        "top_left_XY": (line.bounding_box[0], line.bounding_box[1]),
                        "top_right_XY": (line.bounding_box[2], line.bounding_box[3]),
                        "bottom_right_XY": (line.bounding_box[4], line.bounding_box[5]),
                        "bottom_left_XY": (line.bounding_box[6], line.bounding_box[7])
                    }                      
                    output_df = output_df.append(data, ignore_index=True)
                    page_change_marker=""
        
        
        # :: output ::
        #  'output_df'    : dataframe - stores line-by-line info with BB info XY (top-l, top-r, bottom-r, bottom-l)
        #  'corpus_json'  : dict - raw original response
        #  'ocr_output'   : dict - usable summarized-response 
        #
        corpus_json = result.as_dict()
        if self.file_ext=='pdf': 
            print("\nPDF file found! >> No. of pages= ", len(corpus_json['analyze_result']['read_results']) )
        
        ocr_output = {
            'title': self.file_basename + "." + self.file_ext,
            'format': self.file_ext,
            'isPdf': True if self.file_ext=='pdf' else False,
            'lines': output_df.to_dict(orient='records'),
            'corpus': "\n".join(output_df['lines'].tolist())
        }
        
        # CLEAN TEXT [ALTERNTATE] - formatiing Bounding Box using coordinates for sorting and positioning
        #   'corpus': self.format_boundingBox(ocr_result),
        
        return output_df, corpus_json, ocr_output
    
    
    def display_results(self, ocr_result):
        """
        Display BB overlapped on text. (**pdf needs poppler installed!**)
        :prarm:  ocr_result - obj: origninal Azure ReadAPI v3.2 response obj
        :return: None
        """
        
        result = ocr_result
        
        # Extract the word bounding boxes and text.
        word_infos=[]
        for readResult in result.analyze_result.read_results:
            for line in readResult.lines:
                for word_info in line.words:
                    word_infos.append(word_info)

        # Display the image and overlay it with the extracted text.
        plt.figure(figsize=(10, 8))
        image = Image.open(self.file_path)
        ax = plt.imshow(image, alpha=0.5)
        for word in word_infos:
            word = word.as_dict()
            bbox = [int(num) for num in word["bounding_box"]]
            text = word["text"]
            origin = (bbox[0], bbox[1])
            patch = Rectangle(origin, bbox[2], bbox[3],
                              fill=False, linewidth=2, color='y')
            ax.axes.add_patch(patch)
            plt.text(origin[0], origin[1], text, fontsize=20, weight="bold", va="top")
        plt.show()
        plt.axis("off")
        return
    
    
    def main(self):
        
        # run Azure's OCR
        ocr_result = self.run_ocr()
        
        # extract info
        output_df, corpus_json, ocr_output = self.process_ocr_results(ocr_result)
        
        if self.file_ext not in ['pdf'] and self.file_type!='online':
            self.display_results(ocr_result)
        
        return ocr_output, output_df

In [251]:
# demo

execute = run_ocr_process(data["The Prospect of a Continued Correction.pdf"])
ocr_output, output_df = execute.main()

Success - Submitted a request | Operation ID: 9507d8e2-831f-4892-a5aa-4c9db1ec3b31 |
Waiting for result...
%%time taken(s):  2.189833402633667

PDF file found! >> No. of pages=  1


In [244]:
output_df

Unnamed: 0,lines,top_left_XY,top_right_XY,bottom_right_XY,bottom_left_XY
0,\n\nThe Prospect of a Continued Correction,"(0.4918, 0.5676)","(3.3007, 0.5676)","(3.3007, 0.6901)","(0.4918, 0.6901)"
1,Mike Wilson,"(0.4894, 1.0363)","(1.2935, 1.0363)","(1.2935, 1.133)","(0.4894, 1.133)"
2,Welcome to Thoughts on the Market. I'm Mike Wi...,"(0.4891, 1.5051)","(7.7671, 1.5051)","(7.7671, 1.6283)","(0.4891, 1.6283)"
3,Strategist for Morgan Stanley. Along with my c...,"(0.4934, 1.6613)","(7.9107, 1.6613)","(7.9107, 1.7846)","(0.4934, 1.7846)"
4,be talking about the latest trends in the fina...,"(0.4967, 1.8176)","(7.9121, 1.8176)","(7.9121, 1.9408)","(0.4967, 1.9408)"
5,a.m.,"(0.4949, 2.0009)","(0.7568, 2.0009)","(0.7568, 2.0705)","(0.4949, 2.0705)"
6,in New York. So let's get after it.,"(0.8692, 1.9738)","(3.4332, 1.9738)","(3.4332, 2.0971)","(0.8692, 2.0971)"
7,This past week tensions around Russia/Ukraine ...,"(0.4918, 2.2863)","(7.9871, 2.2863)","(7.9871, 2.4089)","(0.4918, 2.4089)"
8,"this occur, it's easy to simply throw up one's...","(0.4903, 2.4426)","(7.9128, 2.4426)","(7.9128, 2.5655)","(0.4903, 2.5655)"
9,"not so sure that's a good idea, particularly i...","(0.4967, 2.5988)","(7.9896, 2.5988)","(7.9896, 2.7221)","(0.4967, 2.7221)"


In [253]:
ocr_output

{'title': 'the%20prospect%20of%20a%20continued%20correction.pdf',
 'format': 'pdf',
 'isPdf': True,
 'lines': [{'lines': '\n\nThe Prospect of a Continued Correction',
   'top_left_XY': (0.4918, 0.5676),
   'top_right_XY': (3.3007, 0.5676),
   'bottom_right_XY': (3.3007, 0.6901),
   'bottom_left_XY': (0.4918, 0.6901)},
  {'lines': 'Mike Wilson',
   'top_left_XY': (0.4894, 1.0363),
   'top_right_XY': (1.2935, 1.0363),
   'bottom_right_XY': (1.2935, 1.133),
   'bottom_left_XY': (0.4894, 1.133)},
  {'lines': "Welcome to Thoughts on the Market. I'm Mike Wilson, Chief Investment Officer and Chief U.S. Equity",
   'top_left_XY': (0.4891, 1.5051),
   'top_right_XY': (7.7671, 1.5051),
   'bottom_right_XY': (7.7671, 1.6283),
   'bottom_left_XY': (0.4891, 1.6283)},
  {'lines': "Strategist for Morgan Stanley. Along with my colleagues bringing you a variety of perspectives, I'll",
   'top_left_XY': (0.4934, 1.6613),
   'top_right_XY': (7.9107, 1.6613),
   'bottom_right_XY': (7.9107, 1.7846),
   'bo

---

# OpenAI NLG GPT-3

In [18]:
# License Key
openai.api_key = os.environ.get("OPENAI_KEY")

# list engines
engines = openai.Engine.list()
print([i.id for i in engines.data])

In [19]:
def play(prompt):

    if prompt is None or prompt.strip()=="":
        return Exception("Incorrect prompt")

    response = openai.Completion.create(
        engine="text-davinci-001",
        prompt=prompt,
        temperature=0.01,   # sampling temp, higher values means the model will take more risks, 0.9-creative, 0-well-defined.
        max_tokens=250,     # max number of tokens to generate in completion API.
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0)

    # >>> REPSONE JSON <<<
    # print(response, "\n")
    
    # print the nl generated using davinci gpt-3 model
    print("-"*100, "\nprompt: ", prompt.split("\n\n###\n\n")[2])
    print("\nResponse\n###\n", response.choices[0].text.strip().strip("\n").strip(), "\n###")
    return

#### Examples

In [21]:
# The Prospect of a Continued Correction by Mike Wilson Podcast Transcript

demo_doc = """YOUR DOCUMENT GOES HERE"""

# prepare this as 'ocr-result'
demo_doc = "TEXT: \n\n###\n\n"  + demo_doc.strip("\n").strip() + "\n\n###\n\n" 

In [None]:
play(demo_doc + "Who is the author in this?")

In [None]:
play(demo_doc + "Summarize TEXT in 500 words.")

In [None]:
play(demo_doc + "Generate 5 FAQs from the TEXT.")

In [None]:
play(demo_doc + "Find important words in TEXT")

In [None]:
play(demo_doc + "Find important phrases in the TEXT.")

In [None]:
play(demo_doc + "Find sentiment words in the TEXT.")

In [None]:
# 50% confidence
play(demo_doc + "Find abbreviation in the TEXT.")

### OpenAI Module

In [200]:
class extract_metadata:
    
    def __init__(self, OPENAI_INSTANCE, OCR_RESULT_JSON):

        if not OCR_RESULT_JSON:
            raise Exception("ERROR: Please pass a 'OCR Ouptut'")

        # License key
        self.openai = OPENAI_INSTANCE
        self.openai.api_key = os.environ.get("OPENAI_KEY")
        
        # :: config ::
        self.engine = "text-davinci-001"   # engine choose gpt3 from list, older - gpt2.1
        self.temp = 0.01          # higher values means the model will take more risks, 0.9 means creative, 0 is well-defined.
        self.max_tokens = 250     # max number of tokens to generate in completion API.
        self.doc_token_size = 1450  # free license supports upto 2049 tokens
        
        # ocr'ed document
        self.doc_title = OCR_RESULT_JSON['title']
        corpus = OCR_RESULT_JSON['corpus'].strip("\n\n").strip("\n")
        corpus = " ".join(corpus.split(" ")[:self.doc_token_size])
        self.doc = "TEXT:\n\n###\n\n" + corpus + "\n\n###\n\n"
        
        return
    
    
    def run_on_doc(self, prompt, insight_type=None):

        # max words
        # max_word_limit = self.max_tokens
        # if insight_type=="summary":
        #    max_word_limit = 500
            
        # run openAI api
        response = self.openai.Completion.create(
            engine=self.engine,
            prompt=prompt,
            temperature=self.temp,
            max_tokens=self.max_tokens,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0)
        
        # >>> REPSONE JSON <<<
        output_json = response
        # print(output_json)
        
        available_choices = len(response.choices)
        if available_choices > 1:
            selected_choice = random.choice(response.choices)
        else:
            selected_choice = response.choices[0]
            
        # Final output
        output = selected_choice.text.strip().strip("\n").strip()
        # print("Available choices --->", available_choices)
        # print("Response\n###\n", output, "\n###")
        
        return output
    
    
    def execute(self, prompt, insight=None):
        
        prompt_mapping = {
            
            "default-summary": "Summarize TEXT in 300 words.",
            "default-faq": "Generate 5 FAQs from the TEXT.",
            "default-keypoints": "Tl;dr",
            "default-keywords": "Find important words in the TEXT.",
            "default-phrases": "Find important phrases in the TEXT.",
            "default-sentiment": "Find sentiment words in the TEXT.",
            "default-abbrv": "Find abbreviations in the TEXT."
        }
        
        if insight is None and (prompt is None or str(prompt).strip()=="" or str(prompt)=="nan"):
            return Exception("ERROR: Incorrect or no prompt passed!")
        
        if insight and insight.lower().strip() in prompt_mapping.keys(): 
            insight = insight.lower().strip()
            input_text = self.doc + prompt_mapping[insight]
            result = self.run_on_doc(input_text).strip("\n").strip()

        else:
            # USER INPUT (FAQ)
            user_input = str(prompt).rstrip(" ").rstrip(".").rstrip(":").rstrip("-").rstrip(",").rstrip("?").rstrip("!")
            input_text = self.doc + user_input + " in the TEXT."
            result = self.run_on_doc(input_text).strip("\n").strip()
        
        print(input_text.split("###")[2], ":\n\n", result, "\n", "--"*50)
        return result

In [201]:
# demo

run_docAI = extract_metadata(openai, ocr_output)

run_docAI.execute("Who is the author in this?")
run_docAI.execute("Summarize TEXT in 500 words.")
run_docAI.execute("What is Mike Wilson's view on direction of market?")
run_docAI.execute("Generate 5 FAQs in this.")
run_docAI.execute("\n\nTl;dr")
run_docAI.execute("Find important words..")
run_docAI.execute("Find important phrases?")
run_docAI.execute("Find sentiment words")
run_docAI.execute("Find abbreviation.")



Who is the author in this in the TEXT. :

 Mike Wilson 
 ----------------------------------------------------------------------------------------------------


Summarize TEXT in 500 words in the TEXT. :

 The market is correcting from its high in January, with the main drivers being slowing growth and rising supply. The depth and duration of the correction will be determined by the magnitude of the slowdown in the first half of 2022, which will be exacerbated by the Russia/Ukraine situation. However, there are also preexisting fundamental risks that will continue to drive the market, such as slowing consumer demand and rising supply. The technical picture is mixed, with the S&P 500 down less than 10% but a divergence between the index and the average stock. Relief from the Russia/Ukraine situation could lead to a tactical rally, but uncertainty remains high. 
 ----------------------------------------------------------------------------------------------------


What is Mike Wilson's 

'N/A'

----

# Search  (Lucene based keyword search service)

In [None]:
def initiate_search():
    load_config("azure_search_config.json")
    global search_endpoint, search_index_name, search_key, search_client
    search_endpoint = os.environ.get("SEARCH_ENDPOINT")
    search_index_name = os.environ.get("INDEX_NAME")
    search_key = os.environ.get("SEARCH_KEY")
    search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))


def keyword_query(query):
    results = search_client.search(search_text=query)
    final_result = list()
    count = 1
    for result in results:
        doc = dict()
        doc["metadata_content_type"] = result["metadata_content_type"]
        doc["metadata_storage_name"] = result["metadata_storage_name"]
        doc["metadata_storage_path"] = result["metadata_storage_path"]
        doc["@search.score"] = result["@search.score"] 
        final_result.append(doc)
    return final_result

In [None]:
# search a 'query'

initiate_search()    
keyword_query("australia")

----
----

# API TESTING

#### OCR & OpenAI API

In [None]:
url = "http://0.0.0.0:5002/docai/version"
r = requests.get(url)

print(r.text)

In [None]:
url = "http://0.0.0.0:5002/docai/run_ocr"
payload = {
    "file_path": "https://azure-container-name.blob.core.windows.net/documents/sample.pdf"
}

r = requests.post(url, json=payload)
print(r.status_code)

ocr_response_json = r.json()   # upload this in azure-blob

In [None]:
# Option 1 - When user types a input query

url = "http://0.0.0.0:5002/docai/extract_insights"
payload = {
    "document": ocr_response_json,
    "query": "Generate summary for this in 100 words!"
}

r = requests.post(url, json=payload)
print(r.text)

In [None]:
# Option 2 - When user clicks on "SUMMARY" button ---

url = "http://0.0.0.0:5002/docai/extract_insights"
payload = {
    "document": ocr_response_json,
    "query": "",
    "default_insight": "default-summary"
}

r = requests.post(url, json=payload)
print(r.json())

In [None]:
# Option 2.2 - When user clicks on "GENERATE FAQs" button ---

url = "http://0.0.0.0:5002/docai/extract_insights"
payload = {
    "document": ocr_response_json,
    "query": "",
    "default_insight": "default-faq"
}

r = requests.post(url, json=payload)
print(r.json())

In [None]:
# Option 2.3 - When user clicks on "GENERATE MAIN KEYPOINYS" button ---

url = "http://0.0.0.0:5002/docai/extract_insights"
payload = {
    "document": ocr_response_json,
    "query": "",
    "default_insight": "default-keypoints"
}

r = requests.post(url, json=payload)
print(r.json())

In [None]:
# Option 2.4 - When user clicks on "GENERATE KEYWORDS" button ---

url = "http://0.0.0.0:5002/docai/extract_insights"
payload = {
    "document": ocr_response_json,
    "query": "",
    "default_insight": "default-keywords"
}

r = requests.post(url, json=payload)
print(r.json())

In [None]:
# Option 2.5 - When user clicks on "GENERATE SENTIMENT WORDS" button ---

url = "http://0.0.0.0:5002/docai/extract_insights"
payload = {
    "document": ocr_response_json,
    "query": "",
    "default_insight": "default-sentiment"
}

r = requests.post(url, json=payload)
print(r.json())

In [None]:
# Option 2.6 - When user clicks on "GENERATE ABBREVIATIONS" button ---

url = "http://0.0.0.0:5002/docai/extract_insights"
payload = {
    "document": ocr_response_json,
    "query": "",
    "default_insight": "default-abbrv"
}

r = requests.post(url, json=payload)
print(r.json())

#### Search API

In [None]:
# Testing API

response = requests.get("http://127.0.0.1:5000/search/australia finance")
response.json()