# ESG Question Answering Assistant: 

### Handle Imports:

In [1]:
# Move to root directory
import os

notebooks_dir = 'notebooks'
if notebooks_dir in os.path.abspath(os.curdir):
    while not os.path.abspath(os.curdir).endswith('notebooks'):
        print(os.path.abspath(os.curdir))
        os.chdir('..')
    os.chdir('..')  # to get to root

print(os.path.abspath(os.curdir))

C:\Users\MD726YR\PycharmProjects\eyalytics


In [2]:
# Supress SSL verification (EY problem):
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning

# Suppress only the single warning from urllib3 needed.
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)

old_send = requests.Session.send

def new_send(*args, **kwargs):
    kwargs['verify'] = False
    return old_send(*args, **kwargs)

requests.Session.send = new_send

In [63]:
# Import relevant libraries for llm ochestration:
from langchain import OpenAI, PromptTemplate
from langchain.agents.tools import Tool
from langchain.agents import AgentType, initialize_agent
from langchain.tools import DuckDuckGoSearchRun, BaseTool
from langchain.utilities import SerpAPIWrapper
from langchain.callbacks import get_openai_callback
from langchain.memory import ConversationBufferMemory

In [4]:
# Import libraries for web scraping:
import requests
import mimetypes
import urllib.parse
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options 
from selenium.webdriver.common.by import By

In [None]:
# Other imports:
from typing import List, Dict, Optional

In [64]:
# Get API Key:
from dotenv import load_dotenv
load_dotenv()
if not os.getenv("OPENAI_API_KEY"):
    raise KeyError(
        "You will need an OPENAI_API_KEY to use the LLM models in this notebook."
    )

### Task 0: Instantiate your LLM.
Use a deterministic model - setting the temperature (measure of randomness) to 0.

In [6]:
llm = OpenAI(
#     openai_api_key=OPENAI_API_KEY,  # not needed as load_dotenv used instead 
    temperature=0,
    model_name='gpt-3.5-turbo',
)



Set up token counting to monitor your costs:

In [None]:
def count_tokens(agent, query):
    with get_openai_callback() as cb:
        result = agent(query)
        print(f'Spent a total of {cb.total_tokens} tokens')

    return result

### Task 1: Use SerpAPI to try to find publickly available ESG reports

In [7]:
# Define APIError:
class APIError(Exception):
    """An API Error Exception"""

    def __init__(self, status):
        self.status = status

    def __str__(self):
        return "APIError: status={}".format(self.status)

In [8]:
# Create tools: 
tools = []

# Add paid search tool - note 100 calls per month are free:
if os.getenv("SERPAPI_API_KEY"):
    paid_search = SerpAPIWrapper()
    tools.append(Tool(
        name='Paid Google Search',
        func= paid_search.run,
        description="""
            A useful tool for searching the internet. Be specific with your input.
        """
    ))
else:
    raise APIError(
        "No API key found. Search functionality not supported."
    )

In [7]:
# Check tools properly instantiated: 
tools[0].run("Tesla stock price?")

'Tesla, Inc. is an American multinational automotive and energy company headquartered in Austin, Texas. Tesla designs and manufactures electric vehicles, stationary battery energy storage devices from home to grid-scale, solar panels and solar roof tiles, and related products and services.'

Add custom URL checker tool. For more information about creating custom tooling check out: 
    
https://github.com/pinecone-io/examples/blob/master/generation/langchain/handbook/07-langchain-tools.ipynb

In [9]:
# First create function:
def is_downloadable(url: str) -> bool:
    try:
        h = requests.head(url, allow_redirects=True)
        header = h.headers
        content_type = header.get('content-type')
        if 'application' in content_type.lower():
            return True

        return False
    except Exception as e:
        print(f"An error occurred: {e}")
        return False

# Test is_downloadable: 
assert is_downloadable(
    'https://www.coca-colacompany.com/content/dam/journey/us/en/reports/coca-cola-business-environmental-social-governance-report-2021.pdf'
), 'is_downloadable failed to identify a pdf.'
assert not is_downloadable(
    "https://www.coca-colacompany.com/reports/business-and-sustainability-report"
), 'is_downloadable wrongly assumed that a webpage was downloadable.' 

In [11]:
# Next, create custom tool from is_downloadable
class URLCheck(BaseTool):
    name = "URL Check"
    description = """
    Checks if a url is a downloadable resource, returning True if 
    the url is downloadable and False otherwise.
    
    Note: The function checks the 'content-type' header to determine 
    if the resource is a file. If the content type contains the word 
    'application', it's likely the URL is a downloadable file (like a PDF or a Word doc).
    This isn't foolproof - 'application' can appear in other content types as well, and
    some servers might return non-standard content types for files. You might need
    to adjust the logic based on your specific requirements.
    """
    def _run(self, url: str) -> bool:
        return is_downloadable(url)
    
    def _arun(self, url: str):
        raise NotImplementedError("This tool does not currently support async run.")

tools.append(URLCheck())    

**Initialise a Zero-shot ReAct Agent**:

This agent uses the ReAct framework to determine which tool to use based solely on the tool's description. Any number of tools can be provided. This agent requires that a description is provided for each tool.

In [12]:
zero_shot_agent = initialize_agent(
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
    tools=tools, 
    llm=llm,
    verbose=True,
    max_iterations=3,
)

**Create a template for URL retrieval**

In [98]:
search_template = """
    You are tasked with finding a downloadable URL link to the most recent sustainability 
    report of {company}. Please search the web for this link; checking years 2023, 2022, and
    2021 (in that order) until you find one. You may need the scrape {company}'s official site
    to find a downloadable link to a report.
    
    Note: The final output should (where available) be a downloadable url: specifying a path
    to the sustainability report.
"""

In [14]:
# Start agent
example_company = "Coca-Cola"
zero_shot_agent.run(search_template.format(company=example_company))



[1m> Entering new  chain...[0m
[32;1m[1;3mI should start by searching for the most recent sustainability report of Coca-Cola. I will search for the year 2023 first, then 2022, and finally 2021. I will use the Paid Google Search tool to search for the report.

Action: Paid Google Search
Action Input: "Coca-Cola sustainability report 2023"[0m
Observation: [36;1m[1;3mThe Coca-Cola Company's Business & Sustainability Report covers multiple aspects of the business, including: our sustainability priorities and progress; ...[0m
Thought:[32;1m[1;3mI didn't find a specific URL for the sustainability report in the search results. I should try a different year.

Action: Paid Google Search
Action Input: "Coca-Cola sustainability report 2022"[0m
Observation: [36;1m[1;3mThis 2022 Business & Sustainability Report is The Coca-Cola Company's fifth report to integrate overall business and sustainability ...[0m
Thought:[32;1m[1;3mI still didn't find a specific URL for the sustainability

'Agent stopped due to iteration limit or time limit.'

**Failed** Note that the above failed probably because the SERP API is supposed to be used to obtain information about topics, and not urls. Additionally it probably failed becuase the ZERO-SHOT agent does not retain contextual memory. 
A conversation ReAct agent may be a better option for the task at hand. 

**BELOW** we try and fix these issues by developing custom tools to replace SERP API using selenium, request, and beautiful soup. Additionally we modify the agent type/develop a custom chain to perform the task. 

</br>

#### Develop Custom Tooling:

Start by developing tools for fetching urls and descriptions from duckduckgo. Once you have this, also develop a url scraper that  

In [100]:
import requests
import mimetypes
import urllib.parse
from bs4 import BeautifulSoup
from selenium import webdriver
from typing import List, Dict, Optional


def is_downloadable(url: str) -> bool:
    """
    Check if a url is a downloadable link.
    
    Example:
        is_downloadable(
            'https://www.coca-colacompany.com/content/dam/journey/us/en/reports/coca-cola-business-and-sustainability-report-2022.pdf'
        ) -> True
    """
    mimetype, encoding = mimetypes.guess_type(url)
    if mimetype and mimetype.startswith('application'):
        return True

    try:
        h = requests.head(url, allow_redirects=True)
        header = h.headers
        content_type = header.get('content-type')
        if content_type and 'application' in content_type.lower():
            return True
    except Exception as e:
        print(f"An error occurred: {e}")

    return False


def fetch_duckduckgo_results(query: str, num_results: int = 5) -> List[Dict[str, Union[str, bool, int]]]:
    """
    Fetch search results from DuckDuckGo using Selenium and BeautifulSoup.
    
    Parameters:
    - query (str): The search query string.
    - num_results (int, optional): Maximum number of results to fetch. Defaults to 5.

    Returns:
    - list[dict]: Returns a list of dictionaries containing the following details about the search results:
        * title (str): Title of the search result.
        * description (str, optional): Description or snippet of the result.
        * link (str, optional): Direct URL of the search result.
        * downloadable (bool): Whether the link points to downloadable content.
        * rank (int): Rank or position of the search result on the page.
    """
    
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(options=chrome_options)
    url = f"https://duckduckgo.com/html/?q={query}"
    driver.get(url)
    
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    search_results = soup.find_all('div', class_='result__body')
    
    results = []
    for rank, result in enumerate(search_results):
        title = result.find('a', class_='result__a').get_text(strip=True) if result.find('a', class_='result__a') else None
        description = result.find('a', class_='result__snippet').get_text(strip=True) if result.find('a', class_='result__snippet') else None
        link = result.find('a', class_='result__url').get('href') if result.find('a', class_='result__url') else None
        
        if link:
            actual_url_encoded = urllib.parse.parse_qs(
                urllib.parse.urlparse(link).query
            ).get("uddg", [None])[0]
            if actual_url_encoded:
                final_link = urllib.parse.unquote(actual_url_encoded)
            else:
                final_link = None
        else:
            final_link = None
        
        results.append({
            'title': title,
            'description': description,
            'link': final_link,
            'downloadable': is_downloadable(final_link) if final_link else False,
            'rank': rank,
        })
        
        if len(results) >= num_results:
            break
    
    driver.quit()
    return results


def extract_link_description(link: BeautifulSoup) -> str:
    """
    Extract the most relevant description for a BeautifulSoup link element.
    
    The function uses a series of heuristics to determine the best description:
    1. The link's text.
    2. The 'alt' attribute of any child <img> tag.
    3. The 'title' attribute of the link.
    4. The link's href itself.
    
    Parameters:
    - link (BeautifulSoup): The BeautifulSoup element representing the link (a-tag).
    
    Returns:
    - str: A description extracted based on the heuristics.
    """
    
    description = link.string.strip() if link.string else None

    if not description:
        img_tag = link.find('img')
        if img_tag and 'alt' in img_tag.attrs:
            description = img_tag['alt'].strip()

    if not description and 'title' in link.attrs:
        description = link['title'].strip()

    if not description:
        description = link['href']

    return description


def extract_download_links_from_url(url: str) -> List[Dict[str, str]]:
    """
    Scrape a given URL and extract links that are deemed downloadable.
    
    Parameters:
    - url (str): The URL to scrape.
    
    Returns:
    - list[dict]: A list of dictionaries with two keys:
        * description (str): A description or title for the link.
        * link (str): The actual URL of the link.
    """
    
    if is_downloadable(url):
        return [{'description': None, 'link': None}]
        
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    anchors = soup.find_all('a', href=True)

    link_dict = {}

    for anchor in anchors:
        # Step 1: Resolve to absolute URL
        relative_href = anchor['href']
        absolute_href = urllib.parse.urljoin(url, relative_href)
        
        # Step 2: Strip fragments
        main_url = urllib.parse.urldefrag(absolute_href).url

        # Step 3: Check if downloadable
        if is_downloadable(main_url):
            description = extract_link_description(anchor)

            # Step 4: Decide on description update
            if main_url not in link_dict or len(description) > len(link_dict[main_url]['description']):
                link_dict[main_url] = {'description': description, 'link': main_url}

    return list(link_dict.values())


In [51]:
# Test the tool.
query = "Coca-Cola sustainability report 2023"
results = fetch_duckduckgo_results(query, 5)
for _result in results:
    print(_result)

{'title': 'BUSINESS & SUSTAINABILITY REPORT - The Coca-Cola Company', 'description': 'Publication date: April2023Language: English Number of pages: 88 Below is a list of quick links from thereport: 3. Chairman & CEO Message 5. Board of Directors 6. Executive Summary 12.', 'link': 'https://www.coca-colacompany.com/reports/business-and-sustainability-report', 'downloadable': False, 'rank': 0}
{'title': 'Sustainability - The Coca-Cola Company', 'description': "Water Stewardship Water balance is giving back to nature and communities. We go beyond to improve water security where it's needed most. Explore In Our Products We provide high-quality products and cutting edge ingredients sourced sustainably and ethically. Explore Sustainable Agriculture", 'link': 'https://www.coca-colacompany.com/sustainability', 'downloadable': False, 'rank': 1}
{'title': 'Sustainability - The Coca-Cola Company', 'description': "SustainabilityOur planet matters. We act in ways to create a more sustainable and bet

In [62]:
for _result in results[:3]:
    link = _result['link']
    _results = extract_download_links_from_url(link)
    print(f"Scraping: {link}")
    print(_results)

Scraping: https://www.coca-colacompany.com/reports/business-and-sustainability-report
[{'description': 'Advertising History', 'link': 'https://www.coca-colacompany.com/content/dam/company/us/en/about-us/history/coca-cola-a-short-history-125-years-booklet.pdf'}, {'description': '/content/dam/company/us/en/reports/coca-cola-business-and-sustainability-report-2022.pdf', 'link': 'https://www.coca-colacompany.com/content/dam/company/us/en/reports/coca-cola-business-and-sustainability-report-2022.pdf'}, {'description': 'View Highlights', 'link': 'https://www.coca-colacompany.com/content/dam/company/us/en/reports/2022-business-report/coca-cola-business-and-sustainability-report-2022-highlights.pdf'}]
Scraping: https://www.coca-colacompany.com/sustainability
[{'description': 'Advertising History', 'link': 'https://www.coca-colacompany.com/content/dam/company/us/en/about-us/history/coca-cola-a-short-history-125-years-booklet.pdf'}]
Scraping: https://investors.coca-colacompany.com/sustainability

Create custom langchain tools from the functions:
- fetch_duckduckgo_results
- extract_download_links_from_url

In [101]:
def format_search_results(search_results):
    search_results.sort(key=lambda x: x['rank'])  # Sort by rank
    formatted_results = [
        "Search Results (sorted based on page rank):\n"
    ]

    for entry in search_results:
        formatted_results.append("URL: '{}'\n".format(entry['link']))
        formatted_results.append("Downloadable: {}\n".format(
            entry['downloadable']
        ))
        formatted_results.append("Description: {}; {}\n".format(
            entry['title'], entry['description']))
        formatted_results.append("\n\n")

    formatted_results.append(
        "\nNote that when Downloadable is True, the url " +
        "is a downloadable link and should not be scraped."
    )
    return "".join(formatted_results)


def format_url_scrape_results(url, scrape_results):
    formatted_results = [
        f"Downloadable links from {url} (in no particular order):\n"]

    for entry in scrape_results:
        formatted_results.append("URL: {}".format(entry['link']))
        formatted_results.append("Description: {}\n".format(entry['description']))
        formatted_results.append("\n")
        
    return "".join(formatted_results)
    
class DDGoSearch(BaseTool):
    
    name = fetch_duckduckgo_results.__name__
    description = """
        Fetch search results from DuckDuckGo.
        
        Elaborating, this tool will return a list of urls and their 
        accompanying duckduckgo descriptions. For each url a flag
        indicating if the url is downloadable is outputed as well.
    """
    
    def _run(self, query: str) -> bool:
        ddgo_results = fetch_duckduckgo_results(
            query=query, 
            num_results=5,
        )
        return format_search_results(ddgo_results)
        
    
    def _arun(self, query: str):
        raise NotImplementedError(
            f"{self.__class__.__name__} does not currently support async run."
        )
        
class URLScraper(BaseTool):
    
    name = extract_download_links_from_url.__name__
    description = """
        URL Scraper. Given a non-downloadable URL this tool 
        scrapes a webpage and returns all downloadable links, 
        along with their descriptions, appearing on that webpage.
    """
    
    def _run(self, url: str) -> bool:
        scrape_results = extract_download_links_from_url(url)
        return format_url_scrape_results(url, scrape_results)
    
    def _arun(self, url: str):
        raise NotImplementedError(
            f"{self.__class__.__name__} does not currently support async run."
        )
        
tools = [DDGoSearch(), URLScraper()] 

Now that we have functioning tools for URL retrival, let's deploy the tools alongside an LLM. Below we'll define the LLM as well as the agent that we'll be using. 

*For details about agents see: https://www.pinecone.io/learn/series/langchain/langchain-agents/*

In [67]:
llm = OpenAI(
    temperature=0,  # measure of randomness/creativity
    model_name='gpt-3.5-turbo'
)



We can use the **conversational-react-description agent** to remember interactions - which will be neccessary if we want the agent to be able to check urls for downloadable files. We can think of this agent as the same as our previous zero-shot-react agent, but with conversational memory.

In [95]:
memory = ConversationBufferMemory(
    memory_key="chat_history"
)
conversational_agent  = initialize_agent(
#     agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION, # need to get the output parser to work!
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=5,
#     memory=memory,  # to be used with a CONVERSATIONAL agent.
#     handle_parsing_errors=True,
)

In [96]:
# Start the agent - you can use the search template we devised before. 
example_company = "Coca-Cola"
print(f"{search_template.format(company=example_company)}\n\n")
result = conversational_agent.run(
    search_template.format(company=example_company)
)


    You are tasked with finding a downloadable URL link to the most recent sustainability 
    report of Coca-Cola. Please search the web for this link; checking years 2023, 2022, and
    2021 (in that order) until you find one. You may need the scrape Coca-Cola's official site
    to find a downloadable link to a report.
    
    Note: The final output should (where available) be a downloadable url: specifying a path
    to the sustainability report.





[1m> Entering new  chain...[0m
[32;1m[1;3mI need to search for the most recent sustainability report of Coca-Cola. I should start by searching for the report on DuckDuckGo and then scrape the official Coca-Cola website if necessary. I will check the years 2023, 2022, and 2021 in that order until I find a downloadable link.

Action: fetch_duckduckgo_results
Action Input: "Coca-Cola sustainability report 2023"[0m
Observation: [36;1m[1;3mSearch Results (sorted based on page rank):
URL: 'https://www.coca-colacompany.com/reports/b

In [102]:
# lets repeat the experiment but with a different prompt:
search_template = """
    You are tasked with finding a downloadable URL link to the most recent FULL sustainability 
    report of {company}. Please search the web for this link; checking years 2023, 2022, and
    2021 (in that order) until you find one. You may need the scrape {company}'s official site
    to find a downloadable link to a report.
    
    Note, the final output should (where available) be a downloadable url, specifying a path
    to the sustainability report. Note that as soon as you find a downloadable report you 
    may stop the search. For instance if you search for the 2023 report, and find a 
    downloadable link for the company's FULL 2023 report, you should return this url. If 
    based on a url's description, you think that a longer report may be found on the 
    web-page, you can check that web-page for downloadable links. This may happen when 
    a recent executive summary of report is found from a search, but you believe that a FULL report 
    may be available on a company's web-page. If neither the search not url scraping finds anything 
    please continue your search with less recent dates.
"""

example_company = "Coca-Cola"
print(f"{search_template.format(company=example_company)}\n\n")
result = conversational_agent.run(
    search_template.format(company=example_company)
)


    You are tasked with finding a downloadable URL link to the most recent FULL sustainability 
    report of Coca-Cola. Please search the web for this link; checking years 2023, 2022, and
    2021 (in that order) until you find one. You may need the scrape Coca-Cola's official site
    to find a downloadable link to a report.
    
    Note, the final output should (where available) be a downloadable url, specifying a path
    to the sustainability report. Note that as soon as you find a downloadable report you 
    may stop the search. For instance if you search for the 2023 report, and find a 
    downloadable link for the company's FULL 2023 report, you should return this url. If 
    based on a url's description, you think that a longer report may be found on the 
    web-page, you can check that web-page for downloadable links. This may happen when 
    a recent executive summary of report is found from a search, but you believe that a FULL report 
    may be available on a compa

Error getting version of chromedriver 115. Retrying with chromedriver 114 (attempt 1/5)



Observation: [36;1m[1;3mSearch Results (sorted based on page rank):
URL: 'https://www.coca-colacompany.com/reports/business-and-sustainability-report'
Downloadable: False
Description: 2022 Business & Sustainability Report - The Coca-Cola Company; Publication Details Publication date: April2023Language: English Number of pages: 88 Below is a list of quick links from thereport: 3. Chairman & CEO Message 5. Board of Directors 6. Executive Summary 12.


URL: 'https://www.coca-colacompany.com/sustainability'
Downloadable: False
Description: Sustainability - The Coca-Cola Company; Water Stewardship Water balance is giving back to nature and communities. We go beyond to improve water security where it's needed most. Explore In Our Products We provide high-quality products and cutting edge ingredients sourced sustainably and ethically. Explore Sustainable Agriculture


URL: 'https://www.coca-colacompany.com/content/dam/journey/us/en/reports/coca-cola-business-and-sustainability-report-2022.

In [103]:
# final try with a prompt designed by chatgpt:
search_template = """
    Find the most recent FULL sustainability report for {company} from years 2023, 2022, or 2021. 
    Prioritize downloadable URL links, searching in the given yearly order. 
    If you encounter a summary, consider checking the company's official website for the full report.
    Return the first full report URL you find.
"""

example_company = "Coca-Cola"
print(f"{search_template.format(company=example_company)}\n\n")
result = conversational_agent.run(
    search_template.format(company=example_company)
)


    Find the most recent FULL sustainability report for Coca-Cola from years 2023, 2022, or 2021. 
    Prioritize downloadable URL links, searching in the given yearly order. 
    If you encounter a summary, consider checking the company's official website for the full report.
    Return the first full report URL you find.





[1m> Entering new  chain...[0m
[32;1m[1;3mI need to search for the sustainability reports for Coca-Cola from the years 2023, 2022, and 2021. I should prioritize downloadable URLs and search in the given yearly order. If I encounter a summary, I should check the company's official website for the full report. I need to return the first full report URL I find.

Action: fetch_duckduckgo_results
Action Input: "Coca-Cola sustainability report 2023"[0m
Observation: [36;1m[1;3mSearch Results (sorted based on page rank):
URL: 'https://www.coca-colacompany.com/reports/business-and-sustainability-report'
Downloadable: False
Description: 2022 Business & Sustainabil

InvalidSchema: No connection adapters were found for "'https://www.coca-colacompany.com/content/dam/journey/us/en/reports/coca-cola-business-and-sustainability-report-2022.pdf'"

## ESG KPI Extraction:

In [None]:
#### PDF Loader:
Start by loading a pdf into memory. 

In [None]:
# Import os to set API key
import os
# Import OpenAI as main LLM service
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
# Bring in streamlit for UI/app interface
import streamlit as st

# Import PDF document loaders...there's other ones as well!
from langchain.document_loaders import PyPDFLoader
# Import chroma as the vector store 
from langchain.vectorstores import Chroma

# Import vector store stuff
from langchain.agents.agent_toolkits import (
    create_vectorstore_agent,
    VectorStoreToolkit,
    VectorStoreInfo
)

# Set APIkey for OpenAI Service
# Can sub this out for other LLM providers
os.environ['OPENAI_API_KEY'] = 'youropenaiapikeyhere'

# Create instance of OpenAI LLM
llm = OpenAI(temperature=0.1, verbose=True)
embeddings = OpenAIEmbeddings()

# Create and load PDF Loader
loader = PyPDFLoader('annualreport.pdf')
# Split pages from pdf 
pages = loader.load_and_split()
# Load documents into vector database aka ChromaDB
store = Chroma.from_documents(pages, embeddings, collection_name='annualreport')

# Create vectorstore info object - metadata repo?
vectorstore_info = VectorStoreInfo(
    name="annual_report",
    description="a banking annual report as a pdf",
    vectorstore=store
)
# Convert the document store into a langchain toolkit
toolkit = VectorStoreToolkit(vectorstore_info=vectorstore_info)

# Add the toolkit to an end-to-end LC
agent_executor = create_vectorstore_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True
)
st.title('🦜🔗 GPT Investment Banker')
# Create a text input box for the user
prompt = st.text_input('Input your prompt here')

# If the user hits enter
if prompt:
    # Then pass the prompt to the LLM
    response = agent_executor.run(prompt)
    # ...and write it out to the screen
    st.write(response)

    # With a streamlit expander  
    with st.expander('Document Similarity Search'):
        # Find the relevant pages
        search = store.similarity_search_with_score(prompt) 
        # Write out the firs
t 
        st.write(search[0][0].page_content) 
Footer
