# PDF files

In [1]:
from langchain.document_loaders import PyPDFLoader
files = [{'name':'Mark-and-Spencer-2023-annual-report','path':'pdf files/m&s.pdf'},{'name':'Sainsbury-2023-annual-report','path':'pdf files/sainsbury.pdf'},
        {'name':'Tesco-2023-annual-report','path':'pdf files/tesco.pdf'}]
for file in files:
    loader = PyPDFLoader(file['path'])
    pages = loader.load()

In [2]:
pages

[Document(page_content='Serving our customers,  \ncommunities and planet  \na little better every day.\nAnnual Report and \nFinancial Statements  \n2023.', metadata={'source': 'pdf files/tesco.pdf', 'page': 0}),
 Document(page_content='Contents 2023 highlights\nStrategic report\n2023 highlights ............................................................................... IFC\nIntroduction ....................................................................................... 1\nTesco at a glance ............................................................................... 2\nSupporting stakeholders through the cost-of-living crisis ............... 4 \nPurpose and values ........................................................................... 6\nChairman’s statement ...................................................................... 7\nGroup Chief Executive’s review ........................................................ 8\nOur market context .................................

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
chunk_size = 500
chunk_overlap = 100

text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
splits = text_splitter.split_documents(pages)

In [8]:
print(f'Split {len(pages)} documents into {len(splits)} chunks.')

Split 220 documents into 2536 chunks.


# Vector database

In [3]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass("Provide your OPENAI API KEY key here")

Provide your OPENAI API KEY key here ········


In [4]:
# !pip install pysqlite3-binary
# !pip install langchain-chroma
# !pip install langchain-openai
import pysqlite3
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
import shutil

persist_directory = 'pdf files/chroma/'

vectordb = Chroma.from_documents(splits, OpenAIEmbeddings(), persist_directory=persist_directory)

print(f"Saved {len(splits)} chunks to {persist_directory}.")

Saved 2536 chunks to pdf files/chroma/.


In [7]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""
results = vectordb.similarity_search_with_relevance_scores('profit performance', k=3)
from langchain.prompts import ChatPromptTemplate
if len(results) == 0 or results[0][1] < 0.7:
    print(f"Unable to find matching results.")
else:
    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question='profit performance')
    print(prompt)

Human: 
Answer the question based only on the following context:

performance framework sets out the objectives of the business: 
to drive top-line growth; to grow absolute profits while maintaining 
sector leading margins; and to generate stable retail free cash flow 
each year. The delivery of these objectives will enable the Group 
to maintain a strong balance sheet, invest for growth and deliver 
improved returns for shareholders.Management recognise that customers across the Group currently

---

available to shareholders.  
– This is a key manageme nt incentive metric. 
(a) Operating profit is presented on the Group income statement. It is not defined per IFRS, however, is a generally accepted profi t measure. 
(b) Refer to Note 1 and Note 4.  
Other information
209 Tesco PLC Annual Report and Financial Statements 2023

---

The chart below demonstrates the performance outcomes of our 
2022/23 bonus and 2020 PSP. Full details of performance against 
the 2022/23 individual objecti

# Custom Tools & Agent

In [8]:
retriever = vectordb.as_retriever()

In [10]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0,model='gpt-3.5-turbo-1106',verbose=False)

In [14]:
# !pip install langchainhub
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.tools import tool
from langchain import hub
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.agents import Tool, AgentExecutor, create_openai_tools_agent
# create input schema
class PromptInput(BaseModel):
    question: str
tools = [Tool(args_schema=PromptInput, name=files[0]["name"], 
                  description=f"useful for when you need to answer questions about {files[0]['name']}",
                  func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)),
        Tool(args_schema=PromptInput, name=files[1]["name"], 
                  description=f"useful for when you need to answer questions about {files[1]['name']}",
                  func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)),
        Tool(args_schema=PromptInput, name=files[2]["name"], 
                  description=f"useful for when you need to answer questions about {files[2]['name']}",
                  func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever))]

prompt = hub.pull("hwchase17/openai-tools-agent")

agent = create_openai_tools_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

response = agent_executor.invoke({"input": "Can you compare the performance between Sainsbury and Tesco?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Sainsbury-2023-annual-report` with `{'question': 'What is the revenue and profit of Sainsbury in 2023?'}`


[0m[33;1m[1;3m{'query': 'What is the revenue and profit of Sainsbury in 2023?', 'result': "I'm sorry, but I don't have that information. It seems like you're asking about Sainsbury, but the provided context only includes information about Tesco PLC. If you have a specific question about Tesco PLC, I'd be happy to help with that."}[0m[32;1m[1;3m
Invoking: `Tesco-2023-annual-report` with `{'question': 'What is the revenue and profit of Tesco in 2023?'}`


[0m[38;5;200m[1;3m{'query': 'What is the revenue and profit of Tesco in 2023?', 'result': "I'm sorry, but I don't have access to the specific revenue and profit figures for Tesco in 2023 based on the provided context. I recommend checking Tesco's official website or financial reports for the most accurate and up-to-date information."}[0m[32;1m[1;3

In [21]:
response = agent_executor.invoke({"input": "What is the financial performance of Sainsbury's and Tesco PLC?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Sainsbury-2023-annual-report` with `{'question': "What is the strategy of Sainsbury's?"}`


[0m[33;1m[1;3m{'query': "What is the strategy of Sainsbury's?", 'result': "I don't know, I don't have that information."}[0m[32;1m[1;3m
Invoking: `Tesco-2023-annual-report` with `{'question': 'What is the strategy of Tesco PLC?'}`


[0m[38;5;200m[1;3m{'query': 'What is the strategy of Tesco PLC?', 'result': 'The strategy of Tesco PLC focuses on four priorities: serving customers better, being competitive, remaining agile and efficient as a business, and doing the basics brilliantly while leveraging unique strengths to accelerate growth. This strategy aims to deliver great value and quality to customers while maintaining a competitive edge in the market.'}[0m[32;1m[1;3mThe strategy of Tesco PLC focuses on four priorities: serving customers better, being competitive, remaining agile and efficient as a business, an

In [26]:
response = agent_executor.invoke({"input": "How many supermarkets does Sainsbury and Tesco have?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Sainsbury-2023-annual-report` with `{'question': 'How many supermarkets does Sainsbury have?'}`


[0m[33;1m[1;3m{'query': 'How many supermarkets does Sainsbury have?', 'result': "I don't have that information."}[0m[32;1m[1;3m
Invoking: `Tesco-2023-annual-report` with `{'question': 'How many supermarkets does Tesco have?'}`


[0m[38;5;200m[1;3m{'query': 'How many supermarkets does Tesco have?', 'result': 'Tesco operates 4,859 stores in five markets: the UK, the Republic of Ireland, the Czech Republic, Slovakia, and Hungary. This includes superstores, Tesco Express stores, and other formats.'}[0m[32;1m[1;3mSainsbury's exact number of supermarkets is not available. However, Tesco operates 4,859 stores in five markets, including the UK, the Republic of Ireland, the Czech Republic, Slovakia, and Hungary. This includes superstores, Tesco Express stores, and other formats.[0m

[1m> Finished chain.[0m


In [31]:
response = agent_executor.invoke({"input": "What does a Sainsbury stakeholder need to know?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Sainsbury-2023-annual-report` with `{'question': 'stakeholder information'}`


[0m[33;1m[1;3m{'query': 'stakeholder information', 'result': 'Based on the provided context, it is clear that the company places significant importance on stakeholder engagement and regularly seeks to understand the views of its major investors and other stakeholders. The Board values the insight gained from stakeholder engagement and places significant importance on maintaining close relationships with stakeholders, taking account of and responding to their views. This demonstrates a commitment to understanding and addressing the needs and perspectives of various stakeholders.'}[0m[32;1m[1;3mA Sainsbury stakeholder needs to know that the company places significant importance on stakeholder engagement and regularly seeks to understand the views of its major investors and other stakeholders. The Board values the insight gained from

In [34]:
response = agent_executor.invoke({"input": "can you compare the strategy of mark & spencer and tesco?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Mark-and-Spencer-2023-annual-report` with `{'question': 'What is the current strategy of Mark & Spencer?'}`


[0m[36;1m[1;3m{'query': 'What is the current strategy of Mark & Spencer?', 'result': 'The current strategy of Mark & Spencer is focused on helping customers spend less by offering affordable prices through initiatives like Aldi Price Match, Low Everyday Prices, and Clubcard Prices. The company aims to provide brilliant, helpful service and sustainable products while also investing in their colleagues. They have also invested in their value offer to help customers save money on groceries and banking. Additionally, their strategic priority includes driving top-line growth by increasing customer satisfaction and maintaining their core UK market share.'}[0m[32;1m[1;3m
Invoking: `Tesco-2023-annual-report` with `{'question': 'What is the current strategy of Tesco?'}`


[0m[38;5;200m[1;3m{'query': 'What 

# Prompt

In [None]:
# not neccessary to run, this is for recall the database
persist_directory = 'pdf files/chroma/'
embedding_function = OpenAIEmbeddings()
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)

In [None]:
# !pip install dash-bootstrap-components
import dash_bootstrap_components as dbc
from dash import Dash, html, Input, Output, State, callback
app = Dash(__name__,
           external_stylesheets=[dbc.themes.FLATLY])

app.layout = html.Div(
    [
        dbc.NavbarSimple(
            brand="Financial help",
            brand_href="/",
            color="#165AA7",
            sticky='top',
            links_left=True,
            dark=True,
            expand=True
        ),

        html.Br(),

        dbc.Container(
                    fluid=True,
                    children=[
                        dbc.Row(
                            [
                                dbc.Col(
                                    width=1,
                                ),
                                dbc.Col(
                                    width=10,
                                    children=dbc.Card(
                                        [
                                            dbc.CardHeader("ChatGPT Agent"),
                                            dbc.CardBody([
                                                html.Br(),
                                                dbc.InputGroup([
                                                    dbc.Input(id='prompt', value="", placeholder='Your prompt ...', type='text'),
                                                    dbc.Button(id='sendPrompt', children=">", color="success", n_clicks=0),
                                                    ],
                                                ),
                                                html.Br(),
                                                html.P(id='outputHuman', children=""),
                                                html.P(id='outputChatBot', children=""),
                                            ])
                                        ],
                                    )
                                ),
                                dbc.Col(
                                    width=1,
                                ),
                            ]
                        )
                    ]
                ),

    ]
)

@callback(
    Output(component_id='outputHuman', component_property='children'),
    Output(component_id='outputChatBot', component_property='children'),
    Output(component_id='prompt', component_property='value'),
    Input(component_id='sendPrompt', component_property='n_clicks'),
    State(component_id='prompt', component_property='value')
)
def call_openai_api(n, human_prompt):
    if n==0:
        return "", "", ""
    else:
        result_ai = agent_executor.invoke(human_prompt)
        human_output = f"You: {human_prompt}"
        chatbot_output = f"ChatBot: {result_ai}"

        return human_output, chatbot_output, ""

if __name__ == '__main__':
    app.run(jupyter_mode="tab",port=8060)