In [1]:
from pathlib import Path

In [2]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.groq import Groq
from llama_index.readers.file import PDFReader, PyMuPDFReader


In [3]:
file_details = {
    "2Q24 Earnings Release_Final.pdf": "2nd quarter 2024 earnings release Final of PNC Bank with detailed financial information for first two quarters of year 2024.",
    "2Q24 Financial Supplement_Final.pdf": "Supplemental data for 2nd quarter 2024 earnings release Final of PNC Bank with financial tables.",
    "Board of Directors  PNC.pdf": "PNC Bank Board of Directors member information.",
    "PNC 2023 10-K.pdf": "PNC Bank Form 10-K report for year 2023.",
    "PNC 2023 Annual Report.pdf": "PNC Bank detailed Annual Report for year 2023.",
    "pnc_privacy_notice.pdf": "Information on what kind of customer personal information PNC Bank share and not.",
}
def get_meta(filename):
    return {
        "file_path": filename,
        "file_details": file_details.get(
            Path(filename).name, ""
        ),
    }

In [4]:
# PDF Reader with `SimpleDirectoryReader`
parser = PyMuPDFReader()
file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader("./data", file_extractor=file_extractor).load_data()

In [5]:
documents = SimpleDirectoryReader(
    "./pnc", file_metadata=get_meta, file_extractor=file_extractor
).load_data()

In [6]:
# bge-base embedding model
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")

In [7]:
# Groq
Settings.llm = Groq(
    model="llama-3.1-8b-instant"
)

In [8]:
index = VectorStoreIndex.from_documents(
    documents,
)

In [9]:
query_engine = index.as_query_engine()

In [10]:
response = query_engine.query(
    "Who is PNC Chairman? Provide evidence from documents to support your answer."
)
print(response)

William S. Demchak is the Chairman & Chief Executive Officer of The PNC Financial Services Group. This is supported by the document, which states: "William S. Demchak is chairman and chief executive officer of The PNC Financial Services Group, one of the largest diversified financial services companies in the United States."


In [11]:
response = query_engine.query("Tell me about Bryan Salesky work experiance.")
print(response)

Bryan Salesky has a decade of experience in roles of increasing responsibility across leading technology companies, including Google and Carnegie Mellon University's National Robotics Engineering Center (NREC). Prior to that, he co-founded and served as CEO of Argo AI, LLC, a self-driving technology platform company. He also co-founded and serves as the CEO of Stack AV Co., a developer and builder of autonomous trucking solutions.


In [12]:
response = query_engine.query("What does PNC do with customer personal information?")
print(response)

PNC collects and shares customer personal information for various purposes, including everyday business operations, marketing, and joint marketing with other financial companies. This information can include social security numbers, income, account balances, credit scores, and payment history. PNC shares this information with its affiliates and non-affiliates, but customers have the right to limit some of this sharing.


In [13]:
response = query_engine.query(
    "What was PNC diluted earnings per common share in 2023? Provide evidence from documents to support your answer."
)
print(response)

$12.79


In [14]:
response = query_engine.query(
    "How good PNC revenue was in 2023 compared to previous years?"
)
print(response)

PNC generated record revenue of $21.5 billion in 2023, which is a significant improvement compared to previous years.


In [15]:
response = query_engine.query("Can you print a table showing PNC Revenue, Net Income and Total Non-interst Expenses for first two quarters of 2024? Provide evidence from documents to support your answer.")
print(response)

Here is the table showing PNC Revenue, Net Income and Total Non-interest Expenses for the first two quarters of 2024:

|  | Three months ended June 30, 2024 | Six months ended June 30, 2024 |
| --- | --- | --- |
| Revenue | $5,411 million | $10,556 million |
| Net Income | $1,477 million | $2,821 million |
| Total Non-interest Expenses | $3,357 million | $6,691 million |

Evidence from documents:

* Revenue: "Total revenue $5,411 $5,145 $5,293 $10,556 $10,896" (Source: 2Q24 Earnings Release_Final.pdf, page 2)
* Net Income: "Net income $1,477 $1,344 $1,500 $2,821 $3,194" (Source: 2Q24 Earnings Release_Final.pdf, page 2)
* Total Non-interest Expenses: "Noninterest expense $3,357 $3,334 $3,372 $6,691 $6,693" (Source: 2Q24 Earnings Release_Final.pdf, page 2)
