In [None]:
# !pip install pdfplumber

In [3]:
import pdfplumber
import re

def extract_income_data(text):
    def find_number(pattern):
        match = re.search(pattern, text)
        return float(match.group(1).replace(',', '')) if match else None

    data = {
        "Net Sales - Products": find_number(r"Products\s+!?\s*([\d,]+)"),
        "Net Sales - Services": find_number(r"Services\s+([\d,]+)"),
        "Total Net Sales": find_number(r"Total net sales.*?([\d,]+)"),
        "Cost of Sales - Products": find_number(r"Cost of sales:\s+Products\s+([\d,]+)"),
        "Cost of Sales - Services": find_number(r"Services\s+([\d,]+)"),
        "Total Cost of Sales": find_number(r"Total cost of sales\s+([\d,]+)"),
        "Gross Margin": find_number(r"Gross margin\s+([\d,]+)"),
        "R&D Expense": find_number(r"Research and development\s+([\d,]+)"),
        "SG&A Expense": find_number(r"Selling, general and administrative\s+([\d,]+)"),
        "Total Operating Expense": find_number(r"Total operating expenses\s+([\d,]+)"),
        "Operating Income": find_number(r"Operating income\s+([\d,]+)"),
        "Other Income": find_number(r"Other income/\(expense\), net\s+\(?-?([\d,]+)"),
        "Income Before Tax": find_number(r"Income before provision for income taxes\s+([\d,]+)"),
        "Income Tax": find_number(r"Provision for income taxes\s+([\d,]+)"),
        "Net Income": find_number(r"Net income\s+!?([\d,]+)"),
        "EPS Basic": find_number(r"Earnings per share:.*?Basic\s+!?([\d\.]+)"),
        "EPS Diluted": find_number(r"Diluted\s+!?([\d\.]+)"),
        "Shares Basic": find_number(r"Shares used in computing earnings per share:\s+Basic\s+([\d,]+)"),
        "Shares Diluted": find_number(r"Diluted\s+([\d,]+)")
    }

    return data

def main():
    with pdfplumber.open("./data/finacial statements/FY24_Q1_Consolidated_Financial_Statements.pdf") as pdf:
        text = ""
        for page in pdf.pages:
            text += page.extract_text() + "\n"

    income_data = extract_income_data(text)

    print("\n📊 Báo Cáo Kết Quả Kinh Doanh - Apple (FY24 Q1):\n")
    for k, v in income_data.items():
        print(f"{k}: {v:,.0f}" if isinstance(v, float) else f"{k}: {v}")

if __name__ == "__main__":
    main()



📊 Báo Cáo Kết Quả Kinh Doanh - Apple (FY24 Q1):

Net Sales - Products: 96,458
Net Sales - Services: 23,117
Total Net Sales: 1
Cost of Sales - Products: 58,440
Cost of Sales - Services: 23,117
Total Cost of Sales: 64,720
Gross Margin: 54,855
R&D Expense: 7,696
SG&A Expense: 6,786
Total Operating Expense: 14,482
Operating Income: 40,373
Other Income: 50
Income Before Tax: 40,323
Income Tax: 6,407
Net Income: 33,916
EPS Basic: None
EPS Diluted: 15
Shares Basic: 15,509,763
Shares Diluted: 15,576,641
