### download 10K https://github.com/sec-edgar/sec-edgar
### call OpenAI and get data
### create sankey charts

In [None]:
import requests
from bs4 import BeautifulSoup

# URL of the SEC EDGAR filing page
url = 'https://www.sec.gov/Archives/edgar/data/789019/000095017024087843/msft-20240630.htm'

# Custom headers to declare user agent and comply with SEC guidelines
headers = {
    'User-Agent': 'Sample Company Name AdminContact@samplecompanydomain.com',
    'Accept-Encoding': 'gzip, deflate',
    'Host': 'www.sec.gov',
    'Referer': 'https://www.sec.gov'
}

# Use a session to handle cookies and maintain a session
with requests.Session() as session:
    session.headers.update(headers)

    try:
        # Fetch the HTML content from the page
        response = session.get(url)
        response.raise_for_status()  # Check that the request was successful

        # Save the raw HTML content to a file
        with open('sec_filing_raw.html', 'w', encoding='utf-8') as file:
            file.write(response.text)
        print("Raw HTML content saved to sec_filing_raw.html")

        # Parse the HTML content with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all tables in the HTML
        tables = soup.find_all('table')

        # Open a text file to write all the table content
        with open('sec_filing_tables.txt', 'w', encoding='utf-8') as file:
            for i, table in enumerate(tables):
                file.write(f"Table {i + 1}:\n")
                for row in table.find_all('tr'):
                    cells = [cell.get_text(strip=True) for cell in row.find_all(['td', 'th'])]
                    file.write('\t'.join(cells) + '\n')
                file.write('\n' + '-'*80 + '\n\n')  # Add a separator between tables

        print(f"All tables extracted and saved to sec_filing_tables.txt")

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the filing: {e}")

In [None]:
# Import requests to retrive Web Urls example HTML. TXT 
import requests

# Import BeautifulSoup
from bs4 import BeautifulSoup

# import re module for REGEXes
import re

# import pandas
import pandas as pd

# Get the HTML data from the 2018 10-K from Apple
r = requests.get('https://www.sec.gov/Archives/edgar/data/789019/000095017024087843/msft-20240630.htm')
raw_10k = r.text
print(raw_10k[0:1300])


In [None]:
!pip install plotly

In [None]:
import yfinance as yf

msft = yf.Ticker("MSFT")

In [None]:
msft.quarterly_incomestmt
msft.incomestmt
msft.financials
msft.get_cashflow
msft.get_dividends
msft.get_income_stmt
msft.get_splits
msft.quarterly_financials
msft.quarterly_cash_flow

In [None]:
import plotly.graph_objects as go

# Define the nodes for the chart
labels = [
    'Total Revenue', 'Server', 'Office', 'Gaming', 'Windows', 'LinkedIn', 'Search', 'Other',
    'Cost of Revenue', 'Gross Profit', 'Operating Expenses', 'Operating Profit', 'Net Profit',
    'Tax', 'R&D', 'Sales & Marketing', 'G&A'
]

# Define the sources and targets for the chart
sources = [
    0, 0, 0, 0, 0, 0, 0,    # Revenue to different segments
    1, 2, 3, 4, 5, 6, 7,    # Segments to Cost of Revenue
    8, 8,                   # Cost of Revenue to Gross Profit
    9, 9, 9,                # Gross Profit to Operating Expenses
    10,                     # Operating Expenses to Operating Profit
    11, 11, 11,             # Operating Profit to Net Profit
]

targets = [
    1, 2, 3, 4, 5, 6, 7,    # Revenue to different segments
    8, 8, 8, 8, 8, 8, 8,    # Segments to Cost of Revenue
    9, 10,                  # Cost of Revenue to Gross Profit
    12, 13, 14,             # Gross Profit to Operating Expenses
    11,                     # Operating Expenses to Operating Profit
    12, 13, 14,             # Operating Profit to Net Profit
]

# Define the values for each flow
values = [
    24800000000, 13980000000, 5500000000, 5900000000, 4000000000, 3100000000, 4600000000,  # Revenue to segments
    19684000000, 19684000000, 19684000000, 19684000000, 19684000000, 19684000000, 19684000000,  # Segments to Cost of Revenue
    45043000000, 17118000000,  # Cost of Revenue to Gross Profit
    9062000000, 8056000000, 2246000000,  # Gross Profit to Operating Expenses
    27951000000,  # Operating Expenses to Operating Profit
    22036000000, 4788000000, 7653000000  # Operating Profit to Net Profit
]

# Create the Sankey chart
fig = go.Figure(go.Sankey(
    node = {
        "pad": 15,
        "thickness": 20,
        "line": {"color": "black", "width": 0.5},
        "label": labels
    },
    link = {
        "source": sources,
        "target": targets,
        "value": values
    }
))

fig.update_layout(title_text="Microsoft Earnings Sankey Chart", font_size=10)
fig.show()