## Company Analysis for Investments

We'll require the following Python packages
- numpy
- pandas
- requests
- yfinance
- python-edgar (for SEC filings)
- bs4 (BeautifulSoup for webscraping)
- matplotlib (for plotting)
- seaborn (for plotting)

In [58]:
import yfinance as yf
import edgar
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from bs4 import BeautifulSoup

In [2]:
# here is the stock we will analyze
STOCK_SYMBOL = "MSFT"

In [7]:
# download stock data for past 5 years
ticker = yf.Ticker(STOCK_SYMBOL)

In [13]:
# get financials
income_statement = ticker.financials
balance_sheet = ticker.balance_sheet
cash_flows = ticker.cash_flow

# who are major stock holders
holders = ticker.major_holders

In [9]:
income_statement

Unnamed: 0,2024-06-30,2023-06-30,2022-06-30,2021-06-30
Tax Effect Of Unusual Items,-99918000.0,-2850000.0,43754000.0,180160797.164637
Tax Rate For Calcs,0.182,0.19,0.131,0.138266
Normalized EBITDA,133558000000.0,105155000000.0,99905000000.0,83831000000.0
Total Unusual Items,-549000000.0,-15000000.0,334000000.0,1303000000.0
Total Unusual Items Excluding Goodwill,-549000000.0,-15000000.0,334000000.0,1303000000.0
Net Income From Continuing Operation Net Minority Interest,88136000000.0,72361000000.0,72738000000.0,61271000000.0
Reconciled Depreciation,22287000000.0,13861000000.0,14460000000.0,11686000000.0
Reconciled Cost Of Revenue,74114000000.0,65863000000.0,62650000000.0,52232000000.0
EBITDA,133009000000.0,105140000000.0,100239000000.0,85134000000.0
EBIT,110722000000.0,91279000000.0,85779000000.0,73448000000.0


In [11]:
balance_sheet

Unnamed: 0,2024-06-30,2023-06-30,2022-06-30,2021-06-30
Ordinary Shares Number,7434000000.0,7432000000.0,7464000000.0,7519000000.0
Share Issued,7434000000.0,7432000000.0,7464000000.0,7519000000.0
Net Debt,33315000000.0,12533000000.0,35850000000.0,43922000000.0
Total Debt,67127000000.0,59965000000.0,61270000000.0,67775000000.0
Tangible Book Value,121660000000.0,128971000000.0,87720000000.0,84477000000.0
...,...,...,...,...
Cash Cash Equivalents And Short Term Investments,75531000000.0,111256000000.0,104749000000.0,130256000000.0
Other Short Term Investments,57216000000.0,76552000000.0,90818000000.0,116032000000.0
Cash And Cash Equivalents,18315000000.0,34704000000.0,13931000000.0,14224000000.0
Cash Equivalents,6744000000.0,26226000000.0,5673000000.0,6952000000.0


In [12]:
cash_flows

Unnamed: 0,2024-06-30,2023-06-30,2022-06-30,2021-06-30,2020-06-30
Free Cash Flow,74071000000.0,59475000000.0,65149000000.0,56118000000.0,
Repurchase Of Capital Stock,-17254000000.0,-22245000000.0,-32696000000.0,-27385000000.0,
Repayment Of Debt,-29070000000.0,-2750000000.0,-9023000000.0,-3750000000.0,
Issuance Of Debt,29645000000.0,0.0,0.0,,0.0
Issuance Of Capital Stock,2002000000.0,1866000000.0,1841000000.0,1693000000.0,
Capital Expenditure,-44477000000.0,-28107000000.0,-23886000000.0,-20622000000.0,
End Cash Position,18315000000.0,34704000000.0,13931000000.0,14224000000.0,
Beginning Cash Position,34704000000.0,13931000000.0,14224000000.0,13576000000.0,
Effect Of Exchange Rate Changes,-210000000.0,-194000000.0,-141000000.0,-29000000.0,
Changes In Cash,-16179000000.0,20967000000.0,-152000000.0,677000000.0,


In [16]:
ticker.major_holders

Breakdown,Value
insidersPercentHeld,0.00053
institutionsPercentHeld,0.73735
institutionsFloatPercentHeld,0.73774
institutionsCount,6916.0


### Web Scraping SEC Filings & News Articles

In [65]:
help(edgar.download_index)

Help on function download_index in module edgar.main:

download_index(dest, since_year, user_agent, skip_all_present_except_last=False)
    Convenient method to download all files at once



In [66]:
import os
import pathlib
from datetime import datetime

edgar_index_path = pathlib.Path(os.getcwd()) / "edgar"
if not edgar_index_path.exists():
    edgar_index_path.mkdir()
    print(edgar_index_path)

this_year = datetime.now().year
since_year = this_year - 5
print(f"This year: {this_year} - since_year: {since_year}")

edgar.download_index(
    str(edgar_index_path),
    since_year=since_year,
    user_agent="Mozilla/5.0",
    skip_all_present_except_last=False,
)

This year: 2024 - since_year: 2019


HTTPError: HTTP Error 403: Forbidden

In [55]:
def get_edgar_url(ticker):
    sec_url = f"https://www.sec.gov/cgi-bin/browse-edgar?CIK={ticker}&owner=exclude&action=getcompany"
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(sec_url, headers=headers)

    # extract the company's central index key (CIK)
    content = response.text
    # print(content[:500])
    results_start = content.find("CIK</acronym>#:")
    cik_start = content.find(">", results_start) + 5
    cik_end = content.find("(", cik_start)
    # ensure that CIK is a 10 digit zero padded value
    cik = content[cik_start:cik_end].zfill(10)
    print(f"CIK for {ticker}: {cik}")

    # now that we have the CIK, construct the edgar URK to download 10-K filings
    edgar_10k_url = f"https://sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type=10-K&dateb=&owner=exclude&count=40"
    return edgar_10k_url

In [56]:
edgar_10k_url = get_edgar_url(STOCK_SYMBOL)
print(str(edgar_10k_url))

# response = requests.get(edgar_10k_url)
# soup = BeautifulSoup(response.content, "html.parser")

# # find all filings
# filings = soup.find_all("a", {"id": "documentsbutton"})
# # get the latest filing
# filings_urls = ["https://www.sec.gov" + filings["href"] for filing in filings[:1]]
# # display the scraped URLs
# print("Recent 10-k filings URLs:", filings_urls)

CIK for MSFT: CTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>SEC.gov | Request Rate Threshold Exceeded</title>
<style>
html {height: 100%}
body {height: 100%; margin:0; padding:0;}
#header {background-color:#003968; color:#fff; padding:15px 20px 10px 20px;font-family:Arial, Helvetica, sans-serif; font-size:20px; border-bottom:solid 5px #000;}
#footer {background-color:#003968; color:#fff; padding:15px 20px;font-family:Arial, Helvetica, sans-serif; font-size:20px;}
#content {max-width:650px;margin:60px auto; padding:0 20px 100px 20px; background-image:url
https://sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=CTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-e