# Use SEC EDGAR API to generate visualizations


In [31]:
import requests
import pandas as pd
import json # For potential debugging if JSON response is malformed

# --- Configuration ---
# Microsoft's CIK (Central Index Key) - needs to be 10 digits for the API URL
# Microsoft's CIK is 789019. Padded to 10 digits: 0000789019
COMPANY_CIK = "0000789019"

# **IMPORTANT**: Replace with your actual name/company and email for the User-Agent
HEADERS = {'User-Agent': "FirstName LastName youremail@example.com"}

# Financial metrics and common US-GAAP XBRL tags to search for (ordered by likely preference)
METRICS_TO_EXTRACT = {
    "Revenue": ["RevenueFromContractWithCustomerExcludingAssessedTax", "Revenues", "SalesRevenueNet"],
    "GrossProfit": ["GrossProfit"],
    "NetIncome": ["NetIncomeLoss", "ProfitLoss"] # NetIncomeLoss is common for US GAAP
}
YEARS_OF_INTEREST = range(2018, 2024) # For fiscal years 2018 through 2023

# --- Helper Functions ---
def get_company_facts(cik, headers):
    """Fetches all company facts from the SEC EDGAR API for a given CIK."""
    url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json"
    print(f"Fetching data from: {url}")
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Will raise an HTTPError for bad status codes (4XX or 5XX)
        return response.json()
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err} - Status Code: {response.status_code}")
        print(f"Response Text: {response.text[:500]}") # Print first 500 chars of response
    except requests.exceptions.RequestException as req_err:
        print(f"Request error occurred: {req_err}")
    except json.JSONDecodeError:
        print(f"Error decoding JSON response. URL: {url}")
        print(f"Response Text: {response.text[:500]}...")
    return None

def extract_specific_financial_data(company_facts, metric_possible_tags, target_fiscal_year):
    """
    Extracts a specific financial data point for a given metric, fiscal year, and form type (10-K).
    """
    if not company_facts or 'facts' not in company_facts or 'us-gaap' not in company_facts['facts']:
        # print(f"Warning: 'facts' or 'us-gaap' not found in company_facts for FY{target_fiscal_year}.")
        return None

    for tag in metric_possible_tags:
        if tag in company_facts['facts']['us-gaap']:
            # Data is usually reported in USD
            fact_data_list = company_facts['facts']['us-gaap'][tag]['units'].get('USD')
            if fact_data_list:
                for fact_item in fact_data_list:
                    # We want annual data from 10-K filings for the specific fiscal year
                    if fact_item.get('form') == '10-K' and \
                       fact_item.get('fy') == target_fiscal_year and \
                       fact_item.get('fp') == 'FY': # FY indicates full fiscal year
                        return fact_item.get('val')
        # else:
            # print(f"Warning: Tag '{tag}' not found in us-gaap facts for FY{target_fiscal_year}.")
            
    # print(f"Warning: None of the tags {metric_possible_tags} found or matched criteria for FY{target_fiscal_year}.")
    return None

# --- Main Script Logic ---
print(f"Attempting to retrieve financial data for Microsoft (CIK: {COMPANY_CIK.lstrip('0')})...")
if HEADERS['User-Agent'] == "FirstName LastName youremail@example.com":
    print("\n!!! WARNING: Please update the 'HEADERS' variable with your actual User-Agent information. !!!\n")

all_company_facts = get_company_facts(COMPANY_CIK, HEADERS)

extracted_financials = []

if all_company_facts:
    print("\nCompany facts fetched successfully. Extracting specific data points...")
    for year in YEARS_OF_INTEREST:
        data_for_year = {"Fiscal Year": year}
        for metric_display_name, possible_tags in METRICS_TO_EXTRACT.items():
            value = extract_specific_financial_data(all_company_facts, possible_tags, year)
            data_for_year[metric_display_name] = value if value is not None else "N/A"
        extracted_financials.append(data_for_year)

    # --- Display Results using Pandas DataFrame ---
    if extracted_financials:
        df = pd.DataFrame(extracted_financials)
        df = df.set_index("Fiscal Year")
        print("\n--- Microsoft Financial Data (from 10-K filings) ---")
        print(df)

        # Check for any "N/A" values to alert the user
        if df.applymap(lambda x: x == "N/A").any().any():
            print("\nNote: 'N/A' indicates data was not found for that metric/year with the specified tags.")
            print("This could be due to variations in XBRL tagging or data not being reported under those exact tags in a given year.")
    else:
        print("Could not extract the required financial data points. Check XBRL tags and data availability.")
else:
    print("Failed to retrieve company facts. Check CIK, User-Agent, and network connection.")

Attempting to retrieve financial data for Microsoft (CIK: 789019)...


Fetching data from: https://data.sec.gov/api/xbrl/companyfacts/CIK0000789019.json

Company facts fetched successfully. Extracting specific data points...

--- Microsoft Financial Data (from 10-K filings) ---
                  Revenue   GrossProfit    NetIncome
Fiscal Year                                         
2018          91154000000   58374000000  20539000000
2019          96571000000   62310000000  25489000000
2020         110360000000   72007000000  16571000000
2021         125843000000   82933000000  39240000000
2022         143015000000   96937000000  44281000000
2023         168088000000  115856000000  61271000000


  if df.applymap(lambda x: x == "N/A").any().any():
