# SEC API 10K Extractor

### Here are three links that explain the task in more detail

In [None]:
#https://www.sec.gov/files/reada10k.pdf
#https://sec-api.io/docs/sec-filings-item-extraction-api#request-parameters
#https://sec-api.io/resources/extract-textual-data-from-edgar-10-k-filings-using-python

### If you have sec_api not installed, you need to to that here

In [None]:
#%pip install sec_api

In [1]:
from sec_api import ExtractorApi, QueryApi
import requests
import os,json, time, re
import shutil
from datetime import datetime
import pandas as pd

### Set the parameteres for the script. For which ticker and filing type you want to search for filings

In [2]:
ticker = "APLD" 
filing_type = "10-K"
start_date = "2024-06-01" # input("Enter start date (YYYY-MM-DD): ")
end_date = "2024-09-30"  #input("Enter end date (YYYY-MM-DD): ")
  
api_key='<YOUR API KEY>'


In [3]:
queryApi = QueryApi(api_key=api_key) #Create a QueryAPi object to connect and authenticate with the SEC

query = {
  "query": f"ticker:{ticker} AND filedAt:[{start_date} TO {end_date}] AND formType:\"{filing_type}\"",
  "from": "0",
  "size": "10",
  "sort": [{ "filedAt": { "order": "desc" } }]
} # Defines your query and fills it with you parameters

response = queryApi.get_filings(query) #requests all documents



### Extract is a function that takes a filing URL as input and extracts and maps all sections into a python dict

In [15]:
def extract(filing_url):
    
    extractorApi = ExtractorApi(api_key)

    #filing_url="https://www.sec.gov/Archives/edgar/data/1144879/000114487924000216/apld-20240531.htm"
    
    section_1 = extractorApi.get_section(filing_url, '1', 'text')
    section_1a = extractorApi.get_section(filing_url, "1A", "text")
    section_1b = extractorApi.get_section(filing_url, "1B", "text")
    section_1c = extractorApi.get_section(filing_url, "1C", "text")
    section_2 = extractorApi.get_section(filing_url, "2", "text")
    section_3 = extractorApi.get_section(filing_url, "3", "text")
    section_4 = extractorApi.get_section(filing_url, "4", "text")
    section_5 = extractorApi.get_section(filing_url, "5", "text")
    section_6 = extractorApi.get_section(filing_url, "6", "text")
    section_7 = extractorApi.get_section(filing_url, "7", "text")
    section_7a = extractorApi.get_section(filing_url, "7A", "text")
    section_8 = extractorApi.get_section(filing_url, "8", "text")
    section_9 = extractorApi.get_section(filing_url, "9", "text")
    section_9a = extractorApi.get_section(filing_url, "9A", "text")
    section_9b = extractorApi.get_section(filing_url, "9A", "text")
    section_10 = extractorApi.get_section(filing_url, "10", "text")
    section_11 = extractorApi.get_section(filing_url, "11", "text")
    section_12 = extractorApi.get_section(filing_url, "12", "text")
    section_13 = extractorApi.get_section(filing_url, "13", "text")
    section_14 = extractorApi.get_section(filing_url, "14", "text")
    section_15 = extractorApi.get_section(filing_url, "15", "text")
    
    
    return {
        
        "section_1": {"Business":section_1},
        "section_1a":{"Risk Factors": section_1a},
        "section_1b":{"Unresolved Staff Comments": section_1b},
        "section_1c":{"Cybersecurity":section_1c},
        "section_2": {"Properties":section_2},
        "section_3": {"Legal Proceedings":section_3},
        "section_4": {"For Future USe":section_4},
        "section_5": {"Market for Registrant’s Common Equity, Related Stockholder Matters and Issuer Purchases of Equity Securities":section_5},
        "section_6": {"Selected Financial Data":section_6},
        "section_7": {"Management’s Discussion and Analysis of Financial Condition and Results of Operations":section_7},
        "section_7a":{"“Quantitative and Qualitative Disclosures about Market Risk":section_7a},
        "section_8": {"Financial Statements and Supplementary Data":section_8},
        "section_9": {"Changes in and Disagreements with Accountants on Accounting and Financial Disclosure":section_9},
        "section_9a":{"Controls and Procedures": section_9a},
        "section_9b":{"Other Information": section_9b},
        "section_10":{"Directors, Executive Officers and Corporate Governance": section_10},
        "section_11":{"Executive Compensation": section_11},
        "section_12":{"Security Ownership of Certain Beneficial Owners and Management and Related Stockholder Matters": section_12},
        "section_13":{"Certain Relationships and Related Transactions, and Director Independence":section_13},
        "section_14":{"Principal Accountant Fees and Services": section_14},
        "section_15":{"Exhibits, Financial Statement Schedules": section_15},
        
            
    }

### Once the filing information has been retrieved, you need to extract the relevant urls for extraction

In [None]:
x = pd.DataFrame.from_records(response['filings'])
urls_list = list(map(lambda x: x["linkToFilingDetails"], response["filings"]))

In [20]:
print(f" If found these URLs : {urls_list}")

 If found these URLs : ['https://www.sec.gov/Archives/edgar/data/1144879/000114487924000216/apld-20240531.htm', 'https://www.sec.gov/Archives/edgar/data/1144879/000114487924000213/apld-form10xkx12bx25ntdraf.htm']


### Now use the first URL (for example) to extract the different sections

In [16]:
filing_dict=extract(urls_list[0])

### You can print out a sample section to verify the data

In [17]:
print(filing_dict['section_3'])


{'Legal Proceedings': " Item 3. Legal Proceedings \n\nFrom time to time, we may become involved in legal proceedings. \n\nThe Company, Wes Cummins, the Company's Chief Executive Officer, and David Rench, the Company's Chief Financial Officer, have been named as defendants in a putative securities class action lawsuit in the matter styled, McConnell v. Applied Digital Corporation, et al., Case No. 3:23-cv-1805, filed in August 2023 in the U.S. District Court for the Northern District of Texas (the &#8220;Securities Lawsuit&#8221;). Specifically, the complaint asserts claims pursuant to Section 10(b) and 20(a) of the Securities Exchange Act of 1934, as amended, based on allegedly false or misleading statements regarding the company&#8217;s business, operations, and compliance policies, including claims that the Company overstated the profitability of its data center hosting business and its ability to successfully transition into a low-cost cloud services provider and that the Company&#8

### Simple store the dict as a JSON file. We can also use a Vector DB. But let's do that at a later stage

In [18]:
json_file=f"{ticker}-{filing_type}.json"

with open(json_file, 'w') as file:
    json.dump(filing_dict, file, indent=4)



### That's it.