In [1]:
import requests
from bs4 import BeautifulSoup

def get_links(ticker, output_type = "xml", count = 100, pages = 19, date_lim = 2010):
    
    links = []
    
    #loop through pahes in increment specified above
    for start_index in range(0, pages*count, count):
        
        url = f"http://www.sec.gov/cgi-bin/browse-edgar?"\
              f"action=getcompany&CIK={ticker}"\
              f"&type=&dateb=&owner=&start={start_index}"\
              f"&count={count}&output={output_type}"
        
        #extract link
        request = requests.get(url)
        text = request.text
        soup = BeautifulSoup(text)
        
        filings = soup.find_all('filing')
        
        for filing in filings: 
            link = filing.filinghref.text
            date = filing.datefiled.text
            
            try:
                form_type = filing.type.text
            except AttributeError:
                form_type = "NoneType"
            
            try:
                form_name = filing.formname.text
            except AttributeError:
                form_name = "NoneName"
            
            if int(date[:4]) < date_lim:
                break
            
            links.append((link, date, form_type, form_name))
        
    
    return(links)

In [2]:
import os

def get_files(folder_name, link_list, marker = "Complete submission text file"):
    
    os.mkdir(folder_name)
    os.chdir(folder_name)
    
    for index, tup in enumerate(link_list):
        
        #filenaming if we need types later
        link, date, form_type, form_name = tup
        filename = f"{date}_index_{index}"
        
        #get link
        request = requests.get(link)
        text = request.text
        soup = BeautifulSoup(text)
        table = soup.find("table",{"class":"tableFile"})
        
        #find complete text submission
        text_sub = 'http://sec.gov' + table.find(text = marker).find_next('a')['href']
        
###############################################################################################
        #PARSING OF XML GOES HERE!!!

        request = requests.get(text_sub)
        text = request.text
        
###############################################################################################
        
        with open(filename + ".txt", "wb") as text_file:
            text_file.write(text.encode("utf-8"))
            
    os.chdir("..")

In [7]:
tickers = ["WMT","TGT","AMZN"]
#file limit for demonstration purposes
limit = 20

for ticker in tickers:
    link_list = get_links(ticker, pages = 1)
    get_files(f"{ticker}_XML", link_list[:20])