# SEC EDGAR Scraping Notebook

@author: Rowan Pan

## Import BeautifulSoup and Process Data

In [1]:
# ignore warnings.
import warnings
warnings.filterwarnings('ignore')

In [2]:
from bs4 import BeautifulSoup
import requests

In [3]:
import pandas as pd
import numpy as np

In [4]:
base_url = r"https://www.sec.gov/Archives/edgar/data"

**Load CIK Table From Excel**

In [5]:
cik_table = pd.read_excel("cik_list.xlsx", sheet_name = 'Sheet1')

In [6]:
cik_list = cik_table['cik'].unique()

In [7]:
cik_str = ["/" + str(item) + "/" for item in cik_list]

**cik_str is the list of cik's**

In [8]:
cik_str[:5]

['/901430/', '/1020720/', '/903425/', '/1111564/', '/1056513/']

In [9]:
len(cik_str)

528

In [10]:
names = [name.split(':')[0] for name in cik_table['raw']]

In [11]:
names_final = [i.split('/')[0] for i in list(pd.DataFrame(names)[0].unique())]

In [12]:
len(names_final)

528

**names_final is the list of upper case names**

In [13]:
names_final[0], names_final[300], names_final[500]

('ABELE JOHN E', 'MCCAW KEITH W', 'WENDT RICHARD L')

This is a list containing "bad" or unusable cik numbers with no information.

In [14]:
bad_cik = ['/1705622/']
cik_str = [item for item in cik_str if item not in bad_cik]

## Enter 'year' to extract all records from 'year' and after

**This is the main ETL loop. We can adjust the slicer [0 : 50] to another range or remove it altogether to get the desired CIK's. The year variable can also be adjusted to control for which entries we would like to extract.**

This will be a long list of url's for CIK's. Scroll down to bottom to see the rest of the code!

In [34]:
url_list = []

# can remove [ : ] to extract every CIK
for item in cik_str:                              
    filings_url = base_url + item + "/index.json"

    content = requests.get(filings_url)

    decoded_content = content.json()

    back_up = decoded_content['directory']['item']
    
    # looping through the description and directory content
    for filing in decoded_content['directory']['item']: 
 
        # enter year to get all transactions from this year and after
        year = 2018         
        if int(filing['last-modified'][0:4]) >= year:

            filing_num = filing['name']

            filing_url = base_url + item + filing_num + "/index.json"

            content = requests.get(filing_url)
            document_content = content.json()
            
            
            # looping through url names and document content
            for document in document_content['directory']['item']:

                # this if statement is sued for leaving out all the unneccesary xml file names
                if 'xml' in document['name'] and 'primary_doc' not in document['name'] and (
                'form13' not in document['name']) and ('informationtable' not in document['name']) and (
                'avenue-' not in document['name'] and 'form3' not in document['name'] and (
                'doc5' not in document['name']) and 'doc3' not in document['name'] and (
                'a3' not in document['name']) and 'form5' not in document['name']):

                    doc_name = document['name']
         
                    document_url = base_url + item + filing_num + '/' + doc_name
                    
                    # print out url's and append all url's to a list
                    print(document_url)
                    url_list.append(document_url)

https://www.sec.gov/Archives/edgar/data/1056513/000156761920013603/doc1.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924320016159/doc4a.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924320015508/doc4.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924320009732/doc4.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924320005909/doc4.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924320003693/doc4.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924319029442/doc4.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924319028477/doc4.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924319024343/doc4.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924319018542/doc4.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924319009562/doc4.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924319009560/doc4.xml
https://www.sec.gov/Archives/edgar/data/1056513/000089924319003601/doc4.xml
https://www

https://www.sec.gov/Archives/edgar/data/1294693/000112760220022345/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220022338/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220022269/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220022186/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220022144/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220022058/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220022009/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220021912/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220021853/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220021657/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220021575/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220021508/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760220021446/form4.xml

https://www.sec.gov/Archives/edgar/data/1294693/000112760219033564/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219033294/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219033104/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219032786/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219032575/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219032262/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219032074/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219031748/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219031363/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219031217/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219031184/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219031004/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219030936/form4.xml

https://www.sec.gov/Archives/edgar/data/1294693/000112760219010026/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000089924319005588/doc4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219008564/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219007823/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219007338/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219006452/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219005791/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219005180/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219004643/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219003912/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219003263/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219002833/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760219002685/form4.xml


https://www.sec.gov/Archives/edgar/data/1294693/000112760218023160/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218023047/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218022998/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218022893/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218022353/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218022042/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218021997/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218021927/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218021871/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218021711/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218021460/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218021079/form4.xml
https://www.sec.gov/Archives/edgar/data/1294693/000112760218020992/form4.xml

https://www.sec.gov/Archives/edgar/data/1196579/000132781120000012/wf-form4_157926208317687.xml
https://www.sec.gov/Archives/edgar/data/1196579/000132781119000207/wf-form4_157594178236603.xml
https://www.sec.gov/Archives/edgar/data/1196579/000132781119000183/wf-form4_157135003638133.xml
https://www.sec.gov/Archives/edgar/data/1196579/000132781119000160/wf-form4_156772647423578.xml
https://www.sec.gov/Archives/edgar/data/1196579/000132781119000147/wf-form4_156340212608880.xml
https://www.sec.gov/Archives/edgar/data/1196579/000132781119000100/wf-form4_155986870020697.xml
https://www.sec.gov/Archives/edgar/data/1196579/000112760219019211/form4.xml
https://www.sec.gov/Archives/edgar/data/1196579/000132781119000073/wf-form4_155614275423333.xml
https://www.sec.gov/Archives/edgar/data/1196579/000132781119000069/wf-form4_155554336128182.xml
https://www.sec.gov/Archives/edgar/data/1196579/000132781119000031/wf-form4_155191987011698.xml
https://www.sec.gov/Archives/edgar/data/1196579/00011276021

https://www.sec.gov/Archives/edgar/data/860866/000090342319000137/tpgsbsnexeo4.xml
https://www.sec.gov/Archives/edgar/data/860866/000090342319000119/tpgvii.xml
https://www.sec.gov/Archives/edgar/data/860866/000090342319000055/advii3.xml
https://www.sec.gov/Archives/edgar/data/860866/000090342319000047/tpgvii.xml
https://www.sec.gov/Archives/edgar/data/860866/000089924318031632/doc4.xml
https://www.sec.gov/Archives/edgar/data/860866/000089924318031628/doc4a.xml
https://www.sec.gov/Archives/edgar/data/860866/000090342318000648/magnolia4.xml
https://www.sec.gov/Archives/edgar/data/860866/000090342318000631/iqvia.xml
https://www.sec.gov/Archives/edgar/data/860866/000090342318000614/tpgvii.xml
https://www.sec.gov/Archives/edgar/data/860866/000090342318000561/allogene.xml
https://www.sec.gov/Archives/edgar/data/860866/000090342318000546/allogene.xml
https://www.sec.gov/Archives/edgar/data/860866/000090342318000531/advisorsviavhomes.xml
https://www.sec.gov/Archives/edgar/data/860866/000090342

https://www.sec.gov/Archives/edgar/data/1173893/000117911018002218/edgar.xml
https://www.sec.gov/Archives/edgar/data/1066154/000089924320010610/doc4.xml
https://www.sec.gov/Archives/edgar/data/1558613/000142887518000042/certent-form4a.xml
https://www.sec.gov/Archives/edgar/data/1558613/000142887518000035/certent-form4.xml
https://www.sec.gov/Archives/edgar/data/1079719/000114420418002948/tv483672_3.xml
https://www.sec.gov/Archives/edgar/data/908724/000112329220000863/edgar.xml
https://www.sec.gov/Archives/edgar/data/908724/000112329220000657/edgar.xml
https://www.sec.gov/Archives/edgar/data/908724/000112329220000602/edgar.xml
https://www.sec.gov/Archives/edgar/data/908724/000112329220000472/edgar.xml
https://www.sec.gov/Archives/edgar/data/908724/000112329220000429/edgar.xml
https://www.sec.gov/Archives/edgar/data/908724/000112329220000337/edgar.xml
https://www.sec.gov/Archives/edgar/data/908724/000112329220000138/edgar.xml
https://www.sec.gov/Archives/edgar/data/908724/000112329220000

https://www.sec.gov/Archives/edgar/data/1537372/000120919119030655/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119028482/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119027955/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119027128/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119026207/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119025314/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119024968/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119024588/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119021278/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119019675/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119019162/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119017331/doc4.xml
https://www.sec.gov/Archives/edgar/data/1537372/000120919119015565/doc4.xml
https://www.

https://www.sec.gov/Archives/edgar/data/938071/000132781120000136/wf-form4_159916728606703.xml
https://www.sec.gov/Archives/edgar/data/938071/000132781120000112/wf-form4_159406688790693.xml
https://www.sec.gov/Archives/edgar/data/938071/000132781120000108/wf-form4_159242631120531.xml
https://www.sec.gov/Archives/edgar/data/938071/000132781120000105/wf-form4_159191281854296.xml
https://www.sec.gov/Archives/edgar/data/938071/000132781120000075/wf-form4_159113177669673.xml
https://www.sec.gov/Archives/edgar/data/938071/000132781120000056/wf-form4_158716163638892.xml
https://www.sec.gov/Archives/edgar/data/938071/000132781120000040/wf-form4_158595679480646.xml
https://www.sec.gov/Archives/edgar/data/938071/000132781120000034/wf-form4_158456353660872.xml
https://www.sec.gov/Archives/edgar/data/938071/000132781120000026/wf-form4_158336088100446.xml
https://www.sec.gov/Archives/edgar/data/938071/000132781120000010/wf-form4_157926198051042.xml
https://www.sec.gov/Archives/edgar/data/938071/000

https://www.sec.gov/Archives/edgar/data/904548/000090454819000002/edgar.xml
https://www.sec.gov/Archives/edgar/data/904548/000109053719000004/edgar.xml
https://www.sec.gov/Archives/edgar/data/904548/000090454818000047/edgar.xml
https://www.sec.gov/Archives/edgar/data/904548/000090454818000046/wf-form4_154395795456892.xml
https://www.sec.gov/Archives/edgar/data/904548/000090454818000040/edgar.xml
https://www.sec.gov/Archives/edgar/data/904548/000090454818000028/edgar.xml
https://www.sec.gov/Archives/edgar/data/904548/000090454818000026/wf-form4_153237739298828.xml
https://www.sec.gov/Archives/edgar/data/904548/000090454818000021/edgar.xml
https://www.sec.gov/Archives/edgar/data/904548/000090454818000017/edgar.xml
https://www.sec.gov/Archives/edgar/data/904548/000090454818000015/wf-form4_152788576882874.xml
https://www.sec.gov/Archives/edgar/data/904548/000090454818000006/edgar.xml
https://www.sec.gov/Archives/edgar/data/904548/000090454818000002/edgar.xml
https://www.sec.gov/Archives/ed

https://www.sec.gov/Archives/edgar/data/901185/000141588918000017/form4-01082018_010102.xml
https://www.sec.gov/Archives/edgar/data/898860/000149315220011716/ownership.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919120034967/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919120034342/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919120033802/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919120032828/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919120032220/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919120027825/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919120025033/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919120024699/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919120024601/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919120023893/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919120023542/doc4.xml
htt

https://www.sec.gov/Archives/edgar/data/898860/000120919119003503/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919119003367/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919119003185/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919119002966/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919119002677/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919119002392/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919119001632/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919119000617/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919119000003/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919118064358/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919118064231/doc4.xml
https://www.sec.gov/Archives/edgar/data/898860/000149315218018019/ownership.xml
https://www.sec.gov/Archives/edgar/data/898860/000120919118064190/doc4.xml
https://www.sec.gov/

https://www.sec.gov/Archives/edgar/data/1185533/000080724920000178/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000176/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000174/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000170/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000167/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000156/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000133/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000103/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000091/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000087/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000077/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000071/form4.xml
https://www.sec.gov/Archives/edgar/data/1185533/000080724920000063/form4.xml

https://www.sec.gov/Archives/edgar/data/1322750/000156240118000131/wf-form4_154327202225983.xml
https://www.sec.gov/Archives/edgar/data/1322750/000156240118000129/wf-form4_154275944149326.xml
https://www.sec.gov/Archives/edgar/data/1322750/000156240118000123/wf-form4_154240963337884.xml
https://www.sec.gov/Archives/edgar/data/1322750/000156240118000121/wf-form4_154215141721440.xml
https://www.sec.gov/Archives/edgar/data/1322750/000156240118000116/wf-form4_154172084753134.xml
https://www.sec.gov/Archives/edgar/data/1322750/000156761918003408/doc1.xml
https://www.sec.gov/Archives/edgar/data/1322750/000156240118000098/wf-form4_153627028648908.xml
https://www.sec.gov/Archives/edgar/data/1322750/000156240118000074/wf-form4_152668426346655.xml
https://www.sec.gov/Archives/edgar/data/1322750/000114036118020249/doc1.xml
https://www.sec.gov/Archives/edgar/data/1322750/000156240118000060/wf-form4a_152469241895961.xml
https://www.sec.gov/Archives/edgar/data/1322750/000156240118000052/wf-form4_152

https://www.sec.gov/Archives/edgar/data/1033331/000106528020000157/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1033331/000106528020000145/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1033331/000106528020000139/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1033331/000106528020000111/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1033331/000106528020000109/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1033331/000106528020000104/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1033331/000106528020000076/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1033331/000106528020000074/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1033331/000106528020000066/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1033331/000106528020000037/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1033331/000106528020000021/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1033331/000106528019000445/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/

https://www.sec.gov/Archives/edgar/data/1302110/000156761920000109/doc1.xml
https://www.sec.gov/Archives/edgar/data/1302110/000156761919018962/doc1.xml
https://www.sec.gov/Archives/edgar/data/1302110/000156761919017217/doc1.xml
https://www.sec.gov/Archives/edgar/data/1302110/000156761919013895/doc1.xml
https://www.sec.gov/Archives/edgar/data/1302110/000120919119017877/doc4.xml
https://www.sec.gov/Archives/edgar/data/1302110/000120919118058710/doc4.xml
https://www.sec.gov/Archives/edgar/data/1302110/000120919118058265/doc4.xml
https://www.sec.gov/Archives/edgar/data/1302110/000156761918002373/doc1.xml
https://www.sec.gov/Archives/edgar/data/1302110/000156761918001321/doc1.xml
https://www.sec.gov/Archives/edgar/data/1302110/000156761918000400/doc1.xml
https://www.sec.gov/Archives/edgar/data/1302110/000114036118035628/doc1.xml
https://www.sec.gov/Archives/edgar/data/1302110/000114036118022281/doc1.xml
https://www.sec.gov/Archives/edgar/data/1302110/000114036118005842/doc1.xml
https://www.

https://www.sec.gov/Archives/edgar/data/1134100/000112760219024787/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760219024545/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760219024362/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760219024063/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760219024040/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760219023313/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760218032764/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760218032340/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760218031887/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760218031315/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760218030878/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760218030725/form4.xml
https://www.sec.gov/Archives/edgar/data/1134100/000112760218030572/form4.xml

https://www.sec.gov/Archives/edgar/data/1031190/000150630719000040/wf-form4_155058576299105.xml
https://www.sec.gov/Archives/edgar/data/1031190/000150630719000038/wf-form4_155001082107128.xml
https://www.sec.gov/Archives/edgar/data/1031190/000150630719000037/wf-form4_154989783640783.xml
https://www.sec.gov/Archives/edgar/data/1031190/000150630719000029/wf-form4_154963842909595.xml
https://www.sec.gov/Archives/edgar/data/1031190/000150630719000026/wf-form4_154946203801714.xml
https://www.sec.gov/Archives/edgar/data/1031190/000150630719000024/wf-form4_154937869944264.xml
https://www.sec.gov/Archives/edgar/data/1031190/000150630719000022/wf-form4_154903346219518.xml
https://www.sec.gov/Archives/edgar/data/1031190/000150630719000020/wf-form4_154894890519028.xml
https://www.sec.gov/Archives/edgar/data/1031190/000150630719000018/wf-form4_154879447449393.xml
https://www.sec.gov/Archives/edgar/data/1031190/000150630718000084/wf-form4_154091512217055.xml
https://www.sec.gov/Archives/edgar/data/

https://www.sec.gov/Archives/edgar/data/1325713/000091957418004815/ownership.xml
https://www.sec.gov/Archives/edgar/data/1325713/000091957418000224/p7786449.xml
https://www.sec.gov/Archives/edgar/data/1081714/000114036120019775/form4.xml
https://www.sec.gov/Archives/edgar/data/1081714/000114036120018888/form4.xml
https://www.sec.gov/Archives/edgar/data/1081714/000114036120017082/form4.xml
https://www.sec.gov/Archives/edgar/data/1081714/000114036120014580/form4.xml
https://www.sec.gov/Archives/edgar/data/1081714/000114036120014193/form4.xml
https://www.sec.gov/Archives/edgar/data/1081714/000114036120014068/form4.xml
https://www.sec.gov/Archives/edgar/data/1081714/000114036120012852/form4.xml
https://www.sec.gov/Archives/edgar/data/1081714/000114036120012768/form4.xml
https://www.sec.gov/Archives/edgar/data/1081714/000114036120005779/form4.xml
https://www.sec.gov/Archives/edgar/data/1081714/000114036120003799/form4.xml
https://www.sec.gov/Archives/edgar/data/1081714/000114036120001758/fo

https://www.sec.gov/Archives/edgar/data/1259927/000114036120013703/form4.xml
https://www.sec.gov/Archives/edgar/data/1259927/000114036120013620/form4.xml
https://www.sec.gov/Archives/edgar/data/1259927/000114036120013555/form4.xml
https://www.sec.gov/Archives/edgar/data/1259927/000114036120011991/form4.xml
https://www.sec.gov/Archives/edgar/data/1259927/000110465919010390/a4a.xml
https://www.sec.gov/Archives/edgar/data/1259927/000110465919009089/a4.xml
https://www.sec.gov/Archives/edgar/data/1259927/000149830118000100/edgar.xml
https://www.sec.gov/Archives/edgar/data/1259927/000149830118000069/edgar.xml
https://www.sec.gov/Archives/edgar/data/1259927/000149830118000037/edgar.xml
https://www.sec.gov/Archives/edgar/data/1259927/000110465918020985/a4.xml
https://www.sec.gov/Archives/edgar/data/1008090/000100125019000046/edgar.xml
https://www.sec.gov/Archives/edgar/data/1008090/000100125019000045/edgar.xml
https://www.sec.gov/Archives/edgar/data/1006352/000100125020000011/edgar.xml
https:/

https://www.sec.gov/Archives/edgar/data/1195580/000089924319019694/doc4.xml
https://www.sec.gov/Archives/edgar/data/1195580/000089924319019498/doc4.xml
https://www.sec.gov/Archives/edgar/data/1195580/000089924319019346/doc4.xml
https://www.sec.gov/Archives/edgar/data/1195580/000089924319018996/doc4.xml
https://www.sec.gov/Archives/edgar/data/1195580/000089924319018490/doc4.xml
https://www.sec.gov/Archives/edgar/data/1195580/000089924319018019/doc4.xml
https://www.sec.gov/Archives/edgar/data/1195580/000089924319017521/doc4.xml
https://www.sec.gov/Archives/edgar/data/1195580/000089924319017110/doc4.xml
https://www.sec.gov/Archives/edgar/data/1195580/000089924319016554/doc4.xml
https://www.sec.gov/Archives/edgar/data/1195580/000089924319015919/doc4.xml
https://www.sec.gov/Archives/edgar/data/1195580/000089924319015438/doc4.xml
https://www.sec.gov/Archives/edgar/data/1195580/000089924319015119/doc4.xml
https://www.sec.gov/Archives/edgar/data/1041175/000143472820000284/wf-form4_159908599983

https://www.sec.gov/Archives/edgar/data/1102753/000143774919020636/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/000143774919020466/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/000143774919020309/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/000143774919020209/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/000143774919019980/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/000143774919019797/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/000143774919019746/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/000143774919019680/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/000143774919019275/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/000143774919019107/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/000143774919019021/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/000143774919018785/rdgdoc.xml
https://www.sec.gov/Archives/edgar/data/1102753/0001437749190186

https://www.sec.gov/Archives/edgar/data/925177/000085196818000062/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/925177/000085196818000057/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/925177/000085196818000049/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/925177/000085196818000041/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1379454/000089183920000203/edgar.xml
https://www.sec.gov/Archives/edgar/data/1379454/000089183920000179/edgar.xml
https://www.sec.gov/Archives/edgar/data/1379454/000089183920000098/edgar.xml
https://www.sec.gov/Archives/edgar/data/1379454/000089183920000071/edgar.xml
https://www.sec.gov/Archives/edgar/data/1379454/000114036120004675/form4.xml
https://www.sec.gov/Archives/edgar/data/1379454/000120919120001456/doc4.xml
https://www.sec.gov/Archives/edgar/data/1379454/000114036119021156/form4.xml
https://www.sec.gov/Archives/edgar/data/1379454/000120919119057677/doc4.xml
https://www.sec.gov/Archives/edgar/data/1379454/000120919119056521/doc

https://www.sec.gov/Archives/edgar/data/928264/000120919120044550/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919120039771/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919120032627/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919120029387/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919120022227/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919120007181/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919120000426/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919119059912/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919119051366/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919119046370/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919119040286/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919119035124/doc4.xml
https://www.sec.gov/Archives/edgar/data/928264/000120919119031143/doc4.xml
https://www.sec.gov/Archi

https://www.sec.gov/Archives/edgar/data/1094541/000112760218020328/form4.xml
https://www.sec.gov/Archives/edgar/data/1094541/000091645718000125/wf-form4_152088741870319.xml
https://www.sec.gov/Archives/edgar/data/1078511/000120919120037283/doc4.xml
https://www.sec.gov/Archives/edgar/data/1078511/000120919119053898/doc4.xml
https://www.sec.gov/Archives/edgar/data/1078511/000120919118056095/doc4.xml
https://www.sec.gov/Archives/edgar/data/1078511/000120919118052542/doc4.xml
https://www.sec.gov/Archives/edgar/data/1078511/000120919118038259/doc4.xml
https://www.sec.gov/Archives/edgar/data/1078511/000120919118014397/doc4.xml
https://www.sec.gov/Archives/edgar/data/1494730/000149473020000001/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1494730/000149473019000003/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1494730/000149473019000002/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1494730/000149473019000001/edgardoc.xml
https://www.sec.gov/Archives/edgar/data/1494730/000

https://www.sec.gov/Archives/edgar/data/1186472/000114036119022838/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000114036119021508/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000114036119020346/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000114036119019008/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000095015719001131/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000095015719001106/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000114036119017447/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000095015719001057/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000095015719001047/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000114036119016918/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000114036119016526/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000114036119016100/form4.xml
https://www.sec.gov/Archives/edgar/data/1186472/000114036119015481/form4.xml

https://www.sec.gov/Archives/edgar/data/1087398/000162643119000031/edgar.xml
https://www.sec.gov/Archives/edgar/data/1087398/000162643119000008/edgar.xml
https://www.sec.gov/Archives/edgar/data/1087398/000162643118000151/edgar.xml
https://www.sec.gov/Archives/edgar/data/1087398/000162643118000136/edgar.xml
https://www.sec.gov/Archives/edgar/data/1087398/000162643118000113/edgar.xml
https://www.sec.gov/Archives/edgar/data/1087398/000162643118000090/edgar.xml
https://www.sec.gov/Archives/edgar/data/1087398/000162643118000063/edgar.xml
https://www.sec.gov/Archives/edgar/data/1087398/000162643118000050/edgar.xml
https://www.sec.gov/Archives/edgar/data/1087398/000162643118000034/edgar.xml
https://www.sec.gov/Archives/edgar/data/1087398/000162643118000012/edgar.xml
https://www.sec.gov/Archives/edgar/data/1317547/000146817420000068/wf-form4_158931643302028.xml
https://www.sec.gov/Archives/edgar/data/1317547/000146817420000051/wf-form4_158526286514527.xml
https://www.sec.gov/Archives/edgar/dat

https://www.sec.gov/Archives/edgar/data/1032681/000089924320013169/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320012894/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320012340/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320011938/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320011615/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320010675/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320007262/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320006645/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320005975/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320005538/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320004521/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320003717/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032681/000089924320002946/doc4.xml
https://www.

https://www.sec.gov/Archives/edgar/data/1070844/000089924319024261/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319024060/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319022871/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319022764/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319022630/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319022598/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319022538/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319022537/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319022475/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319022337/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319022236/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319022066/doc4.xml
https://www.sec.gov/Archives/edgar/data/1070844/000089924319022000/doc4.xml
https://www.

https://www.sec.gov/Archives/edgar/data/1620093/000161670718000388/wf-form4a_153740619560166.xml
https://www.sec.gov/Archives/edgar/data/1620093/000161670718000378/wf-form4_153731892619375.xml
https://www.sec.gov/Archives/edgar/data/1620093/000161670718000366/wf-form4_153678439970156.xml
https://www.sec.gov/Archives/edgar/data/1620093/000161670718000362/wf-form4_153618177709643.xml
https://www.sec.gov/Archives/edgar/data/1620093/000161670718000353/wf-form4_153550121645691.xml
https://www.sec.gov/Archives/edgar/data/1620093/000161670718000352/wf-form4_153541953003062.xml
https://www.sec.gov/Archives/edgar/data/1620093/000161670718000345/wf-form4_153488612391691.xml
https://www.sec.gov/Archives/edgar/data/1620093/000161670718000339/wf-form4_153445741242936.xml
https://www.sec.gov/Archives/edgar/data/1620093/000161670718000333/wf-form4_153429374962964.xml
https://www.sec.gov/Archives/edgar/data/1620093/000161670718000319/wf-form4_153376528967226.xml
https://www.sec.gov/Archives/edgar/data

https://www.sec.gov/Archives/edgar/data/1226526/000120919118022256/doc4.xml
https://www.sec.gov/Archives/edgar/data/1226526/000120919118020764/doc4.xml
https://www.sec.gov/Archives/edgar/data/1226526/000120919118019692/doc4.xml
https://www.sec.gov/Archives/edgar/data/924255/000156215120000062/wf-form4_159846903100347.xml
https://www.sec.gov/Archives/edgar/data/924255/000156215120000060/wf-form4_159806102783876.xml
https://www.sec.gov/Archives/edgar/data/924255/000156215120000056/wf-form4_159779301897807.xml
https://www.sec.gov/Archives/edgar/data/924255/000156761920013592/doc1.xml
https://www.sec.gov/Archives/edgar/data/924255/000156215120000021/wf-form4_158032795665609.xml
https://www.sec.gov/Archives/edgar/data/924255/000156215120000020/wf-form4_157991235829684.xml
https://www.sec.gov/Archives/edgar/data/924255/000156215120000018/wf-form4_157983172832812.xml
https://www.sec.gov/Archives/edgar/data/924255/000156215120000016/wf-form4_157930572127317.xml
https://www.sec.gov/Archives/edg

https://www.sec.gov/Archives/edgar/data/1179515/000112760220019580/form4.xml
https://www.sec.gov/Archives/edgar/data/1179515/000112760219021648/form4.xml
https://www.sec.gov/Archives/edgar/data/1179515/000112760218020905/form4.xml
https://www.sec.gov/Archives/edgar/data/1032235/000120919120035513/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032235/000120919120017864/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032235/000120919120017859/doc4a.xml
https://www.sec.gov/Archives/edgar/data/1032235/000120919120016728/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032235/000120919119046383/doc4a.xml
https://www.sec.gov/Archives/edgar/data/1032235/000120919119036427/doc4.xml
https://www.sec.gov/Archives/edgar/data/1032235/000120919119018206/doc4.xml
https://www.sec.gov/Archives/edgar/data/1019037/000010049320000028/wf-form4_158137139407575.xml
https://www.sec.gov/Archives/edgar/data/1019037/000010049319000026/wf-form4_154990254817046.xml
https://www.sec.gov/Archives/edgar/data/101

https://www.sec.gov/Archives/edgar/data/1219106/000112760218020364/form4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000120919120014868/doc4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000120919120014282/doc4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000120919120012054/doc4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000120919119057427/doc4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000120919119047401/doc4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000110465919043027/a4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000120919119032583/doc4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000120919118059375/doc4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000120919118058539/doc4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000120919118055976/doc4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000120919118055953/doc4.xml
https://www.sec.gov/Archives/edgar/data/1276191/000120919118050352/doc4.xml
https://www.s

https://www.sec.gov/Archives/edgar/data/1716837/000171683718000055/edgar.xml
https://www.sec.gov/Archives/edgar/data/1716837/000171683718000046/edgar.xml
https://www.sec.gov/Archives/edgar/data/1716837/000171683718000044/edgar.xml
https://www.sec.gov/Archives/edgar/data/1716837/000120919118037136/doc4.xml
https://www.sec.gov/Archives/edgar/data/1610500/000120919120047471/doc4.xml
https://www.sec.gov/Archives/edgar/data/1610500/000120919120030431/doc4.xml
https://www.sec.gov/Archives/edgar/data/1610500/000120919120011170/doc4.xml
https://www.sec.gov/Archives/edgar/data/1610500/000120919119062152/doc4.xml
https://www.sec.gov/Archives/edgar/data/1610500/000120919119040259/doc4.xml
https://www.sec.gov/Archives/edgar/data/1610500/000120919119033436/doc4.xml
https://www.sec.gov/Archives/edgar/data/1610500/000120919119026874/doc4.xml
https://www.sec.gov/Archives/edgar/data/1610500/000120919119022045/doc4.xml
https://www.sec.gov/Archives/edgar/data/1610500/000120919119016014/doc4.xml
https://w

https://www.sec.gov/Archives/edgar/data/1548760/000112760219029122/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219029121/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219028970/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219028969/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219028772/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219028771/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219028487/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219028214/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219028213/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219028212/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219028083/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219028082/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760219027941/form4.xml

https://www.sec.gov/Archives/edgar/data/1548760/000112760218012706/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218012705/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218012704/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218012401/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218012400/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218012164/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218011629/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218011628/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218011275/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218010622/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218010614/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218010260/form4.xml
https://www.sec.gov/Archives/edgar/data/1548760/000112760218010259/form4.xml

In [35]:
len(url_list)

3766

In [36]:
# soup is the list for the xml data
soup = []

for url in url_list:
    xml_data = requests.get(url).content
    soup.append(BeautifulSoup(xml_data, 'xml'))

In [37]:
len(soup)

3766

In [210]:
shares = []
form = []
dates = []
price = []
shares_after = []
ad_code = []
title = []
city = []
name = []
stock_names = []

# looping through the entire length of soup to extract all form 4 information 
# for non-Derivative transactions for given features such as transaction shares,
# price, stock issuer name, report owner name, date, price per share, 
# Acquired/Disposed Code, and shares owned following transaction

for i in np.arange(0,len(soup)):
    
    if ((soup[i].find('documentType').text == '4') and (soup[i].find('nonDerivativeTable') != None) and 
    (soup[i].find('nonDerivativeSecurity') == None)) :
        for result in (soup[i].find('nonDerivativeTable').find_all('transactionShares')): 
            shares.append(result.text.strip()) 
        for result in (soup[i].find('nonDerivativeTable').find_all('transactionFormType')):
            form.append(result.text.strip())
            stock_names.append(soup[i].find('issuerName').text)
            city.append(soup[i].find('rptOwnerCity').text.strip())
            
            name.append([owner.text.strip() for owner in soup[i].find_all('rptOwnerName') if
                         owner.text.strip().upper() in names_final])
            
            
        for result in (soup[i].find('nonDerivativeTable').find_all('transactionDate')):
            dates.append(result.text.strip())
        for result in (soup[i].find('nonDerivativeTable').find_all('transactionPricePerShare')):
            price.append(result.text.strip())
        for result in (soup[i].find('nonDerivativeTable').find_all('transactionAcquiredDisposedCode')):
            ad_code.append(result.text.strip())
    

        #special case to make sure length is same
        if(len(soup[i].find('nonDerivativeTable').find_all('transactionShares')) <=
         len(soup[i].find('nonDerivativeTable').find_all('sharesOwnedFollowingTransaction'))):
            for j in np.arange((len(soup[i].find('nonDerivativeTable').find_all('transactionShares')))):
                shares_after.append(soup[i].find_all('sharesOwnedFollowingTransaction')[j].text.strip())
                title.append(soup[i].find_all('securityTitle')[j].text.strip())
        else:
            shares_after.append(soup[i].find_all('valueOwnedFollowingTransaction')[j].text.strip())
            title.append(soup[i].find_all('securityTitle')[j].text.strip())
            
            
    

In [211]:
len(shares_after), len(form), len(dates), len(ad_code), len(price), len(title), len(shares), len(stock_names)

(15812, 15812, 15812, 15812, 15812, 15812, 15812, 15812)

In [212]:
len(city), len(name)

(15812, 15812)

**This loop's if statement differs. We are checking if the child node is called 'nonDerivativeSecurity' instead of 'nonDerivativeTable' like above. This deviance occurs on some rare instances for certain CIKs**

In [213]:
for i in np.arange(len(soup)):
    if (soup[i].find('nonDerivativeSecurity') != None):  
        for result in (soup[i].find_all('nonDerivativeSecurity')): 
            shares.append(result.find('transactionShares').text.strip()) 
        for result in (soup[i].find_all('nonDerivativeSecurity')):
            form.append(result.find('transactionFormType').text.strip())
            stock_names.append(soup[i].find('issuerName').text)
        for result in (soup[i].find_all('nonDerivativeSecurity')):
            dates.append(result.find('transactionDate').text.strip())
        for result in (soup[i].find_all('nonDerivativeSecurity')): 
            price.append(result.find('transactionValue').text.strip()) # not transactionPricePerShare anymore!!!
        for result in (soup[i].find_all('nonDerivativeSecurity')):
            ad_code.append(result.find('transactionAcquiredDisposedCode').text.strip())
            
    
        for result in soup[i].find_all('nonDerivativeSecurity'):
            title.append(result.find('securityTitle').text.strip())
            shares_after.append(result.find('sharesOwnedFollowingTransaction').text.strip())
            city.append(soup[i].find('rptOwnerCity').text.strip())
            name.append(soup[i].find('rptOwnerName').text.strip())
            
        
        
        #special case to make sure length is same
        #for j in np.arange((len(soup[i].find_all('nonDerivativeSecurity').find('transactionDate')))):
         ##  title.append(soup[i].find_all('securityTitle')[j].text.strip())
            #price.append(soup[i].find_all('transactionPricePerShare')[j].text.strip())

        # for later
        #for result in soup[i].find_all('rptOwnerCity'):
         #   city.append(result.text.strip())
        #for result in soup[i].find_all('rptOwnerName'):
         #   name.append(result.text.strip())
        
    
    

In [214]:
len(shares_after), len(form), len(dates), len(ad_code), len(price), len(title), len(shares), len(stock_names)

(15812, 15812, 15812, 15812, 15812, 15812, 15812, 15812)

In [215]:
len(city), len(name)

(15812, 15812)

In [503]:
df = pd.DataFrame({
    "Dates" : dates,
    "Form" : form,
    "Name": name,
    "City": city,
    "Shares" : shares,
    "Shares After": shares_after,
    "Price" : price,
    "A/D Code": ad_code,
    "Security Title": title,
    "Stock Name": stock_names
})

In [504]:
df.head()

Unnamed: 0,Dates,Form,Name,City,Shares,Shares After,Price,A/D Code,Security Title,Stock Name
0,2020-06-03,4,[ACKMAN WILLIAM A],NEW YORK,1749072,10448317,58.66,D,"Common stock, par value $0.01 per share",Howard Hughes Corp
1,2020-06-03,4,[ACKMAN WILLIAM A],NEW YORK,469691,10918008,115.0,A,"Common stock, par value $0.01 per share",Howard Hughes Corp
2,2020-03-27,4,[ACKMAN WILLIAM A],NEW YORK,10000000,12197389,50.0,A,"Common stock, par value $0.01 per share",Howard Hughes Corp
3,2020-02-24,4,[ACKMAN WILLIAM A],NEW YORK,17700,1491232,893.42,D,Common Stock,CHIPOTLE MEXICAN GRILL INC
4,2020-02-24,4,[ACKMAN WILLIAM A],NEW YORK,330000,1161232,880.5,D,Common Stock,CHIPOTLE MEXICAN GRILL INC


In [506]:
owner_1 = [result[0] if len(result) >= 1 else np.nan for result in df['Name']]
owner_2 = [result[1] if len(result) > 1 else np.nan for result in df['Name']] # else result or np.nan to get back

In [507]:
from collections import Iterable

# this function flattens list of list into a list
def flatten(lis):
     for item in lis:
        if isinstance(item, Iterable) and not isinstance(item, str):
             for x in flatten(item):
                yield x
        else:        
             yield item

In [508]:
len(list(flatten(owner_1))), len(list(flatten(owner_2)))

(15812, 15812)

In [509]:
df.insert(2, "Owner 1", list(flatten(owner_1)))
df.insert(3, "Owner 2", list(flatten(owner_2)))

In [510]:
df

Unnamed: 0,Dates,Form,Owner 1,Owner 2,Name,City,Shares,Shares After,Price,A/D Code,Security Title,Stock Name
0,2020-06-03,4,ACKMAN WILLIAM A,,[ACKMAN WILLIAM A],NEW YORK,1749072,10448317,58.66,D,"Common stock, par value $0.01 per share",Howard Hughes Corp
1,2020-06-03,4,ACKMAN WILLIAM A,,[ACKMAN WILLIAM A],NEW YORK,469691,10918008,115.00,A,"Common stock, par value $0.01 per share",Howard Hughes Corp
2,2020-03-27,4,ACKMAN WILLIAM A,,[ACKMAN WILLIAM A],NEW YORK,10000000,12197389,50.00,A,"Common stock, par value $0.01 per share",Howard Hughes Corp
3,2020-02-24,4,ACKMAN WILLIAM A,,[ACKMAN WILLIAM A],NEW YORK,17700,1491232,893.42,D,Common Stock,CHIPOTLE MEXICAN GRILL INC
4,2020-02-24,4,ACKMAN WILLIAM A,,[ACKMAN WILLIAM A],NEW YORK,330000,1161232,880.50,D,Common Stock,CHIPOTLE MEXICAN GRILL INC
5,2020-02-06,4,ACKMAN WILLIAM A,,[ACKMAN WILLIAM A],NEW YORK,73178,1651132,859.62,D,Common Stock,CHIPOTLE MEXICAN GRILL INC
6,2020-02-07,4,ACKMAN WILLIAM A,,[ACKMAN WILLIAM A],NEW YORK,142200,1508932,860.61,D,Common Stock,CHIPOTLE MEXICAN GRILL INC
7,2019-12-13,4,ACKMAN WILLIAM A,,[ACKMAN WILLIAM A],NEW YORK,2596,2197389,116.63,A,"Common stock, par value $0.01 per share",Howard Hughes Corp
8,2019-12-02,4,ACKMAN WILLIAM A,,[ACKMAN WILLIAM A],NEW YORK,746476,448317,115.00,D,"Common stock, par value $0.01 per share",Howard Hughes Corp
9,2019-12-02,4,ACKMAN WILLIAM A,,[ACKMAN WILLIAM A],NEW YORK,746476,1194793,115.00,A,"Common stock, par value $0.01 per share",Howard Hughes Corp


## Export to Excel

In [513]:
#df.to_excel("full_cid_2018_nameFix.xlsx",sheet_name='Transactions', index = False)