In [None]:
import re
import typer
import pandas as pd
from typing import Dict
from pathlib import Path
from rich import print
import edgar

# ```edgar2data```: Extracting Information from EDGAR Documents

<br>

<center><img src="figures/library.png" width="30%" style='border:5px solid #000000'/></center>


# Example: Insider Trading Data: SEC Forms 3, 4, and 5

Forms 3, 4, and 5 filings are reports submitted to the SEC by investors who may buy or sell shares in companies where they are deemed insiders. The SEC defines an insider as any officer, director or more than 10% shareholder of a publicly traded company.

* https://www.sec.gov/files/forms-3-4-5.pdf
* https://www.sec.gov/Archives/edgar/data/1326190/000101297517000759/xslF345X03/edgar.xml
* https://whalewisdomalpha.com/form-4-insider-trading-analysis/
* https://www.sec.gov/Archives/edgar/data/1318605/000149473018000006/xslF345X03/edgardoc.xml

These filings are publicly available through the [SEC EDGAR website](https://www.sec.gov/edgar/search/)

For example, here is a [Form 4](https://www.sec.gov/Archives/edgar/data/1326190/000101297517000759/)

# ```edgar2data``` uses XML parsing, RegEx, (and in future NLP) to extract information

<br>
<br>
<br>

<center><img src="figures/unstrctured-data-types.png" width="80%" style='border:5px solid #000000'/></center>

# <span style="color:purple">Working with XML</span>

https://www.xmlviewer.org/

In [None]:
from edgar.forms.secdoc import Document

??Document

In [None]:
from pathlib import Path
from edgar.utils import create_doc

doc = create_doc(Path("../tests/data/form-3/37996_4_0001209191-20-054135.txt"))
print(doc)
# print(doc.doc_info)
# print(doc.report_owners)
# print(doc.nonderivatives)
# print(doc.derivatives)
# print(doc.signatures)
# print(doc.footnotes)

In [None]:
from edgar.cli import process

??process

In [None]:
from pathlib import Path
from edgar.cli import process

in_dir = Path("../tests/data/form-3/sample/2020")
out_dir = Path("./temp")

process(in_dir, out_dir)

# <span style="color:purple">Scaling up to Multiple Documents</span>

<br>
<br>
<br>

<center><img src="figures/information_extraction.png" width="100%" style='border:5px solid #000000'/></center>

In [None]:
!edgar2data --help

In [None]:
!edgar2data process --help

In [None]:
!edgar2data process ../tests/data/form-3/sample/2020 --out_dir ./temp

In [None]:
import pandas as pd

doc_info = pd.read_csv("./temp/document_info.csv")
footnotes = pd.read_csv("./temp/footnotes.csv")
derivatives = pd.read_csv("./temp/derivatives.csv")
nonderivatives = pd.read_csv("./temp/nonderivatives.csv")
report_owners = pd.read_csv("./temp/report_owners.csv")
signatures = pd.read_csv("./temp/signatures.csv")

doc_info.head(20)

In [None]:
!uvicorn edgar.api:app 