In [107]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup

In [108]:
# URL of the SEC's ticker to CIK mapping file
url = 'https://www.sec.gov/files/company_tickers.json'

# Custom headers with a User-Agent
headers = {
    'User-Agent': 'LewisHartley/1.0 (lewisdhartley@icloud.com)'
}

# Function to get CIK for a given ticker
def get_cik_for_ticker(ticker):
    try:
        # Make the request to get the mapping file
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        
        # Parse the JSON data
        data = response.json()
        
        # Loop through the data to find the matching ticker
        for entry in data.values():
            if entry['ticker'].upper() == ticker.upper():
                return entry['cik_str']
                
        
        return None
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

In [109]:
# Example usage
ticker = 'AAPL'
cik = '0000' + str(get_cik_for_ticker(ticker))

if cik:
    print(f"The CIK for ticker symbol {ticker} is {cik}.")
else:
    print(f"No CIK found for ticker symbol {ticker}.")


The CIK for ticker symbol AAPL is 0000320193.


In [110]:
def fetch_filings(cik):
    try:
        # Make the request to get the filings data
        response = requests.get(f'https://data.sec.gov/submissions/CIK{cik}.json', headers=headers)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None
    

In [111]:
def filter_form4_filings(filings, start_date, end_date):
    form4_filings = []
    for i in range(len(filings['form'])):
        if filings['form'][i] == "4":
            filing_date = datetime.strptime(filings['filingDate'][i], '%Y-%m-%d')
            if start_date <= filing_date <= end_date:
                form4_filings.append({
                    "accessionNumber": filings['accessionNumber'][i],
                    "filingDate": filings['filingDate'][i],
                    "primaryDocument": filings['primaryDocument'][i],
                    "primaryDocDescription": filings['primaryDocDescription'][i]
                })
    return form4_filings

In [112]:
# Define the date range
start_date = datetime(2024, 3, 1)
end_date = datetime(2024, 6, 1)

# Fetch the filings data
filings_data = fetch_filings(cik)
# Check if filings data is fetched successfully
if filings_data:
    # Filter the Form 4 filings within the specified date range
    recent_filings = filings_data.get('filings', {}).get('recent', {})
    form4_filings = filter_form4_filings(recent_filings, start_date, end_date)

    # Print the Form 4 filings
    if form4_filings:
        print("Form 4 Filings:")
        for filing in form4_filings:
            print(f"Accession Number: {filing['accessionNumber']}")
            print(f"Filing Date: {filing['filingDate']}")
            print(f"Primary Document: {filing['primaryDocument']}")
            print(f"Primary Document Description: {filing['primaryDocDescription']}")
            filing['form4url'] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{filing['accessionNumber'].replace('-', '')}/{filing['primaryDocument']}"
            print((f"Document URL: {filing['form4url']}"))
            print("---")
    else:
        print("No Form 4 filings found within the specified date range.")
else:
    print("Failed to fetch filings data.")

Form 4 Filings:
Accession Number: 0000320193-24-000073
Filing Date: 2024-05-17
Primary Document: xslF345X05/wk-form4_1715985021.xml
Primary Document Description: FORM 4
Document URL: https://www.sec.gov/Archives/edgar/data/0000320193/000032019324000073/xslF345X05/wk-form4_1715985021.xml
---
Accession Number: 0000320193-24-000071
Filing Date: 2024-05-14
Primary Document: xslF345X05/wk-form4_1715725806.xml
Primary Document Description: FORM 4
Document URL: https://www.sec.gov/Archives/edgar/data/0000320193/000032019324000071/xslF345X05/wk-form4_1715725806.xml
---
Accession Number: 0000320193-24-000058
Filing Date: 2024-04-17
Primary Document: xslF345X05/wk-form4_1713393040.xml
Primary Document Description: FORM 4
Document URL: https://www.sec.gov/Archives/edgar/data/0000320193/000032019324000058/xslF345X05/wk-form4_1713393040.xml
---
Accession Number: 0000320193-24-000056
Filing Date: 2024-04-15
Primary Document: xslF345X05/wk-form4_1713220262.xml
Primary Document Description: FORM 4
Doc

In [113]:
filings_df = pd.DataFrame(form4_filings)
filings_df

Unnamed: 0,accessionNumber,filingDate,primaryDocument,primaryDocDescription,form4url
0,0000320193-24-000073,2024-05-17,xslF345X05/wk-form4_1715985021.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
1,0000320193-24-000071,2024-05-14,xslF345X05/wk-form4_1715725806.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
2,0000320193-24-000058,2024-04-17,xslF345X05/wk-form4_1713393040.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
3,0000320193-24-000056,2024-04-15,xslF345X05/wk-form4_1713220262.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
4,0000320193-24-000055,2024-04-15,xslF345X05/wk-form4_1713220215.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
5,0000320193-24-000052,2024-04-03,xslF345X05/wk-form4_1712183631.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
6,0000320193-24-000051,2024-04-03,xslF345X05/wk-form4_1712183580.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
7,0000320193-24-000050,2024-04-03,xslF345X05/wk-form4_1712183535.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
8,0000320193-24-000049,2024-04-03,xslF345X05/wk-form4_1712183493.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
9,0000320193-24-000048,2024-04-03,xslF345X05/wk-form4_1712183434.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...


In [206]:
# Function to fetch and parse HTML
def fetch_and_parse_html(document_url):
    response = requests.get(document_url, headers=headers)
    response.raise_for_status()
    return BeautifulSoup(response.content, 'html.parser')

# Function to extract relevant data from the HTML soup
def extract_form4_data(soup):
    data = {}
    
    non_derivative_table = soup.find_all('table')[13]
    non_derivative_rows = non_derivative_table.find_all('tr')
    reporting_owner = soup.find_all('table')[5]
    if reporting_owner:
        findtr = reporting_owner.find('tr')
        findtd = findtr.find('td')
        repperson = findtd.find('a').get_text(strip=True)

    if non_derivative_rows:
        for rows in non_derivative_rows[3:]:
            findtd = rows.find_all('td')
            data['Reporting Person'] = repperson 
            data['Transaction Date'] = findtd[1].get_text(strip=True)
            data['Transaction Code'] = findtd[3].get_text(strip=True)
            data['Transaction Amount'] = findtd[5].get_text(strip=True)
            data['Transaction Price'] = findtd[7].get_text(strip=True)
            #print(data)

    # Extract relevant fields (example fields, adjust according to your needs)
    
        
    
    #non_derivative_table = soup.find_all('table')[13]
   # if non_derivative_table:
        #tablerows = non_derivative_table.find_all('tr')     
        #non_derivative_transaction = tablerows[3].find_all('td')
        
        #if non_derivative_transaction:
            #data['Transaction Date'] = non_derivative_transaction[1].get_text(strip=True)
            #data['Transaction Code'] = non_derivative_transaction[3].get_text(strip=True)
            #data['Transaction Amount'] = non_derivative_transaction[5].get_text(strip=True)
            #data['Transaction Price'] = non_derivative_transaction[7].get_text(strip=True)
    
    #return data

In [209]:
# Initialize list to store extracted data
extracted_data = []
# Iterate over Form 4 filings, fetch and parse HTML data
for document_url in filings_df['form4url']:
    try:
        soup = fetch_and_parse_html(document_url)
        filing_data = extract_form4_data(soup)
        #filing_data['accessionNumber'] = filing['accessionNumber']
        #filing_data['filingDate'] = filing['filingDate']
        extracted_data.append(filing_data)
        print(extracted_data)
    except Exception as e:
        print(f"Error processing {document_url}: {e}")

# Convert extracted data to DataFrame
extracted_data = pd.DataFrame(extracted_data)
# Display the DataFrame
extracted_data

{'Reporting Person': 'KONDO CHRIS', 'Transaction Date': '05/15/2024', 'Transaction Code': 'S', 'Transaction Amount': '4,999', 'Transaction Price': '$190.395'}
[None]
{'Reporting Person': 'Adams Katherine L.', 'Transaction Date': '05/10/2024', 'Transaction Code': 'G', 'Transaction Amount': '1,850', 'Transaction Price': '$0'}
[None, None]
{'Reporting Person': 'KONDO CHRIS', 'Transaction Date': '04/15/2024', 'Transaction Code': 'M', 'Transaction Amount': '8,119', 'Transaction Price': '(1)'}
{'Reporting Person': 'KONDO CHRIS', 'Transaction Date': '04/15/2024', 'Transaction Code': 'F', 'Transaction Amount': '3,120', 'Transaction Price': '$172.69'}
[None, None, None]
{'Reporting Person': 'WILLIAMS JEFFREY E', 'Transaction Date': '04/11/2024', 'Transaction Code': 'S', 'Transaction Amount': '59,162', 'Transaction Price': '$172.22(2)'}
[None, None, None, None]
{'Reporting Person': 'Maestri Luca', 'Transaction Date': '04/11/2024', 'Transaction Code': 'S', 'Transaction Amount': '12,700', 'Transac

Unnamed: 0,0
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,


In [125]:
extracted_data = extracted_data.dropna()

In [148]:
print(extracted_data['form4url'][9])

https://www.sec.gov/Archives/edgar/data/0000320193/000032019324000048/xslF345X05/wk-form4_1712183434.xml


In [151]:
soup = fetch_and_parse_html('https://www.sec.gov/Archives/edgar/data/0000320193/000032019324000048/xslF345X05/wk-form4_1712183434.xml')
thingtable = soup.find_all('table')[13]
thingtable

<table border="1" cellpadding="4" cellspacing="0" width="100%">
<thead>
<tr><th align="center" class="FormTextC" colspan="11" valign="top" width="100%"><b>Table I - Non-Derivative Securities Acquired, Disposed of, or Beneficially Owned</b></th></tr>
<tr>
<th align="left" class="MedSmallFormText" rowspan="2" valign="top" width="36%">1. Title of Security (Instr. 
      3)
   </th>
<th align="left" class="SmallFormText" rowspan="2" valign="top" width="6%">2. Transaction Date
      (Month/Day/Year)</th>
<th align="left" class="SmallFormText" rowspan="2" valign="top" width="5%">2A. Deemed Execution Date, if any
      (Month/Day/Year)</th>
<th align="left" class="SmallFormText" colspan="2" valign="top" width="7%">3. Transaction Code (Instr. 
      8)
   </th>
<th align="left" class="SmallFormText" colspan="3" valign="top" width="19%">4. Securities Acquired (A) or Disposed Of (D) (Instr. 
      3, 4 and 5)
   </th>
<th align="left" class="SmallFormText" rowspan="2" valign="top" width="11%">5.

In [198]:
rows = thingtable.find_all('tr')
third = rows[3]
thirdfind = third.find_all('td')
thirdfind

[<td align="left"><span class="FormData">Common Stock</span></td>,
 <td align="center"><span class="FormData">04/01/2024</span></td>,
 <td align="center"></td>,
 <td align="center"><span class="SmallFormData">M</span></td>,
 <td align="center"></td>,
 <td align="center"><span class="FormData">113,309</span></td>,
 <td align="center"><span class="FormData">A</span></td>,
 <td align="center"><span class="FootnoteData"><sup>(1)</sup></span></td>,
 <td align="center">
 <span class="FormData">406,702</span><span class="FootnoteData"><sup>(2)</sup></span>
 </td>,
 <td align="center"><span class="FormData">D</span></td>,
 <td align="left"></td>]

In [190]:
for r in rows[3:]:
    print(r[1].get_text(strip=True))
    print(r[3].get_text(strip=True))
    print(r[5].get_text(strip=True))
    print(r[7].get_text(strip=True))

KeyError: 1

In [145]:
#non_derivative_transaction = rows[3].find_all('td')

#non_derivative_transaction[5].get_text(strip=True)

'100,000'