In [2]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup

In [3]:
# URL of the SEC's ticker to CIK mapping file
url = 'https://www.sec.gov/files/company_tickers.json'

# Custom headers with a User-Agent
headers = {
    'User-Agent': 'LewisHartley/1.0 (lewisdhartley@icloud.com)'
}

# Function to get CIK for a given ticker
def get_cik_for_ticker(ticker):
    try:
        # Make the request to get the mapping file
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        
        # Parse the JSON data
        data = response.json()
        
        # Loop through the data to find the matching ticker
        for entry in data.values():
            if entry['ticker'].upper() == ticker.upper():
                return entry['cik_str']
                
        
        return None
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

In [80]:
# Example usage
ticker = 'AMZN'
cik = '0000' + str(get_cik_for_ticker(ticker))

if cik:
    print(f"The CIK for ticker symbol {ticker} is {cik}.")
else:
    print(f"No CIK found for ticker symbol {ticker}.")


The CIK for ticker symbol AMZN is 00001018724.


In [79]:
def fetch_filings(cik):
    try:
        # Make the request to get the filings data
        response = requests.get(f'https://data.sec.gov/submissions/CIK{cik}.json', headers=headers)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None
    

In [78]:
def filter_form4_filings(filings, start_date, end_date):
    form4_filings = []
    for i in range(len(filings['form'])):
        if filings['form'][i] == "4":
            filing_date = datetime.strptime(filings['filingDate'][i], '%Y-%m-%d')
            if start_date <= filing_date <= end_date:
                form4_filings.append({
                    "accessionNumber": filings['accessionNumber'][i],
                    "filingDate": filings['filingDate'][i],
                    "primaryDocument": filings['primaryDocument'][i],
                    "primaryDocDescription": filings['primaryDocDescription'][i]
                })
    return form4_filings

In [77]:
# Define the date range
start_date = datetime(2024, 3, 1)
end_date = datetime(2024, 6, 1)

# Fetch the filings data
filings_data = fetch_filings(cik)
# Check if filings data is fetched successfully
if filings_data:
    # Filter the Form 4 filings within the specified date range
    recent_filings = filings_data.get('filings', {}).get('recent', {})
    form4_filings = filter_form4_filings(recent_filings, start_date, end_date)

    # Print the Form 4 filings
    if form4_filings:
        print("Form 4 Filings:")
        for filing in form4_filings:
            print(f"Accession Number: {filing['accessionNumber']}")
            print(f"Filing Date: {filing['filingDate']}")
            print(f"Primary Document: {filing['primaryDocument']}")
            print(f"Primary Document Description: {filing['primaryDocDescription']}")
            filing['form4url'] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{filing['accessionNumber'].replace('-', '')}/{filing['primaryDocument']}"
            print((f"Document URL: {filing['form4url']}"))
            print("---")
    else:
        print("No Form 4 filings found within the specified date range.")
else:
    print("Failed to fetch filings data.")

An error occurred: 404 Client Error: Not Found for url: https://data.sec.gov/submissions/CIK00001018724.json
Failed to fetch filings data.


In [8]:
filings_df = pd.DataFrame(form4_filings)
filings_df

Unnamed: 0,accessionNumber,filingDate,primaryDocument,primaryDocDescription,form4url
0,0000320193-24-000073,2024-05-17,xslF345X05/wk-form4_1715985021.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
1,0000320193-24-000071,2024-05-14,xslF345X05/wk-form4_1715725806.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
2,0000320193-24-000058,2024-04-17,xslF345X05/wk-form4_1713393040.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
3,0000320193-24-000056,2024-04-15,xslF345X05/wk-form4_1713220262.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
4,0000320193-24-000055,2024-04-15,xslF345X05/wk-form4_1713220215.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
5,0000320193-24-000052,2024-04-03,xslF345X05/wk-form4_1712183631.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
6,0000320193-24-000051,2024-04-03,xslF345X05/wk-form4_1712183580.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
7,0000320193-24-000050,2024-04-03,xslF345X05/wk-form4_1712183535.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
8,0000320193-24-000049,2024-04-03,xslF345X05/wk-form4_1712183493.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...
9,0000320193-24-000048,2024-04-03,xslF345X05/wk-form4_1712183434.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000032...


In [72]:
# Function to fetch and parse HTML
def fetch_and_parse_html(document_url):
    response = requests.get(document_url, headers=headers)
    response.raise_for_status()
    return BeautifulSoup(response.content, 'html.parser')

# Function to extract relevant data from the HTML soup
def extract_form4_data(soup):
    datalist = []
    
    non_derivative_table = soup.find_all('table')[13]
    non_derivative_rows = non_derivative_table.find_all('tr')
    reporting_owner = soup.find_all('table')[5]
    if reporting_owner:
        findtr = reporting_owner.find('tr')
        findtd = findtr.find('td')
        repperson = findtd.find('a').get_text(strip=True)

    if non_derivative_rows:
        for row in non_derivative_rows[3:]:
            data = {}
            findtd = row.find_all('td')
            data['Reporting Person'] = repperson 
            data['Transaction Date'] = findtd[1].get_text(strip=True)
            data['Transaction Code'] = findtd[3].get_text(strip=True)
            data['Transaction Amount'] = findtd[5].get_text(strip=True)
            data['Transaction Price'] = findtd[7].get_text(strip=True)
            #print(data)
            datalist.append(data)
            #print(datalist)
    
    #print(datalist)
            

    # Extract relevant fields (example fields, adjust according to your needs)
    
        
    
    #non_derivative_table = soup.find_all('table')[13]
   # if non_derivative_table:
        #tablerows = non_derivative_table.find_all('tr')     
        #non_derivative_transaction = tablerows[3].find_all('td')
        
        #if non_derivative_transaction:
            #data['Transaction Date'] = non_derivative_transaction[1].get_text(strip=True)
            #data['Transaction Code'] = non_derivative_transaction[3].get_text(strip=True)
            #data['Transaction Amount'] = non_derivative_transaction[5].get_text(strip=True)
            #data['Transaction Price'] = non_derivative_transaction[7].get_text(strip=True)
    
    return datalist

In [73]:
# Initialize list to store extracted data
extracted_data = []
# Iterate over Form 4 filings, fetch and parse HTML data
for document_url in filings_df['form4url']:
    try:
        soup = fetch_and_parse_html(document_url)
        filing_data = extract_form4_data(soup)
        #print(filing_data)
        #print(type(filing_data))
        extracted_data = extracted_data + filing_data
        #print(filing_data)
        #filing_data['accessionNumber'] = filing['accessionNumber']
        #filing_data['filingDate'] = filing['filingDate']
        #extracted_data.append(filing_data)
    except Exception as e:
        print(f"Error processing {document_url}: {e}")

# Convert extracted data to DataFrame
extracted_data = pd.DataFrame(extracted_data)
# Display the DataFrame
extracted_data

[{'Reporting Person': 'KONDO CHRIS', 'Transaction Date': '05/15/2024', 'Transaction Code': 'S', 'Transaction Amount': '4,999', 'Transaction Price': '$190.395'}]
[{'Reporting Person': 'Adams Katherine L.', 'Transaction Date': '05/10/2024', 'Transaction Code': 'G', 'Transaction Amount': '1,850', 'Transaction Price': '$0'}]
[{'Reporting Person': 'KONDO CHRIS', 'Transaction Date': '04/15/2024', 'Transaction Code': 'M', 'Transaction Amount': '8,119', 'Transaction Price': '(1)'}]
[{'Reporting Person': 'KONDO CHRIS', 'Transaction Date': '04/15/2024', 'Transaction Code': 'M', 'Transaction Amount': '8,119', 'Transaction Price': '(1)'}, {'Reporting Person': 'KONDO CHRIS', 'Transaction Date': '04/15/2024', 'Transaction Code': 'F', 'Transaction Amount': '3,120', 'Transaction Price': '$172.69'}]
[{'Reporting Person': 'WILLIAMS JEFFREY E', 'Transaction Date': '04/11/2024', 'Transaction Code': 'S', 'Transaction Amount': '59,162', 'Transaction Price': '$172.22(2)'}]
[{'Reporting Person': 'Maestri Luca

Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price
0,KONDO CHRIS,05/15/2024,S,4999.0,$190.395
1,Adams Katherine L.,05/10/2024,G,1850.0,$0
2,KONDO CHRIS,04/15/2024,M,8119.0,(1)
3,KONDO CHRIS,04/15/2024,F,3120.0,$172.69
4,WILLIAMS JEFFREY E,04/11/2024,S,59162.0,$172.22(2)
5,Maestri Luca,04/11/2024,S,12700.0,$173.19(2)
6,Maestri Luca,04/11/2024,S,27600.0,$174.12(3)
7,Maestri Luca,04/11/2024,S,12894.0,$175.02(4)
8,WILLIAMS JEFFREY E,04/01/2024,M,113309.0,(1)
9,WILLIAMS JEFFREY E,04/01/2024,F,54147.0,$170.03
