In [399]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import plotly.graph_objs as go


In [539]:
ticker = 'AAPL'

headers = {
    'User-Agent': 'LewisHartley/1.0 (lewisdhartley@icloud.com)'
}

cik_mapping_url = 'https://www.sec.gov/files/company_tickers.json'

start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 3, 1)

In [540]:
# Function to get CIK for a given ticker

def get_cik_for_ticker(ticker):
    try:
        
        response = requests.get(cik_mapping_url, headers=headers)
        response.raise_for_status()
        data = response.json()
        
        for entry in data.values():
            if entry['ticker'].upper() == ticker.upper():
                return entry['cik_str']
                
        return None
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

In [541]:
#Function to check whether a cik is valid

def is_valid_cik(cik):
    response = requests.get(f'https://data.sec.gov/submissions/CIK{cik}.json', headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        if ticker in data['tickers']:
            return True
        else:
            return False
    else:
        return False

In [542]:
#Function that extracts filings from sec for a given cik

def fetch_filings(cik):
    try:
        # Make the request to get the filings data
        response = requests.get(f'https://data.sec.gov/submissions/CIK{cik}.json', headers=headers)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None  

In [543]:
#Function that filters extracted filings for form 4s

def filter_form4_filings(filings, start_date, end_date):
    form4_filings = []
    for i in range(len(filings['form'])):
        if filings['form'][i] == "4":
            filing_date = datetime.strptime(filings['filingDate'][i], '%Y-%m-%d')
            if start_date <= filing_date <= end_date:
                form4_filings.append({
                    "accessionNumber": filings['accessionNumber'][i],
                    "filingDate": filings['filingDate'][i],
                    "primaryDocument": filings['primaryDocument'][i],
                    "primaryDocDescription": filings['primaryDocDescription'][i]
                })
    return form4_filings

In [544]:
# Function to fetch a filing from their url and parse HTML

def fetch_and_parse_html(document_url):
    response = requests.get(document_url, headers=headers)
    response.raise_for_status()
    return BeautifulSoup(response.content, 'html.parser')

In [545]:
# Function to extract relevant data from a filing's html

def extract_form4_data(soup):
    datalist = []
    
    non_derivative_table = soup.find_all('table')[13]
    non_derivative_rows = non_derivative_table.find_all('tr')
    reporting_owner = soup.find_all('table')[5]
    if reporting_owner:
        findtr = reporting_owner.find('tr')
        findtd = findtr.find('td')
        repperson = findtd.find('a').get_text(strip=True)

    if non_derivative_rows:
        for row in non_derivative_rows[3:]:
            data = {}
            findtd = row.find_all('td')
            data['Reporting Person'] = repperson 
            data['Transaction Date'] = findtd[1].get_text(strip=True)
            data['Transaction Code'] = findtd[3].get_text(strip=True)
            data['Transaction Amount'] = findtd[5].get_text(strip=True)
            data['Transaction Price'] = findtd[7].get_text(strip=True)
            datalist.append(data)
    
    return datalist

In [546]:
cik = str(get_cik_for_ticker(ticker))

In [547]:
is_valid_cik(cik)

False

In [548]:
if is_valid_cik(cik):
    cik=cik
elif is_valid_cik('0' + cik):
    cik = '0'+cik
elif is_valid_cik('00' + cik):
    cik = '00'+cik
elif is_valid_cik('000' + cik):
    cik = '000'+cik
elif is_valid_cik('0000' + cik):
    cik = '0000'+cik


In [549]:
print(cik)
is_valid_cik(cik)

0000320193


True

In [550]:


# Fetch the filings data
filings_data = fetch_filings(cik)
# Check if filings data is fetched successfully
if filings_data:
    # Filter the Form 4 filings within the specified date range
    recent_filings = filings_data.get('filings', {}).get('recent', {})
    form4_filings = filter_form4_filings(recent_filings, start_date, end_date)
    if form4_filings:
        for filing in form4_filings:
            filing['form4url'] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{filing['accessionNumber'].replace('-', '')}/{filing['primaryDocument']}"
    else:
        print("No Form 4 filings found within the specified date range.")
else:
    print("Failed to fetch filings data.")

In [551]:
filings_df = pd.DataFrame(form4_filings)
filings_df = filings_df[filings_df['primaryDocDescription'] == 'FORM 4'].reset_index(drop=True)

In [554]:
extracted_data = []
for document_url in filings_df['form4url']:
    try:
        soup = fetch_and_parse_html(document_url)
        filing_data = extract_form4_data(soup)
        extracted_data = extracted_data + filing_data
    except Exception as e:
        print(f"Error processing {document_url}: {e}")

extracted_data = pd.DataFrame(extracted_data)

extracted_data

Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price
0,LEVINSON ARTHUR D,02/29/2024,S,100000.0,$180.94(1)
1,LEVINSON ARTHUR D,,,,
2,WAGNER SUSAN,02/01/2024,M,1852.0,(1)
3,WAGNER SUSAN,,,,
4,WAGNER SUSAN,,,,
5,SUGAR RONALD D,02/01/2024,M,1852.0,(1)
6,LOZANO MONICA C,02/01/2024,M,1852.0,(1)
7,LEVINSON ARTHUR D,02/01/2024,M,1852.0,(1)
8,LEVINSON ARTHUR D,,,,
9,JUNG ANDREA,02/01/2024,M,1852.0,(1)


In [555]:
extracted_data.replace('', np.nan, inplace=True)
extracted_data.dropna(inplace = True)
extracted_data.reset_index(drop=True, inplace=True)
extracted_data

Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price
0,LEVINSON ARTHUR D,02/29/2024,S,100000,$180.94(1)
1,WAGNER SUSAN,02/01/2024,M,1852,(1)
2,SUGAR RONALD D,02/01/2024,M,1852,(1)
3,LOZANO MONICA C,02/01/2024,M,1852,(1)
4,LEVINSON ARTHUR D,02/01/2024,M,1852,(1)
5,JUNG ANDREA,02/01/2024,M,1852,(1)
6,Gorsky Alex,02/01/2024,M,1852,(1)
7,GORE ALBERT JR,02/01/2024,M,1852,(1)
8,BELL JAMES A,02/01/2024,M,1852,(1)


In [556]:
sells = extracted_data[extracted_data['Transaction Code'].str.contains('S', case=False, na=False)]
sells['Transaction Type'] = "Sell"
sells



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price,Transaction Type
0,LEVINSON ARTHUR D,02/29/2024,S,100000,$180.94(1),Sell


In [534]:
buys = extracted_data[extracted_data['Transaction Code'].str.contains('P', case=False, na=False)]
buys['Transaction Type'] = "Buy"
buys

Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price,Transaction Type


In [535]:
transactions = pd.concat([buys, sells], axis=0).reset_index(drop=True)

transactions['Transaction Amount'] = transactions['Transaction Amount'].str.replace(',', '')
transactions['Transaction Amount'] = pd.to_numeric(transactions['Transaction Amount'])
transactions['Transaction Price'] = transactions['Transaction Price'].str.replace('$','', regex=True)
transactions['Transaction Price'] = transactions['Transaction Price'].str.replace(r'\([^)]*\)', '', regex=True)
transactions['Transaction Price'] = pd.to_numeric(transactions['Transaction Price'])
transactions['Transaction Date'] = pd.to_datetime(transactions['Transaction Date'])
transactions.sort_values(by='Transaction Date', ascending = True, inplace=True)
transactions.reset_index(drop=True, inplace=True)
transactions

Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price,Transaction Type
0,LEVINSON ARTHUR D,2024-02-29,S,100000,180.94,Sell
1,COOK TIMOTHY D,2024-04-01,S,99183,170.03,Sell
2,O'BRIEN DEIRDRE,2024-04-02,S,54732,168.91,Sell
3,COOK TIMOTHY D,2024-04-02,S,97062,168.62,Sell
4,COOK TIMOTHY D,2024-04-02,S,165,169.3,Sell
5,Adams Katherine L.,2024-04-02,S,54732,168.9,Sell
6,WILLIAMS JEFFREY E,2024-04-11,S,59162,172.22,Sell
7,Maestri Luca,2024-04-11,S,12700,173.19,Sell
8,Maestri Luca,2024-04-11,S,27600,174.12,Sell
9,Maestri Luca,2024-04-11,S,12894,175.02,Sell


In [536]:
transactions.loc[transactions['Transaction Type'] == 'Buy', 'Transaction Value'] = transactions['Transaction Amount'] * transactions['Transaction Price']
transactions.loc[transactions['Transaction Type'] == 'Sell', 'Transaction Value'] = -transactions['Transaction Amount'] * transactions['Transaction Price']

transactions


Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price,Transaction Type,Transaction Value
0,LEVINSON ARTHUR D,2024-02-29,S,100000,180.94,Sell,-18094000.0
1,COOK TIMOTHY D,2024-04-01,S,99183,170.03,Sell,-16864090.0
2,O'BRIEN DEIRDRE,2024-04-02,S,54732,168.91,Sell,-9244782.0
3,COOK TIMOTHY D,2024-04-02,S,97062,168.62,Sell,-16366590.0
4,COOK TIMOTHY D,2024-04-02,S,165,169.3,Sell,-27934.5
5,Adams Katherine L.,2024-04-02,S,54732,168.9,Sell,-9244235.0
6,WILLIAMS JEFFREY E,2024-04-11,S,59162,172.22,Sell,-10188880.0
7,Maestri Luca,2024-04-11,S,12700,173.19,Sell,-2199513.0
8,Maestri Luca,2024-04-11,S,27600,174.12,Sell,-4805712.0
9,Maestri Luca,2024-04-11,S,12894,175.02,Sell,-2256708.0


In [537]:
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

dailydata = pd.DataFrame(date_range, columns=['Transaction Date'])

dailydata = pd.merge(dailydata, transactions[['Transaction Date', 'Transaction Value', 'Transaction Type']], 
                    on='Transaction Date', how='left')

dailydata['Transaction Value'].fillna(0, inplace=True)
dailydata['Transaction Type'].fillna('', inplace=True)

dailydata['Total Buys'] = 0

cumulative_buys = 0
for index, row in dailydata.iterrows():
    if row['Transaction Type'] == 'Buy' and row['Transaction Value'] != 0:
        cumulative_buys += row['Transaction Value']
    dailydata.at[index, 'Total Buys'] = cumulative_buys

dailydata['Total Sells'] = 0

cumulative_sells = 0
for index, row in dailydata.iterrows():
    if row['Transaction Type'] == 'Sell' and row['Transaction Value'] != 0:
        cumulative_sells += row['Transaction Value']
    dailydata.at[index, 'Total Sells'] = cumulative_sells


dailydata['Net Total'] = dailydata['Transaction Value'].cumsum()

dailydata

Unnamed: 0,Transaction Date,Transaction Value,Transaction Type,Total Buys,Total Sells,Net Total
0,2024-01-01,0.0,,0,0.000000e+00,0.000000e+00
1,2024-01-02,0.0,,0,0.000000e+00,0.000000e+00
2,2024-01-03,0.0,,0,0.000000e+00,0.000000e+00
3,2024-01-04,0.0,,0,0.000000e+00,0.000000e+00
4,2024-01-05,0.0,,0,0.000000e+00,0.000000e+00
...,...,...,...,...,...,...
165,2024-06-08,0.0,,0,-1.046127e+08,-1.046127e+08
166,2024-06-09,0.0,,0,-1.046127e+08,-1.046127e+08
167,2024-06-10,0.0,,0,-1.046127e+08,-1.046127e+08
168,2024-06-11,0.0,,0,-1.046127e+08,-1.046127e+08


In [538]:
fig = go.Figure()
tracenet = go.Scatter(x=dailydata['Transaction Date'], y=dailydata['Net Total'].values, mode='lines', name='Net Total', line=dict(color='Blue'), showlegend=True)
tracebuys = go.Scatter(x=dailydata['Transaction Date'], y=dailydata['Total Buys'].values, mode='lines', name='Total Buys', line=dict(color='Green'), showlegend=True)
tracesells = go.Scatter(x=dailydata['Transaction Date'], y=dailydata['Total Sells'].values, mode='lines', name='Total Sells', line=dict(color='Red'), showlegend=True)

fig.add_trace(tracenet)
fig.add_trace(tracebuys)
fig.add_trace(tracesells)

fig.update_layout(
    title= ticker +' Insider Transactions',
    xaxis_title='Date',
    yaxis_title='Value Traded',
    template='plotly_dark',
    title_x=0.5,
)

fig.show()