In [2]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import plotly.graph_objs as go

In [52]:
ticker = 'LMT'

headers = {
    'User-Agent': 'LewisHartley/1.0 (lewisdhartley@icloud.com)'
}

polygon_api_key = "KkfCQ7fsZnx0yK4bhX9fD81QplTh0Pf3" #This is not my API key, I have borrowed it from https://github.com/quantgalore as it seems to be a premium one

cik_mapping_url = 'https://www.sec.gov/files/company_tickers.json'

start_date = datetime(2022, 1, 1)
end_date = datetime(2022, 3, 17)

In [104]:
ticker_data = pd.json_normalize(requests.get(f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/1/day/{start_date.strftime('%Y-%m-%d')}/{end_date.strftime('%Y-%m-%d')}?adjusted=true&sort=asc&limit=50000&apiKey={polygon_api_key}").json()["results"]).set_index("t")
ticker_data.index = pd.to_datetime(ticker_data.index, unit="ms", utc=True).tz_convert("America/New_York")
ticker_data.index = pd.to_datetime(ticker_data.index.date)
ticker_data = ticker_data[["c"]].dropna()
ticker_data.reset_index(inplace=True)
ticker_data = ticker_data.rename(columns={'c' : 'Price', 'index': 'Transaction Date'})

In [54]:
# Function to get CIK for a given ticker

def get_cik_for_ticker(ticker):
    try:
        
        response = requests.get(cik_mapping_url, headers=headers)
        response.raise_for_status()
        data = response.json()
        
        for entry in data.values():
            if entry['ticker'].upper() == ticker.upper():
                return entry['cik_str']
                
        return None
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

In [55]:
#Function to check whether a cik is valid

def is_valid_cik(cik):
    response = requests.get(f'https://data.sec.gov/submissions/CIK{cik}.json', headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        if ticker in data['tickers']:
            return True
        else:
            return False
    else:
        return False

In [105]:
#Function that extracts filings from sec for a given cik

def fetch_filings(cik):
    try:
        # Make the request to get the filings data
        response = requests.get(f'https://data.sec.gov/submissions/CIK{cik}.json', headers=headers)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None  

In [119]:
#Function that filters extracted filings for form 4s

def filter_form4_filings(filings, start_date, end_date):
    form4_urls = []
    for i in range(len(filings['form'])):
        if filings['form'][i] == "4":
            filing_date = datetime.strptime(filings['filingDate'][i], '%Y-%m-%d')
            if start_date <= filing_date <= end_date:
                form4_urls.append(f"https://www.sec.gov/Archives/edgar/data/{cik}/{filings['accessionNumber'][i].replace('-', '')}/{filings['primaryDocument'][i]}",)
    return form4_urls


In [107]:
# Function to fetch a filing from their url and parse HTML

def fetch_and_parse_html(document_url):
    response = requests.get(document_url, headers=headers)
    response.raise_for_status()
    return BeautifulSoup(response.content, 'html.parser')

In [109]:
# Function to extract relevant data from a filing's html

def extract_form4_data(soup):
    datalist = []
    
    ndtable = soup.find_all('table')
    for i in range(len(ndtable)):
        if ndtable[i].find('tr').get_text(strip=True) == 'Table I - Non-Derivative Securities Acquired, Disposed of, or Beneficially Owned':
            non_derivative_table = ndtable[i]
    non_derivative_rows = non_derivative_table.find_all('tr')
    reporting_owner = soup.find_all('table')[5]
    if reporting_owner:
        findtr = reporting_owner.find('tr')
        findtd = findtr.find('td')
        repperson = findtd.find('a').get_text(strip=True)
    
    datetable = soup.find_all('table')[-1]
    if datetable:
        daterow = datetable.find('tr')
        filing_date = daterow.find_all('td')[2].get_text(strip=True)

    if non_derivative_rows:
        for row in non_derivative_rows[3:]:
            data = {}
            findtd = row.find_all('td')
            data['Reporting Person'] = repperson
            data['Filing Date'] = filing_date
            data['Transaction Date'] = findtd[1].get_text(strip=True)
            data['Transaction Code'] = findtd[3].get_text(strip=True)
            data['Transaction Amount'] = findtd[5].get_text(strip=True)
            data['Transaction Price'] = findtd[7].get_text(strip=True)
            print(data)
            datalist.append(data)
    
    return datalist

In [110]:
cik = str(get_cik_for_ticker(ticker))

In [111]:
if is_valid_cik(cik):
    cik=cik
elif is_valid_cik('0' + cik):
    cik = '0'+cik
elif is_valid_cik('00' + cik):
    cik = '00'+cik
elif is_valid_cik('000' + cik):
    cik = '000'+cik
elif is_valid_cik('0000' + cik):
    cik = '0000'+cik
elif is_valid_cik('00000' + cik):
    cik = '00000'+cik
else:
    print("Error fetching CIK")


In [122]:
# Fetch the filings data
filings_data = fetch_filings(cik)
# Check if filings data is fetched successfully
if filings_data:
    # Filter the Form 4 filings within the specified date range
    recent_filings = filings_data.get('filings', {}).get('recent', {})
    try:
        urls = filter_form4_filings(recent_filings, start_date, end_date)
    except:
        print("No Form 4 filings found within the specified date range.")
else:
    print("Failed to fetch filings data.")

In [123]:
extracted_data = []
for document_url in urls:
    try:
        soup = fetch_and_parse_html(document_url)
        filing_data = extract_form4_data(soup)
        extracted_data = extracted_data + filing_data
    except Exception as e:
        print(f"Error processing {document_url}: {e}")

extracted_data = pd.DataFrame(extracted_data)

extracted_data

{'Reporting Person': 'Hill Stephanie C.', 'Filing Date': '03/04/2022', 'Transaction Date': '03/03/2022', 'Transaction Code': 'S', 'Transaction Amount': '170', 'Transaction Price': '$448.0753(1)'}
{'Reporting Person': 'Hill Stephanie C.', 'Filing Date': '03/04/2022', 'Transaction Date': '03/03/2022', 'Transaction Code': 'S', 'Transaction Amount': '1,248', 'Transaction Price': '$450.0373(2)'}
{'Reporting Person': 'Hill Stephanie C.', 'Filing Date': '03/04/2022', 'Transaction Date': '', 'Transaction Code': '', 'Transaction Amount': '', 'Transaction Price': ''}
{'Reporting Person': 'Mollard John W', 'Filing Date': '03/03/2022', 'Transaction Date': '03/01/2022', 'Transaction Code': 'S', 'Transaction Amount': '1,300', 'Transaction Price': '$451.9441(1)'}
{'Reporting Person': 'Mollard John W', 'Filing Date': '03/03/2022', 'Transaction Date': '03/01/2022', 'Transaction Code': 'S', 'Transaction Amount': '3,700', 'Transaction Price': '$451.2056(2)'}
{'Reporting Person': 'Mollard John W', 'Filing

Unnamed: 0,Reporting Person,Filing Date,Transaction Date,Transaction Code,Transaction Amount,Transaction Price
0,Hill Stephanie C.,03/04/2022,03/03/2022,S,170,$448.0753(1)
1,Hill Stephanie C.,03/04/2022,03/03/2022,S,1248,$450.0373(2)
2,Hill Stephanie C.,03/04/2022,,,,
3,Mollard John W,03/03/2022,03/01/2022,S,1300,$451.9441(1)
4,Mollard John W,03/03/2022,03/01/2022,S,3700,$451.2056(2)
5,Mollard John W,03/03/2022,,,,
6,St John Frank A,03/03/2022,03/01/2022,S,230.411,$438.0426(1)
7,St John Frank A,03/03/2022,03/01/2022,S,500,$436.786(2)
8,St John Frank A,03/03/2022,03/01/2022,S,7333,$435.7834(3)
9,St John Frank A,03/03/2022,03/02/2022,I,8.8246,$0(4)


In [124]:
extracted_data.replace('', np.nan, inplace=True)
extracted_data.dropna(inplace = True)
extracted_data.reset_index(drop=True, inplace=True)
extracted_data

Unnamed: 0,Reporting Person,Filing Date,Transaction Date,Transaction Code,Transaction Amount,Transaction Price
0,Hill Stephanie C.,03/04/2022,03/03/2022,S,170,$448.0753(1)
1,Hill Stephanie C.,03/04/2022,03/03/2022,S,1248,$450.0373(2)
2,Mollard John W,03/03/2022,03/01/2022,S,1300,$451.9441(1)
3,Mollard John W,03/03/2022,03/01/2022,S,3700,$451.2056(2)
4,St John Frank A,03/03/2022,03/01/2022,S,230.411,$438.0426(1)
5,St John Frank A,03/03/2022,03/01/2022,S,500,$436.786(2)
6,St John Frank A,03/03/2022,03/01/2022,S,7333,$435.7834(3)
7,St John Frank A,03/03/2022,03/02/2022,I,8.8246,$0(4)
8,Lavan Maryanne,03/02/2022,02/28/2022,S,300.179,$428.6617(1)
9,Lavan Maryanne,03/02/2022,02/28/2022,S,1230,$427.1893(2)


In [126]:
sells = extracted_data[extracted_data['Transaction Code'].str.contains('S', case=False, na=False)]
sells['Transaction Type'] = "Sell"
buys = extracted_data[extracted_data['Transaction Code'].str.contains('P', case=False, na=False)]
buys['Transaction Type'] = "Buy"
transactions = pd.concat([buys, sells], axis=0).reset_index(drop=True)
transactions



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Reporting Person,Filing Date,Transaction Date,Transaction Code,Transaction Amount,Transaction Price,Transaction Type
0,Hill Stephanie C.,03/04/2022,03/03/2022,S,170.0,$448.0753(1),Sell
1,Hill Stephanie C.,03/04/2022,03/03/2022,S,1248.0,$450.0373(2),Sell
2,Mollard John W,03/03/2022,03/01/2022,S,1300.0,$451.9441(1),Sell
3,Mollard John W,03/03/2022,03/01/2022,S,3700.0,$451.2056(2),Sell
4,St John Frank A,03/03/2022,03/01/2022,S,230.411,$438.0426(1),Sell
5,St John Frank A,03/03/2022,03/01/2022,S,500.0,$436.786(2),Sell
6,St John Frank A,03/03/2022,03/01/2022,S,7333.0,$435.7834(3),Sell
7,Lavan Maryanne,03/02/2022,02/28/2022,S,300.179,$428.6617(1),Sell
8,Lavan Maryanne,03/02/2022,02/28/2022,S,1230.0,$427.1893(2),Sell
9,Lavan Maryanne,03/02/2022,02/28/2022,S,4302.0,$425.8347(3),Sell


In [127]:
if transactions.empty == False:
    transactions['Transaction Amount'] = transactions['Transaction Amount'].str.replace(',', '', regex=True)
    transactions['Transaction Amount'] = pd.to_numeric(transactions['Transaction Amount'])
    transactions['Transaction Price'] = transactions['Transaction Price'].str.replace(r'\([^)]*\)', '', regex=True)
    transactions['Transaction Price'] = transactions['Transaction Price'][0][1:]
    transactions['Transaction Price'] = pd.to_numeric(transactions['Transaction Price'])
transactions

Unnamed: 0,Reporting Person,Filing Date,Transaction Date,Transaction Code,Transaction Amount,Transaction Price,Transaction Type
0,Hill Stephanie C.,03/04/2022,03/03/2022,S,170.0,448.0753,Sell
1,Hill Stephanie C.,03/04/2022,03/03/2022,S,1248.0,448.0753,Sell
2,Mollard John W,03/03/2022,03/01/2022,S,1300.0,448.0753,Sell
3,Mollard John W,03/03/2022,03/01/2022,S,3700.0,448.0753,Sell
4,St John Frank A,03/03/2022,03/01/2022,S,230.411,448.0753,Sell
5,St John Frank A,03/03/2022,03/01/2022,S,500.0,448.0753,Sell
6,St John Frank A,03/03/2022,03/01/2022,S,7333.0,448.0753,Sell
7,Lavan Maryanne,03/02/2022,02/28/2022,S,300.179,448.0753,Sell
8,Lavan Maryanne,03/02/2022,02/28/2022,S,1230.0,448.0753,Sell
9,Lavan Maryanne,03/02/2022,02/28/2022,S,4302.0,448.0753,Sell


In [128]:
transactions['Transaction Date'] = pd.to_datetime(transactions['Transaction Date'])
transactions['Filing Date'] = pd.to_datetime(transactions['Filing Date'])
transactions.sort_values(by='Transaction Date', ascending = True, inplace=True)
transactions.reset_index(drop=True, inplace=True)
transactions

Unnamed: 0,Reporting Person,Filing Date,Transaction Date,Transaction Code,Transaction Amount,Transaction Price,Transaction Type
0,Ulmer Gregory M,2022-02-25,2022-02-25,S,1204.0,448.0753,Sell
1,Lavan Maryanne,2022-03-02,2022-02-28,S,300.179,448.0753,Sell
2,Lavan Maryanne,2022-03-02,2022-02-28,S,1230.0,448.0753,Sell
3,Lavan Maryanne,2022-03-02,2022-02-28,S,4302.0,448.0753,Sell
4,Colan Brian P,2022-03-02,2022-02-28,S,1937.331,448.0753,Sell
5,Mollard John W,2022-03-03,2022-03-01,S,1300.0,448.0753,Sell
6,Mollard John W,2022-03-03,2022-03-01,S,3700.0,448.0753,Sell
7,St John Frank A,2022-03-03,2022-03-01,S,230.411,448.0753,Sell
8,St John Frank A,2022-03-03,2022-03-01,S,500.0,448.0753,Sell
9,St John Frank A,2022-03-03,2022-03-01,S,7333.0,448.0753,Sell


In [129]:
transactions.loc[transactions['Transaction Type'] == 'Buy', 'Transaction Value'] = transactions['Transaction Amount'] * transactions['Transaction Price']
transactions.loc[transactions['Transaction Type'] == 'Sell', 'Transaction Value'] = -transactions['Transaction Amount'] * transactions['Transaction Price']
transactions['Transaction Value'] = transactions['Transaction Value'].astype(int)


In [130]:
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

dailydata = pd.DataFrame(date_range, columns=['Transaction Date'])

dailydata = pd.merge(dailydata, transactions[['Transaction Date', 'Transaction Value', 'Transaction Type']], 
                    on='Transaction Date', how='left')

dailydata['Transaction Value'] = dailydata['Transaction Value'].fillna(0).astype(int)
dailydata['Transaction Type'] = dailydata['Transaction Type'].fillna('')

dailydata['Total Buys'] = 0

cumulative_buys = 0
for index, row in dailydata.iterrows():
    if row['Transaction Type'] == 'Buy' and row['Transaction Value'] != 0:
        cumulative_buys += row['Transaction Value']
    dailydata.at[index, 'Total Buys'] = cumulative_buys

dailydata['Total Sells'] = 0

cumulative_sells = 0
for index, row in dailydata.iterrows():
    if row['Transaction Type'] == 'Sell' and row['Transaction Value'] != 0:
        cumulative_sells += row['Transaction Value']
    dailydata.at[index, 'Total Sells'] = cumulative_sells


dailydata['Net Total'] = dailydata['Transaction Value'].cumsum().astype(int)
dailydata

Unnamed: 0,Transaction Date,Transaction Value,Transaction Type,Total Buys,Total Sells,Net Total
0,2022-01-01,0,,0,0,0
1,2022-01-02,0,,0,0,0
2,2022-01-03,0,,0,0,0
3,2022-01-04,0,,0,0,0
4,2022-01-05,0,,0,0,0
...,...,...,...,...,...,...
79,2022-03-13,0,,0,-10509563,-10509563
80,2022-03-14,0,,0,-10509563,-10509563
81,2022-03-15,0,,0,-10509563,-10509563
82,2022-03-16,0,,0,-10509563,-10509563


In [131]:
dailydata = pd.merge(dailydata, ticker_data, on = 'Transaction Date', how = 'left')
dailydata['Price'] = dailydata['Price'].ffill()
dailydata['Price'] = dailydata['Price'].bfill()
dailydata

Unnamed: 0,Transaction Date,Transaction Value,Transaction Type,Total Buys,Total Sells,Net Total,Price
0,2022-01-01,0,,0,0,0,354.36
1,2022-01-02,0,,0,0,0,354.36
2,2022-01-03,0,,0,0,0,354.36
3,2022-01-04,0,,0,0,0,361.99
4,2022-01-05,0,,0,0,0,358.14
...,...,...,...,...,...,...,...
79,2022-03-13,0,,0,-10509563,-10509563,439.04
80,2022-03-14,0,,0,-10509563,-10509563,444.45
81,2022-03-15,0,,0,-10509563,-10509563,448.67
82,2022-03-16,0,,0,-10509563,-10509563,421.34


In [132]:
fig = go.Figure()
tracenet = go.Scatter(x=dailydata['Transaction Date'], y=dailydata['Net Total'].values, mode='lines', name='Net Total', line=dict(color='Blue'), showlegend=True, yaxis='y1')
tracebuys = go.Scatter(x=dailydata['Transaction Date'], y=dailydata['Total Buys'].values, mode='lines', name='Total Buys', line=dict(color='Green'), showlegend=True, yaxis='y1')
tracesells = go.Scatter(x=dailydata['Transaction Date'], y=dailydata['Total Sells'].values, mode='lines', name='Total Sells', line=dict(color='Red'), showlegend=True, yaxis='y1')
tracestock = go.Scatter(x=dailydata['Transaction Date'], y=dailydata['Price'].values, mode='lines', name='Price', line=dict(color='Orange'), showlegend=True, yaxis='y2', )

fig.add_trace(tracenet)
fig.add_trace(tracebuys)
fig.add_trace(tracesells)
fig.add_trace(tracestock)

fig.update_layout(
    title= ticker +' Insider Transactions',
    xaxis_title='Date',
    yaxis_title='Notional Value Traded',
    template='plotly_dark',
    title_x=0.5,
    yaxis=dict(
        title='Value Traded',
        titlefont=dict(color='White'),
        tickfont=dict(color='White'),
    ),
    yaxis2=dict(
        title='Stock Price',
        titlefont=dict(color='White'),
        tickfont=dict(color='White'),
        overlaying='y',
        side='right',
        showgrid=False
    )
)

fig.show()