In [2]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import plotly.graph_objs as go

In [367]:
ticker = 'NVDA'

headers = {
    'User-Agent': 'LewisHartley/1.0 (lewisdhartley@icloud.com)'
}

polygon_api_key = "KkfCQ7fsZnx0yK4bhX9fD81QplTh0Pf3" #This is not my API key, I have borrowed it from https://github.com/quantgalore as it seems to be a premium one

cik_mapping_url = 'https://www.sec.gov/files/company_tickers.json'

start_date = datetime(2024, 1, 1)
#start_date = start_date.strftime('%Y-%m-%d')
end_date = datetime(2024, 6, 17)
#end_date = end_date.strftime('%Y-%m-%d')

In [368]:
ticker_data = pd.json_normalize(requests.get(f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/1/day/{start_date.strftime('%Y-%m-%d')}/{end_date.strftime('%Y-%m-%d')}?adjusted=true&sort=asc&limit=50000&apiKey={polygon_api_key}").json()["results"]).set_index("t")
ticker_data.index = pd.to_datetime(ticker_data.index, unit="ms", utc=True).tz_convert("America/New_York")
ticker_data.index = pd.to_datetime(ticker_data.index.date)
ticker_data = ticker_data[["c"]].dropna()
ticker_data.reset_index(inplace=True)
ticker_data = ticker_data.rename(columns={'c' : 'Price', 'index': 'Transaction Date'})
ticker_data

Unnamed: 0,Transaction Date,Price
0,2024-01-02,48.168
1,2024-01-03,47.569
2,2024-01-04,47.998
3,2024-01-05,49.097
4,2024-01-08,52.253
...,...,...
111,2024-06-11,120.910
112,2024-06-12,125.200
113,2024-06-13,129.610
114,2024-06-14,131.880


In [369]:
# Function to get CIK for a given ticker

def get_cik_for_ticker(ticker):
    try:
        
        response = requests.get(cik_mapping_url, headers=headers)
        response.raise_for_status()
        data = response.json()
        
        for entry in data.values():
            if entry['ticker'].upper() == ticker.upper():
                return entry['cik_str']
                
        return None
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

In [370]:
#Function to check whether a cik is valid

def is_valid_cik(cik):
    response = requests.get(f'https://data.sec.gov/submissions/CIK{cik}.json', headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        if ticker in data['tickers']:
            return True
        else:
            return False
    else:
        return False

In [371]:
#Function that extracts filings from sec for a given cik

def fetch_filings(cik):
    try:
        # Make the request to get the filings data
        response = requests.get(f'https://data.sec.gov/submissions/CIK{cik}.json', headers=headers)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None  

In [372]:
#Function that filters extracted filings for form 4s

def filter_form4_filings(filings, start_date, end_date):
    form4_filings = []
    for i in range(len(filings['form'])):
        if filings['form'][i] == "4":
            filing_date = datetime.strptime(filings['filingDate'][i], '%Y-%m-%d')
            if start_date <= filing_date <= end_date:
                form4_filings.append({
                    "accessionNumber": filings['accessionNumber'][i],
                    "filingDate": filings['filingDate'][i],
                    "primaryDocument": filings['primaryDocument'][i],
                    "primaryDocDescription": filings['primaryDocDescription'][i]
                })
    return form4_filings

In [373]:
# Function to fetch a filing from their url and parse HTML

def fetch_and_parse_html(document_url):
    response = requests.get(document_url, headers=headers)
    response.raise_for_status()
    return BeautifulSoup(response.content, 'html.parser')

In [374]:
# Function to extract relevant data from a filing's html

def extract_form4_data(soup):
    datalist = []
    
    ndtable = soup.find_all('table')
    for i in range(len(ndtable)):
        if ndtable[i].find('tr').get_text(strip=True) == 'Table I - Non-Derivative Securities Acquired, Disposed of, or Beneficially Owned':
            non_derivative_table = ndtable[i]
    non_derivative_rows = non_derivative_table.find_all('tr')
    reporting_owner = soup.find_all('table')[5]
    if reporting_owner:
        findtr = reporting_owner.find('tr')
        findtd = findtr.find('td')
        repperson = findtd.find('a').get_text(strip=True)

    if non_derivative_rows:
        for row in non_derivative_rows[3:]:
            data = {}
            findtd = row.find_all('td')
            data['Reporting Person'] = repperson 
            data['Transaction Date'] = findtd[1].get_text(strip=True)
            data['Transaction Code'] = findtd[3].get_text(strip=True)
            data['Transaction Amount'] = findtd[5].get_text(strip=True)
            data['Transaction Price'] = findtd[7].get_text(strip=True)
            print(data)
            datalist.append(data)
    
    return datalist

In [375]:
cik = str(get_cik_for_ticker(ticker))
print(cik)

1045810


In [376]:
is_valid_cik(cik)

False

In [377]:
if is_valid_cik(cik):
    cik=cik
elif is_valid_cik('0' + cik):
    cik = '0'+cik
elif is_valid_cik('00' + cik):
    cik = '00'+cik
elif is_valid_cik('000' + cik):
    cik = '000'+cik
elif is_valid_cik('0000' + cik):
    cik = '0000'+cik
elif is_valid_cik('00000' + cik):
    cik = '00000'+cik


In [378]:
print(cik)
is_valid_cik(cik)

0001045810


True

In [379]:
# Fetch the filings data
filings_data = fetch_filings(cik)
# Check if filings data is fetched successfully
if filings_data:
    # Filter the Form 4 filings within the specified date range
    recent_filings = filings_data.get('filings', {}).get('recent', {})
    form4_filings = filter_form4_filings(recent_filings, start_date, end_date)
    if form4_filings:
        for filing in form4_filings:
            print(filing)
            filing['form4url'] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{filing['accessionNumber'].replace('-', '')}/{filing['primaryDocument']}"
    else:
        print("No Form 4 filings found within the specified date range.")
else:
    print("Failed to fetch filings data.")

{'accessionNumber': '0001045810-24-000151', 'filingDate': '2024-06-13', 'primaryDocument': 'xslF345X05/wk-form4_1718315527.xml', 'primaryDocDescription': 'FORM 4'}
{'accessionNumber': '0001415889-24-016244', 'filingDate': '2024-06-10', 'primaryDocument': 'xslF345X05/form4-06102024_080653.xml', 'primaryDocDescription': ''}
{'accessionNumber': '0001045810-24-000149', 'filingDate': '2024-06-07', 'primaryDocument': 'xslF345X05/wk-form4_1717794910.xml', 'primaryDocDescription': 'FORM 4'}
{'accessionNumber': '0001045810-24-000148', 'filingDate': '2024-06-07', 'primaryDocument': 'xslF345X05/wk-form4_1717794853.xml', 'primaryDocDescription': 'FORM 4'}
{'accessionNumber': '0001045810-24-000147', 'filingDate': '2024-06-07', 'primaryDocument': 'xslF345X05/wk-form4_1717794757.xml', 'primaryDocDescription': 'FORM 4'}
{'accessionNumber': '0001045810-24-000140', 'filingDate': '2024-06-05', 'primaryDocument': 'xslF345X05/wk-form4_1717621363.xml', 'primaryDocDescription': 'FORM 4'}
{'accessionNumber': 

In [380]:
filings_df = pd.DataFrame(form4_filings)
#filings_df = filings_df[filings_df['primaryDocDescription'] == 'FORM 4'].reset_index(drop=True)
filings_df

Unnamed: 0,accessionNumber,filingDate,primaryDocument,primaryDocDescription,form4url
0,0001045810-24-000151,2024-06-13,xslF345X05/wk-form4_1718315527.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000104...
1,0001415889-24-016244,2024-06-10,xslF345X05/form4-06102024_080653.xml,,https://www.sec.gov/Archives/edgar/data/000104...
2,0001045810-24-000149,2024-06-07,xslF345X05/wk-form4_1717794910.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000104...
3,0001045810-24-000148,2024-06-07,xslF345X05/wk-form4_1717794853.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000104...
4,0001045810-24-000147,2024-06-07,xslF345X05/wk-form4_1717794757.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000104...
5,0001045810-24-000140,2024-06-05,xslF345X05/wk-form4_1717621363.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000104...
6,0001045810-24-000139,2024-06-05,xslF345X05/wk-form4_1717621332.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000104...
7,0001045810-24-000138,2024-06-05,xslF345X05/wk-form4_1717621281.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000104...
8,0001045810-24-000134,2024-05-31,xslF345X05/wk-form4_1717185896.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000104...
9,0001045810-24-000132,2024-05-31,xslF345X05/wk-form4_1717185818.xml,FORM 4,https://www.sec.gov/Archives/edgar/data/000104...


In [381]:
extracted_data = []
for document_url in filings_df['form4url']:
    try:
        soup = fetch_and_parse_html(document_url)
        filing_data = extract_form4_data(soup)
        extracted_data = extracted_data + filing_data
    except Exception as e:
        print(f"Error processing {document_url}: {e}")

extracted_data = pd.DataFrame(extracted_data)

extracted_data

{'Reporting Person': 'STEVENS MARK A', 'Transaction Date': '06/11/2024', 'Transaction Code': 'S', 'Transaction Amount': '150,000', 'Transaction Price': '$120.4219(1)'}
{'Reporting Person': 'STEVENS MARK A', 'Transaction Date': '06/12/2024', 'Transaction Code': 'S', 'Transaction Amount': '320,000', 'Transaction Price': '$125.8773(4)'}
{'Reporting Person': 'STEVENS MARK A', 'Transaction Date': '', 'Transaction Code': '', 'Transaction Amount': '', 'Transaction Price': ''}
{'Reporting Person': 'STEVENS MARK A', 'Transaction Date': '', 'Transaction Code': '', 'Transaction Amount': '', 'Transaction Price': ''}
{'Reporting Person': 'COXE TENCH', 'Transaction Date': '06/07/2024', 'Transaction Code': 'S', 'Transaction Amount': '1,409', 'Transaction Price': '$1,197.3594(1)'}
{'Reporting Person': 'COXE TENCH', 'Transaction Date': '06/07/2024', 'Transaction Code': 'S', 'Transaction Amount': '4,008', 'Transaction Price': '$1,196.4958(4)'}
{'Reporting Person': 'COXE TENCH', 'Transaction Date': '06/0

Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price
0,STEVENS MARK A,06/11/2024,S,150000,$120.4219(1)
1,STEVENS MARK A,06/12/2024,S,320000,$125.8773(4)
2,STEVENS MARK A,,,,
3,STEVENS MARK A,,,,
4,COXE TENCH,06/07/2024,S,1409,"$1,197.3594(1)"
...,...,...,...,...,...
276,STEVENS MARK A,,,,
277,STEVENS MARK A,,,,
278,Shoquist Debora,01/08/2024,S(1),5676,$500
279,Shoquist Debora,,,,


In [382]:
extracted_data.replace('', np.nan, inplace=True)
extracted_data.dropna(inplace = True)
extracted_data.reset_index(drop=True, inplace=True)
extracted_data

Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price
0,STEVENS MARK A,06/11/2024,S,150000,$120.4219(1)
1,STEVENS MARK A,06/12/2024,S,320000,$125.8773(4)
2,COXE TENCH,06/07/2024,S,1409,"$1,197.3594(1)"
3,COXE TENCH,06/07/2024,S,4008,"$1,196.4958(4)"
4,COXE TENCH,06/07/2024,S,20969,"$1,195.543(5)"
...,...,...,...,...,...
206,STEVENS MARK A,01/10/2024,S,5000,$543.5
207,STEVENS MARK A,01/10/2024,S,8848,$545.5
208,STEVENS MARK A,01/11/2024,S,18500,$549.8795(3)
209,Shoquist Debora,01/08/2024,S(1),5676,$500


In [383]:
sells = extracted_data[extracted_data['Transaction Code'].str.contains('S', case=False, na=False)]
sells['Transaction Type'] = "Sell"
sells



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price,Transaction Type
0,STEVENS MARK A,06/11/2024,S,150000,$120.4219(1),Sell
1,STEVENS MARK A,06/12/2024,S,320000,$125.8773(4),Sell
2,COXE TENCH,06/07/2024,S,1409,"$1,197.3594(1)",Sell
3,COXE TENCH,06/07/2024,S,4008,"$1,196.4958(4)",Sell
4,COXE TENCH,06/07/2024,S,20969,"$1,195.543(5)",Sell
...,...,...,...,...,...,...
206,STEVENS MARK A,01/10/2024,S,5000,$543.5,Sell
207,STEVENS MARK A,01/10/2024,S,8848,$545.5,Sell
208,STEVENS MARK A,01/11/2024,S,18500,$549.8795(3),Sell
209,Shoquist Debora,01/08/2024,S(1),5676,$500,Sell


In [384]:
buys = extracted_data[extracted_data['Transaction Code'].str.contains('P', case=False, na=False)]
buys['Transaction Type'] = "Buy"
buys

Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price,Transaction Type


In [406]:
transactions = pd.concat([buys, sells], axis=0).reset_index(drop=True)
if transactions.empty == False:
    transactions['Transaction Amount'] = transactions['Transaction Amount'].str.replace(',', '', regex=True)
    transactions['Transaction Amount'] = pd.to_numeric(transactions['Transaction Amount'])
    transactions['Transaction Price'] = transactions['Transaction Price'].str.replace(r'\([^)]*\)', '', regex=True)
    transactions['Transaction Price'] = transactions['Transaction Price'][0][1:]
    transactions['Transaction Price'] = pd.to_numeric(transactions['Transaction Price'])
transactions

Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price,Transaction Type
0,STEVENS MARK A,06/11/2024,S,150000,120.4219,Sell
1,STEVENS MARK A,06/12/2024,S,320000,120.4219,Sell
2,COXE TENCH,06/07/2024,S,1409,120.4219,Sell
3,COXE TENCH,06/07/2024,S,4008,120.4219,Sell
4,COXE TENCH,06/07/2024,S,20969,120.4219,Sell
...,...,...,...,...,...,...
172,STEVENS MARK A,01/10/2024,S,5000,120.4219,Sell
173,STEVENS MARK A,01/10/2024,S,8848,120.4219,Sell
174,STEVENS MARK A,01/11/2024,S,18500,120.4219,Sell
175,Shoquist Debora,01/08/2024,S(1),5676,120.4219,Sell


In [407]:
transactions['Transaction Date'] = pd.to_datetime(transactions['Transaction Date'])
transactions.sort_values(by='Transaction Date', ascending = True, inplace=True)
transactions.reset_index(drop=True, inplace=True)
transactions

Unnamed: 0,Reporting Person,Transaction Date,Transaction Code,Transaction Amount,Transaction Price,Transaction Type
0,MCCAFFERY MICHAEL G,2023-12-13,S,4250,120.4219,Sell
1,Robertson Donald F Jr,2024-01-05,S(1),450,120.4219,Sell
2,Shoquist Debora,2024-01-08,S(1),5676,120.4219,Sell
3,STEVENS MARK A,2024-01-10,S,5000,120.4219,Sell
4,STEVENS MARK A,2024-01-10,S,18000,120.4219,Sell
...,...,...,...,...,...,...
172,COXE TENCH,2024-06-07,S,20969,120.4219,Sell
173,COXE TENCH,2024-06-07,S,4008,120.4219,Sell
174,COXE TENCH,2024-06-07,S,1409,120.4219,Sell
175,STEVENS MARK A,2024-06-11,S,150000,120.4219,Sell


In [408]:
transactions.loc[transactions['Transaction Type'] == 'Buy', 'Transaction Value'] = transactions['Transaction Amount'] * transactions['Transaction Price']
transactions.loc[transactions['Transaction Type'] == 'Sell', 'Transaction Value'] = -transactions['Transaction Amount'] * transactions['Transaction Price']



In [413]:
transactions['Transaction Value'] = transactions['Transaction Value'].astype(int)

In [415]:
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

dailydata = pd.DataFrame(date_range, columns=['Transaction Date'])

dailydata = pd.merge(dailydata, transactions[['Transaction Date', 'Transaction Value', 'Transaction Type']], 
                    on='Transaction Date', how='left')

dailydata['Transaction Value'] = dailydata['Transaction Value'].fillna(0).astype(int)
dailydata['Transaction Type'] = dailydata['Transaction Type'].fillna('')

dailydata['Total Buys'] = 0

cumulative_buys = 0
for index, row in dailydata.iterrows():
    if row['Transaction Type'] == 'Buy' and row['Transaction Value'] != 0:
        cumulative_buys += row['Transaction Value']
    dailydata.at[index, 'Total Buys'] = cumulative_buys

dailydata['Total Sells'] = 0

cumulative_sells = 0
for index, row in dailydata.iterrows():
    if row['Transaction Type'] == 'Sell' and row['Transaction Value'] != 0:
        cumulative_sells += row['Transaction Value']
    dailydata.at[index, 'Total Sells'] = cumulative_sells


dailydata['Net Total'] = dailydata['Transaction Value'].cumsum().astype(int)
dailydata

Unnamed: 0,Transaction Date,Transaction Value,Transaction Type,Total Buys,Total Sells,Net Total
0,2024-01-01,0,,0,0,0
1,2024-01-02,0,,0,0,0
2,2024-01-03,0,,0,0,0
3,2024-01-04,0,,0,0,0
4,2024-01-05,-54189,Sell,0,-54189,-54189
...,...,...,...,...,...,...
315,2024-06-13,0,,0,-140836091,-140836091
316,2024-06-14,0,,0,-140836091,-140836091
317,2024-06-15,0,,0,-140836091,-140836091
318,2024-06-16,0,,0,-140836091,-140836091


In [416]:
dailydata = pd.merge(dailydata, ticker_data, on = 'Transaction Date', how = 'left')
dailydata['Price'] = dailydata['Price'].ffill()
dailydata['Price'] = dailydata['Price'].bfill()
dailydata

Unnamed: 0,Transaction Date,Transaction Value,Transaction Type,Total Buys,Total Sells,Net Total,Price
0,2024-01-01,0,,0,0,0,48.168
1,2024-01-02,0,,0,0,0,48.168
2,2024-01-03,0,,0,0,0,47.569
3,2024-01-04,0,,0,0,0,47.998
4,2024-01-05,-54189,Sell,0,-54189,-54189,49.097
...,...,...,...,...,...,...,...
315,2024-06-13,0,,0,-140836091,-140836091,129.610
316,2024-06-14,0,,0,-140836091,-140836091,131.880
317,2024-06-15,0,,0,-140836091,-140836091,131.880
318,2024-06-16,0,,0,-140836091,-140836091,131.880


In [417]:
fig = go.Figure()
tracenet = go.Scatter(x=dailydata['Transaction Date'], y=dailydata['Net Total'].values, mode='lines', name='Net Total', line=dict(color='Blue'), showlegend=True, yaxis='y1')
tracebuys = go.Scatter(x=dailydata['Transaction Date'], y=dailydata['Total Buys'].values, mode='lines', name='Total Buys', line=dict(color='Green'), showlegend=True, yaxis='y1')
tracesells = go.Scatter(x=dailydata['Transaction Date'], y=dailydata['Total Sells'].values, mode='lines', name='Total Sells', line=dict(color='Red'), showlegend=True, yaxis='y1')
tracestock = go.Scatter(x=dailydata['Transaction Date'], y=dailydata['Price'].values, mode='lines', name='Price', line=dict(color='Orange'), showlegend=True, yaxis='y2', )

fig.add_trace(tracenet)
fig.add_trace(tracebuys)
fig.add_trace(tracesells)
fig.add_trace(tracestock)

fig.update_layout(
    title= ticker +' Insider Transactions',
    xaxis_title='Date',
    yaxis_title='Notional Value Traded',
    template='plotly_dark',
    title_x=0.5,
    yaxis=dict(
        title='Value Traded',
        titlefont=dict(color='White'),
        tickfont=dict(color='White'),
    ),
    yaxis2=dict(
        title='Stock Price',
        titlefont=dict(color='White'),
        tickfont=dict(color='White'),
        overlaying='y',
        side='right',
        showgrid=False
    )
)

fig.show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

