In [5]:
import requests
import time
from IPython.display import clear_output

columns = [
    'company_ticker',
    'company_name',
    'has_trades',
    'insider_names',
    'insider_titles_unique',
    'insiders',
    'last_filing_date',
    'first_trade_date',
    'last_trade_date',
    'state_of_incorporation',
    'industry_group_name',
    'industry_name',
    'sector_name',
    'subsector_name',
    'previous_shares',
    'remaining_shares',
    'trade_dates',
    'trade_status',
    'trade_types',
    'traded_percentage',
    'traded_share_price',
    'traded_shares',
    'traded_value'
]
all_trades = []
errors = "None"
headers = {'Accept': 'application/json'}

def print_progress(page, page_count, tries, errors):
    clear_output(wait=False)
    print(f"Current Page : {str(page):>{len(str(page_count + 1))}}\u002F{str(page_count):>{len(str(page_count + 1))}}", end="", flush=True)
    print(f"  [ {((page/page_count) * 100): 0.2f}%  ]", flush=True)
    print("{ " + (tries * "O") + ((10 - tries) * ".") + " }" + f"   Errors: {errors}", flush=True)

def scrape(query_limit):
    init_payload = {"insider_titles":[],"trade_types":["p","s"],"filing_date_preset":"custom","trade_date_preset":"custom","group_by":"filing","sort_by":"last_filing_date","page":1,"page_limit":query_limit}
    page_count = requests.post('https://www.benzinga.com/sec/insider-trades/api/insider-trades?', json=init_payload, headers=headers).json()['filings']['total_pages']

    for page in range(1, page_count + 1):
        payload = {"insider_titles":[],"trade_types":["p","s"],"filing_date_preset":"custom","trade_date_preset":"custom","group_by":"filing","sort_by":"last_filing_date","page":page,"page_limit":query_limit}

        tries = 0
        looping = True
        while looping and tries < 10:
            try:
                print_progress(page, page_count, tries, errors)
                req = requests.post('https://www.benzinga.com/sec/insider-trades/api/insider-trades?', json=payload, headers=headers)
                res = req.json()['filings']
                looping = False
            except:
                tries += 1
        
        if tries == 10:
            if errors == "None":
                errors = "" + page
            else:
                errors += ", " + page
            continue

        trade_count = len(res['filings'])

        for t in range(trade_count):
            trade = {}
            for c in range(len(columns)):
                if columns[c] != "insiders":
                    try:
                        trade[columns[c]] = res['filings'][t][columns[c]]
                    except:
                        trade[columns[c]] = None
                else:
                    try:
                        trade['is_director'] = res['filings'][t]['insiders'][0]['is_director']
                        trade['is_officer'] = res['filings'][t]['insiders'][0]['is_officer']
                        trade['is_ten_percent_owner'] = res['filings'][t]['insiders'][0]['is_ten_percent_owner']
                    except:
                        trade['is_director'] = None
                        trade['is_officer'] = None
                        trade['is_ten_percent_owner'] = None
            all_trades.append(trade)

scrape(500)

Current Page : 334/333  [  99.70%  ]
{ .......... }   Errors: []


In [6]:
import pandas as pd

trade_data = pd.DataFrame(all_trades)

In [7]:
pd.DataFrame(trade_data)

Unnamed: 0,company_ticker,company_name,has_trades,insider_names,insider_titles_unique,insiders,last_filing_date,first_trade_date,last_trade_date,state_of_incorporation,...,subsector_name,previous_shares,remaining_shares,trade_dates,trade_status,trade_types,traded_percentage,traded_share_price,traded_shares,traded_value
0,esmt,"EngageSmart, Inc.",True,Robert P. Bennett,CHIEF EXECUTIVE OFFICER,"[{'cik': '0001881869', 'is_director': False, '...",2023-07-14T20:47:13.000Z,2023-07-05T00:00:00.000Z,2023-07-12T00:00:00.000Z,de,...,Business Services,894491.000,832580.000,"[2023-07-12T00:00:00.000Z, 2023-07-05T00:00:00...",SELL,"[m, s]",-6.921366,12.100000,-61911.000,-1.431275e+06
1,crwd,"CrowdStrike Holdings, Inc.",True,George Kurtz,PRESIDENT AND CEO,"[{'cik': '0001778564', 'is_director': False, '...",2023-07-14T20:45:04.000Z,2023-07-13T00:00:00.000Z,2023-07-13T00:00:00.000Z,de,...,Business Services,1119971.000,1119971.000,"[2023-07-13T00:00:00.000Z, 2023-07-13T00:00:00...",SELL-OPTIONS,"[c, s]",0.000000,151.038553,0.000,-2.688486e+07
2,cacc,CREDIT ACCEPTANCE CORP,True,Donald A Foss Irrevocable Trust Dated August,,"[{'cik': '0001946067', 'is_director': False, '...",2023-07-14T20:36:46.000Z,2023-07-12T00:00:00.000Z,2023-07-14T00:00:00.000Z,mi,...,Non-depository Credit Institutions,1436327.000,1420312.000,"[2023-07-14T00:00:00.000Z, 2023-07-14T00:00:00...",SELL,[s],-1.114997,545.226613,-16015.000,-8.731804e+06
3,aur,"Aurora Innovation, Inc.",True,Sterling Anderson,DIRECTOR,"[{'cik': '0001878943', 'is_director': True, 'i...",2023-07-14T20:10:26.000Z,2023-07-13T00:00:00.000Z,2023-07-14T00:00:00.000Z,,...,Business Services,92899.000,92899.000,"[2023-07-14T00:00:00.000Z, 2023-07-14T00:00:00...",SELL-OPTIONS,"[c, s]",0.000000,3.324482,0.000,-1.248965e+06
4,dkng,DraftKings Inc.,True,Stanton Dodge,CHIEF LEGAL OFFICER,"[{'cik': '0001404430', 'is_director': False, '...",2023-07-14T20:01:27.000Z,2023-07-12T00:00:00.000Z,2023-07-12T00:00:00.000Z,nv,...,Amusement And Recreation Services,629023.000,629023.000,"[2023-07-12T00:00:00.000Z, 2023-07-12T00:00:00...",SELL-OPTIONS,"[m, s]",0.000000,16.535000,0.000,-5.735804e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166246,cwst,CASELLA WASTE SYSTEMS INC,True,Edwin D Johnson,,"[{'cik': '0001392991', 'is_director': False, '...",2020-07-16T15:43:41.000Z,2020-07-14T00:00:00.000Z,2020-07-16T00:00:00.000Z,de,...,"Electric, Gas, And Sanitary Services",227947.000,202466.000,"[2020-07-16T00:00:00.000Z, 2020-07-15T00:00:00...",,[s],-11.178476,52.816903,-25481.000,-1.345828e+06
166247,yorw,YORK WATER CO,True,Natalee Colon,,"[{'cik': '0001766609', 'is_director': False, '...",2020-07-16T15:22:43.000Z,2020-07-15T00:00:00.000Z,2020-07-16T00:00:00.000Z,pa,...,"Electric, Gas, And Sanitary Services",655.672,669.524,"[2020-07-16T00:00:00.000Z, 2020-07-15T00:00:00...",,[p],2.112642,45.135565,13.852,6.252178e+02
166248,yorw,YORK WATER CO,True,VERNON BRACEY,,"[{'cik': '0001238869', 'is_director': False, '...",2020-07-16T15:15:17.000Z,2020-07-15T00:00:00.000Z,2020-07-16T00:00:00.000Z,pa,...,"Electric, Gas, And Sanitary Services",823.520,838.338,"[2020-07-16T00:00:00.000Z, 2020-07-15T00:00:00...",,[p],1.799349,45.111752,14.818,6.684659e+02
166249,qcrh,QCR HOLDINGS INC,True,Marie Ziegler,,"[{'cik': '0001434721', 'is_director': True, 'i...",2020-07-16T13:40:10.000Z,2020-07-16T00:00:00.000Z,2020-07-16T00:00:00.000Z,de,...,Depository Institutions,45443.560,45673.560,[2020-07-16T00:00:00.000Z],,[p],0.506122,28.544000,230.000,6.565120e+03


In [9]:
trade_data.to_csv("all_insider_trade_data.csv", encoding='utf-8', index=False)