In [19]:
import requests
import time
from IPython.display import clear_output

columns = [
    'company_ticker',
    'company_name',
    'has_trades',
    'insider_names',
    'insider_titles_unique',
    'insiders',
    'last_filing_date',
    'first_trade_date',
    'last_trade_date',
    'state_of_incorporation',
    'industry_group_name',
    'industry_name',
    'sector_name',
    'subsector_name',
    'previous_shares',
    'remaining_shares',
    'trade_dates',
    'trade_status',
    'trade_types',
    'traded_percentage',
    'traded_share_price',
    'traded_shares',
    'traded_value'
]
all_trades = []
errors = []
headers = {'Accept': 'application/json'}

def print_progress(page, page_count, tries, errors):
    clear_output(wait=False)
    print(f"Current Page : {str(page + 1):>{len(str(page_count + 1))}}\u002F{str(page_count):>{len(str(page_count + 1))}}", end="", flush=True)
    print(f"  [ {((page/page_count + 1) * 100): 0.2f}%  ]", flush=True)
    print("{ " + (tries * "O") + ((10 - tries) * ".") + " }" + f"   Errors: {errors}", flush=True)

def scrape(query_limit):
    init_payload = {"insider_titles":[],"trade_types":["p","s"],"filing_date_preset":"custom","trade_date_preset":"custom","group_by":"filing","sort_by":"last_filing_date","page":1,"page_limit":query_limit}
    page_count = requests.post('https://www.benzinga.com/sec/insider-trades/api/insider-trades?', json=init_payload, headers=headers).json()['filings']['total_pages']

    for page in range(1, page_count + 1):
        payload = {"insider_titles":[],"trade_types":["p","s"],"filing_date_preset":"custom","trade_date_preset":"custom","group_by":"filing","sort_by":"last_filing_date","page":page,"page_limit":query_limit}

        tries = 0
        looping = True
        while looping and tries < 10:
            try:
                print_progress(page, page_count, tries, errors)
                req = requests.post('https://www.benzinga.com/sec/insider-trades/api/insider-trades?', json=payload, headers=headers)
                res = req.json()['filings']
                looping = False
            except:
                tries += 1
        
        if tries == 10:
            errors.append(page)
            continue

        trade_count = len(res['filings'])

        for t in range(trade_count):
            trade = {}
            for c in range(len(columns)):
                try:
                    trade[columns[c]] = res['filings'][t][columns[c]]
                except:
                    trade[columns[c]] = None
            all_trades.append(trade)

scrape(500)

Current Page : 335/334  [  100.00%  ]
{ .......... }   Errors: []


In [20]:
import pandas as pd

trade_data = pd.DataFrame(all_trades)

In [23]:
pd.DataFrame(trade_data)

Unnamed: 0,company_ticker,company_name,has_trades,insider_names,insider_titles_unique,insiders,last_filing_date,first_trade_date,last_trade_date,state_of_incorporation,...,subsector_name,previous_shares,remaining_shares,trade_dates,trade_status,trade_types,traded_percentage,traded_share_price,traded_shares,traded_value
0,acnb,ACNB CORP,True,James Helt,PRESIDENT & CEO,"[{'cik': '0001225042', 'is_director': True, 'i...",2023-06-21T14:15:58.000Z,2023-06-15T00:00:00.000Z,2023-06-15T00:00:00.000Z,pa,...,Depository Institutions,2.665741e+04,2.687317e+04,[2023-06-15T00:00:00.000Z],BUY,[p],0.809380,33.375,215.7597,7200.979987
1,acnb,ACNB CORP,True,Frank Elsner III,DIRECTOR,"[{'cik': '0001271480', 'is_director': True, 'i...",2023-06-21T13:59:07.000Z,2023-06-15T00:00:00.000Z,2023-06-15T00:00:00.000Z,pa,...,Depository Institutions,2.583379e+04,2.599915e+04,"[2023-06-15T00:00:00.000Z, 2023-06-15T00:00:00...",BUY,"[a, p]",0.640101,33.375,165.3624,5518.970100
2,pwsc,"POWERSCHOOL HOLDINGS, INC.",True,HARDEEP GULATI,CHIEF EXECUTIVE OFFICER,"[{'cik': '0001858877', 'is_director': True, 'i...",2023-06-21T13:53:50.000Z,2023-06-20T00:00:00.000Z,2023-06-20T00:00:00.000Z,de,...,Business Services,6.209260e+05,5.906420e+05,[2023-06-20T00:00:00.000Z],SELL,[s],-4.877232,18.820,-30284.0000,-569944.880000
3,pwsc,"POWERSCHOOL HOLDINGS, INC.",True,MARCY DANIEL,CHIEF PRODUCT OFFICER,"[{'cik': '0001858856', 'is_director': False, '...",2023-06-21T13:53:31.000Z,2023-06-20T00:00:00.000Z,2023-06-20T00:00:00.000Z,de,...,Business Services,5.361800e+04,5.146800e+04,[2023-06-20T00:00:00.000Z],SELL,[s],-4.009847,18.820,-2150.0000,-40463.000000
4,pwsc,"POWERSCHOOL HOLDINGS, INC.",True,DEVENDRA SINGH,CHIEF TECHNOLOGY OFFICER,"[{'cik': '0001859065', 'is_director': False, '...",2023-06-21T13:53:17.000Z,2023-06-20T00:00:00.000Z,2023-06-20T00:00:00.000Z,de,...,Business Services,7.418300e+04,7.185700e+04,[2023-06-20T00:00:00.000Z],SELL,[s],-3.135489,18.820,-2326.0000,-43775.320000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166608,gcbc,GREENE COUNTY BANCORP INC,True,Donald E Gibson,,"[{'cik': '0001405640', 'is_director': True, 'i...",2020-06-22T16:55:24.000Z,2020-06-22T00:00:00.000Z,2020-06-22T00:00:00.000Z,de,...,Depository Institutions,1.026700e+04,1.062900e+04,[2020-06-22T00:00:00.000Z],,[p],3.525860,22.050,362.0000,7982.100000
166609,atnx,"Athenex, Inc.",True,NAM YIU JOHNSON LAU,,"[{'cik': '0001181165', 'is_director': True, 'i...",2020-06-22T16:08:45.000Z,2020-06-22T00:00:00.000Z,2020-06-22T00:00:00.000Z,de,...,Chemicals And Allied Products,3.220140e+06,3.222140e+06,[2020-06-22T00:00:00.000Z],,[p],0.062109,12.810,2000.0000,25620.000000
166610,yorw,YORK WATER CO,True,Steven Rasmussen,,"[{'cik': '0001511157', 'is_director': True, 'i...",2020-06-22T16:05:39.000Z,2020-06-22T00:00:00.000Z,2020-06-22T00:00:00.000Z,pa,...,"Electric, Gas, And Sanitary Services",2.290539e+03,2.303686e+03,[2020-06-22T00:00:00.000Z],,[p],0.573970,45.637,13.1470,599.989639
166611,juvf,JUNIATA VALLEY FINANCIAL CORP,True,TIMOTHY HAVICE,,"[{'cik': '0001240861', 'is_director': True, 'i...",2020-06-22T13:12:58.000Z,2020-06-22T00:00:00.000Z,2020-06-22T00:00:00.000Z,pa,...,Depository Institutions,2.235500e+04,2.335500e+04,[2020-06-22T00:00:00.000Z],,[p],4.473272,16.550,1000.0000,16550.000000
