In [97]:
import requests
import pandas as pd
from datetime import datetime
import pprint

pp = pprint.PrettyPrinter(indent=4)

In [98]:
# Base URL to retreive company facts from SEC Data API
base_url = "https://data.sec.gov/api/xbrl/companyfacts/"

# Headers to be set to receive appropriate respnse from SEC Data API
headers = {
    'User-Agent' : 'ramkumarpj@gmail.com',
    'Host' : 'data.sec.gov'
}


In [99]:
# CIK - Central Index Key - Unique key that identifies a company in SEC Database

# List of CIKs under analysis

cik_list = ['808362', '1652044', '1637459' ]

# Data elements to be explored for each CIK

data_elements = [ 'Revenues',
                      'SalesRevenueGoodsNet',
                      'SalesRevenueServicesNet',
                      'RevenueFromContractWithCustomerIncludingAssessedTax',
                      'GrossProfit',
                      'OperatingIncomeLoss',
                      'NetIncomeLoss',
                      'ResearchAndDevelopmentExpense',
                      'SellingAndMarketingExpense',
                      'ShareBasedCompensation',
                      'Depreciation',
                      'AllocatedShareBasedCompensationExpense',
                      'CostsAndExpenses',
                      'GeneralAndAdministrativeExpense',
                      'InterestExpense',
                      'LeaseAndRentalExpense',
                      'MarketingAndAdvertisingExpense',
                      'OtherAccruedLiabilitiesCurrent',
                      'EntityCommonStockSharesOutstanding',
                      'EntityPublicFloat']





In [100]:
def isQuaterlyOrAnnualFiling(start, end):

    start_date = datetime.strptime(start, '%Y-%m-%d')
    end_date = datetime.strptime(end, '%Y-%m-%d')
    
    return end_date.month - start_date.month <3 or end_date.month - start_date.month == 11

def isQuaterlyFiling(start, end):

    start_date = datetime.strptime(start, '%Y-%m-%d')
    end_date = datetime.strptime(end, '%Y-%m-%d')
    
    return end_date.month - start_date.month <3 

def isAnnualFiling(start, end):

    start_date = datetime.strptime(start, '%Y-%m-%d')
    end_date = datetime.strptime(end, '%Y-%m-%d')
    
    return end_date.month - start_date.month == 11
    

In [101]:
def extractData(tenQ_tenK_filings_list, items, key):
    
         
    if key in items.keys():
        i = 1
        for key2 in items[key]['units'].keys():

            fin_list = items[key]['units'][key2]
            tenQ_tenK_filings = [i for i in fin_list if i['form'] == '10-Q' or i['form'] == '10-K']

            #print(f"{i}. {key} {key2} 10Qs- {tenQCount}, 10Ks - {tenKCount}")

            i+=1

            tenQ_tenK_filings_list.append({
                'key' : key,
                'units' : key2,
                'filings' : tenQ_tenK_filings
            })
            

In [102]:
def transformData(tenQ_tenK_filings_list, key):
    
    if len(tenQ_tenK_filings_list) > 0 :
        filings = tenQ_tenK_filings_list[0]['filings']
    
        print(f' key={key},  Length of the list {len(filings)}')

    
        tenQ_tenK_filings_list_sorted = {}

        # Build a dictionary with a key using start and end fields 
        for filing in filings:
            #print(filing)
            if 'start' not in filing:
                start = '-'
            else:
                start = filing['start']
            
            index = start + ':' + filing['end']
            
    
            if index in tenQ_tenK_filings_list_sorted :
                tenQ_tenK_filings_list_sorted[index].append(filing)
            else :
                tenQ_tenK_filings_list_sorted[index] = [filing]
                
        # Identify multiple filings for same period
        tenQ_tenK_filings_list_sorted_multiple_filings = [filing for filing in tenQ_tenK_filings_list_sorted.values() if len(filing) > 1]
        
        # Identify single filings for same period
        tenQ_tenK_filings_list_single_filings = [filing[0] for filing in tenQ_tenK_filings_list_sorted.values() if len(filing) < 2]
        
        # Sort multiple filings in descending order of filed date 
        # Append the latest filing to single filings list
        for filing in tenQ_tenK_filings_list_sorted_multiple_filings:
            filing.sort(key = lambda x: datetime.strptime(x['filed'], '%Y-%m-%d'), reverse = True)
            tenQ_tenK_filings_list_single_filings.append(filing[0])
 
        # Sort single filings in ascending order of end date
        tenQ_tenK_filings_list_single_filings.sort(key = lambda x: datetime.strptime(x['end'], '%Y-%m-%d'))
        #pp.pprint(tenQ_tenK_filings_list_single_filings)
        
        print(f' key={key},  Length of the list single filings {len(tenQ_tenK_filings_list_single_filings)}')
        
        # Filter single filings to keep only quatery and annual filings (eliminate 6 months, 9 months filings)
        tenQ_tenK_single_filings__qtr_annnual_filtered = [filing for filing 
                                                          in tenQ_tenK_filings_list_single_filings 
                                                          if 'start' in filing and isQuaterlyOrAnnualFiling(filing['start'], 
                                                                                      filing['end'])]
        
        print(f' key={key},  Length of the list after filtering {len(tenQ_tenK_single_filings__qtr_annnual_filtered)}')
        
        if len(tenQ_tenK_single_filings__qtr_annnual_filtered) == 0:
            tenQ_tenK_single_filings__qtr_annnual_filtered = tenQ_tenK_filings_list_single_filings
        
        print(f' key={key},  Length of the list final {len(tenQ_tenK_single_filings__qtr_annnual_filtered)}')


In [103]:


for cik in cik_list:
    
    # Create the URL to retrieve data for specific CIK
    url = base_url + f'CIK{cik.zfill(10)}.json'

    print(url)
    
    # Fetch the data from SEC Data API
    response = requests.get(url, headers=headers).json()

    print(f"received data for company- {response['entityName']}, cik = {response['cik']}")
    
    # Get DEI Items from response
    dei = response['facts']['dei']

    # Get US-GAAP Items from response
    us_gaap = response['facts']['us-gaap']
    

    for key in data_elements: 
        tenQ_tenK_filings_list = []
        extractData(tenQ_tenK_filings_list, us_gaap, key)
        extractData(tenQ_tenK_filings_list, dei, key)
        transformData(tenQ_tenK_filings_list, key)
    


https://data.sec.gov/api/xbrl/companyfacts/CIK0000808362.json
received data for company- Baker Hughes Holdings LLC, cik = 808362
 key=SalesRevenueGoodsNet,  Length of the list 117
 key=SalesRevenueGoodsNet,  Length of the list single filings 61
 key=SalesRevenueGoodsNet,  Length of the list after filtering 41
 key=SalesRevenueGoodsNet,  Length of the list after filtering 41
 key=SalesRevenueServicesNet,  Length of the list 117
 key=SalesRevenueServicesNet,  Length of the list single filings 61
 key=SalesRevenueServicesNet,  Length of the list after filtering 41
 key=SalesRevenueServicesNet,  Length of the list after filtering 41
 key=RevenueFromContractWithCustomerIncludingAssessedTax,  Length of the list 8
 key=RevenueFromContractWithCustomerIncludingAssessedTax,  Length of the list single filings 8
 key=RevenueFromContractWithCustomerIncludingAssessedTax,  Length of the list after filtering 4
 key=RevenueFromContractWithCustomerIncludingAssessedTax,  Length of the list after filterin

received data for company- Kraft Heinz Co, cik = 1637459
 key=Revenues,  Length of the list 0
 key=Revenues,  Length of the list single filings 0
 key=Revenues,  Length of the list after filtering 0
 key=Revenues,  Length of the list after filtering 0
 key=SalesRevenueGoodsNet,  Length of the list 64
 key=SalesRevenueGoodsNet,  Length of the list single filings 31
 key=SalesRevenueGoodsNet,  Length of the list after filtering 17
 key=SalesRevenueGoodsNet,  Length of the list after filtering 17
 key=RevenueFromContractWithCustomerIncludingAssessedTax,  Length of the list 100
 key=RevenueFromContractWithCustomerIncludingAssessedTax,  Length of the list single filings 47
 key=RevenueFromContractWithCustomerIncludingAssessedTax,  Length of the list after filtering 28
 key=RevenueFromContractWithCustomerIncludingAssessedTax,  Length of the list after filtering 28
 key=GrossProfit,  Length of the list 164
 key=GrossProfit,  Length of the list single filings 67
 key=GrossProfit,  Length of th