In [None]:
import requests
import pandas as pd
from datetime import datetime

In [None]:
# Base URL to retreive company facts from SEC Data API
base_url = "https://data.sec.gov/api/xbrl/companyfacts/"

# Headers to be set to receive appropriate respnse from SEC Data API
headers = {
    'User-Agent' : 'ramkumarpj@gmail.com',
    'Host' : 'data.sec.gov'
}


In [None]:
# CIK - Central Index Key - Unique key that identifies a company in SEC Database

# List of CIKs under analysis
cik_list = ['808362', '1652044', '1637459' ]

# Data elements to be explored for each CIK

data_elements = [ 'Revenues',
                      'SalesRevenueGoodsNet',
                      'SalesRevenueServicesNet',
                      'RevenueFromContractWithCustomerIncludingAssessedTax',
                      'GrossProfit',
                      'OperatingIncomeLoss',
                      'NetIncomeLoss',
                      'ResearchAndDevelopmentExpense',
                      'SellingAndMarketingExpense',
                      'ShareBasedCompensation',
                      'Depreciation',
                      'AllocatedShareBasedCompensationExpense',
                      'CostsAndExpenses',
                      'GeneralAndAdministrativeExpense',
                      'InterestExpense',
                      'LeaseAndRentalExpense',
                      'MarketingAndAdvertisingExpense',
                      'OtherAccruedLiabilitiesCurrent',
                      'EntityCommonStockSharesOutstanding',
                      'EntityPublicFloat']





In [None]:
def extractData(tenQ_tenK_filings_list, items, key):
    
    #for key in items.keys():
    
    #    if key in data_elements:
         
         if key in items.keys():
            i = 1
            for key2 in items[key]['units'].keys():
                
                fin_list = items[key]['units'][key2]
                tenQ_tenK_filings = [i for i in fin_list if i['form'] == '10-Q' or i['form'] == '10-K']
                
                #print(f"{i}. {key} {key2} 10Qs- {tenQCount}, 10Ks - {tenKCount}")
                
                i+=1
                
                tenQ_tenK_filings_list.append({
                    'key' : key,
                    'units' : key2,
                    'filings' : tenQ_tenK_filings
                })

In [None]:
def transformData(tenQ_tenK_filings_list, key):
    
    if len(tenQ_tenK_filings_list) > 0 :
        filings = tenQ_tenK_filings_list[0]['filings']
    
        print(f' key={key},  Length of the list {len(filings)}')

    
        tenQ_tenK_filings_list_sorted = []

        # create a key using start and end fields 
        for filing in filings:
            #print(filing)
            if 'start' not in filing:
                start = '-'
            else:
                start = filing['start']
            
            index = start + ':' + filing['end']
            
            indexed_filings = [sorted_filing for sorted_filing in tenQ_tenK_filings_list_sorted if sorted_filing['index'] == index]
            if len(indexed_filings) > 0 :
                indexed_filing = indexed_filings[0]
                indexed_filing['filings'].append(filing)
            else:
                indexed_filing = {
                    'index' : index,
                    'filings' : [filing]
                }
                tenQ_tenK_filings_list_sorted.append(indexed_filing)
        
        tenQ_tenK_filings_list_sorted_multiple_filings = [filing for filing in tenQ_tenK_filings_list_sorted if len(filing['filings']) > 1]
        
        for filing in tenQ_tenK_filings_list_sorted_multiple_filings:
            filing['filings'].sort(key = lambda x: datetime.strptime(x['filed'], '%Y-%m-%d'), reverse = True)
            filing['filings'] = [filing['filings'][0]]
 
        print(tenQ_tenK_filings_list_sorted_multiple_filings)
        

In [None]:


for cik in cik_list:
    
    # Create the URL to retrieve data for specific CIK
    url = base_url + f'CIK{cik.zfill(10)}.json'

    print(url)
    
    # Fetch the data from SEC Data API
    response = requests.get(url, headers=headers).json()

    print(f"received data for company- {response['entityName']}, cik = {response['cik']}")
    
    # Get DEI Items from response
    dei = response['facts']['dei']

    # Get US-GAAP Items from response
    us_gaap = response['facts']['us-gaap']
    

    for key in data_elements: 
        tenQ_tenK_filings_list = []
        extractData(tenQ_tenK_filings_list, us_gaap, key)
        extractData(tenQ_tenK_filings_list, dei, key)
        transformData(tenQ_tenK_filings_list, key)
    
