# Extract Financial Data From XBRL Instance Document

I have been manually noting the financial data from some companies' annual reports into Google Sheets, then downloading it as CSV file for data analysis.

I will try to automate the getting of financial data by extracting the financial data from XBRL instance documents.

# XBRL Instance Documents From SEC Website

The XBRL instance documents are data files of companies' annual reports. They can be downloaded from the [SEC](https://www.sec.gov/edgar/searchedgar/companysearch.html) website.

# Create Functions For Extracting Data From XBRL Instance Document

First, I will create the functions that I needed to do the extracting of data from XBRL instance document. It will extract the following data that I need for analysis:
* Company name
* Filing type
* Period end date
* Reporting currency
* Cash and cash equivalents
* Short-term investments
* Current portion of debt/notes
* Revenue
* Cost of revenue
* Revenue from one fiscal year ago
* Operating cash flow
* Capital expenditure

In [1]:
import scipy.stats as stats
import xml.etree.ElementTree as ET

def get_context_id_and_currency(root, ns, tag_name, fy_ended):
    context_id_length = 0
    context_id_list = []
    end_date_list = []
    unit_id_list = []
    
    tags = root.findall(tag_name, ns)
    
    # get shortest context id length
    for tag in tags:
        context_id = tag.attrib['contextRef']
        
        if context_id_length == 0 \
        or len(context_id) <= context_id_length:
            context_id_length = len(context_id)
            
    for tag in tags:
        context_id = tag.attrib['contextRef']
        
        # only process for the shortest context id as that should be overall id
        if len(context_id) == context_id_length:
            if len(root.findall("xbrli:context[@id='{}']//xbrli:instant" \
                                .format(context_id), ns)) > 0:
                end_date = root.find("xbrli:context[@id='{}']//xbrli:instant" \
                                     .format(context_id), ns)
            else:
                end_date = root.find("xbrli:context[@id='{}']//xbrli:endDate" \
                                     .format(context_id), ns)
                
            end_date_list.append(end_date.text)

            if end_date.text == fy_ended:
                context_id_list.append(context_id)
                unit_id_list.append(tag.attrib['unitRef'])
        
    context_id_current_fy = stats.mode(context_id_list)[0][0]
    
    # get currency
    unit_id = stats.mode(unit_id_list)[0][0]
    measure = root.find("xbrli:unit[@id='{}']//xbrli:measure".format(unit_id), ns)
    currency = measure.text.split(':')[1]
    
    # get context id for previous FY
    previous_fy_ended = sorted(set(end_date_list))[-2]
    
    context_id_list = []
    
    for tag in tags:
        context_id = tag.attrib['contextRef']
        
        # only process for the shortest context id as that should be overall id
        if len(context_id) == context_id_length:
            if(len(root.findall("xbrli:context[@id='{}']//xbrli:instant" \
                                .format(context_id), ns)) > 0):
                end_date = root.find("xbrli:context[@id='{}']//xbrli:instant" \
                                     .format(context_id), ns)
            else:
                end_date = root.find("xbrli:context[@id='{}']//xbrli:endDate" \
                                     .format(context_id), ns)
    
            if end_date.text == previous_fy_ended:
                context_id_list.append(context_id)
                
    context_id_previous_fy = stats.mode(context_id_list)[0][0]
    
    return (context_id_current_fy, context_id_previous_fy, currency)



def get_value(root, ns, tag_name, context_id):
    text_list = []
    
    elements = root.findall("{tag_name}[@contextRef='{context_id}']" \
                                .format(tag_name=tag_name
                                        , context_id=context_id)
                           , ns)
    
    for e in elements:
        text_list.append(e.text)
        
    # get value
    value = stats.mode(text_list)[0][0]
    
    return value    



import datetime as dt
import pandas as pd
import re

def extract_data_from_XBRL(file_path):
    company = {}
    tree = ET.parse(file_path)
    root = tree.getroot()
    
    # get namespaces
    namespaces = []
    for key, value in ET.iterparse(file_path, ['start-ns']):
        namespaces.append(value)

    ns = dict(namespaces)
    
    # if xbrl instance namespace has no prefix, then set a prefix
    for k in ns:
        if re.search(r'^http://www\.xbrl\.org/\d+/instance$', ns[k]) \
        and len(k) == 0:
            ns['xbrli'] = ns[k]
            break;

    # get central index key
    central_index_key = root.find('dei:EntityCentralIndexKey', ns)
    
    if central_index_key is None:
        raise Exception('Central index key not found.')
    
    # get company name
    company_name = root.find('dei:EntityRegistrantName', ns)
    
    if company_name is None:
        raise Exception('Entity registrant name not found.')
        
    company['company_name'] = company_name.text.upper()
    
    # get document type
    document_type = root.find('dei:DocumentType', ns)
    
    if document_type is None:
        raise Exception('Document type not found.')
        
    company['filings'] = document_type.text
    
    # get period end date
    document_period_end_date = root.find('dei:DocumentPeriodEndDate', ns)
    
    if document_period_end_date is None:
        raise Exception('Document period end date not found.')
        
    company['fiscal_year_ended'] = dt.datetime.strptime(document_period_end_date.text
                                                        , '%Y-%m-%d')
    
    # get namespace of financial data because not all are 'us-gaap'
    ns_fd = ''
    current_assets_tag_name = ''
    
    for child in root:
        namespace = child.tag.split('}')[0][1:]
        tag_name = child.tag.split('}')[1]
        
        if tag_name == 'AssetsCurrent' or tag_name == 'CurrentAssets':
            for k in ns:
                if ns[k] == namespace:
                    ns_fd = k
                    current_assets_tag_name = tag_name

    # get context id of balance sheet and balance sheet currency
    temp = get_context_id_and_currency(root, ns
                                       , ns_fd + ':' + current_assets_tag_name
                                       , document_period_end_date.text)
    bs_id_for_fy = temp[0]
    currency = temp[2]
    company['reporting_currency'] = currency
    
    # get current assets and current liabilities
    elements = root.findall("*[@contextRef='{context_id}']" \
                            .format(context_id = bs_id_for_fy)
                            , ns)
    
    reached_cash = False
    reached_total_current_assets = False
    reached_total_current_liabilities = False
    current_liabilities_started = False
    current_asset_list = ET.Element('CurrentAssetList')
    current_liability_list = ET.Element('CurrentLiabilityList')
    expected_bs_seqence = ['Cash', 'Total current assets', 'Total assets'
                           , 'Total current liabilities']
    actual_bs_sequence = []
    
    for e in elements:
        tag_name = e.tag.split('}')[1]
        
        if tag_name == current_assets_tag_name:
            reached_total_current_assets = True
            actual_bs_sequence.append('Total current assets')
        elif tag_name.startswith('Cash') and \
        not reached_total_current_assets:
            reached_cash = True
            actual_bs_sequence.append('Cash')
        elif tag_name == 'Assets':
            current_liabilities_started = True
            actual_bs_sequence.append('Total assets')
            continue
        elif tag_name == 'LiabilitiesCurrent':
            reached_total_current_liabilities = True
            actual_bs_sequence.append('Total current liabilities')
            
        if reached_cash and not reached_total_current_assets:
            current_asset_list.append(e)
            
        if current_liabilities_started \
        and not reached_total_current_liabilities:
            current_liability_list.append(e)
            
        if reached_total_current_liabilities:
            break;
    
    if actual_bs_sequence != expected_bs_seqence:
        raise Exception('The balance sheet fields in file are not in the expected sequence.')
    
    # get cash and cash equivalents value
    # tag name may be 'CashAndCashEquivalentsAtCarryingValue'
    # or 'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents'
    cash_and_equivalents = []
    tag_names = [ns_fd + ':CashAndCashEquivalentsAtCarryingValue'
                 , ns_fd + ':CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents']
    
    if len(current_asset_list) > 0:
        for tag_name in tag_names:
            result = current_asset_list.find(tag_name, ns)

            if not(result is None):
                cash_and_equivalents.append(float(result.text))
                break
    else:
        for tag_name in tag_names:
            if len(root.findall("{}[@contextRef='{}']".format(tag_name
                                                              , bs_id_for_fy)
                                , ns)) > 0:
                value = get_value(root, ns, tag_name, bs_id_for_fy)
                cash_and_equivalents.append(float(value))
                break
        
    if len(cash_and_equivalents) == 0:
        raise Exception('Cash and cash equivalents not found.')
    
    company['cash_and_equivalents'] = sum(cash_and_equivalents)
    
    # get short-term investments value, some company do not have short-term investment
    # tag name may be 'MarketableSecuritiesCurrent' 
    # or 'AvailableForSaleSecuritiesDebtSecuritiesCurrent' 
    # or 'AvailableForSaleSecuritiesCurrent'
    # or 'ShortTermInvestments' or 'HeldToMaturitySecuritiesCurrent'
    short_term_investments = []
    tag_names = [ns_fd + ':MarketableSecuritiesCurrent'
                 , ns_fd + ':AvailableForSaleSecuritiesDebtSecuritiesCurrent'
                 , ns_fd + ':AvailableForSaleSecuritiesCurrent'
                 , ns_fd + ':ShortTermInvestments'
                 , ns_fd + ':HeldToMaturitySecuritiesCurrent']

    if len(current_asset_list) > 0:
        for tag_name in tag_names:
            result = current_asset_list.find(tag_name, ns)

            if not(result is None):
                short_term_investments.append(float(result.text))
    else:
        for tag_name in tag_names:
            if len(root.findall("{}[@contextRef='{}']".format(tag_name
                                                              , bs_id_for_fy)
                                , ns)) > 0:
                value = get_value(root, ns, tag_name, bs_id_for_fy)
                short_term_investments.append(float(value))
                
    company['short_term_investments'] = sum(short_term_investments)
    
    # get current debt value, some company do not have current debt
    # tag name may be 'NotesPayableCurrent' or 'ConvertibleDebtCurrent'
    # or 'ConvertibleNotesPayableCurrent' or 'LongTermDebtCurrent'
    # or 'LongTermDebtAndCapitalLeaseObligationsCurrent'
    # or 'LoansPayableToBankCurrent'
    current_debt_items = []
    tag_names = [ns_fd + ':NotesPayableCurrent'
                 , ns_fd + ':ConvertibleDebtCurrent'
                 , ns_fd + ':ConvertibleNotesPayableCurrent'
                 , ns_fd + ':LongTermDebtCurrent'
                 , ns_fd + ':LongTermDebtAndCapitalLeaseObligationsCurrent'
                 , ns_fd + ':LoansPayableToBankCurrent'
                 , ns_fd + ':DebtCurrent'
                 , ns_fd + ':LinesOfCreditCurrent']
    
    if len(current_liability_list) > 0:
        for tag_name in tag_names:
            result = current_liability_list.find(tag_name, ns)

            if not(result is None):
                current_debt_items.append(float(result.text))
    else:
        for tag_name in tag_names:
            if len(root.findall("{}[@contextRef='{}']".format(tag_name
                                                              , bs_id_for_fy)
                                , ns)) > 0:
                value = get_value(root, ns, tag_name, bs_id_for_fy)
                current_debt_items.append(float(value))

    company['current_debt'] = sum(current_debt_items)
    
    
    # get context id of cash flow and income statement for current and previous FY
    temp = get_context_id_and_currency(root, ns
                                       , ns_fd + ':NetCashProvidedByUsedInOperatingActivities'
                                       , document_period_end_date.text)

    cf_in_id_curr_fy = temp[0]
    cf_in_id_prev_fy = temp[1]
    currency = temp[2]
    
    # if currency from income statement is different from balance sheet
    if currency != company['reporting_currency']:
        raise Exception('Currency ' + currency +' from income statement is ' 
                        + 'different from currency ' + company['reporting_currency'] 
                        + ' from balance sheet.')

    # get cash flows from investing activities and income statement
    elements = root.findall("*[@contextRef='{context_id}']" \
                            .format(context_id = cf_in_id_curr_fy)
                            , ns)
    
    investing_cash_flow_started = False
    end_of_investing_cash_flow = False
    reached_revenue = False
    reached_operating_incomeloss = False
    expected_is_seqence = ['Total revenues', 'Operating income/loss']
    actual_is_sequence = []
    expected_cfs_seqence = ['Net cash provided by operating activities'
                            , 'Net cash used in investing activities']
    actual_cfs_sequence = []
    
    cf_investing_activity_list = ET.Element('CashFlowInvestingActivities')
    income_statement_items = ET.Element('IncomeStatementItems')
    
    for e in elements:
        tag_name = e.tag.split('}')[1]
        
        if tag_name == 'NetCashProvidedByUsedInOperatingActivities':
            investing_cash_flow_started = True
            actual_cfs_sequence.append('Net cash provided by operating activities')
            continue
        elif tag_name == 'NetCashProvidedByUsedInInvestingActivities':
            end_of_investing_cash_flow = True
            actual_cfs_sequence.append('Net cash used in investing activities')
        elif (tag_name.startswith('Revenues') \
              or tag_name.startswith('RevenueFromContractWithCustomer')) \
        and not reached_operating_incomeloss:
            reached_revenue = True
            actual_is_sequence.append('Total revenues')
        elif tag_name == 'OperatingIncomeLoss':
            reached_operating_incomeloss = True
            actual_is_sequence.append('Operating income/loss')
            
        if investing_cash_flow_started \
        and not end_of_investing_cash_flow:
            cf_investing_activity_list.append(e)
        
        if reached_revenue and not reached_operating_incomeloss:
            income_statement_items.append(e)
        
        if end_of_investing_cash_flow:
            break;
    
    if actual_is_sequence != expected_is_seqence:
        raise Exception('The income statement fields in file are not in the expected sequence')
    
    if actual_cfs_sequence != expected_cfs_seqence:
        raise Exception('The cash flow statement fields in file are not in the expected sequence.')
    
    # get revenue value
    # tag name may be 'RevenueFromContractWithCustomerExcludingAssessedTax'
    # or 'RevenueFromContractWithCustomerIncludingAssessedTax'
    # or 'Revenues'
    revenue_items = []
    tag_names = [ns_fd + ':RevenueFromContractWithCustomerExcludingAssessedTax'
                 , ns_fd + ':RevenueFromContractWithCustomerIncludingAssessedTax'
                 , ns_fd + ':Revenues']
    rev_tag_name = ''
    
    if len(income_statement_items) > 0:
        for tag_name in tag_names:
            result = income_statement_items.find(tag_name, ns)

            if not(result is None):
                rev_tag_name = tag_name
                revenue_items.append(float(result.text))
                break
    else:
        for tag_name in tag_names:
            if len(root.findall("{}[@contextRef='{}']".format(tag_name
                                                              , cf_in_id_curr_fy)
                                , ns)) > 0:
                value = get_value(root, ns, tag_name, cf_in_id_curr_fy)
                revenue_items.append(float(value))
                rev_tag_name = tag_name
                break
                
    if len(revenue_items) == 0:
        raise Exception('Revenue not found.')
        
    company['revenue'] = sum(revenue_items)
    
    
    # get cost of revenue value
    # tag name may be 'CostOfRevenue' or 'CostOfGoodsAndServicesSold'
    cost_of_revenue_items = []
    tag_names = [ns_fd + ':CostOfRevenue'
                 , ns_fd + ':CostOfGoodsAndServicesSold']
    
    if len(income_statement_items) > 0:
        for tag_name in tag_names:
            result = income_statement_items.find(tag_name, ns)

            if not(result is None):
                cost_of_revenue_items.append(float(result.text))
                break
    else:
        for tag_name in tag_names:
            if len(root.findall("{}[@contextRef='{}']".format(tag_name
                                                              , cf_in_id_curr_fy)
                                , ns)) > 0:
                value = get_value(root, ns, tag_name, cf_in_id_curr_fy)
                cost_of_revenue_items.append(float(value))
                break
                
    if len(cost_of_revenue_items) == 0:
        raise Exception('Cost of revenue not found.')
    
    company['cost_of_revenue'] = sum(cost_of_revenue_items)
    
    # get revenue value of previous FY
    value = get_value(root, ns, rev_tag_name, cf_in_id_prev_fy)
    
    company['revenue_1_fy_ago'] = float(value)
    
    # get operating cash flow value
    value = get_value(root, ns
                      , ns_fd + ':NetCashProvidedByUsedInOperatingActivities'
                      , cf_in_id_curr_fy)
    
    company['operating_cash_flow'] = float(value)
    
    # get capital expenditure value
    # tag name may be 'PaymentsForCapitalImprovements' 
    # or 'PaymentsToAcquirePropertyPlantAndEquipment'
    # or 'PaymentsToDevelopSoftware' or 'PaymentsToAcquireProductiveAssets'
    # or 'PaymentsForSoftware' or 'PaymentsToAcquireIntangibleAssets'
    # or 'PaymentsToAcquireSoftware' or 'PaymentsToAcquireEquipmentOnLease'
    tag_names = [ns_fd + ':PaymentsForCapitalImprovements'
                 , ns_fd + ':PaymentsToAcquirePropertyPlantAndEquipment'
                 , ns_fd + ':PaymentsToDevelopSoftware'
                 , ns_fd + ':PaymentsToAcquireProductiveAssets'
                 , ns_fd + ':PaymentsForSoftware'
                 , ns_fd + ':PaymentsToAcquireIntangibleAssets'
                 , ns_fd + ':PaymentsToAcquireSoftware'
                 , ns_fd + ':PaymentsToAcquireEquipmentOnLease']
    cap_ex_items = []
    
    if len(cf_investing_activity_list) > 0:
        for tag_name in tag_names:
            result = cf_investing_activity_list.find(tag_name, ns)

            if not(result is None):
                cap_ex_items.append(float(result.text))
    else:
        for tag_name in tag_names:
            if len(root.findall("{}[@contextRef='{}']".format(tag_name
                                                              , cf_in_id_curr_fy)
                                , ns)) > 0:
                value = get_value(root, ns, tag_name, cf_in_id_curr_fy)
                cap_ex_items.append(float(value))
                
    if len(cap_ex_items) == 0:
        raise Exception('Capital expenditure not found.')
    
    company['capital_expenditure'] = sum(cap_ex_items)
    
    return pd.DataFrame(company, index=[central_index_key.text])


# Process XBRL Instance Documents

Now that I have created the functions, I will process each XBRL instance document by passing their file paths as input to the function.

In [2]:
import glob
import pathlib as pl
import os
import configparser

# get configuration from config file
config = configparser.ConfigParser()
config.read('config.ini')
#config.read(os.path.dirname(__file__) + '/config.ini') # use this line when running from cron job
uid = config['Sql']['uid']
pwd = config['Sql']['pwd']
host = config['Sql']['host']
port = config['Sql']['port']
dir_data_file = config['Path']['dir_data_file']

company_df_list = []
error_count = 0
dir_completed = dir_data_file + '/completed'
dir_error = dir_data_file + '/error'

# create directory if not exist
pl.Path(dir_completed).mkdir(exist_ok=True)
pl.Path(dir_error).mkdir(exist_ok=True)

# process each file
file_paths = glob.glob(dir_data_file + '/*.xml')

for file_path in sorted(file_paths):
    file_name = file_path.split('/')[-1]
    print('Processing "{}" ... '.format(file_name), end='')
    
    try:
        df = extract_data_from_XBRL(file_path)
        company_df_list.append(df)
        os.replace(dir_data_file + '/' + file_name, dir_completed + '/' + file_name)
        
        print('completed.')
    except Exception as e:
        print('ERROR OCCURRED:',str(e))
        error_count += 1
        os.replace(dir_data_file + '/' + file_name, dir_error + '/' + file_name)
        

print()
print('Out of the {} documents, {} encountered error.'.format(len(file_paths)
                                                              , error_count))

# combined the list of dataframes into a single dataframe
if len(company_df_list) > 0:
    companies_financials = pd.concat(company_df_list).sort_values('company_name')

Processing "adsk-20210131_htm.xml" ... completed.
Processing "ayx-20201231_htm.xml" ... completed.
Processing "bl-20201231_htm.xml" ... completed.
Processing "brhc10022673_20f_htm.xml" ... completed.
Processing "bynd-20201231_htm.xml" ... completed.
Processing "cdna-20201231_htm.xml" ... ERROR OCCURRED: Cost of revenue not found.
Processing "cloud-20201231_htm.xml" ... completed.
Processing "coup-20210131_htm.xml" ... completed.
Processing "crm-20210131_htm.xml" ... completed.
Processing "crwd-20210131_htm.xml" ... completed.
Processing "d105808d10k_htm.xml" ... completed.
Processing "ddog-20201231.xml" ... completed.
Processing "docu-20210131_htm.xml" ... completed.
Processing "exas-20201231_htm.xml" ... completed.
Processing "fivn-20201231_htm.xml" ... completed.
Processing "frpt20201231b_10k_htm.xml" ... completed.
Processing "fsly-20201231_htm.xml" ... completed.
Processing "gh-20201231_htm.xml" ... ERROR OCCURRED: Cost of revenue not found.
Processing "hubs-10k_20201231_htm.xml" .

I noticed there are `ERROR OCCURRED` for some of the documents. After investigation, below are the causes that I have found:
* `Cost of revenue not found`: There is no cost of revenue section at the income statement.
* `The balance sheet fields in file are not in the expected sequence`: The balance sheet fields in file are not in the expected sequence.

I will have to manually note the financial data for these companies.

# Display Extracted Data For Verification

I will display the extracted data to verify that the correct data are extracted.

In [3]:
pd.options.display.float_format = '{:,.2f}'.format
companies_financials if len(company_df_list) > 0 else None

Unnamed: 0,company_name,filings,fiscal_year_ended,reporting_currency,cash_and_equivalents,short_term_investments,current_debt,revenue,cost_of_revenue,revenue_1_fy_ago,operating_cash_flow,capital_expenditure
1689923,"ALTERYX, INC.",10-K,2020-12-31,USD,171891000.0,584445000.0,72619000.0,495308000.0,43839000.0,417910000.0,74782000.0,26358000.0
769397,"AUTODESK, INC.",10-K,2021-01-31,USD,1772200000.0,85000000.0,0.0,3790400000.0,337100000.0,3274300000.0,1437200000.0,95900000.0
1655210,"BEYOND MEAT, INC.",10-K,2020-12-31,USD,159127000.0,0.0,25000000.0,406785000.0,284510000.0,297897000.0,-39995000.0,57696000.0
1666134,"BLACKLINE, INC.",10-K,2020-12-31,USD,367413000.0,175206000.0,0.0,351737000.0,68972000.0,288976000.0,54735000.0,19424000.0
1477333,"CLOUDFLARE, INC.",10-K,2020-12-31,USD,108895000.0,923201000.0,0.0,431059000.0,101055000.0,287022000.0,-17129000.0,74962000.0
1385867,COUPA SOFTWARE INC,10-K,2021-01-31,USD,323284000.0,283036000.0,609068000.0,541643000.0,221701000.0,389719000.0,78202000.0,11492000.0
1535527,"CROWDSTRIKE HOLDINGS, INC.",10-K,2021-01-31,USD,1918608000.0,0.0,0.0,874438000.0,229545000.0,481413000.0,356566000.0,63843000.0
1561550,"DATADOG, INC.",10-K,2020-12-31,USD,224927000.0,1292532000.0,0.0,603466000.0,130197000.0,362780000.0,109091000.0,25883000.0
1261333,"DOCUSIGN, INC.",10-K,2021-01-31,USD,566055000.0,207450000.0,20469000.0,1453047000.0,364058000.0,973971000.0,296954000.0,82395000.0
1124140,EXACT SCIENCES CORPORATION,10-K,2020-12-31,USD,1491288000.0,348699000.0,255464000.0,1491391000.0,354324000.0,876293000.0,136482000.0,64352000.0


# Insert The Extracted Data Into Database

I will insert the extracted data into database so that I can use them for analysis later.

In [4]:
import psycopg2

def insert_update_record(row):
    is_company_exists = False
    
    company = {
        'central_index_key': row['index']
        ,'company_name': row['company_name']
        ,'filings': row['filings']
        ,'fiscal_year_ended': row['fiscal_year_ended']
        ,'reporting_currency': row['reporting_currency']
        ,'cash_and_equivalents': row['cash_and_equivalents']
        ,'short_term_investments': row['short_term_investments']
        ,'current_debt': row['current_debt']
        ,'revenue': row['revenue']
        ,'cost_of_revenue': row['cost_of_revenue']
        ,'revenue_1_fy_ago': row['revenue_1_fy_ago']
        ,'operating_cash_flow': row['operating_cash_flow']
        ,'capital_expenditure': row['capital_expenditure']
    }
    
    try:
        conn = psycopg2.connect(user=uid, password=pwd, host=host
                                , port=port, dbname='stock')
        
        # check whether the company exist in table
        cur = conn.cursor()
        query = 'SELECT central_index_key FROM company WHERE central_index_key = %s;'
        cur.execute(query, (company['central_index_key'],))

        if len(cur.fetchall()) > 0:
            is_company_exists = True

        # if exists, update existing record. Else, insert record
        cur = conn.cursor()
        
        if is_company_exists:
            upd_statement = 'UPDATE company \
                                SET \
                                    filings = %(filings)s \
                                    , company_name = %(company_name)s \
                                    , fiscal_year_ended = %(fiscal_year_ended)s \
                                    , reporting_currency = %(reporting_currency)s \
                                    , cash_and_equivalents = %(cash_and_equivalents)s \
                                    , short_term_investments = %(short_term_investments)s \
                                    , current_debt = %(current_debt)s \
                                    , revenue = %(revenue)s \
                                    , cost_of_revenue = %(cost_of_revenue)s \
                                    , revenue_1_fy_ago = %(revenue_1_fy_ago)s \
                                    , operating_cash_flow = %(operating_cash_flow)s \
                                    , capital_expenditure = %(capital_expenditure)s \
                                WHERE central_index_key = %(central_index_key)s;'
            cur.execute(upd_statement, company)
            conn.commit()
        else:
            ins_statement = 'INSERT INTO company \
                                (central_index_key, company_name \
                                , filings, fiscal_year_ended, reporting_currency \
                                , cash_and_equivalents, short_term_investments \
                                , current_debt, revenue, cost_of_revenue \
                                , revenue_1_fy_ago, operating_cash_flow \
                                , capital_expenditure) \
                                VALUES \
                                (%(central_index_key)s, %(company_name)s \
                                , %(filings)s, %(fiscal_year_ended)s, %(reporting_currency)s, \
                                 %(cash_and_equivalents)s, %(short_term_investments)s \
                                , %(current_debt)s, %(revenue)s, %(cost_of_revenue)s \
                                , %(revenue_1_fy_ago)s, %(operating_cash_flow)s \
                                , %(capital_expenditure)s);'
            cur.execute(ins_statement, company)
            conn.commit()
            
        print('Inserted/Updated "{}" into database.'.format(company['company_name']))
    except Exception as e:
        print(company['company_name'] + ':', str(e))
    finally:
        if(conn):
            conn.close()
        
    
            
# call function for each row   
if len(company_df_list) > 0:
    for index, row in companies_financials.reset_index().iterrows():
        insert_update_record(row)
    

Inserted/Updated "ALTERYX, INC." into database.
Inserted/Updated "AUTODESK, INC." into database.
Inserted/Updated "BEYOND MEAT, INC." into database.
Inserted/Updated "BLACKLINE, INC." into database.
Inserted/Updated "CLOUDFLARE, INC." into database.
Inserted/Updated "COUPA SOFTWARE INC" into database.
Inserted/Updated "CROWDSTRIKE HOLDINGS, INC." into database.
Inserted/Updated "DATADOG, INC." into database.
Inserted/Updated "DOCUSIGN, INC." into database.
Inserted/Updated "EXACT SCIENCES CORPORATION" into database.
Inserted/Updated "FASTLY, INC." into database.
Inserted/Updated "FIVE9, INC." into database.
Inserted/Updated "FRESHPET INC" into database.
Inserted/Updated "HUBSPOT, INC." into database.
Inserted/Updated "INSPIRE MEDICAL SYSTEMS, INC." into database.
Inserted/Updated "INTUITIVE SURGICAL, INC." into database.
Inserted/Updated "MERCADOLIBRE, INC." into database.
Inserted/Updated "MONGODB, INC." into database.
Inserted/Updated "NOVOCURE LIMITED" into database.
Inserted/Update