# FullTextSearchApi

In [10]:
import os, requests
import pandas as pd
from sec_api import FullTextSearchApi
from tqdm import tqdm
tqdm.pandas()

In [4]:
data_dir = '/media/dmlab/My Passport/DATA/ComBERT/data'
data_filepath = os.path.join(data_dir, 'company_info_sec_cik_mapper_12057_20220802.csv')
api_key_filepath = 'API_Key.txt'

In [7]:
with open(api_key_filepath, "r") as f:
    api_key = f.read()

In [23]:
df = pd.read_csv(data_filepath)
df = df.astype({'CIK':'str'})

print('Number of CIKs: {}'.format(len(df['CIK'].unique())))
df.head()

Number of CIKs: 9170


Unnamed: 0,CIK,Ticker,Name,Exchange
0,1750,AIR,Aar Corp,NYSE
1,1800,ABT,Abbott Laboratories,NYSE
2,1961,WDDD,Worlds Inc,OTC
3,2098,ACU,Acme United Corp,NYSE
4,2178,AE,"Adams Resources & Energy, Inc.",NYSE


In [11]:
fullTextSearchApi = FullTextSearchApi(api_key=api_key)

In [22]:
cik = '0001652044' #<- GOOG, GOOGL #df.iloc[0]['CIK']
start_date, end_date = '2016-01-01', '2016-12-31'

query = {
    "query": '',
    'ciks': [cik],
    "formTypes": ['10-K'],
    "startDate": start_date,
    "endDate": end_date,
}

filings = fullTextSearchApi.get_filings(query)
filings

{'total': {'value': 2, 'relation': 'eq'},
 'filings': [{'accessionNo': '0001193125-16-520367',
   'cik': '1652044',
   'companyNameLong': 'Alphabet Inc. (GOOG, GOOGL) (CIK 0001652044)',
   'ticker': 'GOOG',
   'description': 'AMENDMENT NO. 1 TO FORM 10-K',
   'formType': '10-K/A',
   'type': '10-K/A',
   'filingUrl': 'https://www.sec.gov/Archives/edgar/data/1652044/000119312516520367/d133613d10ka.htm',
   'filedAt': '2016-03-29'},
  {'accessionNo': '0001652044-16-000012',
   'cik': '1652044',
   'companyNameLong': 'Alphabet Inc. (GOOG, GOOGL) (CIK 0001652044)',
   'ticker': 'GOOG',
   'description': 'FORM 10-K',
   'formType': '10-K',
   'type': '10-K',
   'filingUrl': 'https://www.sec.gov/Archives/edgar/data/1652044/000165204416000012/goog10-k2015.htm',
   'filedAt': '2016-02-11'}]}

# ExtractorApi

In [25]:
from sec_api import ExtractorApi

In [27]:
root_dir = '/media/dmlab/My Passport/DATA/ComBERT/data'
url_filepath =  os.path.join(root_dir, 'urls_2016_by_sec_api_FullTextSearchApi_with_cik_of_sec_cik_mapper_3999.csv')

In [28]:
df = pd.read_csv(url_filepath)
print('Number of rows: {}'.format(len(df)))
df.head()

Number of rows: 3999


Unnamed: 0,accessionNo,cik,companyNameLong,ticker,description,formType,type,filingUrl,filedAt
0,0001047469-16-014299,1750,AAR CORP (AIR) (CIK 0000001750),AIR,10-K,10-K,10-K,https://www.sec.gov/Archives/edgar/data/1750/0...,2016-07-13
1,0001047469-16-010246,1800,ABBOTT LABORATORIES (ABT) (CIK 0000001800),ABT,10-K,10-K,10-K,https://www.sec.gov/Archives/edgar/data/1800/0...,2016-02-19
2,0001264931-16-000339,1961,WORLDS INC (WDDD) (CIK 0000001961),WDDD,,10-K,10-K,https://www.sec.gov/Archives/edgar/data/1961/0...,2016-04-13
3,0001026608-16-000091,2098,ACME UNITED CORP (ACU) (CIK 0000002098),ACU,10-K,10-K,10-K,https://www.sec.gov/Archives/edgar/data/2098/0...,2016-03-11
4,0000002178-16-000064,2178,"ADAMS RESOURCES & ENERGY, INC. (AE) (CIK 00000...",AE,"FORM 10-K 151231 ADAMS RESOURCES & ENERGY, INC.",10-K,10-K,https://www.sec.gov/Archives/edgar/data/2178/0...,2016-03-11


In [38]:
extractorApi = ExtractorApi(api_key)

In [31]:
url = df.iloc[0]['filingUrl']
print(url)
item_num = '1'
extractorApi.get_section(url, item_num, "text")

https://www.sec.gov/Archives/edgar/data/1750/000104746916014299/a2228768z10-k.htm


' ITEM 1. BUSINESS (Dollars in millions) \n\nGeneral \n\nAAR CORP. and its subsidiaries are referred to herein collectively as "AAR," "Company," "we," "us," and "our" unless the context indicates otherwise. AAR was founded in 1951, organized in 1955 and reincorporated in Delaware in 1966. We are a diversified provider of products and services to the worldwide aviation and government and defense markets. \n\nDuring fiscal 2015, we executed on a comprehensive strategic plan that included: \n\n&#149; The sale of our Telair Cargo Group for cash of $714 million, resulting in pre-tax gains of $198.6 million in the fourth quarter of fiscal 2015 (and $27.7 million in the first quarter of fiscal 2016 from the receipt of contingent consideration); &#149; A decision to divest our Precision Systems Manufacturing business; &#149; The exit of certain product lines and inventories in our aviation services businesses that were underperforming or not part of our strategy going forward; &#149; The reduc

In [39]:
url = df[df['ticker']=='C'].iloc[0]['filingUrl'] # <- CITI Group
print(url)
item_num = '1'
extractorApi.get_section(url, item_num, "text")

https://www.sec.gov/Archives/edgar/data/831001/000083100116000235/c-12312015x10k.htm


''

In [41]:
result = extractorApi.get_section(url, item_num, "text")
result == ''

True

### Check Undefined results 

In [42]:
undefined_filepath = os.path.join(root_dir, 'Item1s_2016_by_sec_api_ExtractorApi_with_cik_of_sec_cik_mapper_undefined_9.csv')

In [45]:
for _, row in pd.read_csv(undefined_filepath).iterrows():
    print(row['companyNameLong'], row['ticker'], row['filingUrl'])

NORTH EUROPEAN OIL ROYALTY TRUST (NRT) (CIK 0000072633) NRT https://www.sec.gov/Archives/edgar/data/72633/000007263316000054/tenk16.txt
SCIENTIFIC INDUSTRIES INC (SCND) (CIK 0000087802) SCND https://www.sec.gov/Archives/edgar/data/87802/000008780216000019/k63016.txt
SEMPRA ENERGY (SRE) (CIK 0001032208) SRE https://www.sec.gov/Archives/edgar/data/1032208/000008652116000091/sre10k_12312015.htm
CITIGROUP INC (C) (CIK 0000831001) C https://www.sec.gov/Archives/edgar/data/831001/000083100116000235/c-12312015x10k.htm
BALTIC INTERNATIONAL USA INC (BISA) (CIK 0000918545) BISA https://www.sec.gov/Archives/edgar/data/918545/000091854516000010/r10k2015.txt
SEMPRA ENERGY (SRE) (CIK 0001032208) SRE https://www.sec.gov/Archives/edgar/data/1032208/000008652116000091/sre10k_12312015.htm
MARKEL CORP (MKL) (CIK 0001096343) MKL https://www.sec.gov/Archives/edgar/data/1096343/000109634316000185/mkl_12312015x10k.htm
MGE ENERGY INC (MGEE) (CIK 0001161728) MGEE https://www.sec.gov/Archives/edgar/data/1161728

# Check results to be used in the experiment

In [46]:
item1s_filepath = os.path.join(root_dir, 'Item1s_2016_by_sec_api_ExtractorApi_with_cik_of_sec_cik_mapper_3990.csv')

In [47]:
pd.read_csv(item1s_filepath)

Unnamed: 0,accessionNo,cik,companyNameLong,ticker,description,formType,type,filingUrl,filedAt,item_1
0,0001047469-16-014299,1750,AAR CORP (AIR) (CIK 0000001750),AIR,10-K,10-K,10-K,https://www.sec.gov/Archives/edgar/data/1750/0...,2016-07-13,ITEM 1. BUSINESS (Dollars in millions) \n\nGe...
1,0001047469-16-010246,1800,ABBOTT LABORATORIES (ABT) (CIK 0000001800),ABT,10-K,10-K,10-K,https://www.sec.gov/Archives/edgar/data/1800/0...,2016-02-19,ITEM 1. BUSINESS \n\nGENERAL DEVELOPMENT OF B...
2,0001264931-16-000339,1961,WORLDS INC (WDDD) (CIK 0000001961),WDDD,,10-K,10-K,https://www.sec.gov/Archives/edgar/data/1961/0...,2016-04-13,ITEM 1. BUSINESS. \n\n&#160;\n\nGeneral \n\n&...
3,0001026608-16-000091,2098,ACME UNITED CORP (ACU) (CIK 0000002098),ACU,10-K,10-K,10-K,https://www.sec.gov/Archives/edgar/data/2098/0...,2016-03-11,Item 1. Business \n\n&#160; \n\n&#160; \n\nOv...
4,0000002178-16-000064,2178,"ADAMS RESOURCES & ENERGY, INC. (AE) (CIK 00000...",AE,"FORM 10-K 151231 ADAMS RESOURCES & ENERGY, INC.",10-K,10-K,https://www.sec.gov/Archives/edgar/data/2178/0...,2016-03-11,Items 1 and 2. BUSINESS AND PROPERTIES \n\nBu...
...,...,...,...,...,...,...,...,...,...,...
3985,0001213900-16-017726,1667313,"Zedge, Inc. (ZDGE) (CIK 0001667313)",ZDGE,ANNUAL REPORT,10-K,10-K,https://www.sec.gov/Archives/edgar/data/166731...,2016-10-26,Item 1. Business \n\n&#160; \n\nCompany Overv...
3986,0001670541-16-000016,1670541,Adient Ltd (ADNT) (CIK 0001670541),ADNT,10-K,10-K,10-K,https://www.sec.gov/Archives/edgar/data/167054...,2016-11-29,Item 1. \n\nBusiness \n\n&#160; \n\n&#160; \n...
3987,0001663577-16-000330,1670869,"Rocky Mountain High Brands, Inc. (RMHB) (CIK 0...",RMHB,,10-K,10-K,https://www.sec.gov/Archives/edgar/data/167086...,2016-10-04,Item 1. Business \n\nOverview \n\n&#160; \n\n...
3988,0001674862-16-000008,1674862,ASHLAND GLOBAL HOLDINGS INC (ASH) (CIK 0001674...,ASH,10-K,10-K,10-K,https://www.sec.gov/Archives/edgar/data/167486...,2016-11-21,ITEM 1. BUSINESS \n\nGENERAL \n\nAshland Glob...
