In [1]:
# General packages
import requests
import json
import pandas as pd
import numpy as np
import time
import random
import re
import os
from ast import literal_eval

# Custom modules
from modules import sec_helper as sec

# Web scraping
from bs4 import BeautifulSoup as bs4


# Get CIK Numbers

In [10]:
# list storage initialization
cik_col = []
link_col = []

# List of company names to search
co_list = ['Lockheed Martin', 'Boeing Co', 'Microsoft Corp', 
           'Mercury Systems','Abaco Systems', 'API Jet']
# Initiatlize dictionary
co_dict = {co: {'cik': [], 'cik_link': []} for co in co_list} 

# loop through company names
for co in co_dict:
    
    # sleep timer for each loop - randomized
    time.sleep(random.randint(2,4))
    
    
    try:
    
        # get_cik --> start with company name
        cik_res = sec.get_cik(co)
        cik = cik_res[0]
        cik_link = cik_res[1]
        
        # append results
        co_dict[co]['cik'] = cik
        co_dict[co]['cik_link'] = cik_link

        
    except:
        
        # error here, pass
        print('error at ',co, 'pass for now')
        

# Get filings data

In [11]:
file_features = ['f_type', 'f_date', 'f_num']

for co in co_dict:

    # sleep timer for each loop - randomized
    time.sleep(random.randint(2,5))
    
    
    if co_dict[co]['cik'] != 'none':

        try:

            # get_filings --> uses cik number
            filing_res = sec.get_filings(co_dict[co]['cik'])
            
            # unpack response variable
            f_type = filing_res[0]
            f_date = filing_res[1]
            f_num = filing_res[2]

            # append result variables
            co_dict[co]['f_type'] = f_type
            co_dict[co]['f_date'] = f_date
            co_dict[co]['f_num'] = f_num

        except:

            # error here, pass for now
            print('error here at ', co, 'pass for now')
            
    else:

        for feat in file_features:
            co_dict[co][feat] = 'none'
        

In [12]:
filings_df = pd.DataFrame.from_dict(co_dict, orient ='index')
filings_df.head()

Unnamed: 0,cik,cik_link,f_type,f_date,f_num
Lockheed Martin,0000936468,https://www.sec.gov/cgi-bin/browse-edgar?actio...,"[S-3ASR, 10-Q, 8-K, 8-K, 8-K, S-8 POS, S-3ASR,...","[2014-10-24, 2014-10-24, 2014-10-21, 2014-08-1...","[333-199570141170749, 001-11437141170739, 001-..."
Boeing Co,0000012927,https://www.sec.gov/cgi-bin/browse-edgar?actio...,"[8-K, 8-K, 8-K, 424B2, FWP, 424B3, IRANNOTICE,...","[2014-12-19, 2014-12-15, 2014-10-31, 2014-10-2...","[001-00442141298791, 001-00442141287102, 001-0..."
Microsoft Corp,0000789019,https://www.sec.gov/cgi-bin/browse-edgar?actio...,"[UPLOAD, CORRESP, 8-K, UPLOAD, DEFA14A, ARS, 1...","[2014-12-22, 2014-12-12, 2014-12-04, 2014-12-0...","[, , 000-14278141266997, , 000-14278141225591,..."
Mercury Systems,0001049521,https://www.sec.gov/cgi-bin/browse-edgar?actio...,"[8-K, S-8 POS, S-8, 10-Q, 8-K, 8-K, ARS, EFFEC...","[2014-11-12, 2014-11-06, 2014-11-06, 2014-11-0...","[000-23599141211422, 333-101993141200008, 333-..."
Abaco Systems,none,none,none,none,none


# Check if Private

In [13]:
filings_df['likely_private'] = [sec.likely_private(i, filings_df) for i in filings_df.index]

In [14]:
filings_df

Unnamed: 0,cik,cik_link,f_type,f_date,f_num,likely_private
Lockheed Martin,0000936468,https://www.sec.gov/cgi-bin/browse-edgar?actio...,"[S-3ASR, 10-Q, 8-K, 8-K, 8-K, S-8 POS, S-3ASR,...","[2014-10-24, 2014-10-24, 2014-10-21, 2014-08-1...","[333-199570141170749, 001-11437141170739, 001-...",No
Boeing Co,0000012927,https://www.sec.gov/cgi-bin/browse-edgar?actio...,"[8-K, 8-K, 8-K, 424B2, FWP, 424B3, IRANNOTICE,...","[2014-12-19, 2014-12-15, 2014-10-31, 2014-10-2...","[001-00442141298791, 001-00442141287102, 001-0...",No
Microsoft Corp,0000789019,https://www.sec.gov/cgi-bin/browse-edgar?actio...,"[UPLOAD, CORRESP, 8-K, UPLOAD, DEFA14A, ARS, 1...","[2014-12-22, 2014-12-12, 2014-12-04, 2014-12-0...","[, , 000-14278141266997, , 000-14278141225591,...",No
Mercury Systems,0001049521,https://www.sec.gov/cgi-bin/browse-edgar?actio...,"[8-K, S-8 POS, S-8, 10-Q, 8-K, 8-K, ARS, EFFEC...","[2014-11-12, 2014-11-06, 2014-11-06, 2014-11-0...","[000-23599141211422, 333-101993141200008, 333-...",No
Abaco Systems,none,none,none,none,none,Yes
API Jet,none,none,none,none,none,Yes
