In [25]:
import requests
from bs4 import BeautifulSoup
import csv

# SEC EDGAR search URL for company tickers
url = 'https://www.sec.gov/Archives/edgar/cik-lookup-data.txt'

# Define headers for the request
headers = {
    'User-Agent': 'junghae2017@gmail.com',
    'Accept-Encoding': 'gzip, deflate'
}

try:
    # Send GET request to EDGAR
    response = requests.get(url, headers=headers)
    response.raise_for_status()

    # Parse the plain text response
    text_data = response.text

    # Split text data into lines
    lines = text_data.splitlines()

    # Create a list to store company data
    company_data = []

    for line in lines:
        if line.strip():  # Skip empty lines
            # Split line by ':' to separate company name and CIK
            parts = line.split(':')
            company_name = parts[0].strip()
            cik = parts[1].strip()
            company_data.append({'Company Name': company_name, 'CIK': cik})

    # Optionally, write data to a CSV file
    with open('company_cik_codes.csv', 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Company Name', 'CIK']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for company in company_data:
            writer.writerow(company)

    print(f"Total companies found: {len(company_data)}")
    print(f"Sample data: {company_data[:5]}")  # Print the first 5 entries

except requests.exceptions.HTTPError as e:
    print(f"HTTP error occurred: {e}")
except requests.exceptions.RequestException as e:
    print(f"Request error occurred: {e}")


Total companies found: 957947
Sample data: [{'Company Name': '!J INC', 'CIK': '0001438823'}, {'Company Name': '#1 A LIFESAFER HOLDINGS, INC.', 'CIK': '0001509607'}, {'Company Name': '#1 ARIZONA DISCOUNT PROPERTIES LLC', 'CIK': '0001457512'}, {'Company Name': '#1 PAINTBALL CORP', 'CIK': '0001433777'}, {'Company Name': '$ LLC', 'CIK': '0001427189'}]


In [24]:
# Import requests to retrive Web Urls example HTML. TXT
import requests

# Import BeautifulSoup
from bs4 import BeautifulSoup

# import re module for REGEXes
import re

# import pandas
import pandas as pd

r = requests.get('https://www.sec.gov/Archives/edgar/data/320193/000032019318000145/0000320193-18-000145.txt')
raw_10k = r.text
print(raw_10k[0:1300])

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>SEC.gov | Request Rate Threshold Exceeded</title>
<style>
html {height: 100%}
body {height: 100%; margin:0; padding:0;}
#header {background-color:#003968; color:#fff; padding:15px 20px 10px 20px;font-family:Arial, Helvetica, sans-serif; font-size:20px; border-bottom:solid 5px #000;}
#footer {background-color:#003968; color:#fff; padding:15px 20px;font-family:Arial, Helvetica, sans-serif; font-size:20px;}
#content {max-width:650px;margin:60px auto; padding:0 20px 100px 20px; background-image:url(seal_bw.png);background-repeat:no-repeat;background-position:50% 100%;}
h1 {font-family:Georgia, Times, serif; font-size:20px;}
h2 {text-align:center; font-family:Georgia, Times, serif; font-size:20px; width:100%; border-bottom:solid #999 1px;padding

In [None]:
import requests
import pandas as pd

url = 'https://www.sec.gov/files/company_tickers_exchange.json'
headers = {'User-Agent': 'junghae2017@gmail.com'}
res = requests.get(url, headers=headers)
cik_list = res.json()

cik_df = pd.DataFrame(cik_list['data'], columns=cik_list['fields'])
# cik_df['cik'] = str(cik_df['cik'])


print(cik_df)

tic = 'SLXNW'
cik = cik_df[cik_df['ticker']==tic]['cik'].iloc[0].astype(str)
cik = cik.zfill(10)

# API에 요청 보내기
url = f"https://data.sec.gov/submissions/CIK{cik}.json"
response = requests.get(url, headers=headers)
# 응답 데이터 확인
data = response.json()
df_filing = pd.DataFrame(data['filings']['recent'])
print(df_filing)