In [3]:
# Import necessary libraries
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# It is stable, free, and comprehensive.
URL = "https://www.sec.gov/files/company_tickers.json"

# SEC requires a User-Agent header with an email address to identify the requester.
# You can replace this with your actual email if running strictly locally, 
# but this generic format usually works for personal scripts.
headers = {'User-Agent': "DataScientist (datascientist@example.com)"}


response = requests.get(URL, headers=headers)
response.raise_for_status() # Check for HTTP errors
data = response.json()

# The SEC JSON structure is a dictionary where keys are indices ('0', '1', ...).
# We transpose this directly into a DataFrame.
df = pd.DataFrame.from_dict(data, orient='index')

# Rename columns for clarity (SEC uses 'cik_str', 'ticker', 'title')
df = df.rename(columns={'cik_str': 'CIK', 'ticker': 'Symbol', 'title': 'Company Name'})

print(f"Successfully retrieved {len(df)} tickers.")
print(df.head())

# --- Export ---
# Save to CSV for your modeling pipeline
df.to_csv("us_assets_list.csv", index=False)
print("List saved to 'us_assets_list.csv'")


Successfully retrieved 10283 tickers.
       CIK Symbol    Company Name
0  1045810   NVDA     NVIDIA CORP
1   320193   AAPL      Apple Inc.
2  1652044  GOOGL   Alphabet Inc.
3   789019   MSFT  MICROSOFT CORP
4  1018724   AMZN  AMAZON COM INC
List saved to 'us_assets_list.csv'
