import pandas as pd
import time

In [32]:
# Load S&P 500 tickers from Wikipedia
sp500_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
sp500_table = pd.read_html(sp500_url)[0]
tickers = sp500_table['Symbol'].tolist()
tickers[:10]

['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A']

In [7]:
# Collect info
##The **YFRateLimitError** will happen because Yahoo Finance limits the number
##of API requests you can make in a short period. This is common when looping 
##over many tickers, like the S&P 500, and fetching .info data (which hits 
##the API separately for each ticker).
data = []
for ticker_symbol in tickers:
    success = False
    retries = 10
    while not success and retries > 0:
        try:
            ticker = yf.Ticker(ticker_symbol)
            info = ticker.info
            data.append({
                'ticker': ticker_symbol,
                'company_name': info.get('longName', 'N/A'),
                'sector': info.get('sectorKey', 'N/A'),
                'industry': info.get('industry', 'N/A'),
                'market_cap': info.get('marketCap', 'N/A'),
                'country': info.get('country', 'N/A')
            })
            success = True
        except Exception as e:
            print(f"Error for {ticker_symbol}: {e}")
            retries -= 1
            time.sleep(5)

In [10]:
# Save to CSV
df = pd.DataFrame(data)
df.shape
# the index contains 503 stocks because it includes two share classes of stock from 3 of its component companies

(503, 6)

In [11]:
df.to_csv('sp500_stock_info.csv', index=False)


In [12]:
# Load Nasdaq-100 tickers from Wikipedia
nasdaq100_url = 'https://en.wikipedia.org/wiki/Nasdaq-100'
nasdaq100_table = pd.read_html(nasdaq100_url)[4]
nasdaq100_tickers = nasdaq100_table['Symbol'].tolist()

tickers[:10]

['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A']

In [13]:
# Collect info
data = []
for ticker_symbol in nasdaq100_tickers:
    ticker = yf.Ticker(ticker_symbol)
    info = ticker.info
    data.append({
        'ticker': ticker_symbol,
        'company_name': info.get('longName', 'N/A'),
        'sector': info.get('sectorKey', 'N/A'),
        'industry': info.get('industry', 'N/A'),
        'market_cap': info.get('marketCap', 'N/A'),
        'country': info.get('country', 'N/A')
    })


In [15]:
df_nasdaq = pd.DataFrame(data)
df_nasdaq.shape

(101, 6)

In [16]:
df_nasdaq.to_csv('nasdaq100_stock_info.csv', index=False)

In [18]:
# Load dowjones30 tickers from Wikipedia
dowjones30_url = 'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average'
dowjones30_table = pd.read_html(dowjones30_url)[2]
dowjones30_tickers = dowjones30_table['Symbol'].tolist()


In [19]:
# Collect info
dowjones30_data = []
for ticker_symbol in dowjones30_tickers:
    ticker = yf.Ticker(ticker_symbol)
    info = ticker.info
    dowjones30_data.append({
        'ticker': ticker_symbol,
        'company_name': info.get('longName', 'N/A'),
        'sector': info.get('sectorKey', 'N/A'),
        'industry': info.get('industry', 'N/A'),
        'market_cap': info.get('marketCap', 'N/A'),
        'country': info.get('country', 'N/A')
    })

In [20]:
df_dowjones30 = pd.DataFrame(dowjones30_data)
df_dowjones30.shape

(30, 6)

In [21]:
df_dowjones30.to_csv('dowjones30_stock_info.csv', index=False)