In [2]:
import pandas as pd
import requests

In [5]:
# Nasdaq Stock Screener is a good way to get a (full) list of stocks, 7626 stocks as of June 22 2023
# https://www.nasdaq.com/market-activity/stocks/screener
# You can download the csv file manually
data = pd.read_csv("nasdaq_screener_20230622.csv")
data.sort_values(by="Market Cap", ascending=False, inplace=True)
data.head()

Unnamed: 0,Symbol,Name,Last Sale,Net Change,% Change,Market Cap,Country,IPO Year,Volume,Sector,Industry
22,AAPL,Apple Inc. Common Stock,$185.465,1.505,0.818%,2917123000000.0,United States,1980.0,7281925,Technology,Computer Manufacturing
4540,MSFT,Microsoft Corporation Common Stock,$336.71,3.15,0.944%,2503603000000.0,United States,1986.0,3687698,Technology,Computer Software: Prepackaged Software
3050,GOOG,Alphabet Inc. Class C Capital Stock,$121.605,0.345,0.285%,1544019000000.0,United States,2004.0,3221308,Technology,Computer Software: Programming Data Processing
3051,GOOGL,Alphabet Inc. Class A Common Stock,$120.94,0.39,0.324%,1535575000000.0,United States,2004.0,5474863,Technology,Computer Software: Programming Data Processing
419,AMZN,Amazon.com Inc. Common Stock,$127.9599,3.1299,2.507%,1312914000000.0,United States,1997.0,15850686,Consumer Discretionary,Catalog/Specialty Distribution


In [6]:
# Or make a HTTP request and convert the JSON payload to a dataframe
# Request URL can ben found in Network tab in Chrome
# User agent is used to identify the application, operating system, vendor, and/or version of the requesting user agent to custom code for broswer variants
# Using the correct user agent when performing data scraping tasks is crucial while avoding being blocked
# https://brightdata.com/blog/how-tos/user-agents-for-web-scraping-101
payload = requests.get('https://api.nasdaq.com/api/screener/stocks?tableonly=true&limit=25&offset=0&download=true', headers={'User-agent': 'Chrome/114.0.0.0'}).json()
data = pd.json_normalize(payload["data"]["rows"])
data.rename(columns=payload["data"]["headers"], inplace=True)
data["Market Cap"] = pd.to_numeric(data["Market Cap"]) # to_numeric handle empty string conversion
data.sort_values(by="Market Cap", ascending=False, inplace=True)
data.head()

Unnamed: 0,Symbol,Name,Last Sale,Net Change,% Change,Volume,Market Cap,Country,IPO Year,Industry,Sector,Url
22,AAPL,Apple Inc. Common Stock,$186.65,2.69,1.462%,34926044,2935762000000.0,United States,1980,Computer Manufacturing,Technology,/market-activity/stocks/aapl
4598,MSFT,Microsoft Corporation Common Stock,$338.84,5.28,1.583%,16790462,2519441000000.0,United States,1986,Computer Software: Prepackaged Software,Technology,/market-activity/stocks/msft
3091,GOOG,Alphabet Inc. Class C Capital Stock,$123.19,1.93,1.592%,15243220,1564143000000.0,United States,2004,Computer Software: Programming Data Processing,Technology,/market-activity/stocks/goog
3092,GOOGL,Alphabet Inc. Class A Common Stock,$122.48,1.93,1.601%,19387038,1555129000000.0,United States,2004,Computer Software: Programming Data Processing,Technology,/market-activity/stocks/googl
420,AMZN,Amazon.com Inc. Common Stock,$129.67,4.84,3.877%,70578986,1330460000000.0,United States,1997,Catalog/Specialty Distribution,Consumer Discretionary,/market-activity/stocks/amzn


In [7]:
# % Change/Market Cap/Country/IPO Year/Sector/Industry are not populated for some stocks (real world data!)
# Apart from 11 GICS sectors, some stocks have Miscellaneous and '' populated as Sector
data["Sector"].unique()

array(['Technology', 'Consumer Discretionary', 'Utilities', '',
       'Health Care', 'Energy', 'Finance', 'Consumer Staples',
       'Telecommunications', 'Industrials', 'Basic Materials',
       'Real Estate', 'Miscellaneous'], dtype=object)

In [9]:
us_data = data[(data["Country"] == "United States")]
us_data.head(10)

Unnamed: 0,Symbol,Name,Last Sale,Net Change,% Change,Volume,Market Cap,Country,IPO Year,Industry,Sector,Url
22,AAPL,Apple Inc. Common Stock,$186.65,2.69,1.462%,34926044,2935762000000.0,United States,1980.0,Computer Manufacturing,Technology,/market-activity/stocks/aapl
4598,MSFT,Microsoft Corporation Common Stock,$338.84,5.28,1.583%,16790462,2519441000000.0,United States,1986.0,Computer Software: Prepackaged Software,Technology,/market-activity/stocks/msft
3091,GOOG,Alphabet Inc. Class C Capital Stock,$123.19,1.93,1.592%,15243220,1564143000000.0,United States,2004.0,Computer Software: Programming Data Processing,Technology,/market-activity/stocks/goog
3092,GOOGL,Alphabet Inc. Class A Common Stock,$122.48,1.93,1.601%,19387038,1555129000000.0,United States,2004.0,Computer Software: Programming Data Processing,Technology,/market-activity/stocks/googl
420,AMZN,Amazon.com Inc. Common Stock,$129.67,4.84,3.877%,70578986,1330460000000.0,United States,1997.0,Catalog/Specialty Distribution,Consumer Discretionary,/market-activity/stocks/amzn
4978,NVDA,NVIDIA Corporation Common Stock,$430.72,0.27,0.063%,32528818,1063878000000.0,United States,1999.0,Semiconductors,Technology,/market-activity/stocks/nvda
6941,TSLA,Tesla Inc. Common Stock,$263.975,4.515,1.74%,145217789,836669900000.0,United States,2010.0,Auto Manufacturing,Consumer Discretionary,/market-activity/stocks/tsla
5326,PCGU,Pacific Gas & Electric Co. Equity Unit,$150.22,0.93,0.623%,5938,820137100000.0,United States,,Power Generation,Utilities,/market-activity/stocks/pcgu
1133,BRK/A,Berkshire Hathaway Inc.,$511127.15000000002,-4387.85,-0.851%,6210,751803600000.0,United States,,,,/market-activity/stocks/brk/a
1134,BRK/B,Berkshire Hathaway Inc.,$336.54,-2.07,-0.611%,1741108,742512000000.0,United States,,,,/market-activity/stocks/brk/b
