In [2]:
import pandas as pd
import requests

In [3]:
# Nasdaq Stock Screener is a good way to get a (full) list of stocks, 7626 stocks as of June 22 2023
# https://www.nasdaq.com/market-activity/stocks/screener
# You can download the csv file manually
data = pd.read_csv("nasdaq_screener_20230622.csv")
data.sort_values(by="Market Cap", ascending=False, inplace=True)
data.head()

Unnamed: 0,Symbol,Name,Last Sale,Net Change,% Change,Market Cap,Country,IPO Year,Volume,Sector,Industry
22,AAPL,Apple Inc. Common Stock,$185.465,1.505,0.818%,2917123000000.0,United States,1980.0,7281925,Technology,Computer Manufacturing
4540,MSFT,Microsoft Corporation Common Stock,$336.71,3.15,0.944%,2503603000000.0,United States,1986.0,3687698,Technology,Computer Software: Prepackaged Software
3050,GOOG,Alphabet Inc. Class C Capital Stock,$121.605,0.345,0.285%,1544019000000.0,United States,2004.0,3221308,Technology,Computer Software: Programming Data Processing
3051,GOOGL,Alphabet Inc. Class A Common Stock,$120.94,0.39,0.324%,1535575000000.0,United States,2004.0,5474863,Technology,Computer Software: Programming Data Processing
419,AMZN,Amazon.com Inc. Common Stock,$127.9599,3.1299,2.507%,1312914000000.0,United States,1997.0,15850686,Consumer Discretionary,Catalog/Specialty Distribution


In [4]:
# Or make a HTTP request and convert the JSON payload to a dataframe
# Request URL can ben found in Network tab in Chrome
# User agent is used to identify the application, operating system, vendor, and/or version of the requesting user agent to custom code for broswer variants
# Using the correct user agent when performing data scraping tasks is crucial while avoding being blocked
# https://brightdata.com/blog/how-tos/user-agents-for-web-scraping-101
payload = requests.get('https://api.nasdaq.com/api/screener/stocks?tableonly=true&limit=25&offset=0&download=true', headers={'User-agent': 'Chrome/114.0.0.0'}).json()
data = pd.json_normalize(payload["data"]["rows"])
data.rename(columns=payload["data"]["headers"], inplace=True)
data["Market Cap"] = pd.to_numeric(data["Market Cap"]) # to_numeric handle empty string conversion
data.sort_values(by="Market Cap", ascending=False, inplace=True)
data.head()

Unnamed: 0,Symbol,Name,Last Sale,Net Change,% Change,Volume,Market Cap,Country,IPO Year,Industry,Sector,Url
21,AAPL,Apple Inc. Common Stock,$175.37,-0.12,-0.068%,37737956,2741775000000.0,United States,1980,Computer Manufacturing,Technology,/market-activity/stocks/aapl
4513,MSFT,Microsoft Corporation Common Stock,$321.61,0.84,0.262%,22188721,2389486000000.0,United States,1986,Computer Software: Prepackaged Software,Technology,/market-activity/stocks/msft
3029,GOOG,Alphabet Inc. Class C Capital Stock,$132.81,-1.78,-1.323%,12951287,1674601000000.0,United States,2004,Computer Software: Programming Data Processing,Technology,/market-activity/stocks/goog
3030,GOOGL,Alphabet Inc. Class A Common Stock,$131.85,-1.89,-1.413%,19467690,1662497000000.0,United States,2004,Computer Software: Programming Data Processing,Technology,/market-activity/stocks/googl
415,AMZN,Amazon.com Inc. Common Stock,$130.6801,-4.6099,-3.407%,41544342,1348325000000.0,United States,1997,Catalog/Specialty Distribution,Consumer Discretionary,/market-activity/stocks/amzn


In [5]:
# % Change/Market Cap/Country/IPO Year/Sector/Industry are not populated for some stocks (real world data!)
# Apart from 11 GICS sectors, some stocks have Miscellaneous and '' populated as Sector
data["Sector"].unique()

array(['Technology', 'Consumer Discretionary', '', 'Finance',
       'Health Care', 'Energy', 'Basic Materials', 'Consumer Staples',
       'Utilities', 'Telecommunications', 'Industrials', 'Real Estate',
       'Miscellaneous'], dtype=object)

In [6]:
us_data = data[(data["Country"] == "United States")]
us_data.head(10)

Unnamed: 0,Symbol,Name,Last Sale,Net Change,% Change,Volume,Market Cap,Country,IPO Year,Industry,Sector,Url
21,AAPL,Apple Inc. Common Stock,$175.37,-0.12,-0.068%,37737956,2741775000000.0,United States,1980.0,Computer Manufacturing,Technology,/market-activity/stocks/aapl
4513,MSFT,Microsoft Corporation Common Stock,$321.61,0.84,0.262%,22188721,2389486000000.0,United States,1986.0,Computer Software: Prepackaged Software,Technology,/market-activity/stocks/msft
3029,GOOG,Alphabet Inc. Class C Capital Stock,$132.81,-1.78,-1.323%,12951287,1674601000000.0,United States,2004.0,Computer Software: Programming Data Processing,Technology,/market-activity/stocks/goog
3030,GOOGL,Alphabet Inc. Class A Common Stock,$131.85,-1.89,-1.413%,19467690,1662497000000.0,United States,2004.0,Computer Software: Programming Data Processing,Technology,/market-activity/stocks/googl
415,AMZN,Amazon.com Inc. Common Stock,$130.6801,-4.6099,-3.407%,41544342,1348325000000.0,United States,1997.0,Catalog/Specialty Distribution,Consumer Discretionary,/market-activity/stocks/amzn
4888,NVDA,NVIDIA Corporation Common Stock,$414.5401,-7.8499,-1.858%,31043686,1023914000000.0,United States,1999.0,Semiconductors,Technology,/market-activity/stocks/nvda
6806,TSLA,Tesla Inc. Common Stock,$258.31,-4.28,-1.63%,88009971,819874500000.0,United States,2010.0,Auto Manufacturing,Consumer Discretionary,/market-activity/stocks/tsla
1109,BRK/A,Berkshire Hathaway Inc.,$552258.00,-4322.0,-0.777%,7369,812301900000.0,United States,,,,/market-activity/stocks/brk/a
1110,BRK/B,Berkshire Hathaway Inc.,$364.23,-2.59,-0.706%,1513695,803604800000.0,United States,,,,/market-activity/stocks/brk/b
4297,META,Meta Platforms Inc. Class A Common Stock,$299.40,-0.27,-0.09%,13793883,767282000000.0,United States,2012.0,Computer Software: Programming Data Processing,Technology,/market-activity/stocks/meta
