# Getting Started with Polygon API

In [116]:
from polygon import RESTClient
from polygon.rest.models import Market
import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
client = RESTClient("45lJarByEgEjiabROm6ZTbphYP9jTGqa")

In [27]:
upcoming_holidays = client.get_market_holidays()

In [None]:
# Get all available exchanges (no need to refresh often - once per day or more)
all_exchanges = client.get_exchanges()
stock_exchanges = [exchange for exchange in all_exchanges if exchange.asset_class == "stocks"]
stock_exchanges_dict = {exchange.mic: exchange.name for exchange in stock_exchanges}

In [51]:
# get all stocks for which there is information in Polygon (no need to refresh ofteh but need a full copy to refresh once per day). 
all_polygon_tickers = client.list_tickers(market=Market.STOCKS, type='CS', limit=100000)

valid_tickers = []
for ticker in all_polygon_tickers:
    valid_tickers.append(ticker)
    
valid_tickers = [ticker for ticker in valid_tickers if ticker.locale == "us"]

In [65]:
# All polygon tickers will form our base database, we will use its info to call other APIs for more info
stock_info = pd.DataFrame({
    'ticker': [ticker.ticker for ticker in valid_tickers],
    'name': [ticker.name for ticker in valid_tickers],
    'exchange': [ticker.primary_exchange for ticker in valid_tickers],
    'locale': [ticker.locale for ticker in valid_tickers],
})

stock_info['exchange_name'] = stock_info.exchange.apply(lambda x: stock_exchanges_dict[x])

In [133]:
stock_info = stock_info.set_index("ticker")

In [134]:
stock_info.to_csv("../data/all_stocks.csv")

In [131]:
stock_info.head()

Unnamed: 0,ticker,name,exchange,locale,exchange_name
0,A,Agilent Technologies Inc.,XNYS,us,New York Stock Exchange
1,AA,Alcoa Corporation,XNYS,us,New York Stock Exchange
2,AAC,Ares Acquisition Corporation,XNYS,us,New York Stock Exchange
3,AACI,Armada Acquisition Corp. I Common Stock,XNAS,us,Nasdaq
4,AADI,"Aadi Bioscience, Inc. Common Stock",XNAS,us,Nasdaq


### Experimental

In [128]:
daily_example = client.get_grouped_daily_aggs("2022-05-23")

In [129]:
len(daily_example)

11128

In [130]:
[example for example in daily_example if example.ticker in ["AMD", "AAPL", "NVDA", "MSFT", "GOOGL"]]

[GroupedDailyAgg(ticker='AMD', open=91.66, high=95.19, low=90.92, close=95.07, volume=128773040.0, vwap=93.392, timestamp=1653336000000, transactions=662108),
 GroupedDailyAgg(ticker='MSFT', open=255.49, high=261.5, low=253.43, close=260.65, volume=33175379.0, vwap=258.9217, timestamp=1653336000000, transactions=386147),
 GroupedDailyAgg(ticker='AAPL', open=137.79, high=143.26, low=137.65, close=143.11, volume=117726265.0, vwap=141.4344, timestamp=1653336000000, transactions=851032),
 GroupedDailyAgg(ticker='NVDA', open=162.74, high=169.15, low=161.7901, close=168.98, volume=63988869.0, vwap=166.4595, timestamp=1653336000000, transactions=546192),
 GroupedDailyAgg(ticker='GOOGL', open=2191.75, high=2242.95, low=2174.82, close=2229.76, volume=1859247.0, vwap=2214.0921, timestamp=1653336000000, transactions=121005)]

### Get Ticker Price Dataset

Gets the `open`, `close`, the all time `high` and `low`, total number of `transactions`, the UNIX `timestamp` for the start of the aggregate window, the total trading `volume`, and the volume weighted average price `vwap`. The base aggregates from polygon are the `minute` and `hour`.

In [137]:
aggs = client.get_aggs("NVDA", 1, "day", "2022-05-16", "2022-05-21", limit=4999)

In [153]:
stock_price_history = pd.DataFrame({
    'open': [agg.open for agg in aggs],
    'high': [agg.high for agg in aggs],
    'low': [agg.low for agg in aggs],
    'close': [agg.close for agg in aggs],
    'volume': [agg.volume for agg in aggs],
    'vwap': [agg.vwap for agg in aggs],
    'transactions': [agg.transactions for agg in aggs],
}, index=pd.to_datetime([agg.timestamp for agg in aggs], unit='ms'))

In [170]:
ticker_detail_keys = ['list_date', 'market_cap', 'share_class_shares_outstanding', 'sic_code', 'sic_description', 'total_employees', 'weighted_shares_outstanding']

for col in ticker_detail_keys:
    stock_price_history[col] = np.nan

In [176]:
ticker_detail_keys = {
    'list_date', 
    'market_cap', 
    'share_class_shares_outstanding', 
    'sic_code', 
    'sic_description', 
    'total_employees', 
    'weighted_shares_outstanding'
}


for date in stock_price_history.index:
    datetime = date.strftime('%Y-%m-%d')
    ticker_details = client.get_ticker_details("NVDA", date=datetime)
    for col in ticker_detail_keys:
        stock_price_history.at[date, col] = getattr(ticker_details, col)

In [221]:
stock_price_history

Unnamed: 0,open,high,low,close,volume,vwap,transactions,list_date,market_cap,share_class_shares_outstanding,sic_code,sic_description,total_employees,weighted_shares_outstanding
2022-05-16 04:00:00,175.09,177.88,171.06,172.64,52144598.0,174.449,483480,1999-01-22,404498500000.0,2504010000.0,3674,SEMICONDUCTORS & RELATED DEVICES,22473.0,2504014000.0
2022-05-17 04:00:00,180.74,183.71,176.34,181.77,58582971.0,180.0808,563506,1999-01-22,404498500000.0,2504010000.0,3674,SEMICONDUCTORS & RELATED DEVICES,22473.0,2504014000.0
2022-05-18 04:00:00,177.05,181.18,168.64,169.38,54516106.0,173.659,536759,1999-01-22,404498500000.0,2504010000.0,3674,SEMICONDUCTORS & RELATED DEVICES,22473.0,2504014000.0
2022-05-19 04:00:00,169.37,176.87,167.3405,171.24,62130959.0,172.781,549159,1999-01-22,404498500000.0,2504010000.0,3674,SEMICONDUCTORS & RELATED DEVICES,22473.0,2504014000.0
2022-05-20 04:00:00,173.32,174.1,157.55,166.94,73910526.0,163.8915,722854,1999-01-22,404498500000.0,2504010000.0,3674,SEMICONDUCTORS & RELATED DEVICES,22473.0,2504014000.0


In [281]:
balance_sheet_key_value = [
    'liabilities_and_equity',
    'current_assets',
    'assets',
    'equity_attributable_to_parent',
    'fixed_assets',
    'noncurrent_liabilities',
    'other_than_fixed_noncurrent_assets',
    'current_liabilities',
    'equity', 
    'equity_attributable_to_noncontrolling_interest', 
    'noncurrent_assets',
    'liabilities',
]

cash_flow_key_values = [
    'exchange_gains_losses',
    'net_cash_flow',
    'net_cash_flow_from_financing_activities'
]

comprehensive_income_info_key_vales = [
    'comprehensive_income_loss',
    'comprehensive_income_loss_attributable_to_parent',
    'other_comprehensive_income_loss'
]

income_statement_info_key_values = [
    'basic_earnings_per_share',
    'cost_of_revenue',
    'gross_profit',
    'operating_expenses',
    'revenues',
]

In [284]:
all_financial_cols = balance_sheet_key_value + cash_flow_key_values + comprehensive_income_info_key_vales + income_statement_info_key_values

for col in all_financial_cols:
    stock_price_history[col] = np.nan

In [270]:
stock_price_history_safe = stock_price_history.copy()

In [287]:
for date in stock_price_history.index:
    datetime = date.strftime('%Y-%m-%d')
    financials = client.vx.list_stock_financials("NVDA", filing_date=datetime)
    stock_financials = next(financials, False)
    if not stock_financials:
        continue

    if date.year != getattr(stock_financials, 'fiscal_year'):
        continue

    col_value = {
        'fiscal_period': getattr(stock_financials, 'fiscal_period')
    }
    
    balance_sheet_info = stock_financials.financials.balance_sheet
    cash_flow_info = stock_financials.financials.cash_flow_statement
    comprehensive_income_info = stock_financials.financials.comprehensive_income
    income_statement_info = stock_financials.financials.income_statement
    
    # balance sheet info
    for col in balance_sheet_key_value:
        stock_price_history.at[date, col] = balance_sheet_info[col].value

    # cash flow info
    for col in cash_flow_key_values:
        value = getattr(cash_flow_info, col)
        to_append = value if value is not None else np.nan
        stock_price_history.at[date, col] = to_append
    
    # comprehensive income
    for col in comprehensive_income_info_key_vales:
        sub_object = getattr(comprehensive_income_info, col)
        value = sub_object.value
        to_append = value if value is not None else np.nan
        stock_price_history.at[date, col] = to_append

    # income statement
    for col in income_statement_info_key_values:
        sub_object = getattr(income_statement_info, col)
        value = sub_object.value
        to_append = value if value is not None else np.nan
        stock_price_history.at[date, col] = to_append
    

In [288]:
stock_price_history

Unnamed: 0,open,high,low,close,volume,vwap,transactions,list_date,market_cap,share_class_shares_outstanding,...,net_cash_flow,net_cash_flow_from_financing_activities,comprehensive_income_loss,comprehensive_income_loss_attributable_to_parent,other_comprehensive_income_loss,basic_earnings_per_share,cost_of_revenue,gross_profit,operating_expenses,revenues
2022-05-16 04:00:00,175.09,177.88,171.06,172.64,52144598.0,174.449,483480,1999-01-22,404498500000.0,2504010000.0,...,,,,,,,,,,
2022-05-17 04:00:00,180.74,183.71,176.34,181.77,58582971.0,180.0808,563506,1999-01-22,404498500000.0,2504010000.0,...,,,,,,,,,,
2022-05-18 04:00:00,177.05,181.18,168.64,169.38,54516106.0,173.659,536759,1999-01-22,404498500000.0,2504010000.0,...,,,,,,,,,,
2022-05-19 04:00:00,169.37,176.87,167.3405,171.24,62130959.0,172.781,549159,1999-01-22,404498500000.0,2504010000.0,...,,,,,,,,,,
2022-05-20 04:00:00,173.32,174.1,157.55,166.94,73910526.0,163.8915,722854,1999-01-22,404498500000.0,2504010000.0,...,,,,,,,,,,


In [None]:
# # Don't know how to make use of splits in the dataset
# splits = client.list_splits("NVDA")
# splits_data = [split for split in splits]

# # Don't know how to make use of dividends in the dataset
# dividends = client.list_dividends("NVDA")
# dividends_data = [dividend for dividend in dividends]

### Get Ticker Natural Language Dataset

In [182]:
stock_price_history["news"] = ""

In [188]:
for date in stock_price_history.index:
    datetime = date.strftime('%Y-%m-%d')
    all_news_on_date = ""
    stock_news = client.list_ticker_news("NVDA", published_utc=datetime, limit=1)
    for n in stock_news:
        all_news_on_date = all_news_on_date + " " + n.title + " " + n.description

    stock_price_history.at[date, "news"] = all_news_on_date

2022-05-16 04:00:00


Exception: {"status":"ERROR","request_id":"01104b7bb631292b3a5d6e76a17ba759","error":"You've exceeded the maximum requests per minute, please wait or upgrade your subscription to continue. https://polygon.io/pricing"}

### Get Trades

In [9]:
trades = []
for t in client.list_trades("NVDA", "2022-05-20", limit=5):
    trades.append(t)

Exception: {"status":"NOT_AUTHORIZED","request_id":"98eee780af6f09d6bb88fbdc8ed46674","message":"You are not entitled to this data. Please upgrade your plan at https://polygon.io/pricing"}