In [1]:
import os
import pandas_datareader.data as web
from datetime import datetime
from pprint import pprint
import pandas as pd   

## Download html table with SP500 constituents

In [2]:
sp_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
sp500_constituents = pd.read_html(sp_url, header=0)[0]

In [3]:
sp500_constituents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 505 entries, 0 to 504
Data columns (total 9 columns):
Security                 505 non-null object
Symbol                   505 non-null object
SEC filings              505 non-null object
GICS Sector              505 non-null object
GICS Sub Industry        505 non-null object
Headquarters Location    505 non-null object
Date first added         402 non-null object
CIK                      505 non-null int64
Founded                  172 non-null object
dtypes: int64(1), object(8)
memory usage: 35.6+ KB


In [4]:
sp500_constituents.head()

Unnamed: 0,Security,Symbol,SEC filings,GICS Sector,GICS Sub Industry,Headquarters Location,Date first added,CIK,Founded
0,3M Company,MMM,reports,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",,66740,1902
1,Abbott Laboratories,ABT,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888
2,AbbVie Inc.,ABBV,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
3,ABIOMED Inc,ABMD,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981
4,Accenture plc,ACN,reports,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


## pandas-datareader for Market Data

See [documentation](https://pandas-datareader.readthedocs.io/en/latest/); functionality frequently changes as underlying provider APIs evolve.

In [5]:
start = '2014'
end = datetime(2017, 5, 24)

yahoo= web.DataReader('FB', 'yahoo', start=start, end=end)
yahoo.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 856 entries, 2014-01-02 to 2017-05-25
Data columns (total 6 columns):
High         856 non-null float64
Low          856 non-null float64
Open         856 non-null float64
Close        856 non-null float64
Volume       856 non-null int64
Adj Close    856 non-null float64
dtypes: float64(5), int64(1)
memory usage: 46.8 KB


### IEX

In [6]:
start = datetime(2015, 2, 9)
# end = datetime(2017, 5, 24)

iex = web.DataReader('FB', 'iex', start)
iex.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1044 entries, 2015-02-09 to 2019-04-02
Data columns (total 5 columns):
open      1044 non-null float64
high      1044 non-null float64
low       1044 non-null float64
close     1044 non-null float64
volume    1044 non-null int64
dtypes: float64(4), int64(1)
memory usage: 48.9+ KB


In [7]:
iex.tail()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-03-27,167.85,168.94,164.79,165.87,10620299
2019-03-28,164.57,166.72,163.33,165.55,10689171
2019-03-29,166.39,167.19,164.81,166.69,13455454
2019-04-01,167.83,168.9,167.2789,168.7,10381490
2019-04-02,170.14,174.9,169.55,174.2,23946529


#### Book Data

DEEP is used to receive real-time depth of book quotations direct from IEX. The depth of book quotations received via DEEP provide an aggregated size of resting displayed orders at a price and side, and do not indicate the size or number of individual orders at any price level. Non-displayed orders and non-displayed portions of reserve orders are not represented in DEEP.

DEEP also provides last trade price and size information. Trades resulting from either displayed or non-displayed orders matching on IEX will be reported. Routed executions will not be reported.

Only works on trading days.

In [8]:
book = web.get_iex_book('AAPL')

In [9]:
list(book.keys())

['symbol',
 'marketPercent',
 'volume',
 'lastSalePrice',
 'lastSaleSize',
 'lastSaleTime',
 'lastUpdated',
 'bids',
 'asks',
 'systemEvent',
 'tradingStatus',
 'opHaltStatus',
 'ssrStatus',
 'securityEvent',
 'trades',
 'tradeBreaks']

In [10]:
orders = pd.concat([pd.DataFrame(book[side]).assign(side=side) for side in ['bids', 'asks']])
orders.head()

Unnamed: 0,side


In [11]:
for key in book.keys():
    try:
        print(f'\n{key}')
        print(pd.DataFrame(book[key]))
    except:
        print(book[key])


symbol
AAPL

marketPercent
0.02492

volume
576430

lastSalePrice
195.38

lastSaleSize
100

lastSaleTime
1554321598215

lastUpdated
1554323446927

bids
Empty DataFrame
Columns: []
Index: []

asks
Empty DataFrame
Columns: []
Index: []

systemEvent
{'systemEvent': 'C', 'timestamp': 1554325800006}

tradingStatus
{'status': 'T', 'reason': '    ', 'timestamp': 1554290551232}

opHaltStatus
{'isHalted': False, 'timestamp': 1554290551232}

ssrStatus
{'isSSR': False, 'detail': ' ', 'timestamp': 1554290551232}

securityEvent
{'securityEvent': 'MarketClose', 'timestamp': 1554321600000}

trades
    isISO  isOddLot  isOutsideRegularHours  isSinglePriceCross  \
0    True     False                  False               False   
1    True     False                  False               False   
2   False     False                  False               False   
3    True     False                  False               False   
4    True     False                  False               False   
5   False     

In [12]:
pd.DataFrame(book['trades']).head()

Unnamed: 0,isISO,isOddLot,isOutsideRegularHours,isSinglePriceCross,isTradeThroughExempt,price,size,timestamp,tradeId
0,True,False,False,False,True,195.38,100,1554321598215,909783074
1,True,False,False,False,False,195.38,100,1554321597300,909583265
2,False,False,False,False,False,195.36,100,1554321595721,909280140
3,True,False,False,False,False,195.32,100,1554321595020,909064229
4,True,False,False,False,False,195.32,600,1554321595020,909064195


### Quandl

In [13]:
symbol = 'FB.US'

import quandl
quandl.ApiConfig.api_key = 'ZDAVdkw9gxBR3MRyvRkW'

quandl = web.DataReader(symbol, 'quandl', '2015-01-01')
quandl.info()

ValueError: The Quandl API key must be provided either through the api_key variable or through the environmental variable QUANDL_API_KEY.

### FRED

In [None]:
start = datetime(2010, 1, 1)

end = datetime(2013, 1, 27)

gdp = web.DataReader('GDP', 'fred', start, end)

gdp.info()

In [None]:
inflation = web.DataReader(['CPIAUCSL', 'CPILFESL'], 'fred', start, end)
inflation.info()

### Fama/French

In [None]:
from pandas_datareader.famafrench import get_available_datasets
get_available_datasets()

In [None]:
ds = web.DataReader('5_Industry_Portfolios', 'famafrench')
print(ds['DESCR'])

### World Bank

### OECD

### EuroStat



### Stooq

SP500 - Sourced from Shiller?
lots of stuff, hard to navigate, eastern europe

In [None]:
index_url = 'https://stooq.com/t/'
ix = pd.read_html(index_url)
len(ix)

Currently broken, awaiting [fix](https://github.com/pydata/pandas-datareader/issues/594)

In [None]:
f = web.DataReader('^SPX', 'stooq', start='20000101')
f.info()

In [None]:
f.head()

In [None]:
f.to_csv('sp_test.csv')

In [None]:
%matplotlib inline
f.resample('M').Close.mean().plot()

### NASDAQ Symbols

In [None]:
from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
symbols = get_nasdaq_symbols()
symbols.info()

In [None]:
url = 'https://www.nasdaq.com/screening/companies-by-industry.aspx?exchange=NASDAQ'
res = pd.read_html(url)
len(res)

In [None]:
for r in res:
    print(r.info())

### Tiingo

Requires [signing up](https://api.tiingo.com/) and storing API key in environment

In [None]:
df = web.get_data_tiingo('GOOG', api_key=os.getenv('TIINGO_API_KEY'))

In [None]:
df.info()