From 2aaf9ecb2a76960c5676dad0d543925fd582f232 Mon Sep 17 00:00:00 2001 From: Noah Stoffman Date: Sun, 18 Feb 2018 11:42:51 -0500 Subject: [PATCH 1/4] Fix Yahoo! price data Adds back support for downloading price data from Yahoo! --- docs/source/whatsnew/v0.7.0.txt | 3 ++- pandas_datareader/yahoo/daily.py | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/source/whatsnew/v0.7.0.txt b/docs/source/whatsnew/v0.7.0.txt index ea3ce68a..5afd505a 100644 --- a/docs/source/whatsnew/v0.7.0.txt +++ b/docs/source/whatsnew/v0.7.0.txt @@ -54,4 +54,5 @@ Bug Fixes - Handle Morningstar index volume data properly (:issue:`486`). - Added support for optionally passing a custom base_url to the EnigmaReader (:issue:`499`). - Fixed Morningstar 'retry' incrementation (:issue:`513`) -- Updated Google Daily Price API to functional url (:issue:`502`) \ No newline at end of file +- Updated Google Daily Price API to functional url (:issue:`502`) +- Fix Yahoo! price data (:issue:`498`) diff --git a/pandas_datareader/yahoo/daily.py b/pandas_datareader/yahoo/daily.py index b02af3d3..7b70c42c 100644 --- a/pandas_datareader/yahoo/daily.py +++ b/pandas_datareader/yahoo/daily.py @@ -125,16 +125,16 @@ def _read_one_data(self, url, params): ptrn = r'root\.App\.main = (.*?);\n}\(this\)\);' jsn = json.loads(re.search(ptrn, resp.text, re.DOTALL).group(1)) df = DataFrame( - jsn['context']['dispatcher']['stores'] - ['HistoricalPriceStore']['prices'] - ) + jsn['context']['dispatcher']['stores'] + ['HistoricalPriceStore']['prices'] + ) df['date'] = to_datetime(df['date'], unit='s').dt.date df = df.dropna(subset=['close']) df = df[['date', 'high', 'low', 'open', 'close', 'volume', 'adjclose']] if self.ret_index: - df['Ret_Index'] = _calc_return_index(df['adjclose']) + df['Ret_Index'] = _calc_return_index(df['Adj Close']) if self.adjust_price: df = _adjust_prices(df) return df.sort_index().dropna(how='all') From de57b612ee5b8aec144ed85dc631ddb03b70f8a8 Mon Sep 17 00:00:00 2001 From: Noah Stoffman Date: Mon, 19 Feb 2018 21:38:26 -0500 Subject: [PATCH 2/4] Bug fix --- pandas_datareader/yahoo/daily.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas_datareader/yahoo/daily.py b/pandas_datareader/yahoo/daily.py index 7b70c42c..b08a1066 100644 --- a/pandas_datareader/yahoo/daily.py +++ b/pandas_datareader/yahoo/daily.py @@ -134,7 +134,7 @@ def _read_one_data(self, url, params): 'volume', 'adjclose']] if self.ret_index: - df['Ret_Index'] = _calc_return_index(df['Adj Close']) + df['Ret_Index'] = _calc_return_index(df['adjclose']) if self.adjust_price: df = _adjust_prices(df) return df.sort_index().dropna(how='all') From c5bc91eb0f7af07192b67107928feddc60dd6cc0 Mon Sep 17 00:00:00 2001 From: Noah Stoffman Date: Sun, 25 Feb 2018 21:21:26 -0500 Subject: [PATCH 3/4] Add dividends, splits, and currencies Dividends and splits are now returned along with prices in a dict of DataFrames. Only one call needed per ticker. Currency pairs (e.g. CADUSD) can now be accessed with YahooFXReader. --- docs/source/whatsnew/v0.7.0.txt | 3 +- pandas_datareader/data.py | 63 ----------- pandas_datareader/yahoo/FX.py | 113 ++++++++++++++++++++ pandas_datareader/yahoo/actions.py | 53 ---------- pandas_datareader/yahoo/daily.py | 164 +++++++++++++++++------------ 5 files changed, 214 insertions(+), 182 deletions(-) create mode 100644 pandas_datareader/yahoo/FX.py delete mode 100644 pandas_datareader/yahoo/actions.py diff --git a/docs/source/whatsnew/v0.7.0.txt b/docs/source/whatsnew/v0.7.0.txt index 5afd505a..e92e77f4 100644 --- a/docs/source/whatsnew/v0.7.0.txt +++ b/docs/source/whatsnew/v0.7.0.txt @@ -50,9 +50,10 @@ Bug Fixes - Added support for passing the API KEY to QuandlReader either directly or by setting the environmental variable QUANDL_API_KEY (:issue:`485`). -- Added back support for Yahoo! price data - Handle Morningstar index volume data properly (:issue:`486`). - Added support for optionally passing a custom base_url to the EnigmaReader (:issue:`499`). - Fixed Morningstar 'retry' incrementation (:issue:`513`) - Updated Google Daily Price API to functional url (:issue:`502`) - Fix Yahoo! price data (:issue:`498`) +- Added back support for Yahoo! price, dividends, and splits data for stocks + and currency pairs (:issue:`487`). diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py index 107edf01..a0215164 100644 --- a/pandas_datareader/data.py +++ b/pandas_datareader/data.py @@ -32,7 +32,6 @@ RobinhoodQuoteReader from pandas_datareader.stooq import StooqDailyReader from pandas_datareader.tiingo import TiingoDailyReader, TiingoQuoteReader -from pandas_datareader.yahoo.actions import (YahooActionReader, YahooDivReader) from pandas_datareader.yahoo.components import _get_data as \ get_components_yahoo from pandas_datareader.yahoo.daily import YahooDailyReader @@ -81,7 +80,6 @@ def get_quote_av(*args, **kwargs): def get_data_yahoo_actions(*args, **kwargs): raise ImmediateDeprecationError(DEP_ERROR_MSG.format('Yahoo Actions')) - return YahooActionReader(*args, **kwargs).read() def get_quote_yahoo(*args, **kwargs): @@ -313,67 +311,6 @@ def DataReader(name, data_source=None, start=None, end=None, retry_count=retry_count, pause=pause, session=session).read() - elif data_source == "yahoo-actions": - raise ImmediateDeprecationError(DEP_ERROR_MSG.format('Yahoo Actions')) - return YahooActionReader(symbols=name, start=start, end=end, - retry_count=retry_count, pause=pause, - session=session).read() - - elif data_source == "yahoo-dividends": - comp = 'Yahoo Dividends' - raise ImmediateDeprecationError(DEP_ERROR_MSG.format(comp)) - return YahooDivReader(symbols=name, start=start, end=end, - adjust_price=False, chunksize=25, - retry_count=retry_count, pause=pause, - session=session, interval='d').read() - - elif data_source == "av-forex": - return AVForexReader(symbols=name, retry_count=retry_count, - pause=pause, session=session, - api_key=access_key).read() - - elif data_source == "av-daily": - return AVTimeSeriesReader(symbols=name, - function="TIME_SERIES_DAILY", start=start, - end=end, retry_count=retry_count, - pause=pause, session=session, - api_key=access_key).read() - - elif data_source == "av-daily-adjusted": - return AVTimeSeriesReader(symbols=name, - function="TIME_SERIES_DAILY_ADJUSTED", - start=start, end=end, - retry_count=retry_count, pause=pause, - session=session, api_key=access_key).read() - - elif data_source == "av-weekly": - return AVTimeSeriesReader(symbols=name, - function="TIME_SERIES_WEEKLY", start=start, - end=end, retry_count=retry_count, - pause=pause, session=session, - api_key=access_key).read() - - elif data_source == "av-weekly-adjusted": - return AVTimeSeriesReader(symbols=name, - function="TIME_SERIES_WEEKLY_ADJUSTED", - start=start, end=end, - retry_count=retry_count, pause=pause, - session=session, api_key=access_key).read() - - elif data_source == "av-monthly": - return AVTimeSeriesReader(symbols=name, - function="TIME_SERIES_MONTHLY", start=start, - end=end, retry_count=retry_count, - pause=pause, session=session, - api_key=access_key).read() - - elif data_source == "av-monthly-adjusted": - return AVTimeSeriesReader(symbols=name, - function="TIME_SERIES_MONTHLY_ADJUSTED", - start=start, end=end, - retry_count=retry_count, pause=pause, - session=session, api_key=access_key).read() - elif data_source == "google": return GoogleDailyReader(symbols=name, start=start, end=end, chunksize=25, diff --git a/pandas_datareader/yahoo/FX.py b/pandas_datareader/yahoo/FX.py new file mode 100644 index 00000000..17b61507 --- /dev/null +++ b/pandas_datareader/yahoo/FX.py @@ -0,0 +1,113 @@ +import time +import json +import warnings +from pandas import (DataFrame, Series, to_datetime, concat) +from pandas_datareader.yahoo.daily import YahooDailyReader +import pandas.compat as compat +from pandas_datareader._utils import (RemoteDataError, SymbolWarning) +from pandas.core.indexes.numeric import Int64Index + + +class YahooFXReader(YahooDailyReader): + """ + Returns DataFrame of historical prices for currencies + + Parameters + ---------- + symbols : string, array-like object (list, tuple, Series), or DataFrame + Single stock symbol (ticker), array-like object of symbols or + DataFrame with index containing stock symbols. + start : string, (defaults to '1/1/2010') + Starting date, timestamp. Parses many different kind of date + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') + end : string, (defaults to today) + Ending date, timestamp. Same format as starting date. + retry_count : int, default 3 + Number of times to retry query request. + pause : int, default 0 + Time, in seconds, to pause between consecutive queries of chunks. If + single value given for symbol, represents the pause between retries. + session : Session, default None + requests.sessions.Session instance to be used + chunksize : int, default 25 (NOT IMPLEMENTED) + Number of symbols to download consecutively before intiating pause. + interval : string, default '1d' + Valid values are '1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', + '10y', 'ytd', 'max' + """ + + def _get_params(self, symbol): + unix_start = int(time.mktime(self.start.timetuple())) + day_end = self.end.replace(hour=23, minute=59, second=59) + unix_end = int(time.mktime(day_end.timetuple())) + + params = { + 'symbol': symbol + '=X', + 'period1': unix_start, + 'period2': unix_end, + 'interval': self.interval, # deal with this + 'includePrePost': 'true', + 'events': 'div|split|earn', + 'corsDomain': 'finance.yahoo.com' + } + return params + + def read(self): + """Read data""" + try: + # If a single symbol, (e.g., 'GOOG') + if isinstance(self.symbols, (compat.string_types, int)): + df = self._read_one_data(self.symbols) + + # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT']) + elif isinstance(self.symbols, DataFrame): + df = self._dl_mult_symbols(self.symbols.index) + else: + df = self._dl_mult_symbols(self.symbols) + + if isinstance(df.index, Int64Index): + df = df.set_index('Date') + + if 'Volume' in df: + df = df.drop('Volume', axis=1) + + return df.sort_index().dropna(how='all') + finally: + self.close() + + def _read_one_data(self, symbol): + """ read one data from specified URL """ + url = 'https://query1.finance.yahoo.com/v8/finance/chart/{}=X'\ + .format(symbol) + params = self._get_params(symbol) + + resp = self._get_response(url, params=params) + jsn = json.loads(resp.text) + + data = jsn['chart']['result'][0] + df = DataFrame(data['indicators']['quote'][0]) + df.insert(0, 'date', to_datetime(Series(data['timestamp']), + unit='s').dt.date) + df.columns = map(str.capitalize, df.columns) + return df + + def _dl_mult_symbols(self, symbols): + stocks = {} + failed = [] + passed = [] + for sym in symbols: + try: + df = self._read_one_data(sym) + df['PairCode'] = sym + stocks[sym] = df + passed.append(sym) + except IOError: + msg = 'Failed to read symbol: {0!r}, replacing with NaN.' + warnings.warn(msg.format(sym), SymbolWarning) + failed.append(sym) + + if len(passed) == 0: + msg = "No data fetched using {0!r}" + raise RemoteDataError(msg.format(self.__class__.__name__)) + else: + return concat(stocks).set_index(['PairCode', 'Date']) diff --git a/pandas_datareader/yahoo/actions.py b/pandas_datareader/yahoo/actions.py deleted file mode 100644 index 5965971a..00000000 --- a/pandas_datareader/yahoo/actions.py +++ /dev/null @@ -1,53 +0,0 @@ -from pandas import (concat, DataFrame) -from pandas_datareader.yahoo.daily import YahooDailyReader - - -class YahooActionReader(YahooDailyReader): - """ - Returns DataFrame of historical corporate actions (dividends and stock - splits) from symbols, over date range, start to end. All dates in the - resulting DataFrame correspond with dividend and stock split ex-dates. - """ - def read(self): - dividends = YahooDivReader(symbols=self.symbols, - start=self.start, - end=self.end, - retry_count=self.retry_count, - pause=self.pause, - session=self.session).read() - # Add a label column so we can combine our two DFs - if isinstance(dividends, DataFrame): - dividends["action"] = "DIVIDEND" - dividends = dividends.rename(columns={'Dividends': 'value'}) - - splits = YahooSplitReader(symbols=self.symbols, - start=self.start, - end=self.end, - retry_count=self.retry_count, - pause=self.pause, - session=self.session).read() - # Add a label column so we can combine our two DFs - if isinstance(splits, DataFrame): - splits["action"] = "SPLIT" - splits = splits.rename(columns={'Stock Splits': 'value'}) - # Converts fractional form splits (i.e. "2/1") into conversion - # ratios, then take the reciprocal - splits['value'] = splits.apply(lambda x: 1/eval(x['value']), axis=1) # noqa - - output = concat([dividends, splits]).sort_index(ascending=False) - - return output - - -class YahooDivReader(YahooDailyReader): - - @property - def service(self): - return 'div' - - -class YahooSplitReader(YahooDailyReader): - - @property - def service(self): - return 'split' diff --git a/pandas_datareader/yahoo/daily.py b/pandas_datareader/yahoo/daily.py index b08a1066..382de153 100644 --- a/pandas_datareader/yahoo/daily.py +++ b/pandas_datareader/yahoo/daily.py @@ -2,19 +2,20 @@ import json import time import warnings -import numpy as np -from pandas import Panel, DataFrame, to_datetime -from pandas_datareader.base import (_DailyBaseReader, _in_chunks) +from pandas import (DataFrame, to_datetime, concat) +from pandas_datareader.base import _DailyBaseReader from pandas_datareader._utils import (RemoteDataError, SymbolWarning) +from pandas.core.indexes.numeric import Int64Index import pandas.compat as compat class YahooDailyReader(_DailyBaseReader): """ - Returns DataFrame/Panel of historical stock prices from symbols, over date - range, start to end. To avoid being penalized by Yahoo! Finance servers, - pauses between downloading 'chunks' of symbols can be specified. + Returns a dictionary of DataFrames with historical stock prices, dividends, + and splits from symbols, over date range, start to end. To avoid being + penalized by Yahoo! Finance servers, pauses between downloading 'chunks' of + symbols can be specified. Parameters ---------- @@ -48,7 +49,8 @@ class YahooDailyReader(_DailyBaseReader): def __init__(self, symbols=None, start=None, end=None, retry_count=3, pause=0.35, session=None, adjust_price=False, - ret_index=False, chunksize=1, interval='d'): + ret_index=False, chunksize=1, interval='d', + get_actions=True): super(YahooDailyReader, self).__init__(symbols=symbols, start=start, end=end, retry_count=retry_count, @@ -70,6 +72,7 @@ def __init__(self, symbols=None, start=None, end=None, retry_count=3, self.adjust_price = adjust_price self.ret_index = ret_index self.interval = interval + self.get_actions = get_actions if self.interval not in ['d', 'wk', 'mo', 'm', 'w']: raise ValueError("Invalid interval: valid values are 'd', 'wk' and 'mo'. 'm' and 'w' have been implemented for " # noqa @@ -83,15 +86,6 @@ def __init__(self, symbols=None, start=None, end=None, retry_count=3, self.interval = '1' + self.interval - @property - def service(self): - return 'history' - - @staticmethod - def yurl(symbol): - return 'https://finance.yahoo.com/quote/{}/history'\ - .format(symbol) - def _get_params(self, symbol): unix_start = int(time.mktime(self.start.timetuple())) day_end = self.end.replace(hour=23, minute=59, second=59) @@ -102,72 +96,112 @@ def _get_params(self, symbol): 'period2': unix_end, 'interval': self.interval, 'frequency': self.interval, - 'filter': self.service + 'filter': 'history' } return params def read(self): """Read data""" - # If a single symbol, (e.g., 'GOOG') - if isinstance(self.symbols, (compat.string_types, int)): - df = self._read_one_data(self.yurl(self.symbols), - params=self._get_params(self.symbols)) - # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT']) - elif isinstance(self.symbols, DataFrame): - df = self._dl_mult_symbols(self.symbols.index) - else: - df = self._dl_mult_symbols(self.symbols) - return df + try: + # If a single symbol, (e.g., 'GOOG') + if isinstance(self.symbols, (compat.string_types, int)): + dfs = self._read_one_data(self.symbols) + + # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT']) + elif isinstance(self.symbols, DataFrame): + dfs = self._dl_mult_symbols(self.symbols.index) + else: + dfs = self._dl_mult_symbols(self.symbols) + + for k in dfs: + if isinstance(dfs[k].index, Int64Index): + dfs[k] = dfs[k].set_index('Date') + dfs[k] = dfs[k].sort_index().dropna(how='all') + + if self.ret_index: + dfs['prices']['Ret_Index'] = \ + _calc_return_index(dfs['prices']['Adj Close']) + if self.adjust_price: + dfs['prices'] = _adjust_prices(dfs['prices']) + + return dfs + finally: + self.close() + + def _read_one_data(self, symbol): + """ read one data from specified symbol """ + url = 'https://finance.yahoo.com/quote/{}/history'.format(symbol) + params = self._get_params(symbol) - def _read_one_data(self, url, params): - """ read one data from specified URL """ resp = self._get_response(url, params=params) ptrn = r'root\.App\.main = (.*?);\n}\(this\)\);' - jsn = json.loads(re.search(ptrn, resp.text, re.DOTALL).group(1)) - df = DataFrame( - jsn['context']['dispatcher']['stores'] - ['HistoricalPriceStore']['prices'] - ) - df['date'] = to_datetime(df['date'], unit='s').dt.date - df = df.dropna(subset=['close']) - df = df[['date', 'high', 'low', 'open', 'close', - 'volume', 'adjclose']] - - if self.ret_index: - df['Ret_Index'] = _calc_return_index(df['adjclose']) - if self.adjust_price: - df = _adjust_prices(df) - return df.sort_index().dropna(how='all') + try: + j = json.loads(re.search(ptrn, resp.text, re.DOTALL).group(1)) + data = j['context']['dispatcher']['stores']['HistoricalPriceStore'] + except KeyError: + msg = 'No data fetched for symbol {} using {}' + raise RemoteDataError(msg.format(symbol, self.__class__.__name__)) + + # price data + prices = DataFrame(data['prices']) + prices.columns = map(str.capitalize, prices.columns) + prices['Date'] = to_datetime(prices['Date'], unit='s').dt.date + + prices = prices[prices['Data'].isnull()] + prices = prices[['Date', 'High', 'Low', 'Open', 'Close', 'Volume', + 'Adjclose']] + prices = prices.rename(columns={'Adjclose': 'Adj Close'}) + + dfs = {'prices': prices} + + # dividends & splits data + if self.get_actions: + actions = DataFrame(data['eventsData']) + actions.columns = map(str.capitalize, actions.columns) + actions['Date'] = to_datetime(actions['Date'], unit='s').dt.date + + types = actions['Type'].unique() + if 'DIVIDEND' in types: + divs = actions[actions.Type == 'DIVIDEND'].copy() + divs = divs[['Date', 'Amount']].reset_index(drop=True) + dfs['dividends'] = divs + + if 'SPLIT' in types: + splits = actions[actions.Type == 'SPLIT'].copy() + splits['SplitRatio'] = splits['Splitratio'].apply( + lambda x: eval(x)) + splits = splits[['Date', 'Denominator', 'Numerator', + 'SplitRatio']] + splits = splits.reset_index(drop=True) + dfs['splits'] = splits + + return dfs def _dl_mult_symbols(self, symbols): stocks = {} failed = [] passed = [] - for sym_group in _in_chunks(symbols, 1): # ignoring chunksize - for sym in sym_group: - try: - stocks[sym] = self._read_one_data(self.yurl(sym), - self._get_params(sym)) - passed.append(sym) - except IOError: - msg = 'Failed to read symbol: {0!r}, replacing with NaN.' - warnings.warn(msg.format(sym), SymbolWarning) - failed.append(sym) + for sym in symbols: + try: + dfs = self._read_one_data(sym) + for k in dfs: + dfs[k]['Ticker'] = sym + if k not in stocks: + stocks[k] = [] + stocks[k].append(dfs[k]) + passed.append(sym) + except IOError: + msg = 'Failed to read symbol: {0!r}, replacing with NaN.' + warnings.warn(msg.format(sym), SymbolWarning) + failed.append(sym) if len(passed) == 0: msg = "No data fetched using {0!r}" raise RemoteDataError(msg.format(self.__class__.__name__)) - try: - if len(stocks) > 0 and len(failed) > 0 and len(passed) > 0: - df_na = stocks[passed[0]].copy() - df_na[:] = np.nan - for sym in failed: - stocks[sym] = df_na - return Panel(stocks).swapaxes('items', 'minor') - except AttributeError: - # cannot construct a panel with just 1D nans indicating no data - msg = "No data fetched using {0!r}" - raise RemoteDataError(msg.format(self.__class__.__name__)) + else: + for k in stocks: + dfs[k] = concat(stocks[k]).set_index(['Ticker', 'Date']) + return dfs def _adjust_prices(hist_data, price_list=None): From cdf0c9a7cf1491806c610c7c28c249c7c4fa2022 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 13 Apr 2018 15:28:23 +0100 Subject: [PATCH 4/4] CLN: Rename module, backward compat Rename FX to fx Remove use of Int64Index which was added in 0.20 --- pandas_datareader/yahoo/daily.py | 3 +-- pandas_datareader/yahoo/{FX.py => fx.py} | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) rename pandas_datareader/yahoo/{FX.py => fx.py} (97%) diff --git a/pandas_datareader/yahoo/daily.py b/pandas_datareader/yahoo/daily.py index 382de153..494dc480 100644 --- a/pandas_datareader/yahoo/daily.py +++ b/pandas_datareader/yahoo/daily.py @@ -5,7 +5,6 @@ from pandas import (DataFrame, to_datetime, concat) from pandas_datareader.base import _DailyBaseReader from pandas_datareader._utils import (RemoteDataError, SymbolWarning) -from pandas.core.indexes.numeric import Int64Index import pandas.compat as compat @@ -114,7 +113,7 @@ def read(self): dfs = self._dl_mult_symbols(self.symbols) for k in dfs: - if isinstance(dfs[k].index, Int64Index): + if 'Date' in dfs[k]: dfs[k] = dfs[k].set_index('Date') dfs[k] = dfs[k].sort_index().dropna(how='all') diff --git a/pandas_datareader/yahoo/FX.py b/pandas_datareader/yahoo/fx.py similarity index 97% rename from pandas_datareader/yahoo/FX.py rename to pandas_datareader/yahoo/fx.py index 17b61507..863637c3 100644 --- a/pandas_datareader/yahoo/FX.py +++ b/pandas_datareader/yahoo/fx.py @@ -5,7 +5,6 @@ from pandas_datareader.yahoo.daily import YahooDailyReader import pandas.compat as compat from pandas_datareader._utils import (RemoteDataError, SymbolWarning) -from pandas.core.indexes.numeric import Int64Index class YahooFXReader(YahooDailyReader): @@ -65,7 +64,7 @@ def read(self): else: df = self._dl_mult_symbols(self.symbols) - if isinstance(df.index, Int64Index): + if 'Date' in df: df = df.set_index('Date') if 'Volume' in df: