Skip to content

Commit

Permalink
BUG: Fix backwards compatibility in get_data_yahoo
Browse files Browse the repository at this point in the history
  • Loading branch information
nehalecky authored and wesm committed Feb 10, 2013
1 parent 6bc8a6b commit b921d1a
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 34 deletions.
60 changes: 35 additions & 25 deletions pandas/io/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,23 +132,23 @@ def get_quote_yahoo(symbols):
return DataFrame(data, index=idx)


def _get_hist_yahoo(name=None, start=None, end=None, retry_count=3,
def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
pause=0):
"""
Get historical data for the given name from yahoo.
Date format is datetime
Returns a DataFrame.
"""
if(name is None):
if(sym is None):
warnings.warn("Need to provide a name.")
return None

start, end = _sanitize_dates(start, end)

yahoo_URL = 'http://ichart.yahoo.com/table.csv?'

url = yahoo_URL + 's=%s' % name + \
url = yahoo_URL + 's=%s' % sym + \
'&a=%s' % (start.month - 1) + \
'&b=%s' % start.day + \
'&c=%s' % start.year + \
Expand Down Expand Up @@ -203,17 +203,18 @@ def _calc_return_index(price_df):
return ret_index


def get_components_yahoo(idx_sym='^DJI'):
def get_components_yahoo(idx_sym):
"""
Returns DataFrame containing list of component information for index
represented in idx_sym from yahoo. Includes component symbol
Returns DataFrame containing list of component information for
index represented in idx_sym from yahoo. Includes component symbol
(ticker), exchange, and name.
Parameters
----------
idx_sym : str
Index symbol, default '^DJI' (Dow Jones Industrial Average)
Stock index symbol
Examples:
'^DJI' (Dow Jones Industrial Average)
'^NYA' (NYSE Composite)
'^IXIC' (NASDAQ Composite)
Expand Down Expand Up @@ -256,44 +257,48 @@ def get_components_yahoo(idx_sym='^DJI'):
return idx_df


def get_data_yahoo(symbols=None, start=None, end=None, adjust_price=False,
ret_index=False, chunk=25, pause=0, **kwargs):
def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3, pause=0,
adjust_price=False, ret_index=False, chunksize=25, **kwargs):
"""
Returns DataFrame/Panel of historical stock prices from symbols, over date
range, start to end. To avoid being penalized by Yahoo! Finance servers,
pauses between downloading 'chunks' of symbols can be specified.
Parameters
----------
symbols : string, list-like object (list, tupel, Series), DataFrame
symbols : string, list-like object (list, tupel, Series), or DataFrame
Single stock symbol (ticker), list-like object of symbols or
DataFrame with index containing of stock symbols
DataFrame with index containing stock symbols.
start : string, (defaults to '1/1/2010')
Starting date, timestamp. Parses many different kind of date
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
end : string, (defaults to today)
end : string, (defaults to today)
Ending date, timestamp. Same format as starting date.
retry_count : int, default 3
Number of times to retry query request.
pause : int, default 0
Time, in seconds, to pause between consecutive queries of chunks. If
single value given for symbol, represents the pause between retries.
adjust_price : bool, default False
Adjust all prices in hist_data ('Open', 'High', 'Low', 'Close') via
'Adj Close' price. Adds 'Adj_Ratio' column and drops 'Adj Close'.
ret_index: bool, default False
Include a simple return index 'Ret_Index' in hist_data.
chunk : int, default 25
If True, adjusts all prices in hist_data ('Open', 'High', 'Low', 'Close')
based on 'Adj Close' price. Adds 'Adj_Ratio' column and drops
'Adj Close'.
ret_index : bool, default False
If True, includes a simple return index 'Ret_Index' in hist_data.
chunksize : int, default 25
Number of symbols to download consecutively before intiating pause.
pause : int, default 0
Time, in seconds, to pause between consecutive chunks.
**kwargs: additional arguments to pass to _get_hist_yahoo
Returns
-------
hist_data : DataFrame (str) or Panel (list-like object, DataFrame)
"""

def dl_mult_symbols(symbols):
stocks = {}
for sym_group in _in_chunks(symbols, chunk):
for sym_group in _in_chunks(symbols, chunksize):
for sym in sym_group:
try:
stocks[sym] = _get_hist_yahoo(name=sym, start=start,
stocks[sym] = _get_hist_yahoo(sym, start=start,
end=end, **kwargs)
except:
warnings.warn('Error with sym: ' + sym + '... skipping.')
Expand All @@ -302,11 +307,16 @@ def dl_mult_symbols(symbols):

return Panel(stocks).swapaxes('items', 'minor')

#If a scalar (single symbol, e.g. 'GOOG')
if 'name' in kwargs:
warnings.warn("Arg 'name' is deprecated, please use 'symbols' instead.",
FutureWarning)
symbols = kwargs['name']

#If a single symbol, (e.g., 'GOOG')
if isinstance(symbols, (str, int)):
sym = symbols
hist_data = _get_hist_yahoo(sym, start=start, end=end, **kwargs)
#Multiple symbols
hist_data = _get_hist_yahoo(sym, start=start, end=end)
#Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
elif isinstance(symbols, DataFrame):
try:
hist_data = dl_mult_symbols(Series(symbols.index))
Expand Down
20 changes: 11 additions & 9 deletions pandas/io/tests/test_yahoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,19 @@ def test_yahoo(self):
else:
raise


@slow
@network
def test_get_quote(self):
df = web.get_quote_yahoo(pd.Series(['GOOG', 'AAPL', 'GOOG']))
assert_series_equal(df.ix[0], df.ix[2])


@slow
@network
def test_get_components(self):

df = web.get_components_yahoo() #Dow Jones (default)
df = web.get_components_yahoo('^DJI') #Dow Jones
assert isinstance(df, pd.DataFrame)
assert len(df) == 30

Expand All @@ -63,7 +65,7 @@ def test_get_components(self):

df = web.get_components_yahoo('^NDX') #NASDAQ-100
assert isinstance(df, pd.DataFrame)
assert len(df) == 100
#assert len(df) == 100
#Usual culprits, should be around for a while
assert 'AAPL' in df.index
assert 'GOOG' in df.index
Expand All @@ -83,25 +85,25 @@ def test_get_data(self):
assert ts[0].dayofyear == 96

dfi = web.get_components_yahoo('^DJI')
pan = web.get_data_yahoo(dfi, 'JAN-01-12', 'JAN-31-13')
pan = web.get_data_yahoo(dfi, 'JAN-01-12', 'JAN-31-12')
expected = [19.02, 28.23, 25.39]
result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist()
assert result == expected

pan = web.get_data_yahoo(dfi, 'JAN-01-12', 'JAN-31-13',
pan = web.get_data_yahoo(dfi, 'JAN-01-12', 'JAN-31-12',
adjust_price=True)
expected = [18.38, 27.45, 24.54]
result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist()
assert result == expected

pan = web.get_data_yahoo(dfi, '2011', ret_index=True)
d = [[ 1.31810193, 1.08170606, 1.05281026],
[ 1.31810193, 1.09352518, 1.05658242],
[ 1.30228471, 1.09815005, 1.05054696],
[ 1.30521383, 1.08119219, 1.03545832]]
d = [[ 1.01757469, 1.01130524, 1.02414183],
[ 1.00292912, 1.00770812, 1.01735194],
[ 1.00820152, 1.00462487, 1.01320257],
[ 1.08025776, 0.99845838, 1.00113165]]

expected = pd.DataFrame(d)
result = pan.Ret_Index[['GE', 'INTC', 'MSFT']].ix[-5:-1]
result = pan.Ret_Index.ix['01-18-11':'01-21-11'][['GE', 'INTC', 'MSFT']]
assert_almost_equal(result.values, expected.values)


Expand Down

0 comments on commit b921d1a

Please sign in to comment.