Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

BUG/TST: catch socket.error in py2/3.2 and ConnectionError in py3.3 #3985

Merged
merged 2 commits into from Jun 26, 2013
Jump to file or symbol
Failed to load files and symbols.
+65 −58
Split
View
@@ -281,6 +281,8 @@ pandas 0.12
- Fixed flattening of columns when renaming MultiIndex columns DataFrame (:issue:`4004`)
- Fix ``Series.clip`` for datetime series. NA/NaN threshold values will now throw ValueError (:issue:`3996`)
- Fixed insertion issue into DataFrame, after rename (:issue:`4032`)
+ - Fixed testing issue where too many sockets where open thus leading to a
+ connection reset issue (:issue:`3982`, :issue:`3985`)
pandas 0.11.0
View
@@ -420,6 +420,8 @@ Bug Fixes
explicitly checking a website as a proxy for seeing if there is network
connectivity. Plus, new ``optional_args`` decorator factory for decorators.
(:issue:`3910`, :issue:`3914`)
+ - Fixed testing issue where too many sockets where open thus leading to a
+ connection reset issue (:issue:`3982`, :issue:`3985`)
See the :ref:`full release notes
<release>` or issue tracker
View
@@ -8,9 +8,9 @@
import numpy as np
import datetime as dt
import urllib
-import urllib2
import time
-import warnings
+from contextlib import closing
+from urllib2 import urlopen
from zipfile import ZipFile
from pandas.util.py3compat import StringIO, BytesIO, bytes_to_str
@@ -109,10 +109,11 @@ def get_quote_yahoo(symbols):
data = dict(zip(codes.keys(), [[] for i in range(len(codes))]))
- urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (
- sym_list, request)
+ url_str = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (sym_list,
+ request)
- lines = urllib2.urlopen(urlStr).readlines()
+ with closing(urlopen(url_str)) as url:
+ lines = url.readlines()
for line in lines:
fields = line.decode('utf-8').strip().split(',')
@@ -151,29 +152,29 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
yahoo_URL = 'http://ichart.yahoo.com/table.csv?'
- url = yahoo_URL + 's=%s' % sym + \
- '&a=%s' % (start.month - 1) + \
- '&b=%s' % start.day + \
- '&c=%s' % start.year + \
- '&d=%s' % (end.month - 1) + \
- '&e=%s' % end.day + \
- '&f=%s' % end.year + \
- '&g=d' + \
- '&ignore=.csv'
-
- for _ in range(retry_count):
- resp = urllib2.urlopen(url)
- if resp.code == 200:
- lines = resp.read()
- rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
- parse_dates=True)[::-1]
-
- # Yahoo! Finance sometimes does this awesome thing where they
- # return 2 rows for the most recent business day
- if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
- rs = rs[:-1]
-
- return rs
+ url = (yahoo_URL + 's=%s' % sym +
+ '&a=%s' % (start.month - 1) +
+ '&b=%s' % start.day +
+ '&c=%s' % start.year +
+ '&d=%s' % (end.month - 1) +
+ '&e=%s' % end.day +
+ '&f=%s' % end.year +
+ '&g=d' +
+ '&ignore=.csv')
+
+ for _ in xrange(retry_count):
+ with closing(urlopen(url)) as resp:
+ if resp.code == 200:
+ lines = resp.read()
+ rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
+ parse_dates=True)[::-1]
+
+ # Yahoo! Finance sometimes does this awesome thing where they
+ # return 2 rows for the most recent business day
+ if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
+ rs = rs[:-1]
+
+ return rs
time.sleep(pause)
@@ -198,17 +199,19 @@ def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
google_URL = 'http://www.google.com/finance/historical?'
# www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv
- url = google_URL + urllib.urlencode({"q": sym, \
- "startdate": start.strftime('%b %d, %Y'), \
- "enddate": end.strftime('%b %d, %Y'), "output": "csv" })
- for _ in range(retry_count):
- resp = urllib2.urlopen(url)
- if resp.code == 200:
- lines = resp.read()
- rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
- parse_dates=True)[::-1]
-
- return rs
+ url = google_URL + urllib.urlencode({"q": sym,
+ "startdate": start.strftime('%b %d, '
+ '%Y'),
+ "enddate": end.strftime('%b %d, %Y'),
+ "output": "csv"})
+ for _ in xrange(retry_count):
+ with closing(urlopen(url)) as resp:
+ if resp.code == 200:
+ lines = resp.read()
+ rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
+ parse_dates=True)[::-1]
+
+ return rs
time.sleep(pause)
@@ -280,19 +283,19 @@ def get_components_yahoo(idx_sym):
'&e=.csv&h={2}'
idx_mod = idx_sym.replace('^', '@%5E')
- urlStr = url.format(idx_mod, stats, 1)
+ url_str = url.format(idx_mod, stats, 1)
idx_df = DataFrame()
mask = [True]
comp_idx = 1
- #LOOP across component index structure,
- #break when no new components are found
- while (True in mask):
- urlStr = url.format(idx_mod, stats, comp_idx)
- lines = (urllib.urlopen(urlStr).read().decode('utf-8').strip().
- strip('"').split('"\r\n"'))
-
+ # LOOP across component index structure,
+ # break when no new components are found
+ while True in mask:
+ url_str = url.format(idx_mod, stats, comp_idx)
+ with closing(urlopen(url_str)) as resp:
+ raw = resp.read()
+ lines = raw.decode('utf-8').strip().strip('"').split('"\r\n"')
lines = [line.strip().split('","') for line in lines]
temp_df = DataFrame(lines, columns=['ticker', 'name', 'exchange'])
@@ -468,11 +471,11 @@ def get_data_fred(name=None, start=dt.datetime(2010, 1, 1),
fred_URL = "http://research.stlouisfed.org/fred2/series/"
- url = fred_URL + '%s' % name + \
- '/downloaddata/%s' % name + '.csv'
- data = read_csv(urllib.urlopen(url), index_col=0, parse_dates=True,
- header=None, skiprows=1, names=["DATE", name],
- na_values='.')
+ url = fred_URL + '%s' % name + '/downloaddata/%s' % name + '.csv'
+ with closing(urlopen(url)) as resp:
+ data = read_csv(resp, index_col=0, parse_dates=True,
+ header=None, skiprows=1, names=["DATE", name],
+ na_values='.')
try:
return data.truncate(start, end)
except KeyError:
@@ -489,9 +492,9 @@ def get_data_famafrench(name, start=None, end=None):
# path of zip files
zipFileURL = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"
- url = urllib.urlopen(zipFileURL + name + ".zip")
- zipfile = ZipFile(StringIO(url.read()))
- data = zipfile.open(name + ".txt").readlines()
+ with closing(urlopen(zipFileURL + name + ".zip")) as url:
+ with closing(ZipFile(StringIO(url.read()))) as zf:
+ data = zf.read(name + ".txt").splitlines()
file_edges = np.where(np.array([len(d) for d in data]) == 2)[0]
@@ -638,7 +641,7 @@ def get_options_data(self, month=None, year=None, expiry=None):
url = str('http://finance.yahoo.com/q/op?s=' + self.symbol +
'+Options')
- parsed = parse(urllib2.urlopen(url))
+ parsed = parse(url)
doc = parsed.getroot()
tables = doc.findall('.//table')
calls = tables[9]
@@ -709,7 +712,7 @@ def get_call_data(self, month=None, year=None, expiry=None):
url = str('http://finance.yahoo.com/q/op?s=' + self.symbol +
'+Options')
- parsed = parse(urllib2.urlopen(url))
+ parsed = parse(url)
doc = parsed.getroot()
tables = doc.findall('.//table')
calls = tables[9]
@@ -777,7 +780,7 @@ def get_put_data(self, month=None, year=None, expiry=None):
url = str('http://finance.yahoo.com/q/op?s=' + self.symbol +
'+Options')
- parsed = parse(urllib2.urlopen(url))
+ parsed = parse(url)
doc = parsed.getroot()
tables = doc.findall('.//table')
puts = tables[13]