Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

BUG: fix data.py regression #4281

Merged
merged 1 commit into from Jul 18, 2013
Jump to file or symbol
Failed to load files and symbols.
+16 −10
Split
View
@@ -342,6 +342,8 @@ pandas 0.12
- Fixed bug in initializing ``DatetimeIndex`` with an array of strings
in a certain time zone (:issue:`4229`)
- Fixed bug where html5lib wasn't being properly skipped (:issue:`4265`)
+ - Fixed bug where get_data_famafrench wasn't using the correct file edges
+ (:issue:`4281`)
pandas 0.11.0
=============
View
@@ -475,6 +475,8 @@ Bug Fixes
- Fixed bug in initializing ``DatetimeIndex`` with an array of strings
in a certain time zone (:issue:`4229`)
- Fixed bug where html5lib wasn't being properly skipped (:issue:`4265`)
+ - Fixed bug where get_data_famafrench wasn't using the correct file edges
+ (:issue:`4281`)
See the :ref:`full release notes
<release>` or issue tracker
View
@@ -453,8 +453,8 @@ def get_data_fred(name, start=dt.datetime(2010, 1, 1),
def get_data_famafrench(name):
# path of zip files
zip_file_url = ('http://mba.tuck.dartmouth.edu/pages/faculty/'
- 'ken.french/ftp/')
- zip_file_path = '{0}{1}.zip'.format(zip_file_url, name)
+ 'ken.french/ftp')
+ zip_file_path = '{0}/{1}.zip'.format(zip_file_url, name)
with urlopen(zip_file_path) as url:
raw = url.read()
@@ -463,13 +463,13 @@ def get_data_famafrench(name):
tmpf.write(raw)
with ZipFile(tmpf, 'r') as zf:
- data = zf.read(name + '.txt').splitlines()
+ data = zf.open(name + '.txt').readlines()
line_lengths = np.array(map(len, data))
- file_edges = np.where(line_lengths)[0]
+ file_edges = np.where(line_lengths == 2)[0]
datasets = {}
- edges = itertools.izip(file_edges[:-1], file_edges[1:])
+ edges = itertools.izip(file_edges + 1, file_edges[1:])
for i, (left_edge, right_edge) in enumerate(edges):
dataset = [d.split() for d in data[left_edge:right_edge]]
if len(dataset) > 10:
@@ -479,14 +479,15 @@ def get_data_famafrench(name):
header = dataset[header_index]
ds_header = dataset[header_index + 1:]
# to ensure the header is unique
- header = ['{0} {1}'.format(*items) for items in enumerate(header,
- start=1)]
- index = np.fromiter((d[0] for d in ds_header), dtype=int)
- dataset = np.fromiter((d[1:] for d in ds_header), dtype=float)
+ header = ['{0} {1}'.format(j, hj) for j, hj in enumerate(header,
+ start=1)]
+ index = np.array([d[0] for d in ds_header], dtype=int)
+ dataset = np.array([d[1:] for d in ds_header], dtype=float)
datasets[i] = DataFrame(dataset, index, columns=header)
return datasets
+
# Items needed for options class
CUR_MONTH = dt.datetime.now().month
CUR_YEAR = dt.datetime.now().year
@@ -10,7 +10,7 @@
from pandas.io import data as web
from pandas.io.data import DataReader, SymbolWarning
from pandas.util.testing import (assert_series_equal, assert_produces_warning,
- assert_frame_equal, network)
+ network)
from numpy.testing import assert_array_equal
@@ -343,6 +343,7 @@ def test_read_famafrench(self):
"F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3",
"F-F_ST_Reversal_Factor"):
ff = DataReader(name, "famafrench")
+ assert ff
assert isinstance(ff, dict)