diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8971011e..3482af19 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,5 +1,5 @@ - [ ] closes #xxxx - [ ] tests added / passed -- [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff` +- [ ] passes `flake8 $(git diff --name-only origin/main -- '*.py')` - [ ] passes `black --check pandas_datareader` - [ ] added entry to docs/source/whatsnew/vLATEST.txt diff --git a/.gitignore b/.gitignore index 5100f734..479a3a21 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,6 @@ docs/build *~ env/ .pytest_cache/ -.vscode/ \ No newline at end of file +.vscode/ +*.diff +pandas_datareader/_version.py diff --git a/README.md b/README.md index 10a8dcfa..fd776eb9 100644 --- a/README.md +++ b/README.md @@ -72,5 +72,5 @@ or ``` shell git clone https://github.com/pydata/pandas-datareader.git cd pandas-datareader -python setup.py install +python install -e . ``` diff --git a/docs/source/index.rst b/docs/source/index.rst index 751be3a5..4ea9790b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -67,7 +67,7 @@ or git clone https://github.com/pydata/pandas-datareader.git cd pandas-datareader - python setup.py install + python install -e . `Development documentation `__ is available for the latest changes in master. diff --git a/docs/source/whatsnew/vLATEST.txt b/docs/source/whatsnew/vLATEST.txt new file mode 100644 index 00000000..5b64d9f8 --- /dev/null +++ b/docs/source/whatsnew/vLATEST.txt @@ -0,0 +1,4 @@ +Bug Fixes +~~~~~~~~~ + +- Fixed FamaFrench reader to handle updated format diff --git a/pandas_datareader/famafrench.py b/pandas_datareader/famafrench.py index 0f733119..f7e1a36d 100644 --- a/pandas_datareader/famafrench.py +++ b/pandas_datareader/famafrench.py @@ -101,6 +101,7 @@ def _read_one_data(self, url, params): doc_chunks, tables = [], [] data = self._read_zipfile(url) + data = re.sub(r"\r(?!\n)", "\r\n", data) # turn lone CR into CRLF for chunk in data.split(2 * "\r\n"): if len(chunk) < 800: @@ -114,7 +115,11 @@ def _read_one_data(self, url, params): start = 0 if not match else match.start() df = read_csv(StringIO("Date" + src[start:]), **params) - if df.index.min() > 190000: + if df.index.min() > 19000000: + df.index = to_datetime(df.index.astype(str), format="%Y%m%d").to_period( + freq="D" + ) + elif df.index.min() > 190000: df.index = to_datetime(df.index.astype(str), format="%Y%m").to_period( freq="M" ) diff --git a/pandas_datareader/tests/test_famafrench.py b/pandas_datareader/tests/test_famafrench.py index e3574adf..f3e6790a 100644 --- a/pandas_datareader/tests/test_famafrench.py +++ b/pandas_datareader/tests/test_famafrench.py @@ -10,7 +10,7 @@ class TestFamaFrench: - def test_get_data(self): + def test_get_data_sample(self): keys = [ "F-F_Research_Data_Factors", "F-F_ST_Reversal_Factor", @@ -50,50 +50,50 @@ def test_f_f_research(self): exp = pd.DataFrame( { "Mkt-RF": [ - -3.36, - 3.4, - 6.31, - 2.0, - -7.89, - -5.57, - 6.93, - -4.77, - 9.54, - 3.88, - 0.6, + -3.35, + 3.39, + 6.30, + 1.99, + -7.90, + -5.56, + 6.92, + -4.78, + 9.55, + 3.87, + 0.59, 6.82, ], "SMB": [ - 0.4, - 1.19, - 1.48, - 4.87, - 0.09, - -1.81, - 0.2, - -3.0, - 3.96, - 1.13, - 3.76, - 0.73, + 0.43, + 1.18, + 1.46, + 4.84, + 0.13, + -1.79, + 0.22, + -3.01, + 3.82, + 1.08, + 3.67, + 0.72, ], "HML": [ - 0.43, - 3.22, - 2.21, - 2.89, - -2.44, - -4.7, - -0.31, - -1.9, - -3.16, - -2.42, - -0.96, - 3.69, + 0.33, + 3.18, + 2.19, + 2.96, + -2.48, + -4.73, + -0.50, + -1.73, + -3.02, + -2.46, + -0.90, + 3.56, ], "RF": [ - 0.0, - 0.0, + 0.00, + 0.00, 0.01, 0.01, 0.01, @@ -192,3 +192,9 @@ def test_prior_2_12_breakpoints(self): exp_index = pd.period_range("2010-01-01", "2010-12-01", freq="M", name="Date") tm.assert_index_equal(results[0].index, exp_index) + + def test_all_datasets(self) -> None: + for dataset in get_available_datasets(): + data = web.DataReader(dataset, "famafrench") + + assert tuple(data) == (*range(len(data) - 1), "DESCR")