Skip to content

Commit

Permalink
REF: Move webuse. Closes statsmodels#1571.
Browse files Browse the repository at this point in the history
  • Loading branch information
jseabold committed Sep 20, 2014
1 parent ea554cc commit a1b203c
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 44 deletions.
2 changes: 1 addition & 1 deletion statsmodels/datasets/__init__.py
Expand Up @@ -6,4 +6,4 @@
engel, grunfeld, longley, macrodata, modechoice, nile, randhie,
scotland, spector, stackloss, star98, strikes, sunspots, fair,
heart, statecrime, co2)
from .utils import get_rdataset, get_data_home, clear_data_home
from .utils import get_rdataset, get_data_home, clear_data_home, webuse
41 changes: 40 additions & 1 deletion statsmodels/datasets/utils.py
@@ -1,5 +1,5 @@
from statsmodels.compat.python import (range, StringIO, urlopen, HTTPError, lrange,
cPickle)
cPickle, urljoin)
import sys
import shutil
from os import environ
Expand All @@ -18,6 +18,45 @@
from pandas import read_csv


def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True):
"""
Parameters
----------
data : str
Name of dataset to fetch.
baseurl : str
The base URL to the stata datasets.
as_df : bool
If True, returns a `pandas.DataFrame`
Returns
-------
dta : Record Array
A record array containing the Stata dataset.
Examples
--------
>>> dta = webuse('auto')
Notes
-----
Make sure baseurl has trailing forward slash. Doesn't do any
error checking in response URLs.
"""
# lazy imports
from statsmodels.iolib import genfromdta

url = urljoin(baseurl, data+'.dta')
dta = urlopen(url)
#TODO: this isn't Python 3 compatibile since urlopen returns bytes?
dta = StringIO(dta.read()) # make it truly file-like
if as_df: # could make this faster if we don't process dta twice?
from pandas import DataFrame
return DataFrame.from_records(genfromdta(dta))
else:
return genfromdta(dta)


class Dataset(dict):
def __init__(self, **kw):
# define some default attributes, so pylint can find them
Expand Down
2 changes: 1 addition & 1 deletion statsmodels/examples/ex_sandwich2.py
Expand Up @@ -23,7 +23,7 @@
urlretrieve('http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta', 'srs.dta')
print('downloading file')
srs = dta.genfromdta("srs.dta")
# from statsmodels.tools.tools import webuse
# from statsmodels.datasets import webuse
# srs = webuse('srs', 'http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/')
# #does currently not cache file

Expand Down
50 changes: 10 additions & 40 deletions statsmodels/tools/tools.py
@@ -1,15 +1,16 @@
'''
Utility functions models code
'''
from statsmodels.compat.python import (reduce, lzip, lmap, asstr2, urlopen, urljoin,
StringIO, range)
from statsmodels.compat.python import (reduce, lzip, lmap, asstr2,
StringIO, range)
import numpy as np
import numpy.lib.recfunctions as nprf
import numpy.linalg as L
from scipy.interpolate import interp1d
from scipy.linalg import svdvals
from statsmodels.distributions import (ECDF, monotone_fn_inverter,
StepFunction)
from statsmodels.datasets import webuse
from statsmodels.tools.data import _is_using_pandas
from statsmodels.compat.numpy import np_matrix_rank
from pandas import DataFrame
Expand Down Expand Up @@ -486,44 +487,6 @@ def chain_dot(*arrs):
"""
return reduce(lambda x, y: np.dot(y, x), arrs[::-1])

def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True):
"""
Parameters
----------
data : str
Name of dataset to fetch.
baseurl : str
The base URL to the stata datasets.
as_df : bool
If True, returns a `pandas.DataFrame`
Returns
-------
dta : Record Array
A record array containing the Stata dataset.
Examples
--------
>>> dta = webuse('auto')
Notes
-----
Make sure baseurl has trailing forward slash. Doesn't do any
error checking in response URLs.
"""
# lazy imports
from statsmodels.iolib import genfromdta

url = urljoin(baseurl, data+'.dta')
dta = urlopen(url)
#TODO: this isn't Python 3 compatibile since urlopen returns bytes?
dta = StringIO(dta.read()) # make it truly file-like
if as_df: # could make this faster if we don't process dta twice?
from pandas import DataFrame
return DataFrame.from_records(genfromdta(dta))
else:
return genfromdta(dta)

def nan_dot(A, B):
"""
Returns np.dot(left_matrix, right_matrix) with the convention that
Expand Down Expand Up @@ -562,3 +525,10 @@ class Bunch(dict):
def __init__(self, **kw):
dict.__init__(self, kw)
self.__dict__ = self

webuse = np.deprecate(webuse,
old_name='statsmodels.tools.tools.webuse',
new_name='statsmodels.datasets.webuse',
message='webuse will be removed from the tools '
'namespace in the 0.7.0 release. Please use the'
' new import.')
2 changes: 1 addition & 1 deletion statsmodels/tsa/arima_model.py
Expand Up @@ -1750,7 +1750,7 @@ class ARIMAResultsWrapper(ARMAResultsWrapper):
res14css = arma14css.fit(order=(4, 1), trend='nc', method='css')

# ARIMA Model
from statsmodels.tools.tools import webuse
from statsmodels.datasets import webuse
dta = webuse('wpi1')
wpi = dta['wpi']

Expand Down

0 comments on commit a1b203c

Please sign in to comment.