From 32d2d580aadd2e73c870afd0999e681bbf8c0db5 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 27 Mar 2014 18:28:04 -0400 Subject: [PATCH 1/5] DOC: Fix formatting. --- docs/source/release/version0.6.rst | 30 +++++++++++++++--------------- statsmodels/graphics/gofplots.py | 5 +++-- statsmodels/tsa/stattools.py | 8 ++++---- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/docs/source/release/version0.6.rst b/docs/source/release/version0.6.rst index a904de95fd1..86cae457e29 100644 --- a/docs/source/release/version0.6.rst +++ b/docs/source/release/version0.6.rst @@ -33,21 +33,21 @@ covariates. .. code-block:: python -import numpy as np -import pandas as pd -from statsmodels.genmod.generalized_estimating_equations import GEE -from statsmodels.genmod.dependence_structures import Independence -from statsmodels.genmod.families import Poisson - -data_url = "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/epil.csv" -data = pd.read_csv(data_url) - -fam = Poisson() -ind = Independence() -md1 = GEE.from_formula("y ~ age + trt + base", data, groups=data["subject"],\ - covstruct=ind, family=fam) -mdf1 = md1.fit() -print mdf1.summary() + import numpy as np + import pandas as pd + from statsmodels.genmod.generalized_estimating_equations import GEE + from statsmodels.genmod.dependence_structures import Independence + from statsmodels.genmod.families import Poisson + + data_url = "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/epil.csv" + data = pd.read_csv(data_url) + + fam = Poisson() + ind = Independence() + md1 = GEE.from_formula("y ~ age + trt + base", data, groups=data["subject"],\ + covstruct=ind, family=fam) + mdf1 = md1.fit() + print mdf1.summary() The dependence structure in a GEE is treated as a nuisance parameter diff --git a/statsmodels/graphics/gofplots.py b/statsmodels/graphics/gofplots.py index 69a45f772e5..310efac80ff 100644 --- a/statsmodels/graphics/gofplots.py +++ b/statsmodels/graphics/gofplots.py @@ -275,6 +275,7 @@ def qqplot(self, xlabel=None, ylabel=None, line=None, other=None, other values are used depending on the status of the kwarg `other`. line : str {'45', 's', 'r', q'} or None, optional Options for the reference line to which the data is compared: + - '45' - 45-degree line - 's' - standardized line, the expected order statistics are scaled by the standard deviation of the given sample and have the mean @@ -287,8 +288,8 @@ def qqplot(self, xlabel=None, ylabel=None, line=None, other=None, If provided, the sample quantiles of this `ProbPlot` instance are plotted against the sample quantiles of the `other` `ProbPlot` instance. If an array-like object is provided, it will be turned - into a `ProbPlot` instance using default parameters. If not provided - (default), the theoretical quantiles are used. + into a `ProbPlot` instance using default parameters. If not + provided (default), the theoretical quantiles are used. ax : Matplotlib AxesSubplot instance, optional If given, this subplot is used to plot in instead of a new figure being created. diff --git a/statsmodels/tsa/stattools.py b/statsmodels/tsa/stattools.py index 02f4e8d1469..a74f3828916 100644 --- a/statsmodels/tsa/stattools.py +++ b/statsmodels/tsa/stattools.py @@ -983,23 +983,23 @@ def arma_order_select_ic(y, max_ar=4, max_ma=2, ic='bic', trend='c', max_ar : int Maximum number of AR lags to use. Default 4. max_ma : int - Maximum number of MA lags to use. DEfault 2. + Maximum number of MA lags to use. Default 2. ic : str, list Information criteria to report. Either a single string or a list of different criteria is possible. trend : str The trend to use when fitting the ARMA models. model_kw : dict - Keyword arguments to be passed to the `ARMA` model + Keyword arguments to be passed to the ``ARMA`` model fit_kw : dict - Keyword arguments to be passed to `ARMA.fit`. + Keyword arguments to be passed to ``ARMA.fit``. Returns ------- obj : Results object Each ic is an attribute with a DataFrame for the results. The AR order used is the row index. The ma order used is the column index. The - minimum orders are available as `ic`_min_order. + minimum orders are available as ``ic_min_order``. Examples -------- From 75c265076aa3ffd60c7f1d4fd7b1f6a885d56494 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 27 Mar 2014 18:28:49 -0400 Subject: [PATCH 2/5] DOC: Fix broken links. --- docs/source/anova.rst | 2 +- docs/source/discretemod.rst | 3 ++- docs/source/glm.rst | 3 ++- docs/source/regression.rst | 6 +++--- docs/source/rlm.rst | 3 ++- docs/source/tsa.rst | 10 +++++----- 6 files changed, 15 insertions(+), 12 deletions(-) diff --git a/docs/source/anova.rst b/docs/source/anova.rst index d6172470ed1..27f591332a3 100644 --- a/docs/source/anova.rst +++ b/docs/source/anova.rst @@ -31,7 +31,7 @@ A more detailed example can be found here: .. toctree:: :maxdepth: 1 - examples/generated/example_interactions + examples/notebooks/generated/interactions_anova Module Reference ---------------- diff --git a/docs/source/discretemod.rst b/docs/source/discretemod.rst index 0b4764c38f3..9a4135f173c 100644 --- a/docs/source/discretemod.rst +++ b/docs/source/discretemod.rst @@ -31,7 +31,8 @@ Detailed examples can be found here: .. toctree:: :maxdepth: 2 - examples/generated/example_discrete + examples/notebooks/generated/discrete_choice_overview + examples/notebooks/generated/discrete_choice_example Technical Documentation ----------------------- diff --git a/docs/source/glm.rst b/docs/source/glm.rst index 3a7ad1c2606..4aec34d46ac 100644 --- a/docs/source/glm.rst +++ b/docs/source/glm.rst @@ -29,7 +29,8 @@ Detailed examples can be found here: .. toctree:: :maxdepth: 1 - examples/generated/example_glm + examples/notebooks/generated/glm + examples/notebooks/generated/glm_formula Technical Documentation ----------------------- diff --git a/docs/source/regression.rst b/docs/source/regression.rst index d31fc3e99d7..77a2648556d 100644 --- a/docs/source/regression.rst +++ b/docs/source/regression.rst @@ -35,9 +35,9 @@ Detailed examples can be found here: .. toctree:: :maxdepth: 1 - examples/generated/example_ols - examples/generated/example_wls - examples/generated/example_gls + examples/notebooks/generated/ols + examples/notebooks/generated/wls + examples/notebooks/generated/gls Technical Documentation ----------------------- diff --git a/docs/source/rlm.rst b/docs/source/rlm.rst index 401c5d0540c..f72d110f9a4 100644 --- a/docs/source/rlm.rst +++ b/docs/source/rlm.rst @@ -30,7 +30,8 @@ Detailed examples can be found here: .. toctree:: :maxdepth: 1 - examples/generated/example_rlm + examples/notebooks/generated/robust_models_0 + examples/notebooks/generated/robust_models_1 Technical Documentation ----------------------- diff --git a/docs/source/tsa.rst b/docs/source/tsa.rst index dfe88c840f9..c499c57e81b 100644 --- a/docs/source/tsa.rst +++ b/docs/source/tsa.rst @@ -181,11 +181,11 @@ Time Series Filters .. autosummary:: :toctree: generated/ - filters.bkfilter - filters.hpfilter - filters.arfilter - filters.cffilter - filters.miso_lfilter + filters.bk_filter.bkfilter + filters.hp_filter.hpfilter + filters.cf_filter.cffilter + filters.filtertools.arfilter + filters.filtertools.miso_lfilter filters.filtertools.fftconvolve3 filters.filtertools.fftconvolveinv From 7ab98ebd612aadcdc281008df9d0d81b6c66d1c3 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 27 Mar 2014 18:29:08 -0400 Subject: [PATCH 3/5] DOC: Make sure release placeholders is short version --- docs/source/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 7821ca3be87..43dd24cd360 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -75,8 +75,8 @@ # |version| and |release|, also used in various other places throughout the # built documents. # -from statsmodels.version import version, full_version -release = version +from statsmodels.version import short_version, full_version +release = short_version # The full version, including dev tag. version = full_version From 6c3aa6082bc8cbe98ea17ff86996d66c56f7c98b Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 27 Mar 2014 18:29:57 -0400 Subject: [PATCH 4/5] DOC: Remove redundant and outdated information. --- docs/source/about.rst | 57 +++++++++++ docs/source/index.rst | 2 +- docs/source/introduction.rst | 193 ----------------------------------- 3 files changed, 58 insertions(+), 194 deletions(-) create mode 100644 docs/source/about.rst delete mode 100644 docs/source/introduction.rst diff --git a/docs/source/about.rst b/docs/source/about.rst new file mode 100644 index 00000000000..932a03fbc33 --- /dev/null +++ b/docs/source/about.rst @@ -0,0 +1,57 @@ +.. currentmodule:: statsmodels + +***************** +About Statsmodels +***************** + +Background +---------- + +The ``models`` module of ``scipy.stats`` was originally written by Jonathan +Taylor. For some time it was part of scipy but was later removed. During +the Google Summer of Code 2009, ``statsmodels`` was corrected, tested, +improved and released as a new package. Since then, the statsmodels +development team has continued to add new models, plotting tools, and statistical methods. + +Testing +------- + +Most results have been verified with at least one other statistical package: +R, Stata or SAS. The guiding principal for the initial rewrite and for +continued development is that all numbers have to be verified. Some +statistical methods are tested with Monte Carlo studies. While we strive to +follow this test driven approach, there is no guarantee that the code is +bug-free and always works. Some auxiliary function are still insufficiently +tested, some edge cases might not be correctly taken into account, and the +possibility of numerical problems is inherent to many of the statistical +models. We especially appreciate any help and reports for these kind of +problems so we can keep improving the existing models. + +Code Stability +~~~~~~~~~~~~~~ + +The existing models are mostly settled in their user interface and we do not +expect many large changes going forward. For the existing code, although +there is no guarantee yet on API stability, we have long deprecation periods +in all but very special cases, and we try to keep changes that require +adjustments by existing users to a minimal level. For newer models we might +adjust the user interface as we gain more experience and obtain feedback. +These changes will always be noted in our release notes available in the +documentation. + +Financial Support +----------------- + +We are grateful for the financial support that we obtained for the +development of statsmodels: + + Google `www.google.com `_ : Google Summer of Code + (GSOC) 2009-2013. + + AQR `www.aqr.com `_ : financial sponsor for the work on + Vector Autoregressive Models (VAR) by Wes McKinney + +We would also like to thank our hosting providers, `github +`_ for the public code repository, `sourceforge +`_ for hosting our documentation and `python.org +`_ for making our downloads available on PyPi. diff --git a/docs/source/index.rst b/docs/source/index.rst index c8a54fde814..27cf42842c5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -64,12 +64,12 @@ Basic Documentation .. toctree:: :maxdepth: 3 - introduction release/index gettingstarted example_formulas install related + about Information about the structure and development of statsmodels: diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst deleted file mode 100644 index e28063fc3ed..00000000000 --- a/docs/source/introduction.rst +++ /dev/null @@ -1,193 +0,0 @@ -.. currentmodule:: statsmodels - -************ -Introduction -************ - -Background ----------- - -Scipy.stats.models was originally written by Jonathan Taylor. -For some time it was part of scipy but then removed from it. During -the Google Summer of Code 2009, stats.models was corrected, tested and -enhanced and released as a new package. Since then we have continued to -improve the existing models and added new statistical methods. - - -Main Features and Current Status --------------------------------- - -statsmodels 0.4 is a pure python package, with one optional cython based -extension that provides a considerable speed improvement for ARIMA estimation. -Future releases will depend on cython generated extensions. - -statsmodels includes: - -* regression: Generalized least squares (including weighted least squares and - least squares with autoregressive errors), ordinary least squares. -* glm: Generalized linear models with support for all of the one-parameter - exponential family distributions. -* discrete: regression with discrete dependent variables, including Logit, Probit, - MNLogit, Poisson, based on maximum likelihood estimators -* rlm: Robust linear models with support for several M-estimators. -* tsa: models for time series analysis - - - univariate time series analysis: AR, ARIMA - - vector autoregressive models, VAR and structural VAR - - descriptive statistics and process models for time series analysis - -* nonparametric : (Univariate) kernel density estimators -* datasets: Datasets to be distributed and used for examples and in testing. -* stats: a wide range of statistical tests - - - diagnostics and specification tests - - goodness-of-fit and normality tests - - functions for multiple testing - - various additional statistical tests - -* iolib: Tools for reading Stata .dta files into numpy arrays. (not yet ported to Python 3) -* iolob: printing table output to ascii, latex, and html - -* miscellaneous models - -statsmodels contains a sandbox folder, which includes some of the original -stats.models code that has not yet been rewritten and tested. The sandbox also -contains models and functions that we are currently developing. This code is -in various stages of development from early stages to almost finished, but -not sufficiently tested or with an API that is still in flux. Some of the code -in the advanced state covers among others Mixed (repeated measures) Models, -GARCH models, general method of moments (GMM) estimators, kernel regression and -kernel density estimation, and various extensions to scipy.stats.distributions. - -The code is written for plain NumPy arrays so that statsmodels can be used -as a library for any kind of data structure users might have. However, in -order to make the data handling easier, some time series specific models -rely on pandas, and we have plans to integrate pandas in future releases of -statsmodels. - -We have also included several datasets from the public domain and by -permission for tests and examples. The datasets are set up so that it is -easy to add more datasets. - -Python 3 --------- - -statsmodels has been ported and tested for Python 3.2. Python 3 -version of the code is automatically created during setup by running 2to3.py -over the statsmodels source (excluding examples). -The STATA file reader and writer in iolib.foreign has not been ported yet. -A recent development version of matplotlib for Python 3 runs without problems -with our examples and tests. -Running the test suite with Python 3.2 shows only one errors related to -unported STATA file reader. - -Testing -------- - -Most results have been verified with at least one other statistical package: R, -Stata or SAS. The guiding principal for the initial rewrite and for continued -development is that all numbers have to be verified. Some statistical -methods are tested with Monte Carlo studies. While we strive to follow this -test driven approach, there is no guarantee that the code is bug-free and -always works. Some auxiliary function are still insufficiently tested, some -edge cases might not be correctly taken into account, and the possibility of -numerical problems is inherent to many of the statistical models. We -especially appreciate any help and reports for these kind of problems so we -can keep improving the existing models. - - - - -Looking Forward ---------------- - -We would like to invite everyone to give statsmodels a test drive, use it, and -report comments, possibilities for improvement and bugs to the statsmodels -mailing list http://groups.google.com/group/pystatsmodels or file tickets on our -issue tracker at https://github.com/statsmodels/statsmodels/issues - -The source code is available from https://github.com/statsmodels/statsmodels. - -Our plans for the future include improving the coverage of statistical -models, methods and tests that any basic statistics package should provide. -But the main direction for the expansion of statsmodels depends on the -requirements and interests of the developers and contributers. - -The current maintainers are mostly interested in econometrics and time series -analysis, but we would like to invite any users or developers to contribute -their own extensions to existing models, or new models. To speed up -improvements that are waiting in the sandbox, any help with providing test -cases, reviewing or improving the code would be very appreciated. - -Planned Extensions -~~~~~~~~~~~~~~~~~~ - -Big changes that are planned for the next release will improve the -usability of statsmodels especially for interactive work. - -* Metainformation about data and models: Currently the models essentially - use no information about the design matrix and just treat it as numpy - array. Some information like variable names are included with the wrapper - for use with Pandas or other data structures. -* Formulas similar to R: This will provide a faster way to interactively - define models and contrast matrices, and will provide additional - information especially for categorical variables. (Nathaniel Smith) - -Various models that are work in progress where the time to inclusion in -statsmodels proper will depend on the available developer time and interests: - -Bayesian dynamic linear models (Wes) - -more Kalman filter based time series analysis (Skipper) - -New models (roughly in order of completeness): -general method of moments (GMM) estimators, kernel regression, -kernel density estimation, various extensions to scipy.stats.distributions, -GARCH models, copulas, system of equation models, panel data models, -more discrete choice models, mixed effects models, survival models. - -Resampling approaches like bootstrap and permutation for tests and estimator -statistics. - - -Code Stability -~~~~~~~~~~~~~~ - -The existing models are mostly settled in their user interface and we do not -expect many changes anymore. One area that will need adjustment is how -formulas and meta information are included. New models that have just been -included might require adjustments as we gain more experience and obtain -feedback by users. As we expand the range of models, we keep improving the -framework for different estimators and statistical tests, so further changes -will be necessary. - -In 0.3 we reorganized the internal location of the code and -import paths which will make future enhancements less interruptive. In 0.4 -most models obtained a wrapper that stores and returns additional information -from richer data structures like data structures in Pandas and structured -arrays. In 0.4 also prediction has been improved in many cases and made more -consistent across models. - -Although there is no guarantee yet on API stability, we try to keep changes -that require adjustments by existing users to a minimal level. - -Financial Support ------------------ - -We are grateful for the financial support that we obtained for the -development of statsmodels: - - Google `www.google.com `_ : Google Summer of Code - (GSOC) 2009-2013 - - AQR `www.aqr.com `_ : financial sponsor for the work on - Vector Autoregressive Models (VAR) by Wes McKinney - -We would also like to thank our hosting providers, `github -`_ for the public code repository, `sourceforge -`_ for hosting our documentation and `python.org -`_ for making our downloads available on pypi. - - -Josef Perktold and Skipper Seabold -(maintainers) From dd3d82fc427fd8d3c7430a899ff0efc8c1d408de Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 27 Mar 2014 19:05:37 -0400 Subject: [PATCH 5/5] DOC: Add FAQ page --- docs/source/faq.rst | 39 +++++++++++++++++++++++++++++ docs/themes/statsmodels/layout.html | 1 + 2 files changed, 40 insertions(+) create mode 100644 docs/source/faq.rst diff --git a/docs/source/faq.rst b/docs/source/faq.rst new file mode 100644 index 00000000000..44fa7c55bae --- /dev/null +++ b/docs/source/faq.rst @@ -0,0 +1,39 @@ +:orphan: + +.. _faq: + +Frequently Asked Question +------------------------- + +.. _endog-exog-faq: + +What do endog and exog mean? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These are shorthand for endogenous and exogenous variables. You might be more comfortable with the common ``y`` and ``X`` notation in linear models. Sometimes the endogenous variable ``y`` is called a dependent variable. Likewise, sometimes the exogenous variables ``X`` are called the independent variables. You can read about this in greater detail at :ref:`endog_exog` + + +.. _missing-faq: + +How does statsmodels handle missing data? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Missing data can be handled via the ``missing`` keyword argument. Every model takes this keyword. You can find more information in the docstring of :class:`statsmodels.base.Model`. + +.. `Model class `_. + +.. _build-faq: + +Why won't statsmodels build? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you're on Python 3.4, you *must* use Cython 0.20.1. If you're still having problems, try running + +.. code-block:: bash + + python setup.py clean + +What if my question isn't answered here? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You may find answers for questions that have not yet been added here on GitHub under the `FAQ issues tag `_. If not, please ask your question on stackoverflow using the `statsmodels tag `_ or on the `mailing list `_. diff --git a/docs/themes/statsmodels/layout.html b/docs/themes/statsmodels/layout.html index c21a88f5554..90069d37903 100644 --- a/docs/themes/statsmodels/layout.html +++ b/docs/themes/statsmodels/layout.html @@ -28,6 +28,7 @@
  • Bugs
  •  | 
  • Develop
  •  | 
  • Examples
  •  |  +
  • FAQ
  •  |  {% endblock %} {# Render the Header with Banner #}