From 32d2d580aadd2e73c870afd0999e681bbf8c0db5 Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Thu, 27 Mar 2014 18:28:04 -0400
Subject: [PATCH 1/5] DOC: Fix formatting.

---
 docs/source/release/version0.6.rst | 30 +++++++++++++++---------------
 statsmodels/graphics/gofplots.py   |  5 +++--
 statsmodels/tsa/stattools.py       |  8 ++++----
 3 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/docs/source/release/version0.6.rst b/docs/source/release/version0.6.rst
index a904de95fd1..86cae457e29 100644
--- a/docs/source/release/version0.6.rst
+++ b/docs/source/release/version0.6.rst
@@ -33,21 +33,21 @@ covariates.
 
 .. code-block:: python
 
-import numpy as np
-import pandas as pd
-from statsmodels.genmod.generalized_estimating_equations import GEE
-from statsmodels.genmod.dependence_structures import Independence
-from statsmodels.genmod.families import Poisson
-
-data_url = "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/epil.csv"
-data = pd.read_csv(data_url)
-
-fam = Poisson()
-ind = Independence()
-md1 = GEE.from_formula("y ~ age + trt + base", data, groups=data["subject"],\
-                       covstruct=ind, family=fam)
-mdf1 = md1.fit()
-print mdf1.summary()
+   import numpy as np
+   import pandas as pd
+   from statsmodels.genmod.generalized_estimating_equations import GEE
+   from statsmodels.genmod.dependence_structures import Independence
+   from statsmodels.genmod.families import Poisson
+   
+   data_url = "http://vincentarelbundock.github.io/Rdatasets/csv/MASS/epil.csv"
+   data = pd.read_csv(data_url)
+   
+   fam = Poisson()
+   ind = Independence()
+   md1 = GEE.from_formula("y ~ age + trt + base", data, groups=data["subject"],\
+                          covstruct=ind, family=fam)
+   mdf1 = md1.fit()
+   print mdf1.summary()
 
 
 The dependence structure in a GEE is treated as a nuisance parameter
diff --git a/statsmodels/graphics/gofplots.py b/statsmodels/graphics/gofplots.py
index 69a45f772e5..310efac80ff 100644
--- a/statsmodels/graphics/gofplots.py
+++ b/statsmodels/graphics/gofplots.py
@@ -275,6 +275,7 @@ def qqplot(self, xlabel=None, ylabel=None, line=None, other=None,
             other values are used depending on the status of the kwarg `other`.
         line : str {'45', 's', 'r', q'} or None, optional
             Options for the reference line to which the data is compared:
+
             - '45' - 45-degree line
             - 's' - standardized line, the expected order statistics are scaled
               by the standard deviation of the given sample and have the mean
@@ -287,8 +288,8 @@ def qqplot(self, xlabel=None, ylabel=None, line=None, other=None,
             If provided, the sample quantiles of this `ProbPlot` instance are
             plotted against the sample quantiles of the `other` `ProbPlot`
             instance. If an array-like object is provided, it will be turned
-            into a `ProbPlot` instance using default parameters. If not provided
-            (default), the theoretical quantiles are used.
+            into a `ProbPlot` instance using default parameters. If not
+            provided (default), the theoretical quantiles are used.
         ax : Matplotlib AxesSubplot instance, optional
             If given, this subplot is used to plot in instead of a new figure
             being created.
diff --git a/statsmodels/tsa/stattools.py b/statsmodels/tsa/stattools.py
index 02f4e8d1469..a74f3828916 100644
--- a/statsmodels/tsa/stattools.py
+++ b/statsmodels/tsa/stattools.py
@@ -983,23 +983,23 @@ def arma_order_select_ic(y, max_ar=4, max_ma=2, ic='bic', trend='c',
     max_ar : int
         Maximum number of AR lags to use. Default 4.
     max_ma : int
-        Maximum number of MA lags to use. DEfault 2.
+        Maximum number of MA lags to use. Default 2.
     ic : str, list
         Information criteria to report. Either a single string or a list
         of different criteria is possible.
     trend : str
         The trend to use when fitting the ARMA models.
     model_kw : dict
-        Keyword arguments to be passed to the `ARMA` model
+        Keyword arguments to be passed to the ``ARMA`` model
     fit_kw : dict
-        Keyword arguments to be passed to `ARMA.fit`.
+        Keyword arguments to be passed to ``ARMA.fit``.
 
     Returns
     -------
     obj : Results object
         Each ic is an attribute with a DataFrame for the results. The AR order
         used is the row index. The ma order used is the column index. The
-        minimum orders are available as `ic`_min_order.
+        minimum orders are available as ``ic_min_order``.
 
     Examples
     --------

From 75c265076aa3ffd60c7f1d4fd7b1f6a885d56494 Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Thu, 27 Mar 2014 18:28:49 -0400
Subject: [PATCH 2/5] DOC: Fix broken links.

---
 docs/source/anova.rst       |  2 +-
 docs/source/discretemod.rst |  3 ++-
 docs/source/glm.rst         |  3 ++-
 docs/source/regression.rst  |  6 +++---
 docs/source/rlm.rst         |  3 ++-
 docs/source/tsa.rst         | 10 +++++-----
 6 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/docs/source/anova.rst b/docs/source/anova.rst
index d6172470ed1..27f591332a3 100644
--- a/docs/source/anova.rst
+++ b/docs/source/anova.rst
@@ -31,7 +31,7 @@ A more detailed example can be found here:
 .. toctree::
   :maxdepth: 1
 
-  examples/generated/example_interactions
+  examples/notebooks/generated/interactions_anova
 
 Module Reference
 ----------------
diff --git a/docs/source/discretemod.rst b/docs/source/discretemod.rst
index 0b4764c38f3..9a4135f173c 100644
--- a/docs/source/discretemod.rst
+++ b/docs/source/discretemod.rst
@@ -31,7 +31,8 @@ Detailed examples can be found here:
 .. toctree::
     :maxdepth: 2
 
-    examples/generated/example_discrete
+    examples/notebooks/generated/discrete_choice_overview
+    examples/notebooks/generated/discrete_choice_example
 
 Technical Documentation
 -----------------------
diff --git a/docs/source/glm.rst b/docs/source/glm.rst
index 3a7ad1c2606..4aec34d46ac 100644
--- a/docs/source/glm.rst
+++ b/docs/source/glm.rst
@@ -29,7 +29,8 @@ Detailed examples can be found here:
 .. toctree::
    :maxdepth: 1
 
-   examples/generated/example_glm
+   examples/notebooks/generated/glm
+   examples/notebooks/generated/glm_formula
 
 Technical Documentation
 -----------------------
diff --git a/docs/source/regression.rst b/docs/source/regression.rst
index d31fc3e99d7..77a2648556d 100644
--- a/docs/source/regression.rst
+++ b/docs/source/regression.rst
@@ -35,9 +35,9 @@ Detailed examples can be found here:
 .. toctree::
    :maxdepth: 1
 
-   examples/generated/example_ols
-   examples/generated/example_wls
-   examples/generated/example_gls
+   examples/notebooks/generated/ols
+   examples/notebooks/generated/wls
+   examples/notebooks/generated/gls
 
 Technical Documentation
 -----------------------
diff --git a/docs/source/rlm.rst b/docs/source/rlm.rst
index 401c5d0540c..f72d110f9a4 100644
--- a/docs/source/rlm.rst
+++ b/docs/source/rlm.rst
@@ -30,7 +30,8 @@ Detailed examples can be found here:
 .. toctree::
     :maxdepth: 1
 
-    examples/generated/example_rlm
+    examples/notebooks/generated/robust_models_0
+    examples/notebooks/generated/robust_models_1
 
 Technical Documentation
 -----------------------
diff --git a/docs/source/tsa.rst b/docs/source/tsa.rst
index dfe88c840f9..c499c57e81b 100644
--- a/docs/source/tsa.rst
+++ b/docs/source/tsa.rst
@@ -181,11 +181,11 @@ Time Series Filters
 .. autosummary::
    :toctree: generated/
 
-   filters.bkfilter
-   filters.hpfilter
-   filters.arfilter
-   filters.cffilter
-   filters.miso_lfilter
+   filters.bk_filter.bkfilter
+   filters.hp_filter.hpfilter
+   filters.cf_filter.cffilter
+   filters.filtertools.arfilter
+   filters.filtertools.miso_lfilter
    filters.filtertools.fftconvolve3
    filters.filtertools.fftconvolveinv
 

From 7ab98ebd612aadcdc281008df9d0d81b6c66d1c3 Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Thu, 27 Mar 2014 18:29:08 -0400
Subject: [PATCH 3/5] DOC: Make sure release placeholders is short version

---
 docs/source/conf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 7821ca3be87..43dd24cd360 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -75,8 +75,8 @@
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
-from statsmodels.version import version, full_version
-release = version
+from statsmodels.version import short_version, full_version
+release = short_version
 # The full version, including dev tag.
 version = full_version
 

From 6c3aa6082bc8cbe98ea17ff86996d66c56f7c98b Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Thu, 27 Mar 2014 18:29:57 -0400
Subject: [PATCH 4/5] DOC: Remove redundant and outdated information.

---
 docs/source/about.rst        |  57 +++++++++++
 docs/source/index.rst        |   2 +-
 docs/source/introduction.rst | 193 -----------------------------------
 3 files changed, 58 insertions(+), 194 deletions(-)
 create mode 100644 docs/source/about.rst
 delete mode 100644 docs/source/introduction.rst

diff --git a/docs/source/about.rst b/docs/source/about.rst
new file mode 100644
index 00000000000..932a03fbc33
--- /dev/null
+++ b/docs/source/about.rst
@@ -0,0 +1,57 @@
+.. currentmodule:: statsmodels
+
+*****************
+About Statsmodels
+*****************
+
+Background
+----------
+
+The ``models`` module of ``scipy.stats`` was originally written by Jonathan 
+Taylor. For some time it was part of scipy but was later removed. During
+the Google Summer of Code 2009, ``statsmodels`` was corrected, tested,
+improved and released as a new package. Since then, the statsmodels 
+development team has continued to add new models, plotting tools, and statistical methods.
+
+Testing
+-------
+
+Most results have been verified with at least one other statistical package:
+R, Stata or SAS. The guiding principal for the initial rewrite and for 
+continued development is that all numbers have to be verified. Some 
+statistical methods are tested with Monte Carlo studies. While we strive to
+follow this test driven approach, there is no guarantee that the code is 
+bug-free and always works. Some auxiliary function are still insufficiently 
+tested, some edge cases might not be correctly taken into account, and the 
+possibility of numerical problems is inherent to many of the statistical 
+models. We especially appreciate any help and reports for these kind of 
+problems so we can keep improving the existing models.
+
+Code Stability
+~~~~~~~~~~~~~~
+
+The existing models are mostly settled in their user interface and we do not
+expect many large changes going forward. For the existing code, although 
+there is no guarantee yet on API stability, we have long deprecation periods 
+in all but very special cases, and we try to keep changes that require 
+adjustments by existing users to a minimal level. For newer models we might
+adjust the user interface as we gain more experience and obtain feedback. 
+These changes will always be noted in our release notes available in the
+documentation.
+
+Financial Support
+-----------------
+
+We are grateful for the financial support that we obtained for the
+development of statsmodels:
+
+ Google `www.google.com <http://www.google.com/>`_ : Google Summer of Code
+ (GSOC) 2009-2013.
+
+ AQR `www.aqr.com <http://www.aqr.com/>`_ : financial sponsor for the work on
+ Vector Autoregressive Models (VAR) by Wes McKinney
+
+We would also like to thank our hosting providers, `github
+<http://github.com/>`_ for the public code repository, `sourceforge
+<http://sourceforge.net/>`_ for hosting our documentation and `python.org
+<http://python.org>`_ for making our downloads available on PyPi.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index c8a54fde814..27cf42842c5 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -64,12 +64,12 @@ Basic Documentation
 .. toctree::
     :maxdepth: 3
 
-    introduction
     release/index
     gettingstarted
     example_formulas
     install
     related
+    about
 
 Information about the structure and development of
 statsmodels:
diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst
deleted file mode 100644
index e28063fc3ed..00000000000
--- a/docs/source/introduction.rst
+++ /dev/null
@@ -1,193 +0,0 @@
-.. currentmodule:: statsmodels
-
-************
-Introduction
-************
-
-Background
-----------
-
-Scipy.stats.models was originally written by Jonathan Taylor.
-For some time it was part of scipy but then removed from it. During
-the Google Summer of Code 2009, stats.models was corrected, tested and
-enhanced and released as a new package. Since then we have continued to
-improve the existing models and added new statistical methods.
-
-
-Main Features and Current Status
---------------------------------
-
-statsmodels 0.4 is a pure python package, with one optional cython based
-extension that provides a considerable speed improvement for ARIMA estimation.
-Future releases will depend on cython generated extensions.
-
-statsmodels includes:
-
-* regression: Generalized least squares (including weighted least squares and
-  least squares with autoregressive errors), ordinary least squares.
-* glm: Generalized linear models with support for all of the one-parameter
-  exponential family distributions.
-* discrete: regression with discrete dependent variables, including Logit, Probit,
-  MNLogit, Poisson, based on maximum likelihood estimators
-* rlm: Robust linear models with support for several M-estimators.
-* tsa: models for time series analysis
-
-  - univariate time series analysis: AR, ARIMA
-  - vector autoregressive models, VAR and structural VAR
-  - descriptive statistics and process models for time series analysis
-
-* nonparametric : (Univariate) kernel density estimators
-* datasets: Datasets to be distributed and used for examples and in testing.
-* stats: a wide range of statistical tests
-
-  - diagnostics and specification tests
-  - goodness-of-fit and normality tests
-  - functions for multiple testing
-  - various additional statistical tests
-
-* iolib: Tools for reading Stata .dta files into numpy arrays. (not yet ported to Python 3)
-* iolob: printing table output to ascii, latex, and html
-
-* miscellaneous models
-
-statsmodels contains a sandbox folder, which includes some of the original
-stats.models code that has not yet been rewritten and tested. The sandbox also
-contains models and functions that we are currently developing. This code is
-in various stages of development from early stages to almost finished, but
-not sufficiently tested or with an API that is still in flux. Some of the code
-in the advanced state covers among others Mixed (repeated measures) Models,
-GARCH models, general method of moments (GMM) estimators, kernel regression and
-kernel density estimation, and various extensions to scipy.stats.distributions.
-
-The code is written for plain NumPy arrays so that statsmodels can be used
-as a library for any kind of data structure users might have. However, in
-order to make the data handling easier, some time series specific models
-rely on pandas, and we have plans to integrate pandas in future releases of
-statsmodels.
-
-We have also included several datasets from the public domain and by
-permission for tests and examples. The datasets are set up so that it is
-easy to add more datasets.
-
-Python 3
---------
-
-statsmodels has been ported and tested for Python 3.2. Python 3
-version of the code is automatically created during setup by running 2to3.py
-over the statsmodels source (excluding examples).
-The STATA file reader and writer in iolib.foreign has not been ported yet.
-A recent development version of matplotlib for Python 3 runs without problems
-with our examples and tests.
-Running the test suite with Python 3.2 shows only one errors related to
-unported STATA file reader.
-
-Testing
--------
-
-Most results have been verified with at least one other statistical package: R,
-Stata or SAS. The guiding principal for the initial rewrite and for continued
-development is that all numbers have to be verified. Some statistical
-methods are tested with Monte Carlo studies. While we strive to follow this
-test driven approach, there is no guarantee that the code is bug-free and
-always works. Some auxiliary function are still insufficiently tested, some
-edge cases might not be correctly taken into account, and the possibility of
-numerical problems is inherent to many of the statistical models. We
-especially appreciate any help and reports for these kind of problems so we
-can keep improving the existing models.
-
-
-
-
-Looking Forward
----------------
-
-We would like to invite everyone to give statsmodels a test drive, use it, and
-report comments, possibilities for improvement and bugs to the statsmodels
-mailing list http://groups.google.com/group/pystatsmodels or file tickets on our
-issue tracker at https://github.com/statsmodels/statsmodels/issues
-
-The source code is available from https://github.com/statsmodels/statsmodels.
-
-Our plans for the future include improving the coverage of statistical
-models, methods and tests that any basic statistics package should provide.
-But the main direction for the expansion of statsmodels depends on the
-requirements and interests of the developers and contributers.
-
-The current maintainers are mostly interested in econometrics and time series
-analysis, but we would like to invite any users or developers to contribute
-their own extensions to existing models, or new models. To speed up
-improvements that are waiting in the sandbox, any help with providing test
-cases, reviewing or improving the code would be very appreciated.
-
-Planned Extensions
-~~~~~~~~~~~~~~~~~~
-
-Big changes that are planned for the next release will improve the
-usability of statsmodels especially for interactive work.
-
-* Metainformation about data and models: Currently the models essentially
-  use no information about the design matrix and just treat it as numpy
-  array. Some information like variable names are included with the wrapper
-  for use with Pandas or other data structures.
-* Formulas similar to R: This will provide a faster way to interactively
-  define models and contrast matrices, and will provide additional
-  information especially for categorical variables. (Nathaniel Smith)
-
-Various models that are work in progress where the time to inclusion in
-statsmodels proper will depend on the available developer time and interests:
-
-Bayesian dynamic linear models (Wes)
-
-more Kalman filter based time series analysis (Skipper)
-
-New models (roughly in order of completeness):
-general method of moments (GMM) estimators, kernel regression,
-kernel density estimation, various extensions to scipy.stats.distributions,
-GARCH models, copulas, system of equation models, panel data models,
-more discrete choice models, mixed effects models, survival models.
-
-Resampling approaches like bootstrap and permutation for tests and estimator
-statistics.
-
-
-Code Stability
-~~~~~~~~~~~~~~
-
-The existing models are mostly settled in their user interface and we do not
-expect many changes anymore. One area that will need adjustment is how
-formulas and meta information are included. New models that have just been
-included might require adjustments as we gain more experience and obtain
-feedback by users. As we expand the range of models, we keep improving the
-framework for different estimators and statistical tests, so further changes
-will be necessary.
-
-In 0.3 we reorganized the internal location of the code and
-import paths which will make future enhancements less interruptive. In 0.4
-most models obtained a wrapper that stores and returns additional information
-from richer data structures like data structures in Pandas and structured
-arrays. In 0.4 also prediction has been improved in many cases and made more
-consistent across models.
-
-Although there is no guarantee yet on API stability, we try to keep changes
-that require adjustments by existing users to a minimal level.
-
-Financial Support
------------------
-
-We are grateful for the financial support that we obtained for the
-development of statsmodels:
-
- Google `www.google.com <http://www.google.com/>`_ : Google Summer of Code
- (GSOC) 2009-2013
-
- AQR `www.aqr.com <http://www.aqr.com/>`_ : financial sponsor for the work on
- Vector Autoregressive Models (VAR) by Wes McKinney
-
-We would also like to thank our hosting providers, `github
-<http://github.com/>`_ for the public code repository, `sourceforge
-<http://sourceforge.net/>`_ for hosting our documentation and `python.org
-<http://python.org>`_ for making our downloads available on pypi.
-
-
-Josef Perktold and Skipper Seabold
-(maintainers)

From dd3d82fc427fd8d3c7430a899ff0efc8c1d408de Mon Sep 17 00:00:00 2001
From: Skipper Seabold <jsseabold@gmail.com>
Date: Thu, 27 Mar 2014 19:05:37 -0400
Subject: [PATCH 5/5] DOC: Add FAQ page

---
 docs/source/faq.rst                 | 39 +++++++++++++++++++++++++++++
 docs/themes/statsmodels/layout.html |  1 +
 2 files changed, 40 insertions(+)
 create mode 100644 docs/source/faq.rst

diff --git a/docs/source/faq.rst b/docs/source/faq.rst
new file mode 100644
index 00000000000..44fa7c55bae
--- /dev/null
+++ b/docs/source/faq.rst
@@ -0,0 +1,39 @@
+:orphan:
+
+.. _faq:
+
+Frequently Asked Question
+-------------------------
+
+.. _endog-exog-faq:
+
+What do endog and exog mean?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+These are shorthand for endogenous and exogenous variables. You might be more comfortable with the common ``y`` and ``X`` notation in linear models. Sometimes the endogenous variable ``y`` is called a dependent variable. Likewise, sometimes the exogenous variables ``X`` are called the independent variables. You can read about this in greater detail at :ref:`endog_exog` 
+
+
+.. _missing-faq:
+
+How does statsmodels handle missing data?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Missing data can be handled via the ``missing`` keyword argument. Every model takes this keyword. You can find more information in the docstring of :class:`statsmodels.base.Model`. 
+
+.. `Model class <http://statsmodels.sourceforge.net/devel/dev/generated/statsmodels.base.model.Model.html#statsmodels.base.model.Model>`_.
+
+.. _build-faq:
+
+Why won't statsmodels build?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you're on Python 3.4, you *must* use Cython 0.20.1. If you're still having problems, try running
+
+.. code-block:: bash
+
+    python setup.py clean
+
+What if my question isn't answered here?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You may find answers for questions that have not yet been added here on GitHub under the `FAQ issues tag <https://github.com/statsmodels/statsmodels/issues?labels=FAQ&page=1&state=open>`_. If not, please ask your question on stackoverflow using the `statsmodels tag <https://stackoverflow.com/questions/tagged/statsmodels>`_ or on the `mailing list <https://groups.google.com/forum/#!forum/pystatsmodels>`_.
diff --git a/docs/themes/statsmodels/layout.html b/docs/themes/statsmodels/layout.html
index c21a88f5554..90069d37903 100644
--- a/docs/themes/statsmodels/layout.html
+++ b/docs/themes/statsmodels/layout.html
@@ -28,6 +28,7 @@
 <li><a href="https://github.com/statsmodels/statsmodels/issues">Bugs</a></li> &nbsp;|&nbsp;
 <li><a href="{{ pathto('dev/index') }}">Develop</a></li> &nbsp;|&nbsp;
 <li><a href="{{ pathto('examples/index') }}">Examples</a></li> &nbsp;|&nbsp;
+<li><a href="{{ pathto('faq') }}">FAQ</a></li> &nbsp;|&nbsp;
 {% endblock %}
 
 {# Render the Header with Banner #}