diff --git a/docs/source/release/version0.13.3.rst b/docs/source/release/version0.13.3.rst
index e02d5303b8d..1a6ad35cd10 100644
--- a/docs/source/release/version0.13.3.rst
+++ b/docs/source/release/version0.13.3.rst
@@ -30,45 +30,16 @@ Stats
 
 The Highlights
 ==============
-
-
-What's new - an overview
-========================
-
-The following lists the main new features of statsmodels 0.13.3. In addition,
-release 0.13.3 includes bug fixes, refactorings and improvements in many areas.
+This is a Python 3.11 compatability release only.  There are no significant
+new features or bug fixes.
 
 Submodules
 ----------
 
-
 ``maintenance``
 ~~~~~~~~~~~~~~~
 - Backport Python 3.11 to 0.13.x branch  (:pr:`8484`)
 
-
-
-
-
-bug-wrong
----------
-
-A new issue label `type-bug-wrong` indicates bugs that cause that incorrect
-numbers are returned without warnings.
-(Regular bugs are mostly usability bugs or bugs that raise an exception for
-unsupported use cases.)
-`see tagged issues <https://github.com/statsmodels/statsmodels/issues?q=is%3Aissue+label%3Atype-bug-wrong+is%3Aclosed+milestone%3A0.13.3/>`_
-
-
-Major Bugs Fixed
-================
-
-See github issues for a list of bug fixes included in this release
-
-- `Closed bugs <https://github.com/statsmodels/statsmodels/pulls?utf8=%E2%9C%93&q=is%3Apr+is%3Amerged+milestone%3A0.13.3+label%3Atype-bug/>`_
-- `Closed bugs (wrong result) <https://github.com/statsmodels/statsmodels/pulls?q=is%3Apr+is%3Amerged+milestone%3A0.13.3+label%3Atype-bug-wrong/>`_
-
-
 Development summary and credits
 ===============================
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index e0845a518fe..96425f5b7e4 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -7,6 +7,7 @@ oldest-supported-numpy>=2022.4.18
 matplotlib>=3
 colorama
 joblib
+Jinja2
 # Remove due to failures on azure CI
 # cvxopt; os_name != "win32"
 
diff --git a/statsmodels/sandbox/distributions/__init__.py b/statsmodels/sandbox/distributions/__init__.py
index 49b1d3002f3..dd786862bd2 100644
--- a/statsmodels/sandbox/distributions/__init__.py
+++ b/statsmodels/sandbox/distributions/__init__.py
@@ -1,4 +1,4 @@
-'''temporary location for enhancements to scipy.stats
+"""temporary location for enhancements to scipy.stats
 
 includes
 ^^^^^^^^
@@ -20,4 +20,4 @@
 mixed status : from not-working to well-tested
 
 
-'''
+"""
diff --git a/statsmodels/sandbox/distributions/estimators.py b/statsmodels/sandbox/distributions/estimators.py
index 1dcb173bdf9..0a0384eb4b1 100644
--- a/statsmodels/sandbox/distributions/estimators.py
+++ b/statsmodels/sandbox/distributions/estimators.py
@@ -1,4 +1,4 @@
-'''estimate distribution parameters by various methods
+"""estimate distribution parameters by various methods
 method of moments or matching quantiles, and Maximum Likelihood estimation
 based on binned data and Maximum Product-of-Spacings
 
@@ -85,17 +85,17 @@
 changes:
 added Maximum Product-of-Spacings 2010-05-12
 
-'''
+"""
 
 import numpy as np
-from scipy import stats, optimize, special
+from scipy import optimize, special, stats
 
-cache = {}   #module global storage for temp results, not used
+cache = {}  # module global storage for temp results, not used
 
 
 # the next two use distfn from module scope - not anymore
 def gammamomentcond(distfn, params, mom2, quantile=None):
-    '''estimate distribution parameters based method of moments (mean,
+    """estimate distribution parameters based method of moments (mean,
     variance) for distributions with 1 shape parameter and fixed loc=0.
 
     Returns
@@ -106,16 +106,19 @@ def gammamomentcond(distfn, params, mom2, quantile=None):
     -----
     first test version, quantile argument not used
 
-    '''
+    """
+
     def cond(params):
         alpha, scale = params
-        mom2s = distfn.stats(alpha, 0.,scale)
-        #quantil
-        return np.array(mom2)-mom2s
+        mom2s = distfn.stats(alpha, 0.0, scale)
+        # quantil
+        return np.array(mom2) - mom2s
+
     return cond
 
+
 def gammamomentcond2(distfn, params, mom2, quantile=None):
-    '''estimate distribution parameters based method of moments (mean,
+    """estimate distribution parameters based method of moments (mean,
     variance) for distributions with 1 shape parameter and fixed loc=0.
 
     Returns
@@ -129,16 +132,15 @@ def gammamomentcond2(distfn, params, mom2, quantile=None):
 
     The only difference to previous function is return type.
 
-    '''
+    """
     alpha, scale = params
-    mom2s = distfn.stats(alpha, 0.,scale)
-    return np.array(mom2)-mom2s
-
+    mom2s = distfn.stats(alpha, 0.0, scale)
+    return np.array(mom2) - mom2s
 
 
 ######### fsolve does not move in small samples, fmin not very accurate
 def momentcondunbound(distfn, params, mom2, quantile=None):
-    '''moment conditions for estimating distribution parameters using method
+    """moment conditions for estimating distribution parameters using method
     of moments, uses mean, variance and one quantile for distributions
     with 1 shape parameter.
 
@@ -147,12 +149,12 @@ def momentcondunbound(distfn, params, mom2, quantile=None):
     difference : ndarray
         difference between theoretical and empirical moments and quantiles
 
-    '''
+    """
     shape, loc, scale = params
-    mom2diff = np.array(distfn.stats(shape, loc,scale)) - mom2
+    mom2diff = np.array(distfn.stats(shape, loc, scale)) - mom2
     if quantile is not None:
         pq, xq = quantile
-        #ppfdiff = distfn.ppf(pq, alpha)
+        # ppfdiff = distfn.ppf(pq, alpha)
         cdfdiff = distfn.cdf(xq, shape, loc, scale) - pq
         return np.concatenate([mom2diff, cdfdiff[:1]])
     return mom2diff
@@ -160,7 +162,7 @@ def momentcondunbound(distfn, params, mom2, quantile=None):
 
 ###### loc scale only
 def momentcondunboundls(distfn, params, mom2, quantile=None, shape=None):
-    '''moment conditions for estimating loc and scale of a distribution
+    """moment conditions for estimating loc and scale of a distribution
     with method of moments using either 2 quantiles or 2 moments (not both).
 
     Returns
@@ -168,24 +170,24 @@ def momentcondunboundls(distfn, params, mom2, quantile=None, shape=None):
     difference : ndarray
         difference between theoretical and empirical moments or quantiles
 
-    '''
+    """
     loc, scale = params
     mom2diff = np.array(distfn.stats(shape, loc, scale)) - mom2
     if quantile is not None:
         pq, xq = quantile
-        #ppfdiff = distfn.ppf(pq, alpha)
+        # ppfdiff = distfn.ppf(pq, alpha)
         cdfdiff = distfn.cdf(xq, shape, loc, scale) - pq
-        #return np.concatenate([mom2diff, cdfdiff[:1]])
+        # return np.concatenate([mom2diff, cdfdiff[:1]])
         return cdfdiff
     return mom2diff
 
 
-
 ######### try quantile GMM with identity weight matrix
-#(just a guess that's what it is
+# (just a guess that's what it is
+
 
 def momentcondquant(distfn, params, mom2, quantile=None, shape=None):
-    '''moment conditions for estimating distribution parameters by matching
+    """moment conditions for estimating distribution parameters by matching
     quantiles, defines as many moment conditions as quantiles.
 
     Returns
@@ -198,44 +200,49 @@ def momentcondquant(distfn, params, mom2, quantile=None, shape=None):
     This can be used for method of moments or for generalized method of
     moments.
 
-    '''
-    #this check looks redundant/unused know
+    """
+    # this check looks redundant/unused know
     if len(params) == 2:
         loc, scale = params
     elif len(params) == 3:
         shape, loc, scale = params
     else:
-        #raise NotImplementedError
-        pass #see whether this might work, seems to work for beta with 2 shape args
+        # raise NotImplementedError
+        pass  # see whether this might work, seems to work for beta with 2 shape args
 
-    #mom2diff = np.array(distfn.stats(*params)) - mom2
-    #if not quantile is None:
+    # mom2diff = np.array(distfn.stats(*params)) - mom2
+    # if not quantile is None:
     pq, xq = quantile
-    #ppfdiff = distfn.ppf(pq, alpha)
+    # ppfdiff = distfn.ppf(pq, alpha)
     cdfdiff = distfn.cdf(xq, *params) - pq
-    #return np.concatenate([mom2diff, cdfdiff[:1]])
+    # return np.concatenate([mom2diff, cdfdiff[:1]])
     return cdfdiff
-    #return mom2diff
+    # return mom2diff
+
 
 def fitquantilesgmm(distfn, x, start=None, pquant=None, frozen=None):
     if pquant is None:
-        pquant = np.array([0.01, 0.05,0.1,0.4,0.6,0.9,0.95,0.99])
+        pquant = np.array([0.01, 0.05, 0.1, 0.4, 0.6, 0.9, 0.95, 0.99])
     if start is None:
-        if hasattr(distfn, '_fitstart'):
+        if hasattr(distfn, "_fitstart"):
             start = distfn._fitstart(x)
         else:
-            start = [1]*distfn.numargs + [0.,1.]
-    #TODO: vectorize this:
-    xqs = [stats.scoreatpercentile(x, p) for p in pquant*100]
+            start = [1] * distfn.numargs + [0.0, 1.0]
+    # TODO: vectorize this:
+    xqs = [stats.scoreatpercentile(x, p) for p in pquant * 100]
     mom2s = None
-    parest = optimize.fmin(lambda params:np.sum(
-        momentcondquant(distfn, params, mom2s,(pquant,xqs), shape=None)**2), start)
+    parest = optimize.fmin(
+        lambda params: np.sum(
+            momentcondquant(distfn, params, mom2s, (pquant, xqs), shape=None)
+            ** 2
+        ),
+        start,
+    )
     return parest
 
 
-
 def fitbinned(distfn, freq, binedges, start, fixed=None):
-    '''estimate parameters of distribution function for binned data using MLE
+    """estimate parameters of distribution function for binned data using MLE
 
     Parameters
     ----------
@@ -259,24 +266,30 @@ def fitbinned(distfn, freq, binedges, start, fixed=None):
 
     added factorial
 
-    '''
+    """
     if fixed is not None:
         raise NotImplementedError
     nobs = np.sum(freq)
-    lnnobsfact = special.gammaln(nobs+1)
+    lnnobsfact = special.gammaln(nobs + 1)
 
     def nloglike(params):
-        '''negative loglikelihood function of binned data
+        """negative loglikelihood function of binned data
 
         corresponds to multinomial
-        '''
+        """
         prob = np.diff(distfn.cdf(binedges, *params))
-        return -(lnnobsfact + np.sum(freq*np.log(prob)- special.gammaln(freq+1)))
+        return -(
+            lnnobsfact
+            + np.sum(freq * np.log(prob) - special.gammaln(freq + 1))
+        )
+
     return optimize.fmin(nloglike, start)
 
 
-def fitbinnedgmm(distfn, freq, binedges, start, fixed=None, weightsoptimal=True):
-    '''estimate parameters of distribution function for binned data using GMM
+def fitbinnedgmm(
+    distfn, freq, binedges, start, fixed=None, weightsoptimal=True
+):
+    """estimate parameters of distribution function for binned data using GMM
 
     Parameters
     ----------
@@ -305,28 +318,30 @@ def fitbinnedgmm(distfn, freq, binedges, start, fixed=None, weightsoptimal=True)
 
     added factorial
 
-    '''
+    """
     if fixed is not None:
         raise NotImplementedError
     nobs = np.sum(freq)
     if weightsoptimal:
-        weights = freq/float(nobs)
+        weights = freq / float(nobs)
     else:
         weights = np.ones(len(freq))
-    freqnormed = freq/float(nobs)
+    freqnormed = freq / float(nobs)
     # skip turning weights into matrix diag(freq/float(nobs))
 
     def gmmobjective(params):
-        '''negative loglikelihood function of binned data
+        """negative loglikelihood function of binned data
 
         corresponds to multinomial
-        '''
+        """
         prob = np.diff(distfn.cdf(binedges, *params))
         momcond = freqnormed - prob
-        return np.dot(momcond*weights, momcond)
+        return np.dot(momcond * weights, momcond)
+
     return optimize.fmin(gmmobjective, start)
 
-#Addition from try_maxproductspacings:
+
+# Addition from try_maxproductspacings:
 """Estimating Parameters of Log-Normal Distribution with Maximum
 Likelihood and Maximum Product-of-Spacings
 
@@ -337,16 +352,19 @@ def gmmobjective(params):
 License: BSD
 """
 
+
 def hess_ndt(fun, pars, args, options):
     import numdifftools as ndt
-    if not ('stepMax' in options or 'stepFix' in options):
-        options['stepMax'] = 1e-5
+
+    if not ("stepMax" in options or "stepFix" in options):
+        options["stepMax"] = 1e-5
     f = lambda params: fun(params, *args)
     h = ndt.Hessian(f, **options)
     return h(pars), h
 
+
 def logmps(params, xsorted, dist):
-    '''calculate negative log of Product-of-Spacings
+    """calculate negative log of Product-of-Spacings
 
     Parameters
     ----------
@@ -366,13 +384,14 @@ def logmps(params, xsorted, dist):
     Notes
     -----
     MPS definiton from JKB page 233
-    '''
-    xcdf = np.r_[0., dist.cdf(xsorted, *params), 1.]
+    """
+    xcdf = np.r_[0.0, dist.cdf(xsorted, *params), 1.0]
     D = np.diff(xcdf)
     return -np.log(D).mean()
 
+
 def getstartparams(dist, data):
-    '''get starting values for estimation of distribution parameters
+    """get starting values for estimation of distribution parameters
 
     Parameters
     ----------
@@ -389,19 +408,20 @@ def getstartparams(dist, data):
         preliminary estimate or starting value for the parameters of
         the distribution given the data, including loc and scale
 
-    '''
-    if hasattr(dist, 'fitstart'):
-        #x0 = getattr(dist, 'fitstart')(data)
+    """
+    if hasattr(dist, "fitstart"):
+        # x0 = getattr(dist, 'fitstart')(data)
         x0 = dist.fitstart(data)
     else:
         if np.isfinite(dist.a):
-            x0 = np.r_[[1.]*dist.numargs, (data.min()-1), 1.]
+            x0 = np.r_[[1.0] * dist.numargs, (data.min() - 1), 1.0]
         else:
-            x0 = np.r_[[1.]*dist.numargs, (data.mean()-1), 1.]
+            x0 = np.r_[[1.0] * dist.numargs, (data.mean() - 1), 1.0]
     return x0
 
+
 def fit_mps(dist, data, x0=None):
-    '''Estimate distribution parameters with Maximum Product-of-Spacings
+    """Estimate distribution parameters with Maximum Product-of-Spacings
 
     Parameters
     ----------
@@ -419,93 +439,123 @@ def fit_mps(dist, data, x0=None):
         including loc and scale
 
 
-    '''
+    """
     xsorted = np.sort(data)
     if x0 is None:
         x0 = getstartparams(dist, xsorted)
     args = (xsorted, dist)
     print(x0)
-    #print(args)
+    # print(args)
     return optimize.fmin(logmps, x0, args=args)
 
 
+if __name__ == "__main__":
 
-if __name__ == '__main__':
+    # Example: gamma - distribution
+    # -----------------------------
 
-    #Example: gamma - distribution
-    #-----------------------------
-
-    print('\n\nExample: gamma Distribution')
-    print(    '---------------------------')
+    print("\n\nExample: gamma Distribution")
+    print("---------------------------")
 
     alpha = 2
     xq = [0.5, 4]
     pq = [0.1, 0.9]
     print(stats.gamma.ppf(pq, alpha))
     xq = stats.gamma.ppf(pq, alpha)
-    print(np.diff((stats.gamma.ppf(pq, np.linspace(0.01,4,10)[:,None])*xq[::-1])))
-    #optimize.bisect(lambda alpha: np.diff((stats.gamma.ppf(pq, alpha)*xq[::-1])))
-    print(optimize.fsolve(lambda alpha: np.diff((stats.gamma.ppf(pq, alpha)*xq[::-1])), 3.))
+    print(
+        np.diff(
+            (stats.gamma.ppf(pq, np.linspace(0.01, 4, 10)[:, None]) * xq[::-1])
+        )
+    )
+    # optimize.bisect(lambda alpha: np.diff((stats.gamma.ppf(pq, alpha)*xq[::-1])))
+    print(
+        optimize.fsolve(
+            lambda alpha: np.diff((stats.gamma.ppf(pq, alpha) * xq[::-1])), 3.0
+        )
+    )
 
     distfn = stats.gamma
-    mcond = gammamomentcond(distfn, [5.,10], mom2=stats.gamma.stats(alpha, 0.,1.), quantile=None)
-    print(optimize.fsolve(mcond, [1.,2.]))
-    mom2 = stats.gamma.stats(alpha, 0.,1.)
-    print(optimize.fsolve(lambda params:gammamomentcond2(distfn, params, mom2), [1.,2.]))
-
-    grvs = stats.gamma.rvs(alpha, 0.,2., size=1000)
+    mcond = gammamomentcond(
+        distfn,
+        [5.0, 10],
+        mom2=stats.gamma.stats(alpha, 0.0, 1.0),
+        quantile=None,
+    )
+    print(optimize.fsolve(mcond, [1.0, 2.0]))
+    mom2 = stats.gamma.stats(alpha, 0.0, 1.0)
+    print(
+        optimize.fsolve(
+            lambda params: gammamomentcond2(distfn, params, mom2), [1.0, 2.0]
+        )
+    )
+
+    grvs = stats.gamma.rvs(alpha, 0.0, 2.0, size=1000)
     mom2 = np.array([grvs.mean(), grvs.var()])
-    alphaestq = optimize.fsolve(lambda params:gammamomentcond2(distfn, params, mom2), [1.,3.])
+    alphaestq = optimize.fsolve(
+        lambda params: gammamomentcond2(distfn, params, mom2), [1.0, 3.0]
+    )
     print(alphaestq)
-    print('scale = ', xq/stats.gamma.ppf(pq, alphaestq))
-
+    print("scale = ", xq / stats.gamma.ppf(pq, alphaestq))
 
-    #Example beta - distribution
-    #---------------------------
+    # Example beta - distribution
+    # ---------------------------
 
-    #Warning: this example had cut-and-paste errors
+    # Warning: this example had cut-and-paste errors
 
-    print('\n\nExample: beta Distribution')
-    print(    '--------------------------')
+    print("\n\nExample: beta Distribution")
+    print("--------------------------")
 
-    #monkey patching :
-##    if hasattr(stats.beta, '_fitstart'):
-##        del stats.beta._fitstart  #bug in _fitstart  #raises AttributeError: _fitstart
-    #stats.distributions.beta_gen._fitstart = lambda self, data : np.array([1,1,0,1])
-    #_fitstart seems to require a tuple
-    stats.distributions.beta_gen._fitstart = lambda self, data : (5,5,0,1)
+    # monkey patching :
+    ##    if hasattr(stats.beta, '_fitstart'):
+    ##        del stats.beta._fitstart  #bug in _fitstart  #raises AttributeError: _fitstart
+    # stats.distributions.beta_gen._fitstart = lambda self, data : np.array([1,1,0,1])
+    # _fitstart seems to require a tuple
+    stats.distributions.beta_gen._fitstart = lambda self, data: (5, 5, 0, 1)
 
-    pq = np.array([0.01, 0.05,0.1,0.4,0.6,0.9,0.95,0.99])
-    #rvsb = stats.beta.rvs(0.5,0.15,size=200)
-    rvsb = stats.beta.rvs(10,15,size=2000)
-    print('true params', 10, 15, 0, 1)
+    pq = np.array([0.01, 0.05, 0.1, 0.4, 0.6, 0.9, 0.95, 0.99])
+    # rvsb = stats.beta.rvs(0.5,0.15,size=200)
+    rvsb = stats.beta.rvs(10, 15, size=2000)
+    print("true params", 10, 15, 0, 1)
     print(stats.beta.fit(rvsb))
-    xqsb = [stats.scoreatpercentile(rvsb, p) for p in pq*100]
+    xqsb = [stats.scoreatpercentile(rvsb, p) for p in pq * 100]
     mom2s = np.array([rvsb.mean(), rvsb.var()])
-    betaparest_gmmquantile = optimize.fmin(lambda params:np.sum(momentcondquant(stats.beta, params, mom2s,(pq,xqsb), shape=None)**2),
-                                           [10,10, 0., 1.], maxiter=2000)
-    print('betaparest_gmmquantile',  betaparest_gmmquantile)
-    #result sensitive to initial condition
-
-
-    #Example t - distribution
-    #------------------------
-
-    print('\n\nExample: t Distribution')
-    print(    '-----------------------')
+    betaparest_gmmquantile = optimize.fmin(
+        lambda params: np.sum(
+            momentcondquant(stats.beta, params, mom2s, (pq, xqsb), shape=None)
+            ** 2
+        ),
+        [10, 10, 0.0, 1.0],
+        maxiter=2000,
+    )
+    print("betaparest_gmmquantile", betaparest_gmmquantile)
+    # result sensitive to initial condition
+
+    # Example t - distribution
+    # ------------------------
+
+    print("\n\nExample: t Distribution")
+    print("-----------------------")
 
     nobs = 1000
     distfn = stats.t
-    pq = np.array([0.1,0.9])
+    pq = np.array([0.1, 0.9])
     paramsdgp = (5, 0, 1)
     trvs = distfn.rvs(5, 0, 1, size=nobs)
-    xqs = [stats.scoreatpercentile(trvs, p) for p in pq*100]
+    xqs = [stats.scoreatpercentile(trvs, p) for p in pq * 100]
     mom2th = distfn.stats(*paramsdgp)
     mom2s = np.array([trvs.mean(), trvs.var()])
-    tparest_gmm3quantilefsolve = optimize.fsolve(lambda params:momentcondunbound(distfn,params, mom2s,(pq,xqs)), [10,1.,2.])
-    print('tparest_gmm3quantilefsolve', tparest_gmm3quantilefsolve)
-    tparest_gmm3quantile = optimize.fmin(lambda params:np.sum(momentcondunbound(distfn,params, mom2s,(pq,xqs))**2), [10,1.,2.])
-    print('tparest_gmm3quantile', tparest_gmm3quantile)
+    tparest_gmm3quantilefsolve = optimize.fsolve(
+        lambda params: momentcondunbound(distfn, params, mom2s, (pq, xqs)),
+        [10, 1.0, 2.0],
+    )
+    print("tparest_gmm3quantilefsolve", tparest_gmm3quantilefsolve)
+    tparest_gmm3quantile = optimize.fmin(
+        lambda params: np.sum(
+            momentcondunbound(distfn, params, mom2s, (pq, xqs)) ** 2
+        ),
+        [10, 1.0, 2.0],
+    )
+    print("tparest_gmm3quantile", tparest_gmm3quantile)
     print(distfn.fit(trvs))
 
     ##
@@ -517,65 +567,93 @@ def fit_mps(dist, data, x0=None):
     ##xqs = [stats.scoreatpercentile(trvs, p) for p in pq*100]
     ##mom2th = distfn.stats(*paramsdgp)
     ##mom2s = np.array([trvs.mean(), trvs.var()])
-    print(optimize.fsolve(lambda params:momentcondunboundls(distfn, params, mom2s,shape=5), [1.,2.]))
-    print(optimize.fmin(lambda params:np.sum(momentcondunboundls(distfn, params, mom2s,shape=5)**2), [1.,2.]))
+    print(
+        optimize.fsolve(
+            lambda params: momentcondunboundls(distfn, params, mom2s, shape=5),
+            [1.0, 2.0],
+        )
+    )
+    print(
+        optimize.fmin(
+            lambda params: np.sum(
+                momentcondunboundls(distfn, params, mom2s, shape=5) ** 2
+            ),
+            [1.0, 2.0],
+        )
+    )
     print(distfn.fit(trvs))
-    #loc, scale, based on quantiles
-    print(optimize.fsolve(lambda params:momentcondunboundls(distfn, params, mom2s,(pq,xqs),shape=5), [1.,2.]))
+    # loc, scale, based on quantiles
+    print(
+        optimize.fsolve(
+            lambda params: momentcondunboundls(
+                distfn, params, mom2s, (pq, xqs), shape=5
+            ),
+            [1.0, 2.0],
+        )
+    )
 
     ##
 
-    pq = np.array([0.01, 0.05,0.1,0.4,0.6,0.9,0.95,0.99])
-    #paramsdgp = (5, 0, 1)
-    xqs = [stats.scoreatpercentile(trvs, p) for p in pq*100]
-    tparest_gmmquantile = optimize.fmin(lambda params:np.sum(momentcondquant(distfn, params, mom2s,(pq,xqs), shape=None)**2), [10, 1.,2.])
-    print('tparest_gmmquantile', tparest_gmmquantile)
-    tparest_gmmquantile2 = fitquantilesgmm(distfn, trvs, start=[10, 1.,2.], pquant=None, frozen=None)
-    print('tparest_gmmquantile2', tparest_gmmquantile2)
-
+    pq = np.array([0.01, 0.05, 0.1, 0.4, 0.6, 0.9, 0.95, 0.99])
+    # paramsdgp = (5, 0, 1)
+    xqs = [stats.scoreatpercentile(trvs, p) for p in pq * 100]
+    tparest_gmmquantile = optimize.fmin(
+        lambda params: np.sum(
+            momentcondquant(distfn, params, mom2s, (pq, xqs), shape=None) ** 2
+        ),
+        [10, 1.0, 2.0],
+    )
+    print("tparest_gmmquantile", tparest_gmmquantile)
+    tparest_gmmquantile2 = fitquantilesgmm(
+        distfn, trvs, start=[10, 1.0, 2.0], pquant=None, frozen=None
+    )
+    print("tparest_gmmquantile2", tparest_gmmquantile2)
 
     ##
 
-
-    #use trvs from before
-    bt = stats.t.ppf(np.linspace(0,1,21),5)
-    ft,bt = np.histogram(trvs,bins=bt)
-    print('fitbinned t-distribution')
+    # use trvs from before
+    bt = stats.t.ppf(np.linspace(0, 1, 21), 5)
+    ft, bt = np.histogram(trvs, bins=bt)
+    print("fitbinned t-distribution")
     tparest_mlebinew = fitbinned(stats.t, ft, bt, [10, 0, 1])
     tparest_gmmbinewidentity = fitbinnedgmm(stats.t, ft, bt, [10, 0, 1])
-    tparest_gmmbinewoptimal = fitbinnedgmm(stats.t, ft, bt, [10, 0, 1], weightsoptimal=False)
+    tparest_gmmbinewoptimal = fitbinnedgmm(
+        stats.t, ft, bt, [10, 0, 1], weightsoptimal=False
+    )
     print(paramsdgp)
 
-    #Note: this can be used for chisquare test and then has correct asymptotic
+    # Note: this can be used for chisquare test and then has correct asymptotic
     #   distribution for a distribution with estimated parameters, find ref again
-    #TODO combine into test with binning included, check rule for number of bins
+    # TODO combine into test with binning included, check rule for number of bins
 
-    #bt2 = stats.t.ppf(np.linspace(trvs.,1,21),5)
-    ft2,bt2 = np.histogram(trvs,bins=50)
-    'fitbinned t-distribution'
+    # bt2 = stats.t.ppf(np.linspace(trvs.,1,21),5)
+    ft2, bt2 = np.histogram(trvs, bins=50)
+    "fitbinned t-distribution"
     tparest_mlebinel = fitbinned(stats.t, ft2, bt2, [10, 0, 1])
     tparest_gmmbinelidentity = fitbinnedgmm(stats.t, ft2, bt2, [10, 0, 1])
-    tparest_gmmbineloptimal = fitbinnedgmm(stats.t, ft2, bt2, [10, 0, 1], weightsoptimal=False)
+    tparest_gmmbineloptimal = fitbinnedgmm(
+        stats.t, ft2, bt2, [10, 0, 1], weightsoptimal=False
+    )
     tparest_mle = stats.t.fit(trvs)
 
     np.set_printoptions(precision=6)
-    print('sample size', nobs)
-    print('true (df, loc, scale)      ', paramsdgp)
-    print('parest_mle                 ', tparest_mle)
+    print("sample size", nobs)
+    print("true (df, loc, scale)      ", paramsdgp)
+    print("parest_mle                 ", tparest_mle)
     print
-    print('tparest_mlebinel           ', tparest_mlebinel)
-    print('tparest_gmmbinelidentity   ', tparest_gmmbinelidentity)
-    print('tparest_gmmbineloptimal    ', tparest_gmmbineloptimal)
+    print("tparest_mlebinel           ", tparest_mlebinel)
+    print("tparest_gmmbinelidentity   ", tparest_gmmbinelidentity)
+    print("tparest_gmmbineloptimal    ", tparest_gmmbineloptimal)
     print
-    print('tparest_mlebinew           ', tparest_mlebinew)
-    print('tparest_gmmbinewidentity   ', tparest_gmmbinewidentity)
-    print('tparest_gmmbinewoptimal    ', tparest_gmmbinewoptimal)
+    print("tparest_mlebinew           ", tparest_mlebinew)
+    print("tparest_gmmbinewidentity   ", tparest_gmmbinewidentity)
+    print("tparest_gmmbinewoptimal    ", tparest_gmmbinewoptimal)
     print
-    print('tparest_gmmquantileidentity', tparest_gmmquantile)
-    print('tparest_gmm3quantilefsolve ', tparest_gmm3quantilefsolve)
-    print('tparest_gmm3quantile       ', tparest_gmm3quantile)
+    print("tparest_gmmquantileidentity", tparest_gmmquantile)
+    print("tparest_gmm3quantilefsolve ", tparest_gmm3quantilefsolve)
+    print("tparest_gmm3quantile       ", tparest_gmm3quantile)
 
-    ''' example results:
+    """ example results:
     standard error for df estimate looks large
     note: iI do not impose that df is an integer, (b/c not necessary)
     need Monte Carlo to check variance of estimators
@@ -596,82 +674,89 @@ def fit_mps(dist, data, x0=None):
     tparest_gmmquantileidentity [ 3.940797 -0.046469  1.002001]
     tparest_gmm3quantilefsolve  [ 10.   1.   2.]
     tparest_gmm3quantile        [ 6.376101 -0.029322  1.112403]
-    '''
+    """
 
-    #Example with Maximum Product of Spacings Estimation
-    #===================================================
+    # Example with Maximum Product of Spacings Estimation
+    # ===================================================
 
-    #Example: Lognormal Distribution
-    #-------------------------------
+    # Example: Lognormal Distribution
+    # -------------------------------
 
-    #tough problem for MLE according to JKB
-    #but not sure for which parameters
+    # tough problem for MLE according to JKB
+    # but not sure for which parameters
 
-    print('\n\nExample: Lognormal Distribution')
-    print(    '-------------------------------')
+    print("\n\nExample: Lognormal Distribution")
+    print("-------------------------------")
 
     sh = np.exp(10)
     sh = 0.01
     print(sh)
-    x = stats.lognorm.rvs(sh,loc=100, scale=10,size=200)
+    x = stats.lognorm.rvs(sh, loc=100, scale=10, size=200)
 
     print(x.min())
-    print(stats.lognorm.fit(x,  1.,loc=x.min()-1,scale=1))
+    print(stats.lognorm.fit(x, 1.0, loc=x.min() - 1, scale=1))
 
     xsorted = np.sort(x)
 
-    x0 = [1., x.min()-1, 1]
+    x0 = [1.0, x.min() - 1, 1]
     args = (xsorted, stats.lognorm)
-    print(optimize.fmin(logmps,x0,args=args))
-
+    print(optimize.fmin(logmps, x0, args=args))
 
-    #Example: Lomax, Pareto, Generalized Pareto Distributions
-    #--------------------------------------------------------
+    # Example: Lomax, Pareto, Generalized Pareto Distributions
+    # --------------------------------------------------------
 
-    #partially a follow-up to the discussion about numpy.random.pareto
-    #Reference: JKB
-    #example Maximum Product of Spacings Estimation
+    # partially a follow-up to the discussion about numpy.random.pareto
+    # Reference: JKB
+    # example Maximum Product of Spacings Estimation
 
     # current results:
     # does not look very good yet sensitivity to starting values
     # Pareto and Generalized Pareto look like a tough estimation problemprint('\n\nExample: Lognormal Distribution'
 
-    print('\n\nExample: Lomax, Pareto, Generalized Pareto Distributions')
-    print(    '--------------------------------------------------------')
+    print("\n\nExample: Lomax, Pareto, Generalized Pareto Distributions")
+    print("--------------------------------------------------------")
 
     p2rvs = stats.genpareto.rvs(2, size=500)
-    #Note: is Lomax without +1; and classical Pareto with +1
+    # Note: is Lomax without +1; and classical Pareto with +1
     p2rvssorted = np.sort(p2rvs)
     argsp = (p2rvssorted, stats.pareto)
-    x0p = [1., p2rvs.min()-5, 1]
-    print(optimize.fmin(logmps,x0p,args=argsp))
+    x0p = [1.0, p2rvs.min() - 5, 1]
+    print(optimize.fmin(logmps, x0p, args=argsp))
     print(stats.pareto.fit(p2rvs, 0.5, loc=-20, scale=0.5))
-    print('gpdparest_ mle', stats.genpareto.fit(p2rvs))
+    print("gpdparest_ mle", stats.genpareto.fit(p2rvs))
     parsgpd = fit_mps(stats.genpareto, p2rvs)
-    print('gpdparest_ mps', parsgpd)
+    print("gpdparest_ mps", parsgpd)
     argsgpd = (p2rvssorted, stats.genpareto)
     options = dict(stepFix=1e-7)
-    #hess_ndt(fun, pars, argsgdp, options)
-    #the results for the following look strange, maybe refactoring error
+    # hess_ndt(fun, pars, argsgdp, options)
+    # the results for the following look strange, maybe refactoring error
     he, h = hess_ndt(logmps, parsgpd, argsgpd, options)
     print(np.linalg.eigh(he)[0])
     f = lambda params: logmps(params, *argsgpd)
     print(f(parsgpd))
-    #add binned
+    # add binned
     fp2, bp2 = np.histogram(p2rvs, bins=50)
-    'fitbinned t-distribution'
+    "fitbinned t-distribution"
     gpdparest_mlebinel = fitbinned(stats.genpareto, fp2, bp2, x0p)
     gpdparest_gmmbinelidentity = fitbinnedgmm(stats.genpareto, fp2, bp2, x0p)
-    print('gpdparest_mlebinel', gpdparest_mlebinel)
-    print('gpdparest_gmmbinelidentity', gpdparest_gmmbinelidentity)
+    print("gpdparest_mlebinel", gpdparest_mlebinel)
+    print("gpdparest_gmmbinelidentity", gpdparest_gmmbinelidentity)
     gpdparest_gmmquantile2 = fitquantilesgmm(
-        stats.genpareto, p2rvs, start=x0p, pquant=None, frozen=None)
-    print('gpdparest_gmmquantile2', gpdparest_gmmquantile2)
-
-    print(fitquantilesgmm(stats.genpareto, p2rvs, start=x0p,
-                          pquant=np.linspace(0.01,0.99,10), frozen=None))
+        stats.genpareto, p2rvs, start=x0p, pquant=None, frozen=None
+    )
+    print("gpdparest_gmmquantile2", gpdparest_gmmquantile2)
+
+    print(
+        fitquantilesgmm(
+            stats.genpareto,
+            p2rvs,
+            start=x0p,
+            pquant=np.linspace(0.01, 0.99, 10),
+            frozen=None,
+        )
+    )
     fp2, bp2 = np.histogram(
-        p2rvs,
-        bins=stats.genpareto(2).ppf(np.linspace(0,0.99,10)))
-    print('fitbinnedgmm equal weight bins')
+        p2rvs, bins=stats.genpareto(2).ppf(np.linspace(0, 0.99, 10))
+    )
+    print("fitbinnedgmm equal weight bins")
     print(fitbinnedgmm(stats.genpareto, fp2, bp2, x0p))
diff --git a/statsmodels/sandbox/distributions/examples/ex_extras.py b/statsmodels/sandbox/distributions/examples/ex_extras.py
index 9ab22c84783..6783fd1d0df 100644
--- a/statsmodels/sandbox/distributions/examples/ex_extras.py
+++ b/statsmodels/sandbox/distributions/examples/ex_extras.py
@@ -9,72 +9,99 @@
 import numpy as np
 from scipy import stats
 
-from statsmodels.sandbox.distributions.extras import (SkewNorm_gen, skewnorm,
-                                ACSkewT_gen,
-                                NormExpan_gen, pdf_moments,
-                                ExpTransf_gen, LogTransf_gen)
+from statsmodels.sandbox.distributions.extras import (
+    ACSkewT_gen,
+    ExpTransf_gen,
+    LogTransf_gen,
+    NormExpan_gen,
+    SkewNorm_gen,
+    pdf_moments,
+    skewnorm,
+)
 from statsmodels.stats.moment_helpers import mc2mvsk, mnc2mc, mvsk2mnc
 
 
 def example_n():
 
-    print(skewnorm.pdf(1,0), stats.norm.pdf(1), skewnorm.pdf(1,0) - stats.norm.pdf(1))
-    print(skewnorm.pdf(1,1000), stats.chi.pdf(1,1), skewnorm.pdf(1,1000) - stats.chi.pdf(1,1))
-    print(skewnorm.pdf(-1,-1000), stats.chi.pdf(1,1), skewnorm.pdf(-1,-1000) - stats.chi.pdf(1,1))
-    rvs = skewnorm.rvs(0,size=500)
-    print('sample mean var: ', rvs.mean(), rvs.var())
-    print('theoretical mean var', skewnorm.stats(0))
-    rvs = skewnorm.rvs(5,size=500)
-    print('sample mean var: ', rvs.mean(), rvs.var())
-    print('theoretical mean var', skewnorm.stats(5))
-    print(skewnorm.cdf(1,0), stats.norm.cdf(1), skewnorm.cdf(1,0) - stats.norm.cdf(1))
-    print(skewnorm.cdf(1,1000), stats.chi.cdf(1,1), skewnorm.cdf(1,1000) - stats.chi.cdf(1,1))
-    print(skewnorm.sf(0.05,1000), stats.chi.sf(0.05,1), skewnorm.sf(0.05,1000) - stats.chi.sf(0.05,1))
+    print(
+        skewnorm.pdf(1, 0),
+        stats.norm.pdf(1),
+        skewnorm.pdf(1, 0) - stats.norm.pdf(1),
+    )
+    print(
+        skewnorm.pdf(1, 1000),
+        stats.chi.pdf(1, 1),
+        skewnorm.pdf(1, 1000) - stats.chi.pdf(1, 1),
+    )
+    print(
+        skewnorm.pdf(-1, -1000),
+        stats.chi.pdf(1, 1),
+        skewnorm.pdf(-1, -1000) - stats.chi.pdf(1, 1),
+    )
+    rvs = skewnorm.rvs(0, size=500)
+    print("sample mean var: ", rvs.mean(), rvs.var())
+    print("theoretical mean var", skewnorm.stats(0))
+    rvs = skewnorm.rvs(5, size=500)
+    print("sample mean var: ", rvs.mean(), rvs.var())
+    print("theoretical mean var", skewnorm.stats(5))
+    print(
+        skewnorm.cdf(1, 0),
+        stats.norm.cdf(1),
+        skewnorm.cdf(1, 0) - stats.norm.cdf(1),
+    )
+    print(
+        skewnorm.cdf(1, 1000),
+        stats.chi.cdf(1, 1),
+        skewnorm.cdf(1, 1000) - stats.chi.cdf(1, 1),
+    )
+    print(
+        skewnorm.sf(0.05, 1000),
+        stats.chi.sf(0.05, 1),
+        skewnorm.sf(0.05, 1000) - stats.chi.sf(0.05, 1),
+    )
 
 
 def example_T():
     skewt = ACSkewT_gen()
-    rvs = skewt.rvs(10,0,size=500)
-    print('sample mean var: ', rvs.mean(), rvs.var())
-    print('theoretical mean var', skewt.stats(10,0))
-    print('t mean var', stats.t.stats(10))
-    print(skewt.stats(10,1000)) # -> folded t distribution, as alpha -> inf
-    rvs = np.abs(stats.t.rvs(10,size=1000))
+    rvs = skewt.rvs(10, 0, size=500)
+    print("sample mean var: ", rvs.mean(), rvs.var())
+    print("theoretical mean var", skewt.stats(10, 0))
+    print("t mean var", stats.t.stats(10))
+    print(skewt.stats(10, 1000))  # -> folded t distribution, as alpha -> inf
+    rvs = np.abs(stats.t.rvs(10, size=1000))
     print(rvs.mean(), rvs.var())
 
 
-
 def examples_normexpand():
     skewnorm = SkewNorm_gen()
-    rvs = skewnorm.rvs(5,size=100)
-    normexpan = NormExpan_gen(rvs, mode='sample')
+    rvs = skewnorm.rvs(5, size=100)
+    normexpan = NormExpan_gen(rvs, mode="sample")
 
     smvsk = stats.describe(rvs)[2:]
-    print('sample: mu,sig,sk,kur')
+    print("sample: mu,sig,sk,kur")
     print(smvsk)
 
-    dmvsk = normexpan.stats(moments='mvsk')
-    print('normexpan: mu,sig,sk,kur')
+    dmvsk = normexpan.stats(moments="mvsk")
+    print("normexpan: mu,sig,sk,kur")
     print(dmvsk)
-    print('mvsk diff distribution - sample')
+    print("mvsk diff distribution - sample")
     print(np.array(dmvsk) - np.array(smvsk))
-    print('normexpan attributes mvsk')
+    print("normexpan attributes mvsk")
     print(mc2mvsk(normexpan.cnt))
     print(normexpan.mvsk)
 
     mnc = mvsk2mnc(dmvsk)
     mc = mnc2mc(mnc)
-    print('central moments')
+    print("central moments")
     print(mc)
-    print('non-central moments')
+    print("non-central moments")
     print(mnc)
 
-
     pdffn = pdf_moments(mc)
-    print('\npdf approximation from moments')
-    print('pdf at', mc[0]-1,mc[0]+1)
-    print(pdffn([mc[0]-1,mc[0]+1]))
-    print(normexpan.pdf([mc[0]-1,mc[0]+1]))
+    print("\npdf approximation from moments")
+    print("pdf at", mc[0] - 1, mc[0] + 1)
+    print(pdffn([mc[0] - 1, mc[0] + 1]))
+    print(normexpan.pdf([mc[0] - 1, mc[0] + 1]))
 
 
 def examples_transf():
@@ -85,10 +112,12 @@ def examples_transf():
     ##print(stats.lognorm.stats(1))
     ##print(lognormal.rvs(size=10))
 
-    print('Results for lognormal')
-    lognormalg = ExpTransf_gen(stats.norm, a=0, name = 'Log transformed normal general')
+    print("Results for lognormal")
+    lognormalg = ExpTransf_gen(
+        stats.norm, a=0, name="Log transformed normal general"
+    )
     print(lognormalg.cdf(1))
-    print(stats.lognorm.cdf(1,1))
+    print(stats.lognorm.cdf(1, 1))
     print(lognormalg.stats())
     print(stats.lognorm.stats(1))
     print(lognormalg.rvs(size=5))
@@ -98,29 +127,29 @@ def examples_transf():
     ##print(loggammag._cdf(1,10))
     ##print(stats.loggamma.cdf(1,10))
 
-    print('Results for expgamma')
+    print("Results for expgamma")
     loggammaexpg = LogTransf_gen(stats.gamma)
-    print(loggammaexpg._cdf(1,10))
-    print(stats.loggamma.cdf(1,10))
-    print(loggammaexpg._cdf(2,15))
-    print(stats.loggamma.cdf(2,15))
-
+    print(loggammaexpg._cdf(1, 10))
+    print(stats.loggamma.cdf(1, 10))
+    print(loggammaexpg._cdf(2, 15))
+    print(stats.loggamma.cdf(2, 15))
 
     # this requires change in scipy.stats.distribution
-    #print(loggammaexpg.cdf(1,10))
+    # print(loggammaexpg.cdf(1,10))
 
-    print('Results for loglaplace')
+    print("Results for loglaplace")
     loglaplaceg = LogTransf_gen(stats.laplace)
     print(loglaplaceg._cdf(2))
-    print(stats.loglaplace.cdf(2,1))
+    print(stats.loglaplace.cdf(2, 1))
     loglaplaceexpg = ExpTransf_gen(stats.laplace)
     print(loglaplaceexpg._cdf(2))
-    stats.loglaplace.cdf(3,3)
-    #0.98148148148148151
-    loglaplaceexpg._cdf(3,0,1./3)
-    #0.98148148148148151
+    stats.loglaplace.cdf(3, 3)
+    # 0.98148148148148151
+    loglaplaceexpg._cdf(3, 0, 1.0 / 3)
+    # 0.98148148148148151
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     example_n()
     example_T()
     examples_normexpand()
diff --git a/statsmodels/sandbox/distributions/examples/ex_fitfr.py b/statsmodels/sandbox/distributions/examples/ex_fitfr.py
index 5f00a125110..745301362b6 100644
--- a/statsmodels/sandbox/distributions/examples/ex_fitfr.py
+++ b/statsmodels/sandbox/distributions/examples/ex_fitfr.py
@@ -1,14 +1,15 @@
-'''Example for estimating distribution parameters when some are fixed.
+"""Example for estimating distribution parameters when some are fixed.
 
 This uses currently a patched version of the distributions, two methods are
 added to the continuous distributions. This has no side effects.
 It also adds bounds to vonmises, which changes the behavior of it for some
 methods.
 
-'''
+"""
 
 import numpy as np
 from scipy import stats
+
 # Note the following import attaches methods to scipy.stats.distributions
 #     and adds bounds to stats.vonmises
 # from statsmodels.sandbox.distributions import sppatch
@@ -17,12 +18,12 @@
 np.random.seed(12345)
 x = stats.gamma.rvs(2.5, loc=0, scale=1.2, size=200)
 
-#estimate all parameters
+# estimate all parameters
 print(stats.gamma.fit(x))
 print(stats.gamma.fit_fr(x, frozen=[np.nan, np.nan, np.nan]))
-#estimate shape parameter only
-print(stats.gamma.fit_fr(x, frozen=[np.nan, 0., 1.2]))
+# estimate shape parameter only
+print(stats.gamma.fit_fr(x, frozen=[np.nan, 0.0, 1.2]))
 
 np.random.seed(12345)
 x = stats.lognorm.rvs(2, loc=0, scale=2, size=200)
-print(stats.lognorm.fit_fr(x, frozen=[np.nan, 0., np.nan]))
+print(stats.lognorm.fit_fr(x, frozen=[np.nan, 0.0, np.nan]))
diff --git a/statsmodels/sandbox/distributions/examples/ex_gof.py b/statsmodels/sandbox/distributions/examples/ex_gof.py
index 13345fb7a69..6a15943e094 100644
--- a/statsmodels/sandbox/distributions/examples/ex_gof.py
+++ b/statsmodels/sandbox/distributions/examples/ex_gof.py
@@ -1,11 +1,17 @@
 from scipy import stats
+
 from statsmodels.stats import gof
 
-poissrvs = stats.poisson.rvs(0.6, size = 200)
+poissrvs = stats.poisson.rvs(0.6, size=200)
 
-freq, expfreq, histsupp = gof.gof_binning_discrete(poissrvs, stats.poisson, (0.6,), nsupp=20)
+freq, expfreq, histsupp = gof.gof_binning_discrete(
+    poissrvs, stats.poisson, (0.6,), nsupp=20
+)
 (chi2val, pval) = stats.chisquare(freq, expfreq)
 print(chi2val, pval)
 
-print(gof.gof_chisquare_discrete(stats.poisson, (0.6,), poissrvs, 0.05,
-                                     'Poisson'))
+print(
+    gof.gof_chisquare_discrete(
+        stats.poisson, (0.6,), poissrvs, 0.05, "Poisson"
+    )
+)
diff --git a/statsmodels/sandbox/distributions/examples/ex_mvelliptical.py b/statsmodels/sandbox/distributions/examples/ex_mvelliptical.py
index 55801491e04..cf0710e4182 100644
--- a/statsmodels/sandbox/distributions/examples/ex_mvelliptical.py
+++ b/statsmodels/sandbox/distributions/examples/ex_mvelliptical.py
@@ -10,50 +10,44 @@
 for comparison I used R mvtnorm version 0.9-96
 
 """
+import matplotlib.pyplot as plt
 import numpy as np
 from numpy.testing import assert_array_almost_equal
-import matplotlib.pyplot as plt
 
 import statsmodels.api as sm
 import statsmodels.distributions.mixture_rvs as mix
 import statsmodels.sandbox.distributions.mv_normal as mvd
 
-
-cov3 = np.array([[ 1.  ,  0.5 ,  0.75],
-                   [ 0.5 ,  1.5 ,  0.6 ],
-                   [ 0.75,  0.6 ,  2.  ]])
+cov3 = np.array([[1.0, 0.5, 0.75], [0.5, 1.5, 0.6], [0.75, 0.6, 2.0]])
 
 mu = np.array([-1, 0.0, 2.0])
 
-#************** multivariate normal distribution ***************
+# ************** multivariate normal distribution ***************
 
 mvn3 = mvd.MVNormal(mu, cov3)
 
-#compare with random sample
+# compare with random sample
 x = mvn3.rvs(size=1000000)
 
-xli = [[2., 1., 1.5],
-       [0., 2., 1.5],
-       [1.5, 1., 2.5],
-       [0., 1., 1.5]]
+xli = [[2.0, 1.0, 1.5], [0.0, 2.0, 1.5], [1.5, 1.0, 2.5], [0.0, 1.0, 1.5]]
 
-xliarr = np.asarray(xli).T[None,:, :]
+xliarr = np.asarray(xli).T[None, :, :]
 
-#from R session
-#pmvnorm(lower=-Inf,upper=(x[0,.]-mu)/sqrt(diag(cov3)),mean=rep(0,3),corr3)
+# from R session
+# pmvnorm(lower=-Inf,upper=(x[0,.]-mu)/sqrt(diag(cov3)),mean=rep(0,3),corr3)
 r_cdf = [0.3222292, 0.3414643, 0.5450594, 0.3116296]
 r_cdf_errors = [1.715116e-05, 1.590284e-05, 5.356471e-05, 3.567548e-05]
 n_cdf = [mvn3.cdf(a) for a in xli]
 assert_array_almost_equal(r_cdf, n_cdf, decimal=4)
 
 print(n_cdf)
-print('')
-print((x<np.array(xli[0])).all(-1).mean(0))
-print((x[...,None]<xliarr).all(1).mean(0))
-print(mvn3.expect_mc(lambda x: (x<xli[0]).all(-1), size=100000))
-print(mvn3.expect_mc(lambda x: (x[...,None]<xliarr).all(1), size=100000))
+print("")
+print((x < np.array(xli[0])).all(-1).mean(0))
+print((x[..., None] < xliarr).all(1).mean(0))
+print(mvn3.expect_mc(lambda x: (x < xli[0]).all(-1), size=100000))
+print(mvn3.expect_mc(lambda x: (x[..., None] < xliarr).all(1), size=100000))
 
-#other methods
+# other methods
 mvn3n = mvn3.normalized()
 
 assert_array_almost_equal(mvn3n.cov, mvn3n.corr, decimal=15)
@@ -66,21 +60,21 @@
 
 mvn3n2 = mvn3.normalized2()
 assert_array_almost_equal(mvn3n.cov, mvn3n2.cov, decimal=2)
-#mistake: "normalized2" standardizes - FIXED
-#assert_array_almost_equal(np.eye(3), mvn3n2.cov, decimal=2)
+# mistake: "normalized2" standardizes - FIXED
+# assert_array_almost_equal(np.eye(3), mvn3n2.cov, decimal=2)
 
 xs = mvn3.standardize(x)
 xs_cov = np.cov(xn, rowvar=0)
-#another mixup xs is normalized
-#assert_array_almost_equal(np.eye(3), xs_cov, decimal=2)
+# another mixup xs is normalized
+# assert_array_almost_equal(np.eye(3), xs_cov, decimal=2)
 assert_array_almost_equal(mvn3.corr, xs_cov, decimal=2)
 assert_array_almost_equal(np.zeros(3), xs.mean(0), decimal=2)
 
-mv2m = mvn3.marginal(np.array([0,1]))
+mv2m = mvn3.marginal(np.array([0, 1]))
 print(mv2m.mean)
 print(mv2m.cov)
 
-mv2c = mvn3.conditional(np.array([0,1]), [0])
+mv2c = mvn3.conditional(np.array([0, 1]), [0])
 print(mv2c.mean)
 print(mv2c.cov)
 
@@ -88,19 +82,19 @@
 print(mv2c.mean)
 print(mv2c.cov)
 
-mod = sm.OLS(x[:,0], sm.add_constant(x[:,1:], prepend=True))
+mod = sm.OLS(x[:, 0], sm.add_constant(x[:, 1:], prepend=True))
 res = mod.fit()
-print(res.model.predict(np.array([1,0,0])))
+print(res.model.predict(np.array([1, 0, 0])))
 mv2c = mvn3.conditional(np.array([0]), [0, 0])
 print(mv2c.mean)
 mv2c = mvn3.conditional(np.array([0]), [1, 1])
-print(res.model.predict(np.array([1,1,1])))
+print(res.model.predict(np.array([1, 1, 1])))
 print(mv2c.mean)
 
-#the following wrong input does not raise an exception but produces wrong numbers
-#mv2c = mvn3.conditional(np.array([0]), [[1, 1],[2,2]])
+# the following wrong input does not raise an exception but produces wrong numbers
+# mv2c = mvn3.conditional(np.array([0]), [[1, 1],[2,2]])
 
-#************** multivariate t distribution ***************
+# ************** multivariate t distribution ***************
 
 mvt3 = mvd.MVT(mu, cov3, 4)
 xt = mvt3.rvs(size=100000)
@@ -108,7 +102,7 @@
 mvt3s = mvt3.standardized()
 mvt3n = mvt3.normalized()
 
-#the following should be equal or correct up to numerical precision of float
+# the following should be equal or correct up to numerical precision of float
 assert_array_almost_equal(mvt3.corr, mvt3n.sigma, decimal=15)
 assert_array_almost_equal(mvt3n.corr, mvt3n.sigma, decimal=15)
 assert_array_almost_equal(np.eye(3), mvt3s.sigma, decimal=15)
@@ -120,44 +114,44 @@
 xtn_corr = np.corrcoef(xtn, rowvar=0)
 
 assert_array_almost_equal(mvt3n.mean, xtn.mean(0), decimal=2)
-#the following might fail sometimes (random test), add seed in tests
+# the following might fail sometimes (random test), add seed in tests
 assert_array_almost_equal(mvt3n.corr, xtn_corr, decimal=1)
-#watch out cov is not the same as sigma for t distribution, what's right here?
-#normalize by sigma or by cov ? now normalized by sigma
+# watch out cov is not the same as sigma for t distribution, what's right here?
+# normalize by sigma or by cov ? now normalized by sigma
 assert_array_almost_equal(mvt3n.cov, xtn_cov, decimal=1)
 assert_array_almost_equal(mvt3s.cov, xts_cov, decimal=1)
 
 a = [0.0, 1.0, 1.5]
 mvt3_cdf0 = mvt3.cdf(a)
 print(mvt3_cdf0)
-print((xt<np.array(a)).all(-1).mean(0))
-print('R', 0.3026741) # "error": 0.0004832187
-print('R', 0.3026855) # error 3.444375e-06   with smaller abseps
-print('diff', mvt3_cdf0 - 0.3026855)
+print((xt < np.array(a)).all(-1).mean(0))
+print("R", 0.3026741)  # "error": 0.0004832187
+print("R", 0.3026855)  # error 3.444375e-06   with smaller abseps
+print("diff", mvt3_cdf0 - 0.3026855)
 a = [0.0, 0.5, 1.0]
 mvt3_cdf1 = mvt3.cdf(a)
 print(mvt3_cdf1)
-print((xt<np.array(a)).all(-1).mean(0))
-print('R', 0.1946621) # "error": 0.0002524817)
-print('R', 0.1946217) # "error:"2.748699e-06    with smaller abseps)
-print('diff', mvt3_cdf1 - 0.1946217)
+print((xt < np.array(a)).all(-1).mean(0))
+print("R", 0.1946621)  # "error": 0.0002524817)
+print("R", 0.1946217)  # "error:"2.748699e-06    with smaller abseps)
+print("diff", mvt3_cdf1 - 0.1946217)
 
 assert_array_almost_equal(mvt3_cdf0, 0.3026855, decimal=5)
 assert_array_almost_equal(mvt3_cdf1, 0.1946217, decimal=5)
 
 mu2 = np.array([4, 2.0, 2.0])
-mvn32 = mvd.MVNormal(mu2, cov3/2., 4)
+mvn32 = mvd.MVNormal(mu2, cov3 / 2.0, 4)
 md = mix.mv_mixture_rvs([0.4, 0.6], 5, [mvt3, mvt3n], 3)
 rvs = mix.mv_mixture_rvs([0.4, 0.6], 2000, [mvn3, mvn32], 3)
-#rvs2 = rvs[:,:2]
+# rvs2 = rvs[:,:2]
 fig = plt.figure()
 fig.add_subplot(2, 2, 1)
-plt.plot(rvs[:,0], rvs[:,1], '.', alpha=0.25)
-plt.title('1 versus 0')
+plt.plot(rvs[:, 0], rvs[:, 1], ".", alpha=0.25)
+plt.title("1 versus 0")
 fig.add_subplot(2, 2, 2)
-plt.plot(rvs[:,0], rvs[:,2], '.', alpha=0.25)
-plt.title('2 versus 0')
+plt.plot(rvs[:, 0], rvs[:, 2], ".", alpha=0.25)
+plt.title("2 versus 0")
 fig.add_subplot(2, 2, 3)
-plt.plot(rvs[:,1], rvs[:,2], '.', alpha=0.25)
-plt.title('2 versus 1')
-#plt.show()
+plt.plot(rvs[:, 1], rvs[:, 2], ".", alpha=0.25)
+plt.title("2 versus 1")
+# plt.show()
diff --git a/statsmodels/sandbox/distributions/examples/ex_transf2.py b/statsmodels/sandbox/distributions/examples/ex_transf2.py
index 52041824f8a..d5934734f2b 100644
--- a/statsmodels/sandbox/distributions/examples/ex_transf2.py
+++ b/statsmodels/sandbox/distributions/examples/ex_transf2.py
@@ -5,14 +5,19 @@
 Licese: BSD
 """
 import numpy as np
-
 from numpy.testing import assert_almost_equal
 from scipy import stats
-from statsmodels.sandbox.distributions.extras import (
-    ExpTransf_gen, LogTransf_gen,
-    squarenormalg, absnormalg, negsquarenormalg, squaretg)
 
-#define these as module globals
+from statsmodels.sandbox.distributions.extras import (
+    ExpTransf_gen,
+    LogTransf_gen,
+    absnormalg,
+    negsquarenormalg,
+    squarenormalg,
+    squaretg,
+)
+
+# define these as module globals
 l, s = 0.0, 1.0
 ppfq = [0.1, 0.5, 0.9]
 xx = [0.95, 1.0, 1.1]
@@ -22,34 +27,38 @@
 def test_loggamma():
     #'Results for expgamma'
     loggammaexpg = LogTransf_gen(stats.gamma)
-    cdftr = loggammaexpg._cdf(1,10)
-    cdfst = stats.loggamma.cdf(1,10)
+    cdftr = loggammaexpg._cdf(1, 10)
+    cdfst = stats.loggamma.cdf(1, 10)
     assert_almost_equal(cdfst, cdftr, 14)
 
-    cdftr = loggammaexpg._cdf(2,15)
-    cdfst = stats.loggamma.cdf(2,15)
+    cdftr = loggammaexpg._cdf(2, 15)
+    cdfst = stats.loggamma.cdf(2, 15)
     assert_almost_equal(cdfst, cdftr, 14)
 
+
 def test_loglaplace():
-    #if x is laplace then y = exp(x) is loglaplace
-    #parameters are tricky
-    #the stats.loglaplace parameter is the inverse scale of x
+    # if x is laplace then y = exp(x) is loglaplace
+    # parameters are tricky
+    # the stats.loglaplace parameter is the inverse scale of x
     loglaplaceexpg = ExpTransf_gen(stats.laplace)
 
-    cdfst = stats.loglaplace.cdf(3,3)
-    #0.98148148148148151
-    #the parameters are shape, loc and scale of underlying laplace
-    cdftr = loglaplaceexpg._cdf(3,0,1./3)
+    cdfst = stats.loglaplace.cdf(3, 3)
+    # 0.98148148148148151
+    # the parameters are shape, loc and scale of underlying laplace
+    cdftr = loglaplaceexpg._cdf(3, 0, 1.0 / 3)
     assert_almost_equal(cdfst, cdftr, 14)
 
+
 class CheckDistEquivalence(object):
 
-    #no args, kwds yet
+    # no args, kwds yet
 
     def test_cdf(self):
         #'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
         cdftr = self.dist.cdf(xx, *self.trargs, **self.trkwds)
-        sfctr = 1-self.dist.sf(xx, *self.trargs, **self.trkwds) #sf complement
+        sfctr = 1 - self.dist.sf(
+            xx, *self.trargs, **self.trkwds
+        )  # sf complement
         cdfst = self.statsdist.cdf(xx, *self.stargs, **self.stkwds)
         assert_almost_equal(cdfst, cdftr, 14)
         assert_almost_equal(cdfst, sfctr, 14)
@@ -67,18 +76,20 @@ def test_ppf(self):
         assert_almost_equal(ppfst, ppftr, 13)
 
     def test_rvs(self):
-        rvs = self.dist.rvs(*self.trargs, **{'size':100})
+        rvs = self.dist.rvs(*self.trargs, **{"size": 100})
         mean_s = rvs.mean(0)
-        mean_d, var_d = self.dist.stats(*self.trargs, **{'moments':'mv'})
+        mean_d, var_d = self.dist.stats(*self.trargs, **{"moments": "mv"})
         if np.any(np.abs(mean_d) < 1):
             assert_almost_equal(mean_d, mean_s, 1)
         else:
-            assert_almost_equal(mean_s/mean_d, 1., 0) #tests 0.5<meanration<1.5
+            assert_almost_equal(
+                mean_s / mean_d, 1.0, 0
+            )  # tests 0.5<meanration<1.5
 
     def test_stats(self):
-        trkwds = {'moments':'mvsk'}
+        trkwds = {"moments": "mvsk"}
         trkwds.update(self.stkwds)
-        stkwds = {'moments':'mvsk'}
+        stkwds = {"moments": "mvsk"}
         stkwds.update(self.stkwds)
         mvsktr = np.array(self.dist.stats(*self.trargs, **trkwds))
         mvskst = np.array(self.statsdist.stats(*self.stargs, **stkwds))
@@ -86,13 +97,11 @@ def test_stats(self):
         if np.any(np.abs(mvskst[2:]) < 1):
             assert_almost_equal(mvskst[2:], mvsktr[2:], 1)
         else:
-            assert_almost_equal(mvskst[2:]/mvsktr[2:], np.ones(2), 0)
-            #tests 0.5<meanration<1.5
-
+            assert_almost_equal(mvskst[2:] / mvsktr[2:], np.ones(2), 0)
+            # tests 0.5<meanration<1.5
 
 
 class TestLoggamma_1(CheckDistEquivalence):
-
     def __init__(self):
         self.dist = LogTransf_gen(stats.gamma)
         self.trargs = (10,)
@@ -103,7 +112,6 @@ def __init__(self):
 
 
 class TestSquaredNormChi2_1(CheckDistEquivalence):
-
     def __init__(self):
         self.dist = squarenormalg
         self.trargs = ()
@@ -112,8 +120,8 @@ def __init__(self):
         self.stargs = (1,)
         self.stkwds = {}
 
-class TestSquaredNormChi2_2(CheckDistEquivalence):
 
+class TestSquaredNormChi2_2(CheckDistEquivalence):
     def __init__(self):
         self.dist = squarenormalg
         self.trargs = ()
@@ -122,8 +130,8 @@ def __init__(self):
         self.stargs = (1,)
         self.stkwds = dict(loc=-10, scale=20)
 
-class TestAbsNormHalfNorm(CheckDistEquivalence):
 
+class TestAbsNormHalfNorm(CheckDistEquivalence):
     def __init__(self):
         self.dist = absnormalg
         self.trargs = ()
@@ -132,25 +140,27 @@ def __init__(self):
         self.stargs = ()
         self.stkwds = {}
 
-class TestSquaredTF(CheckDistEquivalence):
 
+class TestSquaredTF(CheckDistEquivalence):
     def __init__(self):
         self.dist = squaretg
         self.trargs = (10,)
         self.trkwds = {}
 
         self.statsdist = stats.f
-        self.stargs = (1,10)
+        self.stargs = (1, 10)
         self.stkwds = {}
 
+
 def test_squared_normal_chi2():
     #'\nsquare of standard normal random variable is chisquare with dof=1 distributed'
-    cdftr = squarenormalg.cdf(xx,loc=l, scale=s)
-    sfctr = 1-squarenormalg.sf(xx,loc=l, scale=s) #sf complement
-    cdfst = stats.chi2.cdf(xx,1)
+    cdftr = squarenormalg.cdf(xx, loc=l, scale=s)
+    sfctr = 1 - squarenormalg.sf(xx, loc=l, scale=s)  # sf complement
+    cdfst = stats.chi2.cdf(xx, 1)
     assert_almost_equal(cdfst, cdftr, 14)
     assert_almost_equal(cdfst, sfctr, 14)
 
+
 #    print('sqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.pdf(xx,loc=l, scale=s)
 #    print('chi2    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1)
 #    print('sqnorm  ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.ppf(ppfq,loc=l, scale=s)
@@ -159,104 +169,211 @@ def test_squared_normal_chi2():
 #    print('chi2    cdf with loc scale', stats.chi2.cdf(xx,1,loc=-10, scale=20)
 
 
+if __name__ == "__main__":
 
-if __name__ == '__main__':
-
-    #Examples for Transf2_gen, u- or hump shaped transformation
-    #copied from transformtwo.py
-    l,s = 0.0, 1.0
+    # Examples for Transf2_gen, u- or hump shaped transformation
+    # copied from transformtwo.py
+    l, s = 0.0, 1.0
     ppfq = [0.1, 0.5, 0.9]
     xx = [0.95, 1.0, 1.1]
     nxx = [-0.95, -1.0, -1.1]
     print
-    #print(invnormalg.__doc__
-    print('\nsquare of standard normal random variable is chisquare with dof=1 distributed')
-    print('sqnorm  cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.cdf(xx,loc=l, scale=s))
-    print('sqnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-squarenormalg.sf(xx,loc=l, scale=s))
-    print('chi2    cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.cdf(xx,1))
-    print('sqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.pdf(xx,loc=l, scale=s))
-    print('chi2    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1))
-    print('sqnorm  ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squarenormalg.ppf(ppfq,loc=l, scale=s))
-    print('chi2    ppf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.ppf(ppfq,1))
-    print('sqnorm  cdf with loc scale', squarenormalg.cdf(xx,loc=-10, scale=20))
-    print('chi2    cdf with loc scale', stats.chi2.cdf(xx,1,loc=-10, scale=20))
-#    print('cdf for [0.5]:', squarenormalg.cdf(0.5,loc=l, scale=s))
-#    print('chi square distribution')
-#    print('chi2 pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1))
-#    print('cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.cdf(xx,1))
-
-    print('\nabsolute value of standard normal random variable is foldnorm(0) and ')
-    print('halfnorm distributed:')
-    print('absnorm  cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), absnormalg.cdf(xx,loc=l, scale=s))
-    print('absnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-absnormalg.sf(xx,loc=l, scale=s))
-    print('foldn    cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.foldnorm.cdf(xx,1e-5))
-    print('halfn    cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.halfnorm.cdf(xx))
-    print('absnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), absnormalg.pdf(xx,loc=l, scale=s))
-    print('foldn    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.foldnorm.pdf(xx,1e-5))
-    print('halfn    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.halfnorm.pdf(xx))
-    print('absnorm  ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), absnormalg.ppf(ppfq,loc=l, scale=s))
-    print('foldn    ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), stats.foldnorm.ppf(ppfq,1e-5))
-    print('halfn    ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), stats.halfnorm.ppf(ppfq))
-#    print('cdf for [0.5]:', squarenormalg.cdf(0.5,loc=l, scale=s)
-#    print('chi square distribution'
-#    print('chi2 pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1)
-#    print('cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.cdf(xx,1)
-
-    print('\nnegative square of standard normal random variable is')
-    print('1-chisquare with dof=1 distributed')
-    print('this is mainly for testing')
-    print('the following should be outside of the support - returns nan')
-    print('nsqnorm  cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), negsquarenormalg.cdf(xx,loc=l, scale=s))
-    print('nsqnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-negsquarenormalg.sf(xx,loc=l, scale=s))
-    print('nsqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), negsquarenormalg.pdf(xx,loc=l, scale=s))
-
-    print('nsqnorm  cdf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), negsquarenormalg.cdf(nxx,loc=l, scale=s))
-    print('nsqnorm 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), 1-negsquarenormalg.sf(nxx,loc=l, scale=s))
-    print('chi2      sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.sf(xx,1))
-    print('nsqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), negsquarenormalg.pdf(nxx,loc=l, scale=s))
-    print('chi2     pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1))
-    print('nsqnorm  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(nxx), negsquarenormalg.pdf(nxx,loc=l, scale=s))
-
-
-
-    print('\nsquare of a t distributed random variable with dof=10 is')
-    print('        F with dof=1,10 distributed')
-    print('sqt  cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squaretg.cdf(xx,10))
-    print('sqt 1-sf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), 1-squaretg.sf(xx,10))
-    print('f    cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.f.cdf(xx,1,10))
-    print('sqt  pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), squaretg.pdf(xx,10))
-    print('f    pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.f.pdf(xx,1,10))
-    print('sqt  ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), squaretg.ppf(ppfq,10))
-    print('f    ppf for (%3.2f, %3.2f, %3.2f):' % tuple(ppfq), stats.f.ppf(ppfq,1,10))
-    print('sqt  cdf for 100:', squaretg.cdf(100,10))
-    print('f    cdf for 100:', stats.f.cdf(100,1,10))
-    print('sqt  stats:', squaretg.stats(10, moments='mvsk'))
-    print('f    stats:', stats.f.stats(1,10, moments='mvsk'))
-    #Note the results differ for skew and kurtosis. I think the 3rd and 4th moment
+    # print(invnormalg.__doc__
+    print(
+        "\nsquare of standard normal random variable is chisquare with dof=1 distributed"
+    )
+    print(
+        "sqnorm  cdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        squarenormalg.cdf(xx, loc=l, scale=s),
+    )
+    print(
+        "sqnorm 1-sf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        1 - squarenormalg.sf(xx, loc=l, scale=s),
+    )
+    print(
+        "chi2    cdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        stats.chi2.cdf(xx, 1),
+    )
+    print(
+        "sqnorm  pdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        squarenormalg.pdf(xx, loc=l, scale=s),
+    )
+    print(
+        "chi2    pdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        stats.chi2.pdf(xx, 1),
+    )
+    print(
+        "sqnorm  ppf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        squarenormalg.ppf(ppfq, loc=l, scale=s),
+    )
+    print(
+        "chi2    ppf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        stats.chi2.ppf(ppfq, 1),
+    )
+    print(
+        "sqnorm  cdf with loc scale", squarenormalg.cdf(xx, loc=-10, scale=20)
+    )
+    print(
+        "chi2    cdf with loc scale", stats.chi2.cdf(xx, 1, loc=-10, scale=20)
+    )
+    #    print('cdf for [0.5]:', squarenormalg.cdf(0.5,loc=l, scale=s))
+    #    print('chi square distribution')
+    #    print('chi2 pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1))
+    #    print('cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.cdf(xx,1))
+
+    print(
+        "\nabsolute value of standard normal random variable is foldnorm(0) and "
+    )
+    print("halfnorm distributed:")
+    print(
+        "absnorm  cdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        absnormalg.cdf(xx, loc=l, scale=s),
+    )
+    print(
+        "absnorm 1-sf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        1 - absnormalg.sf(xx, loc=l, scale=s),
+    )
+    print(
+        "foldn    cdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        stats.foldnorm.cdf(xx, 1e-5),
+    )
+    print(
+        "halfn    cdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        stats.halfnorm.cdf(xx),
+    )
+    print(
+        "absnorm  pdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        absnormalg.pdf(xx, loc=l, scale=s),
+    )
+    print(
+        "foldn    pdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        stats.foldnorm.pdf(xx, 1e-5),
+    )
+    print(
+        "halfn    pdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        stats.halfnorm.pdf(xx),
+    )
+    print(
+        "absnorm  ppf for (%3.2f, %3.2f, %3.2f):" % tuple(ppfq),
+        absnormalg.ppf(ppfq, loc=l, scale=s),
+    )
+    print(
+        "foldn    ppf for (%3.2f, %3.2f, %3.2f):" % tuple(ppfq),
+        stats.foldnorm.ppf(ppfq, 1e-5),
+    )
+    print(
+        "halfn    ppf for (%3.2f, %3.2f, %3.2f):" % tuple(ppfq),
+        stats.halfnorm.ppf(ppfq),
+    )
+    #    print('cdf for [0.5]:', squarenormalg.cdf(0.5,loc=l, scale=s)
+    #    print('chi square distribution'
+    #    print('chi2 pdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.pdf(xx,1)
+    #    print('cdf for (%3.2f, %3.2f, %3.2f):' % tuple(xx), stats.chi2.cdf(xx,1)
+
+    print("\nnegative square of standard normal random variable is")
+    print("1-chisquare with dof=1 distributed")
+    print("this is mainly for testing")
+    print("the following should be outside of the support - returns nan")
+    print(
+        "nsqnorm  cdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        negsquarenormalg.cdf(xx, loc=l, scale=s),
+    )
+    print(
+        "nsqnorm 1-sf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        1 - negsquarenormalg.sf(xx, loc=l, scale=s),
+    )
+    print(
+        "nsqnorm  pdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        negsquarenormalg.pdf(xx, loc=l, scale=s),
+    )
+
+    print(
+        "nsqnorm  cdf for (%3.2f, %3.2f, %3.2f):" % tuple(nxx),
+        negsquarenormalg.cdf(nxx, loc=l, scale=s),
+    )
+    print(
+        "nsqnorm 1-sf for (%3.2f, %3.2f, %3.2f):" % tuple(nxx),
+        1 - negsquarenormalg.sf(nxx, loc=l, scale=s),
+    )
+    print(
+        "chi2      sf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        stats.chi2.sf(xx, 1),
+    )
+    print(
+        "nsqnorm  pdf for (%3.2f, %3.2f, %3.2f):" % tuple(nxx),
+        negsquarenormalg.pdf(nxx, loc=l, scale=s),
+    )
+    print(
+        "chi2     pdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        stats.chi2.pdf(xx, 1),
+    )
+    print(
+        "nsqnorm  pdf for (%3.2f, %3.2f, %3.2f):" % tuple(nxx),
+        negsquarenormalg.pdf(nxx, loc=l, scale=s),
+    )
+
+    print("\nsquare of a t distributed random variable with dof=10 is")
+    print("        F with dof=1,10 distributed")
+    print(
+        "sqt  cdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx), squaretg.cdf(xx, 10)
+    )
+    print(
+        "sqt 1-sf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        1 - squaretg.sf(xx, 10),
+    )
+    print(
+        "f    cdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        stats.f.cdf(xx, 1, 10),
+    )
+    print(
+        "sqt  pdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx), squaretg.pdf(xx, 10)
+    )
+    print(
+        "f    pdf for (%3.2f, %3.2f, %3.2f):" % tuple(xx),
+        stats.f.pdf(xx, 1, 10),
+    )
+    print(
+        "sqt  ppf for (%3.2f, %3.2f, %3.2f):" % tuple(ppfq),
+        squaretg.ppf(ppfq, 10),
+    )
+    print(
+        "f    ppf for (%3.2f, %3.2f, %3.2f):" % tuple(ppfq),
+        stats.f.ppf(ppfq, 1, 10),
+    )
+    print("sqt  cdf for 100:", squaretg.cdf(100, 10))
+    print("f    cdf for 100:", stats.f.cdf(100, 1, 10))
+    print("sqt  stats:", squaretg.stats(10, moments="mvsk"))
+    print("f    stats:", stats.f.stats(1, 10, moments="mvsk"))
+    # Note the results differ for skew and kurtosis. I think the 3rd and 4th moment
     #    in the scipy.stats.f distribution is incorrect.
     # I corrected it now in stats.distributions.py in bzr branch
-    v1=1
-    v2=10
-    g1 = 2*(v2+2*v1-2.)/(v2-6.)*np.sqrt(2*(v2-4.)/(v1*(v2+v1-2.)))
-    g2 = 3/(2.*v2-16)*(8+g1*g1*(v2-6.))
-    print('corrected skew, kurtosis of f(1,10) is', g1, g2)
+    v1 = 1
+    v2 = 10
+    g1 = (
+        2
+        * (v2 + 2 * v1 - 2.0)
+        / (v2 - 6.0)
+        * np.sqrt(2 * (v2 - 4.0) / (v1 * (v2 + v1 - 2.0)))
+    )
+    g2 = 3 / (2.0 * v2 - 16) * (8 + g1 * g1 * (v2 - 6.0))
+    print("corrected skew, kurtosis of f(1,10) is", g1, g2)
     print(squarenormalg.rvs())
-    print(squarenormalg.rvs(size=(2,4)))
-    print('sqt random variables')
-    print(stats.f.rvs(1,10,size=4))
-    print(squaretg.rvs(10,size=4))
+    print(squarenormalg.rvs(size=(2, 4)))
+    print("sqt random variables")
+    print(stats.f.rvs(1, 10, size=4))
+    print(squaretg.rvs(10, size=4))
 
-    #a large number check:
+    # a large number check:
     np.random.seed(464239857)
-    rvstsq = squaretg.rvs(10,size=100000)
-    squaretg.moment(4,10)
+    rvstsq = squaretg.rvs(10, size=100000)
+    squaretg.moment(4, 10)
     (rvstsq**4).mean()
-    squaretg.moment(3,10)
+    squaretg.moment(3, 10)
     (rvstsq**3).mean()
-    squaretg.stats(10, moments='mvsk')
+    squaretg.stats(10, moments="mvsk")
     stats.describe(rvstsq)
 
-    '''
+    """
     >>> np.random.seed(464239857)
     >>> rvstsq = squaretg.rvs(10,size=100000)
     >>> squaretg.moment(4,10)
@@ -271,21 +388,22 @@ def test_squared_normal_chi2():
     (array(1.2500000000000022), array(4.6874999999630909), array(5.7735026919777912), array(106.00000000170148))
     >>> stats.describe(rvstsq)
     (100000, (3.2953470738423724e-009, 92.649615690914473), 1.2534924690963247, 4.7741427958594098, 6.1562177957041895, 100.99331166052181)
-    '''
+    """
     # checking the distribution
     # fraction of observations in each decile
-    dec = squaretg.ppf(np.linspace(0.,1,11),10)
-    freq,edges = np.histogram(rvstsq, bins=dec)
-    print(freq/float(len(rvstsq)))
+    dec = squaretg.ppf(np.linspace(0.0, 1, 11), 10)
+    freq, edges = np.histogram(rvstsq, bins=dec)
+    print(freq / float(len(rvstsq)))
 
     import matplotlib.pyplot as plt
-    freq,edges,_ = plt.hist(rvstsq, bins=50, range=(0,4),normed=True)
-    edges += (edges[1]-edges[0])/2.0
-    plt.plot(edges[:-1], squaretg.pdf(edges[:-1], 10), 'r')
-    #plt.show()
-    #plt.close()
 
-    '''
+    freq, edges, _ = plt.hist(rvstsq, bins=50, range=(0, 4), normed=True)
+    edges += (edges[1] - edges[0]) / 2.0
+    plt.plot(edges[:-1], squaretg.pdf(edges[:-1], 10), "r")
+    # plt.show()
+    # plt.close()
+
+    """
     >>> plt.plot(edges[:-1], squaretg.pdf(edges[:-1], 10), 'r')
     [<matplotlib.lines.Line2D object at 0x06EBFDB0>]
     >>> plt.fill(edges[4:8], squaretg.pdf(edges[4:8], 10), 'r')
@@ -309,7 +427,8 @@ def test_squared_normal_chi2():
     AttributeError: 'AxesSubplot' object has no attribute 'fill_between'
     >>> ax1.fill(edges[4:8], squaretg.pdf(edges[4:8], 10), 0, 'r')
     Traceback (most recent call last):
-    '''
+    """
 
     import pytest
-    pytest.main([__file__, '-vvs', '-x', '--pdb'])
+
+    pytest.main([__file__, "-vvs", "-x", "--pdb"])
diff --git a/statsmodels/sandbox/distributions/examples/matchdist.py b/statsmodels/sandbox/distributions/examples/matchdist.py
index a2d4a7bf8f5..c97e541cdd5 100644
--- a/statsmodels/sandbox/distributions/examples/matchdist.py
+++ b/statsmodels/sandbox/distributions/examples/matchdist.py
@@ -1,4 +1,4 @@
-'''given a 1D sample of observation, find a matching distribution
+"""given a 1D sample of observation, find a matching distribution
 
 * estimate maximum likelihood parameter for each distribution
 * rank estimated distribution by Kolmogorov-Smirnov and Anderson-Darling
@@ -14,69 +14,153 @@
 * split estimation by support, add option and choose automatically
 *
 
-'''
-from scipy import stats
-import numpy as np
+"""
 import matplotlib.pyplot as plt
+import numpy as np
+from scipy import stats
+
+# stats.distributions.beta_gen._fitstart = lambda self, data : (5,5,0,1)
 
-#stats.distributions.beta_gen._fitstart = lambda self, data : (5,5,0,1)
 
-def plothist(x,distfn, args, loc, scale, right=1):
+def plothist(x, distfn, args, loc, scale, right=1):
 
     plt.figure()
     # the histogram of the data
-    n, bins, patches = plt.hist(x, 25, normed=1, facecolor='green', alpha=0.75)
+    n, bins, patches = plt.hist(x, 25, normed=1, facecolor="green", alpha=0.75)
     maxheight = max([p.get_height() for p in patches])
     print(maxheight)
     axlim = list(plt.axis())
-    #print(axlim)
-    axlim[-1] = maxheight*1.05
-    #plt.axis(tuple(axlim))
-##    print(bins)
-##    print('args in plothist', args)
+    # print(axlim)
+    axlim[-1] = maxheight * 1.05
+    # plt.axis(tuple(axlim))
+    ##    print(bins)
+    ##    print('args in plothist', args)
     # add a 'best fit' line
-    #yt = stats.norm.pdf( bins, loc=loc, scale=scale)
-    yt = distfn.pdf( bins, loc=loc, scale=scale, *args)
-    yt[yt>maxheight]=maxheight
-    lt = plt.plot(bins, yt, 'r--', linewidth=1)
-    ys = stats.t.pdf( bins, 10,scale=10,)*right
-    ls = plt.plot(bins, ys, 'b-', linewidth=1)
-
-    plt.xlabel('Smarts')
-    plt.ylabel('Probability')
-    plt.title(r'$\mathrm{Testing: %s :}\ \mu=%f,\ \sigma=%f$' % (distfn.name,loc,scale))
-
-    #plt.axis([bins[0], bins[-1], 0, 0.134+0.05])
+    # yt = stats.norm.pdf( bins, loc=loc, scale=scale)
+    yt = distfn.pdf(bins, loc=loc, scale=scale, *args)
+    yt[yt > maxheight] = maxheight
+    lt = plt.plot(bins, yt, "r--", linewidth=1)
+    ys = (
+        stats.t.pdf(
+            bins,
+            10,
+            scale=10,
+        )
+        * right
+    )
+    ls = plt.plot(bins, ys, "b-", linewidth=1)
+
+    plt.xlabel("Smarts")
+    plt.ylabel("Probability")
+    plt.title(
+        r"$\mathrm{Testing: %s :}\ \mu=%f,\ \sigma=%f$"
+        % (distfn.name, loc, scale)
+    )
+
+    # plt.axis([bins[0], bins[-1], 0, 0.134+0.05])
 
     plt.grid(True)
     plt.draw()
-    #plt.show()
-    #plt.close()
-
-
-
-
-
-#targetdist = ['norm','t','truncnorm','johnsonsu','johnsonsb',
-targetdist = ['norm','alpha', 'anglit', 'arcsine',
-           'beta', 'betaprime', 'bradford', 'burr', 'fisk', 'cauchy',
-           'chi', 'chi2', 'cosine', 'dgamma', 'dweibull', 'erlang',
-           'expon', 'exponweib', 'exponpow', 'fatiguelife', 'foldcauchy',
-           'f', 'foldnorm', 'frechet_r', 'weibull_min', 'frechet_l',
-           'weibull_max', 'genlogistic', 'genpareto', 'genexpon', 'genextreme',
-           'gamma', 'gengamma', 'genhalflogistic', 'gompertz', 'gumbel_r',
-           'gumbel_l', 'halfcauchy', 'halflogistic', 'halfnorm', 'hypsecant',
-           'gausshyper', 'invgamma', 'invnorm', 'invweibull', 'johnsonsb',
-           'johnsonsu', 'laplace', 'levy', 'levy_l',
-           'logistic', 'loggamma', 'loglaplace', 'lognorm', 'gilbrat',
-           'maxwell', 'mielke', 'nakagami', 'ncx2', 'ncf', 't',
-           'nct', 'pareto', 'lomax', 'powerlaw', 'powerlognorm', 'powernorm',
-           'rdist', 'rayleigh', 'reciprocal', 'rice', 'recipinvgauss',
-           'semicircular', 'triang', 'truncexpon', 'truncnorm',
-           'tukeylambda', 'uniform', 'vonmises', 'wald', 'wrapcauchy',
-
-           'binom', 'bernoulli', 'nbinom', 'geom', 'hypergeom', 'logser',
-           'poisson', 'planck', 'boltzmann', 'randint', 'zipf', 'dlaplace']
+    # plt.show()
+    # plt.close()
+
+
+# targetdist = ['norm','t','truncnorm','johnsonsu','johnsonsb',
+targetdist = [
+    "norm",
+    "alpha",
+    "anglit",
+    "arcsine",
+    "beta",
+    "betaprime",
+    "bradford",
+    "burr",
+    "fisk",
+    "cauchy",
+    "chi",
+    "chi2",
+    "cosine",
+    "dgamma",
+    "dweibull",
+    "erlang",
+    "expon",
+    "exponweib",
+    "exponpow",
+    "fatiguelife",
+    "foldcauchy",
+    "f",
+    "foldnorm",
+    "frechet_r",
+    "weibull_min",
+    "frechet_l",
+    "weibull_max",
+    "genlogistic",
+    "genpareto",
+    "genexpon",
+    "genextreme",
+    "gamma",
+    "gengamma",
+    "genhalflogistic",
+    "gompertz",
+    "gumbel_r",
+    "gumbel_l",
+    "halfcauchy",
+    "halflogistic",
+    "halfnorm",
+    "hypsecant",
+    "gausshyper",
+    "invgamma",
+    "invnorm",
+    "invweibull",
+    "johnsonsb",
+    "johnsonsu",
+    "laplace",
+    "levy",
+    "levy_l",
+    "logistic",
+    "loggamma",
+    "loglaplace",
+    "lognorm",
+    "gilbrat",
+    "maxwell",
+    "mielke",
+    "nakagami",
+    "ncx2",
+    "ncf",
+    "t",
+    "nct",
+    "pareto",
+    "lomax",
+    "powerlaw",
+    "powerlognorm",
+    "powernorm",
+    "rdist",
+    "rayleigh",
+    "reciprocal",
+    "rice",
+    "recipinvgauss",
+    "semicircular",
+    "triang",
+    "truncexpon",
+    "truncnorm",
+    "tukeylambda",
+    "uniform",
+    "vonmises",
+    "wald",
+    "wrapcauchy",
+    "binom",
+    "bernoulli",
+    "nbinom",
+    "geom",
+    "hypergeom",
+    "logser",
+    "poisson",
+    "planck",
+    "boltzmann",
+    "randint",
+    "zipf",
+    "dlaplace",
+]
 
 left = []
 right = []
@@ -86,89 +170,180 @@ def plothist(x,distfn, args, loc, scale, right=1):
 contdist = []
 discrete = []
 
-categ = {('open','open'):'unbound', ('0','open'):'right',('open','0',):'left',
-             ('finite','finite'):'finite',('oth','oth'):'other'}
-categ = {('open','open'):unbound, ('0','open'):right,('open','0',):left,
-             ('finite','finite'):finite,('oth','oth'):other}
+categ = {
+    ("open", "open"): "unbound",
+    ("0", "open"): "right",
+    (
+        "open",
+        "0",
+    ): "left",
+    ("finite", "finite"): "finite",
+    ("oth", "oth"): "other",
+}
+categ = {
+    ("open", "open"): unbound,
+    ("0", "open"): right,
+    (
+        "open",
+        "0",
+    ): left,
+    ("finite", "finite"): finite,
+    ("oth", "oth"): other,
+}
 
 categ2 = {
-    ('open', '0') : ['frechet_l', 'weibull_max', 'levy_l'],
-    ('finite', 'finite') : ['anglit', 'cosine', 'rdist', 'semicircular'],
-    ('0', 'open') : ['alpha', 'burr', 'fisk', 'chi', 'chi2', 'erlang',
-                'expon', 'exponweib', 'exponpow', 'fatiguelife', 'foldcauchy', 'f',
-                'foldnorm', 'frechet_r', 'weibull_min', 'genpareto', 'genexpon',
-                'gamma', 'gengamma', 'genhalflogistic', 'gompertz', 'halfcauchy',
-                'halflogistic', 'halfnorm', 'invgamma', 'invnorm', 'invweibull',
-                'levy', 'loglaplace', 'lognorm', 'gilbrat', 'maxwell', 'mielke',
-                'nakagami', 'ncx2', 'ncf', 'lomax', 'powerlognorm', 'rayleigh',
-                'rice', 'recipinvgauss', 'truncexpon', 'wald'],
-    ('open', 'open') : ['cauchy', 'dgamma', 'dweibull', 'genlogistic', 'genextreme',
-                'gumbel_r', 'gumbel_l', 'hypsecant', 'johnsonsu', 'laplace',
-                'logistic', 'loggamma', 't', 'nct', 'powernorm', 'reciprocal',
-                'truncnorm', 'tukeylambda', 'vonmises'],
-    ('0', 'finite') : ['arcsine', 'beta', 'betaprime', 'bradford', 'gausshyper',
-                'johnsonsb', 'powerlaw', 'triang', 'uniform', 'wrapcauchy'],
-    ('finite', 'open') : ['pareto']
-    }
-
-#Note: weibull_max == frechet_l
-
-right_incorrect = ['genextreme']
-
-right_all = categ2[('0', 'open')] + categ2[('0', 'finite')] + categ2[('finite', 'open')]\
-            + right_incorrect
+    ("open", "0"): ["frechet_l", "weibull_max", "levy_l"],
+    ("finite", "finite"): ["anglit", "cosine", "rdist", "semicircular"],
+    ("0", "open"): [
+        "alpha",
+        "burr",
+        "fisk",
+        "chi",
+        "chi2",
+        "erlang",
+        "expon",
+        "exponweib",
+        "exponpow",
+        "fatiguelife",
+        "foldcauchy",
+        "f",
+        "foldnorm",
+        "frechet_r",
+        "weibull_min",
+        "genpareto",
+        "genexpon",
+        "gamma",
+        "gengamma",
+        "genhalflogistic",
+        "gompertz",
+        "halfcauchy",
+        "halflogistic",
+        "halfnorm",
+        "invgamma",
+        "invnorm",
+        "invweibull",
+        "levy",
+        "loglaplace",
+        "lognorm",
+        "gilbrat",
+        "maxwell",
+        "mielke",
+        "nakagami",
+        "ncx2",
+        "ncf",
+        "lomax",
+        "powerlognorm",
+        "rayleigh",
+        "rice",
+        "recipinvgauss",
+        "truncexpon",
+        "wald",
+    ],
+    ("open", "open"): [
+        "cauchy",
+        "dgamma",
+        "dweibull",
+        "genlogistic",
+        "genextreme",
+        "gumbel_r",
+        "gumbel_l",
+        "hypsecant",
+        "johnsonsu",
+        "laplace",
+        "logistic",
+        "loggamma",
+        "t",
+        "nct",
+        "powernorm",
+        "reciprocal",
+        "truncnorm",
+        "tukeylambda",
+        "vonmises",
+    ],
+    ("0", "finite"): [
+        "arcsine",
+        "beta",
+        "betaprime",
+        "bradford",
+        "gausshyper",
+        "johnsonsb",
+        "powerlaw",
+        "triang",
+        "uniform",
+        "wrapcauchy",
+    ],
+    ("finite", "open"): ["pareto"],
+}
+
+# Note: weibull_max == frechet_l
+
+right_incorrect = ["genextreme"]
+
+right_all = (
+    categ2[("0", "open")]
+    + categ2[("0", "finite")]
+    + categ2[("finite", "open")]
+    + right_incorrect
+)
 
 for distname in targetdist:
-    distfn = getattr(stats,distname)
-    if hasattr(distfn,'_pdf'):
+    distfn = getattr(stats, distname)
+    if hasattr(distfn, "_pdf"):
         if np.isinf(distfn.a):
-            low = 'open'
+            low = "open"
         elif distfn.a == 0:
-            low = '0'
+            low = "0"
         else:
-            low = 'finite'
+            low = "finite"
         if np.isinf(distfn.b):
-            high = 'open'
+            high = "open"
         elif distfn.b == 0:
-            high = '0'
+            high = "0"
         else:
-            high = 'finite'
+            high = "finite"
         contdist.append(distname)
-        categ.setdefault((low,high),[]).append(distname)
+        categ.setdefault((low, high), []).append(distname)
 
-not_good = ['genextreme', 'reciprocal', 'vonmises']
+not_good = ["genextreme", "reciprocal", "vonmises"]
 # 'genextreme' is right (or left?), 'reciprocal' requires 0<a<b, 'vonmises' no a,b
-targetdist = [f for f in categ[('open', 'open')] if f not in not_good]
-not_good = ['wrapcauchy']
-not_good = ['vonmises']
-not_good = ['genexpon','vonmises']
+targetdist = [f for f in categ[("open", "open")] if f not in not_good]
+not_good = ["wrapcauchy"]
+not_good = ["vonmises"]
+not_good = ["genexpon", "vonmises"]
 #'wrapcauchy' requires additional parameter (scale) in argcheck
 targetdist = [f for f in contdist if f not in not_good]
-#targetdist = contdist
-#targetdist = not_good
-#targetdist = ['t', 'f']
-#targetdist = ['norm','burr']
+# targetdist = contdist
+# targetdist = not_good
+# targetdist = ['t', 'f']
+# targetdist = ['norm','burr']
 
-if __name__ == '__main__':
+if __name__ == "__main__":
 
-    #TODO: calculate correct tail probability for mixture
-    prefix = 'run_conv500_1_'
+    # TODO: calculate correct tail probability for mixture
+    prefix = "run_conv500_1_"
     convol = 0.75
     n = 500
     dgp_arg = 10
     dgp_scale = 10
     results = []
     for i in range(1):
-        rvs_orig = stats.t.rvs(dgp_arg,scale=dgp_scale,size=n*convol)
-        rvs_orig = np.hstack((rvs_orig,stats.halflogistic.rvs(loc=0.4, scale=5.0,size =n*(1-convol))))
+        rvs_orig = stats.t.rvs(dgp_arg, scale=dgp_scale, size=n * convol)
+        rvs_orig = np.hstack(
+            (
+                rvs_orig,
+                stats.halflogistic.rvs(
+                    loc=0.4, scale=5.0, size=n * (1 - convol)
+                ),
+            )
+        )
         rvs_abs = np.absolute(rvs_orig)
-        rvs_pos = rvs_orig[rvs_orig>0]
+        rvs_pos = rvs_orig[rvs_orig > 0]
         rightfactor = 1
         rvs_right = rvs_pos
-        print('='*50)
-        print('samplesize = ', n)
+        print("=" * 50)
+        print("samplesize = ", n)
         for distname in targetdist:
-            distfn = getattr(stats,distname)
+            distfn = getattr(stats, distname)
             if distname in right_all:
                 rvs = rvs_right
                 rind = rightfactor
@@ -176,85 +351,112 @@ def plothist(x,distfn, args, loc, scale, right=1):
             else:
                 rvs = rvs_orig
                 rind = 1
-            print('-'*30)
-            print('target = %s' % distname)
+            print("-" * 30)
+            print("target = %s" % distname)
             sm = rvs.mean()
             sstd = np.sqrt(rvs.var())
             ssupp = (rvs.min(), rvs.max())
-            if distname in ['truncnorm','betaprime','reciprocal']:
-
-                par0 = (sm-2*sstd,sm+2*sstd)
-                par_est = tuple(distfn.fit(rvs,loc=sm,scale=sstd,*par0))
-            elif distname == 'norm':
-                par_est = tuple(distfn.fit(rvs,loc=sm,scale=sstd))
-            elif distname == 'genextreme':
-                par_est = tuple(distfn.fit(rvs,-5,loc=sm,scale=sstd))
-            elif distname == 'wrapcauchy':
-                par_est = tuple(distfn.fit(rvs,0.5,loc=0,scale=sstd))
-            elif distname == 'f':
-                par_est = tuple(distfn.fit(rvs,10,15,loc=0,scale=1))
+            if distname in ["truncnorm", "betaprime", "reciprocal"]:
+
+                par0 = (sm - 2 * sstd, sm + 2 * sstd)
+                par_est = tuple(distfn.fit(rvs, loc=sm, scale=sstd, *par0))
+            elif distname == "norm":
+                par_est = tuple(distfn.fit(rvs, loc=sm, scale=sstd))
+            elif distname == "genextreme":
+                par_est = tuple(distfn.fit(rvs, -5, loc=sm, scale=sstd))
+            elif distname == "wrapcauchy":
+                par_est = tuple(distfn.fit(rvs, 0.5, loc=0, scale=sstd))
+            elif distname == "f":
+                par_est = tuple(distfn.fit(rvs, 10, 15, loc=0, scale=1))
 
             elif distname in right:
                 sm = rvs.mean()
                 sstd = np.sqrt(rvs.var())
-                par_est = tuple(distfn.fit(rvs,loc=0,scale=1))
+                par_est = tuple(distfn.fit(rvs, loc=0, scale=1))
             else:
                 sm = rvs.mean()
                 sstd = np.sqrt(rvs.var())
-                par_est = tuple(distfn.fit(rvs,loc=sm,scale=sstd))
+                par_est = tuple(distfn.fit(rvs, loc=sm, scale=sstd))
 
-
-            print('fit', par_est)
+            print("fit", par_est)
             arg_est = par_est[:-2]
             loc_est = par_est[-2]
             scale_est = par_est[-1]
-            rvs_normed = (rvs-loc_est)/scale_est
-            ks_stat, ks_pval = stats.kstest(rvs_normed,distname, arg_est)
-            print('kstest', ks_stat, ks_pval)
+            rvs_normed = (rvs - loc_est) / scale_est
+            ks_stat, ks_pval = stats.kstest(rvs_normed, distname, arg_est)
+            print("kstest", ks_stat, ks_pval)
             quant = 0.1
-            crit = distfn.ppf(1-quant*float(rind), loc=loc_est, scale=scale_est,*par_est)
-            tail_prob = stats.t.sf(crit,dgp_arg,scale=dgp_scale)
-            print('crit, prob', quant, crit, tail_prob)
-            #if distname == 'norm':
-                #plothist(rvs,loc_est,scale_est)
-                #args = tuple()
-            results.append([distname,ks_stat, ks_pval,arg_est,loc_est,scale_est,crit,tail_prob ])
-            #plothist(rvs,distfn,arg_est,loc_est,scale_est)
-
-    #plothist(rvs,distfn,arg_est,loc_est,scale_est)
-    #plt.show()
-    #plt.close()
-    #TODO: collect results and compare tail quantiles
-
+            crit = distfn.ppf(
+                1 - quant * float(rind), loc=loc_est, scale=scale_est, *par_est
+            )
+            tail_prob = stats.t.sf(crit, dgp_arg, scale=dgp_scale)
+            print("crit, prob", quant, crit, tail_prob)
+            # if distname == 'norm':
+            # plothist(rvs,loc_est,scale_est)
+            # args = tuple()
+            results.append(
+                [
+                    distname,
+                    ks_stat,
+                    ks_pval,
+                    arg_est,
+                    loc_est,
+                    scale_est,
+                    crit,
+                    tail_prob,
+                ]
+            )
+            # plothist(rvs,distfn,arg_est,loc_est,scale_est)
+
+    # plothist(rvs,distfn,arg_est,loc_est,scale_est)
+    # plt.show()
+    # plt.close()
+    # TODO: collect results and compare tail quantiles
 
     from operator import itemgetter
 
-    res_sort = sorted(results, key = itemgetter(2))
+    res_sort = sorted(results, key=itemgetter(2))
 
-    res_sort.reverse()  #kstest statistic: smaller is better, pval larger is better
+    res_sort.reverse()  # kstest statistic: smaller is better, pval larger is better
 
-    print('number of distributions', len(res_sort))
-    imagedir = 'matchresults'
+    print("number of distributions", len(res_sort))
+    imagedir = "matchresults"
     import os
+
     if not os.path.exists(imagedir):
         os.makedirs(imagedir)
 
-    for ii,di in enumerate(res_sort):
-        distname,ks_stat, ks_pval,arg_est,loc_est,scale_est,crit,tail_prob = di[:]
-        distfn = getattr(stats,distname)
+    for ii, di in enumerate(res_sort):
+        (
+            distname,
+            ks_stat,
+            ks_pval,
+            arg_est,
+            loc_est,
+            scale_est,
+            crit,
+            tail_prob,
+        ) = di[:]
+        distfn = getattr(stats, distname)
         if distname in right_all:
             rvs = rvs_right
             rind = rightfactor
-            ri = 'r'
+            ri = "r"
         else:
             rvs = rvs_orig
-            ri = ''
+            ri = ""
             rind = 1
-        print('%s ks-stat = %f, ks-pval = %f tail_prob = %f)' % \
-              (distname, ks_stat, ks_pval, tail_prob))
-    ##    print('arg_est = %s, loc_est = %f scale_est = %f)' % \
-    ##          (repr(arg_est),loc_est,scale_est))
-        plothist(rvs,distfn,arg_est,loc_est,scale_est,right = rind)
-        plt.savefig(os.path.join(imagedir,'%s%s%02d_%s.png'% (prefix, ri,ii, distname)))
+        print(
+            "%s ks-stat = %f, ks-pval = %f tail_prob = %f)"
+            % (distname, ks_stat, ks_pval, tail_prob)
+        )
+        ##    print('arg_est = %s, loc_est = %f scale_est = %f)' % \
+        ##          (repr(arg_est),loc_est,scale_est))
+        plothist(rvs, distfn, arg_est, loc_est, scale_est, right=rind)
+        plt.savefig(
+            os.path.join(
+                imagedir, "%s%s%02d_%s.png" % (prefix, ri, ii, distname)
+            )
+        )
     ##plt.show()
     ##plt.close()
diff --git a/statsmodels/sandbox/distributions/extras.py b/statsmodels/sandbox/distributions/extras.py
index da6e12b80b8..6241e16c004 100644
--- a/statsmodels/sandbox/distributions/extras.py
+++ b/statsmodels/sandbox/distributions/extras.py
@@ -1,4 +1,5 @@
-"""Various extensions to distributions
+"""
+Various extensions to distributions
 
 * skew normal and skew t distribution by Azzalini, A. & Capitanio, A.
 * Gram-Charlier expansion distribution (using 4 moments),
@@ -30,7 +31,7 @@
 >>> logtg = Transf_gen(stats.t, np.exp, np.log,
                 numargs = 1, a=0, name = 'lnnorm',
                 longname = 'Exp transformed normal',
-                extradoc = '\ndistribution of y = exp(x), with x standard normal'
+                # extradoc = '\ndistribution of y = exp(x), with x standard normal'
                 'precision for moment andstats is not very high, 2-3 decimals')
 >>> logtg.cdf(5, 6)
 0.92067704211191848
@@ -51,13 +52,12 @@
 """
 
 import numpy as np
-from numpy import poly1d, sqrt, exp
-
+from numpy import exp, poly1d, sqrt
 import scipy
-from scipy import stats, special
+from scipy import special, stats
 from scipy.stats import distributions
 
-from statsmodels.stats.moment_helpers import mvsk2mc, mc2mvsk
+from statsmodels.stats.moment_helpers import mc2mvsk, mvsk2mc
 
 try:
     from scipy.stats._mvn import mvndst
@@ -71,7 +71,8 @@
 
 
 class SkewNorm_gen(distributions.rv_continuous):
-    """univariate Skew-Normal distribution of Azzalini
+    """
+    Univariate Skew-Normal distribution of Azzalini
 
     class follows scipy.stats.distributions pattern
     but with __init__
@@ -85,7 +86,7 @@ def __init__(self):
             self,
             name="Skew Normal distribution",
             shapes="alpha",
-            extradoc=""" """,
+            # extradoc=""" """,
         )
 
     def _argcheck(self, alpha):
@@ -124,10 +125,14 @@ def _stats_skip(self, x, alpha, moments="mvsk"):
 
 # generated the same way as distributions in stats.distributions
 class SkewNorm2_gen(distributions.rv_continuous):
-    """univariate Skew-Normal distribution of Azzalini
+    """
+    Univariate Skew-Normal distribution of Azzalini
 
     class follows scipy.stats.distributions pattern
 
+    Notes
+    -----
+    -inf < alpha < inf
     """
 
     def _argcheck(self, alpha):
@@ -146,15 +151,34 @@ def _pdf(self, x, alpha):
 skewnorm2 = SkewNorm2_gen(
     name="Skew Normal distribution",
     shapes="alpha",
-    extradoc="""  -inf < alpha < inf""",
+    # extradoc="""  -inf < alpha < inf""",
 )
 
 
 class ACSkewT_gen(distributions.rv_continuous):
-    """univariate Skew-T distribution of Azzalini
+    """
+    Univariate Skew-T distribution of Azzalini
 
     class follows scipy.stats.distributions pattern
     but with __init__
+
+    Notes
+    -----
+    Skewed T distribution by Azzalini, A. & Capitanio, A. (2003)_
+
+    the pdf is given by:
+
+    pdf(x) = 2.0 * t.pdf(x, df) * t.cdf(df+1, alpha*x*np.sqrt((1+df)/(x**2+df)))
+
+    with alpha >=0
+
+    Note: different from skewed t distribution by Hansen 1999
+
+    .._
+    Azzalini, A. & Capitanio, A. (2003), Distributions generated by
+    perturbation of symmetry with emphasis on a multivariate skew-t
+    distribution, appears in J.Roy.Statist.Soc, series B, vol.65,
+    pp.367-389
     """
 
     def __init__(self):
@@ -163,21 +187,18 @@ def __init__(self):
             self,
             name="Skew T distribution",
             shapes="df, alpha",
-            extradoc="""
-Skewed T distribution by Azzalini, A. & Capitanio, A. (2003)_
-
-the pdf is given by:
- pdf(x) = 2.0 * t.pdf(x, df) * t.cdf(df+1, alpha*x*np.sqrt((1+df)/(x**2+df)))
-
-with alpha >=0
-
-Note: different from skewed t distribution by Hansen 1999
-.._
-Azzalini, A. & Capitanio, A. (2003), Distributions generated by perturbation of
-symmetry with emphasis on a multivariate skew-t distribution,
-appears in J.Roy.Statist.Soc, series B, vol.65, pp.367-389
-
-""",
+            # extradoc="""
+            # Skewed T distribution by Azzalini, A. & Capitanio, A. (2003)_
+            #
+            # the pdf is given by:
+            # pdf(x) = 2.0 * t.pdf(x, df) * t.cdf(df+1, alpha*x*np.sqrt((1+df)/(x**2+df)))
+            # with alpha >=0
+            # Note: different from skewed t distribution by Hansen 1999
+            # .._
+            # Azzalini, A. & Capitanio, A. (2003), Distributions generated by perturbation of
+            # symmetry with emphasis on a multivariate skew-t distribution,
+            # appears in J.Roy.Statist.Soc, series B, vol.65, pp.367-389
+            # """,
         )
 
     def _argcheck(self, df, alpha):
@@ -443,6 +464,23 @@ class NormExpan_gen(distributions.rv_continuous):
     class follows scipy.stats.distributions pattern
     but with __init__
 
+    Notes
+    -----
+    The distribution is defined as the Gram-Charlier expansion of
+    the normal distribution using the first four moments. The pdf
+    is given by
+
+    pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc)
+
+    where xc = (x-mu)/sig is the standardized value of the random variable
+    and H(xc,3) and H(xc,4) are Hermite polynomials
+
+    Note: This distribution has to be parametrized during
+    initialization and instantiation, and does not have a shape
+    parameter after instantiation (similar to frozen distribution
+    except for location and scale.) Location and scale can be used
+    as with other distributions, however note, that they are relative
+    to the initialized distribution.
     """
 
     def __init__(self, args, **kwds):
@@ -451,23 +489,20 @@ def __init__(self, args, **kwds):
             self,
             name="Normal Expansion distribution",
             shapes=" ",
-            extradoc="""
-        The distribution is defined as the Gram-Charlier expansion of
-        the normal distribution using the first four moments. The pdf
-        is given by
-
-        pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc)
-
-        where xc = (x-mu)/sig is the standardized value of the random variable
-        and H(xc,3) and H(xc,4) are Hermite polynomials
-
-        Note: This distribution has to be parametrized during
-        initialization and instantiation, and does not have a shape
-        parameter after instantiation (similar to frozen distribution
-        except for location and scale.) Location and scale can be used
-        as with other distributions, however note, that they are relative
-        to the initialized distribution.
-        """,
+            # extradoc="""
+            # The distribution is defined as the Gram-Charlier expansion of
+            # the normal distribution using the first four moments. The pdf
+            # is given by
+            # pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc)
+            # where xc = (x-mu)/sig is the standardized value of the random variable
+            # and H(xc,3) and H(xc,4) are Hermite polynomials
+            # Note: This distribution has to be parametrized during
+            # initialization and instantiation, and does not have a shape
+            # parameter after instantiation (similar to frozen distribution
+            # except for location and scale.) Location and scale can be used
+            # as with other distributions, however note, that they are relative
+            # to the initialized distribution.
+            # """,
         )
         # print args, kwds
         mode = kwds.get("mode", "sample")
@@ -568,7 +603,6 @@ def __init__(self, kls, func, funcinv, *args, **kwargs):
         longname = kwargs.pop(
             "longname", "Non-linear transformed distribution"
         )
-        extradoc = kwargs.pop("extradoc", None)
         a = kwargs.pop("a", -np.inf)
         b = kwargs.pop("b", np.inf)
         self.decr = kwargs.pop("decr", False)
@@ -580,7 +614,7 @@ def __init__(self, kls, func, funcinv, *args, **kwargs):
         # possible to freeze the underlying distribution
 
         super(Transf_gen, self).__init__(
-            a=a, b=b, name=name, longname=longname, extradoc=extradoc
+            a=a, b=b, name=name, longname=longname
         )
 
     def _rvs(self, *args, **kwargs):
@@ -630,7 +664,7 @@ def identit(x):
     numargs=0,
     name="discf",
     longname="normal-based discount factor",
-    extradoc="\ndistribution of discount factor y=1/(1+x)) with x N(0.05,0.1**2)",
+    # extradoc="\ndistribution of discount factor y=1/(1+x)) with x N(0.05,0.1**2)",
 )
 
 lognormalg = Transf_gen(
@@ -641,8 +675,8 @@ def identit(x):
     a=0,
     name="lnnorm",
     longname="Exp transformed normal",
-    extradoc="\ndistribution of y = exp(x), with x standard normal"
-    "precision for moment andstats is not very high, 2-3 decimals",
+    # extradoc="\ndistribution of y = exp(x), with x standard normal"
+    # "precision for moment andstats is not very high, 2-3 decimals",
 )
 
 
@@ -809,7 +843,6 @@ def __init__(
         longname = kwargs.pop(
             "longname", "Non-linear transformed distribution"
         )
-        extradoc = kwargs.pop("extradoc", None)
         a = kwargs.pop("a", -np.inf)  # attached to self in super
         b = kwargs.pop("b", np.inf)  # self.a, self.b would be overwritten
         self.shape = kwargs.pop("shape", False)
@@ -826,7 +859,6 @@ def __init__(
             name=name,
             shapes=kls.shapes,
             longname=longname,
-            extradoc=extradoc,
         )
 
         # add enough info for self.freeze() to be able to reconstruct the instance
@@ -938,8 +970,8 @@ def squarefunc(self, x):
     numargs=0,
     name="squarenorm",
     longname="squared normal distribution",
-    extradoc="\ndistribution of the square of a normal random variable"
-    + " y=x**2 with x N(0.0,1)",
+    # extradoc="\ndistribution of the square of a normal random variable"
+    # + " y=x**2 with x N(0.0,1)",
 )
 # u_loc=l, u_scale=s)
 squaretg = TransfTwo_gen(
@@ -955,8 +987,8 @@ def squarefunc(self, x):
     numargs=1,
     name="squarenorm",
     longname="squared t distribution",
-    extradoc="\ndistribution of the square of a t random variable"
-    + " y=x**2 with x t(dof,0.0,1)",
+    # extradoc="\ndistribution of the square of a t random variable"
+    # + " y=x**2 with x t(dof,0.0,1)",
 )
 
 
@@ -993,8 +1025,8 @@ def negsquarefunc(x):
     numargs=0,
     name="negsquarenorm",
     longname="negative squared normal distribution",
-    extradoc="\ndistribution of the negative square of a normal random variable"
-    + " y=-x**2 with x N(0.0,1)",
+    # extradoc="\ndistribution of the negative square of a normal random variable"
+    # + " y=-x**2 with x N(0.0,1)",
 )
 # u_loc=l, u_scale=s)
 
@@ -1032,8 +1064,8 @@ def absfunc(x):
     numargs=0,
     name="absnorm",
     longname="absolute of normal distribution",
-    extradoc="\ndistribution of the absolute value of a normal random variable"
-    + " y=abs(x) with x N(0,1)",
+    # extradoc="\ndistribution of the absolute value of a normal random variable"
+    # + " y=abs(x) with x N(0,1)",
 )
 
 
@@ -1244,7 +1276,8 @@ def mvstdnormcdf(lower, upper, corrcoef, **kwds):
 
 
 def mvnormcdf(upper, mu, cov, lower=None, **kwds):
-    """multivariate normal cumulative distribution function
+    """
+    Multivariate normal cumulative distribution function
 
     This is a wrapper for scipy.stats._mvn.mvndst which calculates
     a rectangular integral over a multivariate normal distribution.
diff --git a/statsmodels/sandbox/distributions/genpareto.py b/statsmodels/sandbox/distributions/genpareto.py
index fa0c9cdb445..67f93017c46 100644
--- a/statsmodels/sandbox/distributions/genpareto.py
+++ b/statsmodels/sandbox/distributions/genpareto.py
@@ -6,114 +6,137 @@
 
 Author: josef-pktd
 """
+import matplotlib.pyplot as plt
 import numpy as np
+from numpy import abs as np_abs, inf, where
 from scipy import stats
 from scipy.special import comb
 from scipy.stats.distributions import rv_continuous
-import matplotlib.pyplot as plt
 
-from numpy import where, inf
-from numpy import abs as np_abs
 
 ## Generalized Pareto  with reversed sign of c as in literature
 class genpareto2_gen(rv_continuous):
     def _argcheck(self, c):
         c = np.asarray(c)
-        self.b = where(c > 0, 1.0/np_abs(c), inf)
-        return where(c==0, 0, 1)
+        self.b = where(c > 0, 1.0 / np_abs(c), inf)
+        return where(c == 0, 0, 1)
+
     def _pdf(self, x, c):
-        Px = np.power(1-c*x,-1.0+1.0/c)
+        Px = np.power(1 - c * x, -1.0 + 1.0 / c)
         return Px
+
     def _logpdf(self, x, c):
-        return (-1.0+1.0/c) * np.log1p(-c*x)
+        return (-1.0 + 1.0 / c) * np.log1p(-c * x)
+
     def _cdf(self, x, c):
-        return 1.0 - np.power(1-c*x,1.0/c)
+        return 1.0 - np.power(1 - c * x, 1.0 / c)
+
     def _ppf(self, q, c):
-        vals = -1.0/c * (np.power(1-q, c)-1)
+        vals = -1.0 / c * (np.power(1 - q, c) - 1)
         return vals
+
     def _munp(self, n, c):
-        k = np.arange(0,n+1)
-        val = (1.0/c)**n * np.sum(comb(n,k)*(-1)**k / (1.0+c*k),axis=0)
-        return where(c*n > -1, val, inf)
+        k = np.arange(0, n + 1)
+        val = (1.0 / c) ** n * np.sum(
+            comb(n, k) * (-1) ** k / (1.0 + c * k), axis=0
+        )
+        return where(c * n > -1, val, inf)
+
     def _entropy(self, c):
-        if (c < 0):
-            return 1-c
+        if c < 0:
+            return 1 - c
         else:
             self.b = 1.0 / c
             return rv_continuous._entropy(self, c)
 
-genpareto2 = genpareto2_gen(a=0.0,name='genpareto',
-                          longname="A generalized Pareto",
-                          shapes='c',extradoc="""
-
-Generalized Pareto distribution
 
-genpareto2.pdf(x,c) = (1+c*x)**(-1-1/c)
-for c != 0, and for x >= 0 for all c, and x < 1/abs(c) for c < 0.
-""")
+genpareto2 = genpareto2_gen(
+    a=0.0,
+    name="genpareto",
+    longname="A generalized Pareto",
+    shapes="c",  # extradoc="""
+    #
+    # Generalized Pareto distribution
+    #
+    # genpareto2.pdf(x,c) = (1+c*x)**(-1-1/c)
+    # for c != 0, and for x >= 0 for all c, and x < 1/abs(c) for c < 0.
+    # """
+)
 
 shape, loc, scale = 0.5, 0, 1
 rv = np.arange(5)
 quant = [0.01, 0.1, 0.5, 0.9, 0.99]
-for method, x in [('pdf', rv),
-                  ('cdf', rv),
-                  ('sf', rv),
-                  ('ppf', quant),
-                  ('isf', quant)]:
+for method, x in [
+    ("pdf", rv),
+    ("cdf", rv),
+    ("sf", rv),
+    ("ppf", quant),
+    ("isf", quant),
+]:
     print(getattr(genpareto2, method)(x, shape, loc, scale))
     print(getattr(stats.genpareto, method)(x, -shape, loc, scale))
 
-print(genpareto2.stats(shape, loc, scale, moments='mvsk'))
-print(stats.genpareto.stats(-shape, loc, scale, moments='mvsk'))
+print(genpareto2.stats(shape, loc, scale, moments="mvsk"))
+print(stats.genpareto.stats(-shape, loc, scale, moments="mvsk"))
 print(genpareto2.entropy(shape, loc, scale))
 print(stats.genpareto.entropy(-shape, loc, scale))
 
 
 def paramstopot(thresh, shape, scale):
-    '''transform shape scale for peak over threshold
+    """transform shape scale for peak over threshold
 
     y = x-u|x>u ~ GPD(k, sigma-k*u) if x ~ GPD(k, sigma)
     notation of de Zea Bermudez, Kotz
     k, sigma is shape, scale
-    '''
-    return shape, scale - shape*thresh
+    """
+    return shape, scale - shape * thresh
+
 
 def paramsfrompot(thresh, shape, scalepot):
-    return shape, scalepot + shape*thresh
+    return shape, scalepot + shape * thresh
+
 
 def warnif(cond, msg):
     if not cond:
-        print(msg, 'does not hold')
+        print(msg, "does not hold")
+
 
 def meanexcess(thresh, shape, scale):
-    '''mean excess function of genpareto
+    """mean excess function of genpareto
 
     assert are inequality conditions in de Zea Bermudez, Kotz
-    '''
-    warnif(shape > -1, 'shape > -1')
-    warnif(thresh >= 0, 'thresh >= 0')  #make it weak inequality
-    warnif((scale - shape*thresh) > 0, '(scale - shape*thresh) > 0')
-    return (scale - shape*thresh) / (1 + shape)
+    """
+    warnif(shape > -1, "shape > -1")
+    warnif(thresh >= 0, "thresh >= 0")  # make it weak inequality
+    warnif((scale - shape * thresh) > 0, "(scale - shape*thresh) > 0")
+    return (scale - shape * thresh) / (1 + shape)
 
 
-def meanexcess_plot(data, params=None, lidx=100, uidx=10, method='emp', plot=0):
-    if method == 'est':
-        #does not make much sense yet,
-        #estimate the parameters and use theoretical meanexcess
+def meanexcess_plot(
+    data, params=None, lidx=100, uidx=10, method="emp", plot=0
+):
+    if method == "est":
+        # does not make much sense yet,
+        # estimate the parameters and use theoretical meanexcess
         if params is None:
             raise NotImplementedError
         else:
-            pass #estimate parames
-    elif method == 'emp':
-        #calculate meanexcess from data
+            pass  # estimate parames
+    elif method == "emp":
+        # calculate meanexcess from data
         datasorted = np.sort(data)
-        meanexcess = (datasorted[::-1].cumsum())/np.arange(1,len(data)+1) - datasorted[::-1]
+        meanexcess = (datasorted[::-1].cumsum()) / np.arange(
+            1, len(data) + 1
+        ) - datasorted[::-1]
         meanexcess = meanexcess[::-1]
         if plot:
             plt.plot(datasorted[:-uidx], meanexcess[:-uidx])
             if params is not None:
                 shape, scale = params
-                plt.plot(datasorted[:-uidx], (scale - datasorted[:-uidx] * shape) / (1. + shape))
+                plt.plot(
+                    datasorted[:-uidx],
+                    (scale - datasorted[:-uidx] * shape) / (1.0 + shape),
+                )
     return datasorted, meanexcess
 
 
@@ -121,31 +144,35 @@ def meanexcess_plot(data, params=None, lidx=100, uidx=10, method='emp', plot=0):
 print(meanexcess(5, -2, 10))
 
 data = genpareto2.rvs(-0.75, scale=5, size=1000)
-#data = np.random.uniform(50, size=1000)
-#data = stats.norm.rvs(0, np.sqrt(50), size=1000)
-#data = stats.pareto.rvs(1.5, np.sqrt(50), size=1000)
+# data = np.random.uniform(50, size=1000)
+# data = stats.norm.rvs(0, np.sqrt(50), size=1000)
+# data = stats.pareto.rvs(1.5, np.sqrt(50), size=1000)
 tmp = meanexcess_plot(data, params=(-0.75, 5), plot=1)
 print(tmp[1][-20:])
 print(tmp[0][-20:])
-#plt.show()
+# plt.show()
+
 
 def meanexcess_emp(data):
     datasorted = np.sort(data).astype(float)
-    meanexcess = (datasorted[::-1].cumsum())/np.arange(1,len(data)+1) - datasorted[::-1]
-    meancont = (datasorted[::-1].cumsum())/np.arange(1,len(data)+1)
+    meanexcess = (datasorted[::-1].cumsum()) / np.arange(
+        1, len(data) + 1
+    ) - datasorted[::-1]
+    meancont = (datasorted[::-1].cumsum()) / np.arange(1, len(data) + 1)
     meanexcess = meanexcess[::-1]
     return datasorted, meanexcess, meancont[::-1]
 
+
 def meanexcess_dist(self, lb, *args, **kwds):
-    #default function in expect is identity
+    # default function in expect is identity
     # need args in call
     if np.ndim(lb) == 0:
         return self.expect(lb=lb, conditional=True)
     else:
-        return np.array([self.expect(lb=lbb, conditional=True) for
-                    lbb in lb])
+        return np.array([self.expect(lb=lbb, conditional=True) for lbb in lb])
 
-ds, me, mc = meanexcess_emp(1.*np.arange(1,10))
+
+ds, me, mc = meanexcess_emp(1.0 * np.arange(1, 10))
 print(ds)
 print(me)
 print(mc)
@@ -154,11 +181,15 @@ def meanexcess_dist(self, lb, *args, **kwds):
 print(meanexcess_dist(stats.norm, lb=[-np.inf, -0.5, 0, 0.5]))
 rvs = stats.norm.rvs(size=100000)
 rvs = rvs - rvs.mean()
-print(rvs.mean(), rvs[rvs>-0.5].mean(), rvs[rvs>0].mean(), rvs[rvs>0.5].mean())
-
+print(
+    rvs.mean(),
+    rvs[rvs > -0.5].mean(),
+    rvs[rvs > 0].mean(),
+    rvs[rvs > 0.5].mean(),
+)
 
 
-'''
+"""
 [ 1.   0.5  0.   0.   0. ]
 [ 1.   0.5  0.   0.   0. ]
 [ 0.    0.75  1.    1.    1.  ]
@@ -231,4 +262,4 @@ def meanexcess_dist(self, lb, *args, **kwds):
 >>> datasorted[::-1]
 array([ 9.,  8.,  7.,  6.,  5.,  4.,  3.,  2.,  1.])
 >>>
-'''
+"""
diff --git a/statsmodels/sandbox/distributions/gof_new.py b/statsmodels/sandbox/distributions/gof_new.py
index ffb9588b9df..0a804858163 100644
--- a/statsmodels/sandbox/distributions/gof_new.py
+++ b/statsmodels/sandbox/distributions/gof_new.py
@@ -1,4 +1,4 @@
-'''More Goodness of fit tests
+"""More Goodness of fit tests
 
 contains
 
@@ -16,17 +16,17 @@
 References
 ----------
 
-'''
+"""
 from statsmodels.compat.python import lmap
-import numpy as np
 
+import numpy as np
+from scipy.special import kolmogorov as ksprob
 from scipy.stats import distributions
 
 from statsmodels.tools.decorators import cache_readonly
 
-from scipy.special import kolmogorov as ksprob
 
-#from scipy.stats unchanged
+# from scipy.stats unchanged
 def ks_2samp(data1, data2):
     """
     Computes the Kolmogorov-Smirnof statistic on 2 samples.
@@ -106,23 +106,24 @@ def ks_2samp(data1, data2):
     n2 = len(data2)
     data1 = np.sort(data1)
     data2 = np.sort(data2)
-    data_all = np.concatenate([data1,data2])
-    #reminder: searchsorted inserts 2nd into 1st array
-    cdf1 = np.searchsorted(data1,data_all,side='right')/(1.0*n1)
-    cdf2 = (np.searchsorted(data2,data_all,side='right'))/(1.0*n2)
-    d = np.max(np.absolute(cdf1-cdf2))
-    #Note: d absolute not signed distance
-    en = np.sqrt(n1*n2/float(n1+n2))
+    data_all = np.concatenate([data1, data2])
+    # reminder: searchsorted inserts 2nd into 1st array
+    cdf1 = np.searchsorted(data1, data_all, side="right") / (1.0 * n1)
+    cdf2 = (np.searchsorted(data2, data_all, side="right")) / (1.0 * n2)
+    d = np.max(np.absolute(cdf1 - cdf2))
+    # Note: d absolute not signed distance
+    en = np.sqrt(n1 * n2 / float(n1 + n2))
     try:
-        prob = ksprob((en+0.12+0.11/en)*d)
+        prob = ksprob((en + 0.12 + 0.11 / en) * d)
     except:
         prob = 1.0
     return d, prob
 
 
-
-#from scipy.stats unchanged
-def kstest(rvs, cdf, args=(), N=20, alternative = 'two_sided', mode='approx',**kwds):
+# from scipy.stats unchanged
+def kstest(
+    rvs, cdf, args=(), N=20, alternative="two_sided", mode="approx", **kwds
+):
     """
     Perform the Kolmogorov-Smirnov test for goodness of fit
 
@@ -238,56 +239,60 @@ def kstest(rvs, cdf, args=(), N=20, alternative = 'two_sided', mode='approx',**k
     (0.131016895759829, 0.058826222555312224)
     """
     if isinstance(rvs, str):
-        #cdf = getattr(stats, rvs).cdf
+        # cdf = getattr(stats, rvs).cdf
         if (not cdf) or (cdf == rvs):
             cdf = getattr(distributions, rvs).cdf
             rvs = getattr(distributions, rvs).rvs
         else:
-            raise AttributeError('if rvs is string, cdf has to be the same distribution')
-
+            raise AttributeError(
+                "if rvs is string, cdf has to be the same distribution"
+            )
 
     if isinstance(cdf, str):
         cdf = getattr(distributions, cdf).cdf
     if callable(rvs):
-        kwds = {'size':N}
-        vals = np.sort(rvs(*args,**kwds))
+        kwds = {"size": N}
+        vals = np.sort(rvs(*args, **kwds))
     else:
         vals = np.sort(rvs)
         N = len(vals)
     cdfvals = cdf(vals, *args)
 
-    if alternative in ['two_sided', 'greater']:
-        Dplus = (np.arange(1.0, N+1)/N - cdfvals).max()
-        if alternative == 'greater':
-            return Dplus, distributions.ksone.sf(Dplus,N)
-
-    if alternative in ['two_sided', 'less']:
-        Dmin = (cdfvals - np.arange(0.0, N)/N).max()
-        if alternative == 'less':
-            return Dmin, distributions.ksone.sf(Dmin,N)
-
-    if alternative == 'two_sided':
-        D = np.max([Dplus,Dmin])
-        if mode == 'asymp':
-            return D, distributions.kstwobign.sf(D*np.sqrt(N))
-        if mode == 'approx':
-            pval_two = distributions.kstwobign.sf(D*np.sqrt(N))
-            if N > 2666 or pval_two > 0.80 - N*0.3/1000.0 :
-                return D, distributions.kstwobign.sf(D*np.sqrt(N))
+    if alternative in ["two_sided", "greater"]:
+        Dplus = (np.arange(1.0, N + 1) / N - cdfvals).max()
+        if alternative == "greater":
+            return Dplus, distributions.ksone.sf(Dplus, N)
+
+    if alternative in ["two_sided", "less"]:
+        Dmin = (cdfvals - np.arange(0.0, N) / N).max()
+        if alternative == "less":
+            return Dmin, distributions.ksone.sf(Dmin, N)
+
+    if alternative == "two_sided":
+        D = np.max([Dplus, Dmin])
+        if mode == "asymp":
+            return D, distributions.kstwobign.sf(D * np.sqrt(N))
+        if mode == "approx":
+            pval_two = distributions.kstwobign.sf(D * np.sqrt(N))
+            if N > 2666 or pval_two > 0.80 - N * 0.3 / 1000.0:
+                return D, distributions.kstwobign.sf(D * np.sqrt(N))
             else:
-                return D, distributions.ksone.sf(D,N)*2
+                return D, distributions.ksone.sf(D, N) * 2
 
-#TODO: split into modification and pvalue functions separately ?
+
+# TODO: split into modification and pvalue functions separately ?
 #      for separate testing and combining different pieces
 
+
 def dplus_st70_upp(stat, nobs):
     mod_factor = np.sqrt(nobs) + 0.12 + 0.11 / np.sqrt(nobs)
     stat_modified = stat * mod_factor
     pval = np.exp(-2 * stat_modified**2)
     digits = np.sum(stat > np.array([0.82, 0.82, 1.00]))
-    #repeat low to get {0,2,3}
+    # repeat low to get {0,2,3}
     return stat_modified, pval, digits
 
+
 dminus_st70_upp = dplus_st70_upp
 
 
@@ -296,75 +301,85 @@ def d_st70_upp(stat, nobs):
     stat_modified = stat * mod_factor
     pval = 2 * np.exp(-2 * stat_modified**2)
     digits = np.sum(stat > np.array([0.91, 0.91, 1.08]))
-    #repeat low to get {0,2,3}
+    # repeat low to get {0,2,3}
     return stat_modified, pval, digits
 
+
 def v_st70_upp(stat, nobs):
     mod_factor = np.sqrt(nobs) + 0.155 + 0.24 / np.sqrt(nobs)
-    #repeat low to get {0,2,3}
+    # repeat low to get {0,2,3}
     stat_modified = stat * mod_factor
     zsqu = stat_modified**2
     pval = (8 * zsqu - 2) * np.exp(-2 * zsqu)
     digits = np.sum(stat > np.array([1.06, 1.06, 1.26]))
     return stat_modified, pval, digits
 
+
 def wsqu_st70_upp(stat, nobs):
-    nobsinv = 1. / nobs
+    nobsinv = 1.0 / nobs
     stat_modified = (stat - 0.4 * nobsinv + 0.6 * nobsinv**2) * (1 + nobsinv)
     pval = 0.05 * np.exp(2.79 - 6 * stat_modified)
     digits = np.nan  # some explanation in txt
-    #repeat low to get {0,2,3}
+    # repeat low to get {0,2,3}
     return stat_modified, pval, digits
 
+
 def usqu_st70_upp(stat, nobs):
-    nobsinv = 1. / nobs
-    stat_modified = (stat - 0.1 * nobsinv + 0.1 * nobsinv**2)
-    stat_modified *= (1 + 0.8 * nobsinv)
-    pval = 2 * np.exp(- 2 * stat_modified * np.pi**2)
+    nobsinv = 1.0 / nobs
+    stat_modified = stat - 0.1 * nobsinv + 0.1 * nobsinv**2
+    stat_modified *= 1 + 0.8 * nobsinv
+    pval = 2 * np.exp(-2 * stat_modified * np.pi**2)
     digits = np.sum(stat > np.array([0.29, 0.29, 0.34]))
-    #repeat low to get {0,2,3}
+    # repeat low to get {0,2,3}
     return stat_modified, pval, digits
 
+
 def a_st70_upp(stat, nobs):
-    nobsinv = 1. / nobs
-    stat_modified = (stat - 0.7 * nobsinv + 0.9 * nobsinv**2)
-    stat_modified *= (1 + 1.23 * nobsinv)
-    pval = 1.273 * np.exp(- 2 * stat_modified / 2. * np.pi**2)
+    nobsinv = 1.0 / nobs
+    stat_modified = stat - 0.7 * nobsinv + 0.9 * nobsinv**2
+    stat_modified *= 1 + 1.23 * nobsinv
+    pval = 1.273 * np.exp(-2 * stat_modified / 2.0 * np.pi**2)
     digits = np.sum(stat > np.array([0.11, 0.11, 0.452]))
-    #repeat low to get {0,2,3}
+    # repeat low to get {0,2,3}
     return stat_modified, pval, digits
 
 
-
 gof_pvals = {}
 
-gof_pvals['stephens70upp'] = {
-    'd_plus' : dplus_st70_upp,
-    'd_minus' : dplus_st70_upp,
-    'd' : d_st70_upp,
-    'v' : v_st70_upp,
-    'wsqu' : wsqu_st70_upp,
-    'usqu' : usqu_st70_upp,
-    'a' : a_st70_upp }
+gof_pvals["stephens70upp"] = {
+    "d_plus": dplus_st70_upp,
+    "d_minus": dplus_st70_upp,
+    "d": d_st70_upp,
+    "v": v_st70_upp,
+    "wsqu": wsqu_st70_upp,
+    "usqu": usqu_st70_upp,
+    "a": a_st70_upp,
+}
+
 
 def pval_kstest_approx(D, N):
-    pval_two = distributions.kstwobign.sf(D*np.sqrt(N))
-    if N > 2666 or pval_two > 0.80 - N*0.3/1000.0 :
-        return D, distributions.kstwobign.sf(D*np.sqrt(N)), np.nan
+    pval_two = distributions.kstwobign.sf(D * np.sqrt(N))
+    if N > 2666 or pval_two > 0.80 - N * 0.3 / 1000.0:
+        return D, distributions.kstwobign.sf(D * np.sqrt(N)), np.nan
     else:
-        return D, distributions.ksone.sf(D,N)*2, np.nan
+        return D, distributions.ksone.sf(D, N) * 2, np.nan
 
-gof_pvals['scipy'] = {
-    'd_plus' : lambda Dplus, N: (Dplus, distributions.ksone.sf(Dplus, N), np.nan),
-    'd_minus' : lambda Dmin, N: (Dmin, distributions.ksone.sf(Dmin,N), np.nan),
-    'd' : lambda D, N: (D, distributions.kstwobign.sf(D*np.sqrt(N)), np.nan)
-    }
 
-gof_pvals['scipy_approx'] = {
-    'd' : pval_kstest_approx }
+gof_pvals["scipy"] = {
+    "d_plus": lambda Dplus, N: (
+        Dplus,
+        distributions.ksone.sf(Dplus, N),
+        np.nan,
+    ),
+    "d_minus": lambda Dmin, N: (Dmin, distributions.ksone.sf(Dmin, N), np.nan),
+    "d": lambda D, N: (D, distributions.kstwobign.sf(D * np.sqrt(N)), np.nan),
+}
+
+gof_pvals["scipy_approx"] = {"d": pval_kstest_approx}
+
 
 class GOF(object):
-    '''One Sample Goodness of Fit tests
+    """One Sample Goodness of Fit tests
 
     includes Kolmogorov-Smirnov D, D+, D-, Kuiper V, Cramer-von Mises W^2, U^2 and
     Anderson-Darling A, A^2. The p-values for all tests except for A^2 are based on
@@ -382,26 +397,24 @@ class GOF(object):
 
 
 
-    '''
-
-
-
+    """
 
     def __init__(self, rvs, cdf, args=(), N=20):
         if isinstance(rvs, str):
-            #cdf = getattr(stats, rvs).cdf
+            # cdf = getattr(stats, rvs).cdf
             if (not cdf) or (cdf == rvs):
                 cdf = getattr(distributions, rvs).cdf
                 rvs = getattr(distributions, rvs).rvs
             else:
-                raise AttributeError('if rvs is string, cdf has to be the same distribution')
-
+                raise AttributeError(
+                    "if rvs is string, cdf has to be the same distribution"
+                )
 
         if isinstance(cdf, str):
             cdf = getattr(distributions, cdf).cdf
         if callable(rvs):
-            kwds = {'size':N}
-            vals = np.sort(rvs(*args,**kwds))
+            kwds = {"size": N}
+            vals = np.sort(rvs(*args, **kwds))
         else:
             vals = np.sort(rvs)
             N = len(vals)
@@ -411,19 +424,17 @@ def __init__(self, rvs, cdf, args=(), N=20):
         self.vals_sorted = vals
         self.cdfvals = cdfvals
 
-
-
     @cache_readonly
     def d_plus(self):
         nobs = self.nobs
         cdfvals = self.cdfvals
-        return (np.arange(1.0, nobs+1)/nobs - cdfvals).max()
+        return (np.arange(1.0, nobs + 1) / nobs - cdfvals).max()
 
     @cache_readonly
     def d_minus(self):
         nobs = self.nobs
         cdfvals = self.cdfvals
-        return (cdfvals - np.arange(0.0, nobs)/nobs).max()
+        return (cdfvals - np.arange(0.0, nobs) / nobs).max()
 
     @cache_readonly
     def d(self):
@@ -431,25 +442,26 @@ def d(self):
 
     @cache_readonly
     def v(self):
-        '''Kuiper'''
+        """Kuiper"""
         return self.d_plus + self.d_minus
 
     @cache_readonly
     def wsqu(self):
-        '''Cramer von Mises'''
+        """Cramer von Mises"""
         nobs = self.nobs
         cdfvals = self.cdfvals
-        #use literal formula, TODO: simplify with arange(,,2)
-        wsqu = ((cdfvals - (2. * np.arange(1., nobs+1) - 1)/nobs/2.)**2).sum() \
-               + 1./nobs/12.
+        # use literal formula, TODO: simplify with arange(,,2)
+        wsqu = (
+            (cdfvals - (2.0 * np.arange(1.0, nobs + 1) - 1) / nobs / 2.0) ** 2
+        ).sum() + 1.0 / nobs / 12.0
         return wsqu
 
     @cache_readonly
     def usqu(self):
         nobs = self.nobs
         cdfvals = self.cdfvals
-        #use literal formula, TODO: simplify with arange(,,2)
-        usqu = self.wsqu - nobs * (cdfvals.mean() - 0.5)**2
+        # use literal formula, TODO: simplify with arange(,,2)
+        usqu = self.wsqu - nobs * (cdfvals.mean() - 0.5) ** 2
         return usqu
 
     @cache_readonly
@@ -457,49 +469,46 @@ def a(self):
         nobs = self.nobs
         cdfvals = self.cdfvals
 
-        #one loop instead of large array
+        # one loop instead of large array
         msum = 0
-        for j in range(1,nobs):
+        for j in range(1, nobs):
             mj = cdfvals[j] - cdfvals[:j]
-            mask = (mj > 0.5)
+            mask = mj > 0.5
             mj[mask] = 1 - mj[mask]
             msum += mj.sum()
 
-        a = nobs / 4. - 2. / nobs * msum
+        a = nobs / 4.0 - 2.0 / nobs * msum
         return a
 
     @cache_readonly
     def asqu(self):
-        '''Stephens 1974, does not have p-value formula for A^2'''
+        """Stephens 1974, does not have p-value formula for A^2"""
         nobs = self.nobs
         cdfvals = self.cdfvals
 
-        asqu = -((2. * np.arange(1., nobs+1) - 1) *
-                (np.log(cdfvals) + np.log(1-cdfvals[::-1]) )).sum()/nobs - nobs
+        asqu = (
+            -(
+                (2.0 * np.arange(1.0, nobs + 1) - 1)
+                * (np.log(cdfvals) + np.log(1 - cdfvals[::-1]))
+            ).sum()
+            / nobs
+            - nobs
+        )
 
         return asqu
 
-
-    def get_test(self, testid='d', pvals='stephens70upp'):
-        '''
-
-        '''
-        #print gof_pvals[pvals][testid]
+    def get_test(self, testid="d", pvals="stephens70upp"):
+        """ """
+        # print gof_pvals[pvals][testid]
         stat = getattr(self, testid)
-        if pvals == 'stephens70upp':
+        if pvals == "stephens70upp":
             return gof_pvals[pvals][testid](stat, self.nobs), stat
         else:
             return gof_pvals[pvals][testid](stat, self.nobs)
 
 
-
-
-
-
-
-
 def gof_mc(randfn, distr, nobs=100):
-    #print '\nIs it correctly sized?'
+    # print '\nIs it correctly sized?'
     from collections import defaultdict
 
     results = defaultdict(list)
@@ -507,39 +516,47 @@ def gof_mc(randfn, distr, nobs=100):
         rvs = randfn(nobs)
         goft = GOF(rvs, distr)
         for ti in all_gofs:
-            results[ti].append(goft.get_test(ti, 'stephens70upp')[0][1])
+            results[ti].append(goft.get_test(ti, "stephens70upp")[0][1])
 
     resarr = np.array([results[ti] for ti in all_gofs])
-    print('         ', '      '.join(all_gofs))
-    print('at 0.01:', (resarr < 0.01).mean(1))
-    print('at 0.05:', (resarr < 0.05).mean(1))
-    print('at 0.10:', (resarr < 0.1).mean(1))
+    print("         ", "      ".join(all_gofs))
+    print("at 0.01:", (resarr < 0.01).mean(1))
+    print("at 0.05:", (resarr < 0.05).mean(1))
+    print("at 0.10:", (resarr < 0.1).mean(1))
+
 
 def asquare(cdfvals, axis=0):
-    '''vectorized Anderson Darling A^2, Stephens 1974'''
+    """vectorized Anderson Darling A^2, Stephens 1974"""
     ndim = len(cdfvals.shape)
     nobs = cdfvals.shape[axis]
-    slice_reverse = [slice(None)] * ndim  #might make copy if not specific axis???
+    slice_reverse = [
+        slice(None)
+    ] * ndim  # might make copy if not specific axis???
     islice = [None] * ndim
     islice[axis] = slice(None)
     slice_reverse[axis] = slice(None, None, -1)
-    asqu = -((2. * np.arange(1., nobs+1)[tuple(islice)] - 1) *
-            (np.log(cdfvals) + np.log(1-cdfvals[tuple(slice_reverse)]))/nobs).sum(axis) \
-            - nobs
+    asqu = (
+        -(
+            (2.0 * np.arange(1.0, nobs + 1)[tuple(islice)] - 1)
+            * (np.log(cdfvals) + np.log(1 - cdfvals[tuple(slice_reverse)]))
+            / nobs
+        ).sum(axis)
+        - nobs
+    )
 
     return asqu
 
 
-#class OneSGOFFittedVec(object):
+# class OneSGOFFittedVec(object):
 #    '''for vectorized fitting'''
-    # currently I use the bootstrap as function instead of full class
+# currently I use the bootstrap as function instead of full class
 
-    #note: kwds loc and scale are a pain
-    # I would need to overwrite rvs, fit and cdf depending on fixed parameters
+# note: kwds loc and scale are a pain
+# I would need to overwrite rvs, fit and cdf depending on fixed parameters
 
-    #def bootstrap(self, distr, args=(), kwds={}, nobs=200, nrep=1000,
+# def bootstrap(self, distr, args=(), kwds={}, nobs=200, nrep=1000,
 def bootstrap(distr, args=(), nobs=200, nrep=100, value=None, batch_size=None):
-    '''Monte Carlo (or parametric bootstrap) p-values for gof
+    """Monte Carlo (or parametric bootstrap) p-values for gof
 
     currently hardcoded for A^2 only
 
@@ -550,23 +567,22 @@ def bootstrap(distr, args=(), nobs=200, nrep=100, value=None, batch_size=None):
 
     this works also with nrep=1
 
-    '''
-    #signature similar to kstest ?
-    #delegate to fn ?
-
-    #rvs_kwds = {'size':(nobs, nrep)}
-    #rvs_kwds.update(kwds)
+    """
+    # signature similar to kstest ?
+    # delegate to fn ?
 
+    # rvs_kwds = {'size':(nobs, nrep)}
+    # rvs_kwds.update(kwds)
 
-    #it will be better to build a separate batch function that calls bootstrap
-    #keep batch if value is true, but batch iterate from outside if stat is returned
+    # it will be better to build a separate batch function that calls bootstrap
+    # keep batch if value is true, but batch iterate from outside if stat is returned
     if batch_size is not None:
         if value is None:
-            raise ValueError('using batching requires a value')
-        n_batch = int(np.ceil(nrep/float(batch_size)))
+            raise ValueError("using batching requires a value")
+        n_batch = int(np.ceil(nrep / float(batch_size)))
         count = 0
         for irep in range(n_batch):
-            rvs = distr.rvs(args, **{'size':(batch_size, nobs)})
+            rvs = distr.rvs(args, **{"size": (batch_size, nobs)})
             params = distr.fit_vec(rvs, axis=1)
             params = lmap(lambda x: np.expand_dims(x, 1), params)
             cdfvals = np.sort(distr.cdf(rvs, params), axis=1)
@@ -574,22 +590,21 @@ def bootstrap(distr, args=(), nobs=200, nrep=100, value=None, batch_size=None):
             count += (stat >= value).sum()
         return count / float(n_batch * batch_size)
     else:
-        #rvs = distr.rvs(args, **kwds)  #extension to distribution kwds ?
-        rvs = distr.rvs(args, **{'size':(nrep, nobs)})
+        # rvs = distr.rvs(args, **kwds)  #extension to distribution kwds ?
+        rvs = distr.rvs(args, **{"size": (nrep, nobs)})
         params = distr.fit_vec(rvs, axis=1)
         params = lmap(lambda x: np.expand_dims(x, 1), params)
         cdfvals = np.sort(distr.cdf(rvs, params), axis=1)
         stat = asquare(cdfvals, axis=1)
-        if value is None:           #return all bootstrap results
+        if value is None:  # return all bootstrap results
             stat_sorted = np.sort(stat)
             return stat_sorted
-        else:                       #calculate and return specific p-value
+        else:  # calculate and return specific p-value
             return (stat >= value).mean()
 
 
-
 def bootstrap2(value, distr, args=(), nobs=200, nrep=100):
-    '''Monte Carlo (or parametric bootstrap) p-values for gof
+    """Monte Carlo (or parametric bootstrap) p-values for gof
 
     currently hardcoded for A^2 only
 
@@ -598,28 +613,26 @@ def bootstrap2(value, distr, args=(), nobs=200, nrep=100):
 
     rename function to less generic
 
-    '''
-    #signature similar to kstest ?
-    #delegate to fn ?
-
-    #rvs_kwds = {'size':(nobs, nrep)}
-    #rvs_kwds.update(kwds)
+    """
+    # signature similar to kstest ?
+    # delegate to fn ?
 
+    # rvs_kwds = {'size':(nobs, nrep)}
+    # rvs_kwds.update(kwds)
 
     count = 0
     for irep in range(nrep):
-        #rvs = distr.rvs(args, **kwds)  #extension to distribution kwds ?
-        rvs = distr.rvs(args, **{'size':nobs})
+        # rvs = distr.rvs(args, **kwds)  #extension to distribution kwds ?
+        rvs = distr.rvs(args, **{"size": nobs})
         params = distr.fit_vec(rvs)
         cdfvals = np.sort(distr.cdf(rvs, params))
         stat = asquare(cdfvals, axis=0)
-        count += (stat >= value)
-    return count * 1. / nrep
+        count += stat >= value
+    return count * 1.0 / nrep
 
 
 class NewNorm(object):
-    '''just a holder for modified distributions
-    '''
+    """just a holder for modified distributions"""
 
     def fit_vec(self, x, axis=0):
         return x.mean(axis), x.std(axis)
@@ -628,59 +641,57 @@ def cdf(self, x, args):
         return distributions.norm.cdf(x, loc=args[0], scale=args[1])
 
     def rvs(self, args, size):
-        loc=args[0]
-        scale=args[1]
+        loc = args[0]
+        scale = args[1]
         return loc + scale * distributions.norm.rvs(size=size)
 
 
-
-
-
-if __name__ == '__main__':
+if __name__ == "__main__":
     from scipy import stats
-    #rvs = np.random.randn(1000)
+
+    # rvs = np.random.randn(1000)
     rvs = stats.t.rvs(3, size=200)
-    print('scipy kstest')
-    print(kstest(rvs, 'norm'))
-    goft = GOF(rvs, 'norm')
+    print("scipy kstest")
+    print(kstest(rvs, "norm"))
+    goft = GOF(rvs, "norm")
     print(goft.get_test())
 
-    all_gofs = ['d', 'd_plus', 'd_minus', 'v', 'wsqu', 'usqu', 'a']
+    all_gofs = ["d", "d_plus", "d_minus", "v", "wsqu", "usqu", "a"]
     for ti in all_gofs:
-        print(ti, goft.get_test(ti, 'stephens70upp'))
+        print(ti, goft.get_test(ti, "stephens70upp"))
 
-    print('\nIs it correctly sized?')
+    print("\nIs it correctly sized?")
     from collections import defaultdict
 
     results = defaultdict(list)
     nobs = 200
     for i in range(100):
         rvs = np.random.randn(nobs)
-        goft = GOF(rvs, 'norm')
+        goft = GOF(rvs, "norm")
         for ti in all_gofs:
-            results[ti].append(goft.get_test(ti, 'stephens70upp')[0][1])
+            results[ti].append(goft.get_test(ti, "stephens70upp")[0][1])
 
     resarr = np.array([results[ti] for ti in all_gofs])
-    print('         ', '      '.join(all_gofs))
-    print('at 0.01:', (resarr < 0.01).mean(1))
-    print('at 0.05:', (resarr < 0.05).mean(1))
-    print('at 0.10:', (resarr < 0.1).mean(1))
+    print("         ", "      ".join(all_gofs))
+    print("at 0.01:", (resarr < 0.01).mean(1))
+    print("at 0.05:", (resarr < 0.05).mean(1))
+    print("at 0.10:", (resarr < 0.1).mean(1))
 
-    gof_mc(lambda nobs: stats.t.rvs(3, size=nobs), 'norm', nobs=200)
+    gof_mc(lambda nobs: stats.t.rvs(3, size=nobs), "norm", nobs=200)
 
     nobs = 200
     nrep = 100
-    bt = bootstrap(NewNorm(), args=(0,1), nobs=nobs, nrep=nrep, value=None)
+    bt = bootstrap(NewNorm(), args=(0, 1), nobs=nobs, nrep=nrep, value=None)
     quantindex = np.floor(nrep * np.array([0.99, 0.95, 0.9])).astype(int)
     print(bt[quantindex])
 
-    #the bootstrap results match Stephens pretty well for nobs=100, but not so well for
-    #large (1000) or small (20) nobs
-    '''
+    # the bootstrap results match Stephens pretty well for nobs=100, but not so well for
+    # large (1000) or small (20) nobs
+    """
     >>> np.array([15.0, 10.0, 5.0, 2.5, 1.0])/100.  #Stephens
     array([ 0.15 ,  0.1  ,  0.05 ,  0.025,  0.01 ])
     >>> nobs = 100
     >>> [bootstrap(NewNorm(), args=(0,1), nobs=nobs, nrep=10000, value=c/ (1 + 4./nobs - 25./nobs**2)) for c in [0.576, 0.656, 0.787, 0.918, 1.092]]
     [0.1545, 0.10009999999999999, 0.049000000000000002, 0.023, 0.0104]
     >>>
-    '''
+    """
diff --git a/statsmodels/sandbox/distributions/multivariate.py b/statsmodels/sandbox/distributions/multivariate.py
index fce61204629..b6c232804ae 100644
--- a/statsmodels/sandbox/distributions/multivariate.py
+++ b/statsmodels/sandbox/distributions/multivariate.py
@@ -1,4 +1,4 @@
-'''Multivariate Distribution
+"""Multivariate Distribution
 
 Probability of a multivariate t distribution
 
@@ -13,54 +13,72 @@
 Reference:
 Genz and Bretz for formula
 
-'''
+"""
 import numpy as np
-from scipy import integrate, stats, special
+from numpy import exp as np_exp, log as np_log
+from scipy import integrate, special, stats
+from scipy.special import gamma as sps_gamma, gammaln as sps_gammaln
 from scipy.stats import chi
 
 from .extras import mvstdnormcdf
 
-from numpy import exp as np_exp
-from numpy import log as np_log
-from scipy.special import gamma as sps_gamma
-from scipy.special import gammaln as sps_gammaln
 
 def chi2_pdf(self, x, df):
-    '''pdf of chi-square distribution'''
-    #from scipy.stats.distributions
-    Px = x**(df/2.0-1)*np.exp(-x/2.0)
-    Px /= special.gamma(df/2.0)* 2**(df/2.0)
+    """pdf of chi-square distribution"""
+    # from scipy.stats.distributions
+    Px = x ** (df / 2.0 - 1) * np.exp(-x / 2.0)
+    Px /= special.gamma(df / 2.0) * 2 ** (df / 2.0)
     return Px
 
+
 def chi_pdf(x, df):
-    tmp = (df-1.)*np_log(x) + (-x*x*0.5) - (df*0.5-1)*np_log(2.0) \
-          - sps_gammaln(df*0.5)
+    tmp = (
+        (df - 1.0) * np_log(x)
+        + (-x * x * 0.5)
+        - (df * 0.5 - 1) * np_log(2.0)
+        - sps_gammaln(df * 0.5)
+    )
     return np_exp(tmp)
-    #return x**(df-1.)*np_exp(-x*x*0.5)/(2.0)**(df*0.5-1)/sps_gamma(df*0.5)
+    # return x**(df-1.)*np_exp(-x*x*0.5)/(2.0)**(df*0.5-1)/sps_gamma(df*0.5)
+
 
 def chi_logpdf(x, df):
-    tmp = (df-1.)*np_log(x) + (-x*x*0.5) - (df*0.5-1)*np_log(2.0) \
-          - sps_gammaln(df*0.5)
+    tmp = (
+        (df - 1.0) * np_log(x)
+        + (-x * x * 0.5)
+        - (df * 0.5 - 1) * np_log(2.0)
+        - sps_gammaln(df * 0.5)
+    )
     return tmp
 
+
 def funbgh(s, a, b, R, df):
-    sqrt_df = np.sqrt(df+0.5)
-    ret = chi_logpdf(s,df)
-    ret += np_log(mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R,
-                                         maxpts=1000000, abseps=1e-6))
+    sqrt_df = np.sqrt(df + 0.5)
+    ret = chi_logpdf(s, df)
+    ret += np_log(
+        mvstdnormcdf(
+            s * a / sqrt_df, s * b / sqrt_df, R, maxpts=1000000, abseps=1e-6
+        )
+    )
     ret = np_exp(ret)
     return ret
 
+
 def funbgh2(s, a, b, R, df):
     n = len(a)
     sqrt_df = np.sqrt(df)
-    #np.power(s, df-1) * np_exp(-s*s*0.5)
-    return np_exp((df-1)*np_log(s)-s*s*0.5) \
-           * mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R[np.tril_indices(n, -1)],
-                          maxpts=1000000, abseps=1e-4)
+    # np.power(s, df-1) * np_exp(-s*s*0.5)
+    return np_exp((df - 1) * np_log(s) - s * s * 0.5) * mvstdnormcdf(
+        s * a / sqrt_df,
+        s * b / sqrt_df,
+        R[np.tril_indices(n, -1)],
+        maxpts=1000000,
+        abseps=1e-4,
+    )
+
 
 def bghfactor(df):
-    return np.power(2.0, 1-df*0.5) / sps_gamma(df*0.5)
+    return np.power(2.0, 1 - df * 0.5) / sps_gamma(df * 0.5)
 
 
 def mvstdtprob(a, b, R, df, ieps=1e-5, quadkwds=None, mvstkwds=None):
@@ -83,10 +101,11 @@ def mvstdtprob(a, b, R, df, ieps=1e-5, quadkwds=None, mvstkwds=None):
     prob = res * bghfactor(df)
     return prob
 
-#written by Enzo Michelangeli, style changes by josef-pktd
+
+# written by Enzo Michelangeli, style changes by josef-pktd
 # Student's T random variable
 def multivariate_t_rvs(m, S, df=np.inf, n=1):
-    '''generate random variables of multivariate t distribution
+    """generate random variables of multivariate t distribution
 
     Parameters
     ----------
@@ -106,54 +125,52 @@ def multivariate_t_rvs(m, S, df=np.inf, n=1):
         random variable
 
 
-    '''
+    """
     m = np.asarray(m)
     d = len(m)
     if df == np.inf:
         x = np.ones(n)
     else:
-        x = np.random.chisquare(df, n)/df
-    z = np.random.multivariate_normal(np.zeros(d),S,(n,))
-    return m + z/np.sqrt(x)[:,None]   # same output format as random.multivariate_normal
+        x = np.random.chisquare(df, n) / df
+    z = np.random.multivariate_normal(np.zeros(d), S, (n,))
+    return (
+        m + z / np.sqrt(x)[:, None]
+    )  # same output format as random.multivariate_normal
 
 
-
-
-if __name__ == '__main__':
-    corr = np.asarray([[1.0, 0, 0.5],[0,1,0],[0.5,0,1]])
-    corr_indep = np.asarray([[1.0, 0, 0],[0,1,0],[0,0,1]])
-    corr_equal = np.asarray([[1.0, 0.5, 0.5],[0.5,1,0.5],[0.5,0.5,1]])
+if __name__ == "__main__":
+    corr = np.asarray([[1.0, 0, 0.5], [0, 1, 0], [0.5, 0, 1]])
+    corr_indep = np.asarray([[1.0, 0, 0], [0, 1, 0], [0, 0, 1]])
+    corr_equal = np.asarray([[1.0, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 1]])
     R = corr_equal
-    a = np.array([-np.inf,-np.inf,-100.0])
-    a = np.array([-0.96,-0.96,-0.96])
-    b = np.array([0.0,0.0,0.0])
-    b = np.array([0.96,0.96, 0.96])
+    a = np.array([-np.inf, -np.inf, -100.0])
+    a = np.array([-0.96, -0.96, -0.96])
+    b = np.array([0.0, 0.0, 0.0])
+    b = np.array([0.96, 0.96, 0.96])
     a[:] = -1
     b[:] = 3
-    df = 10.
+    df = 10.0
     sqrt_df = np.sqrt(df)
     print(mvstdnormcdf(a, b, corr, abseps=1e-6))
 
-    #print integrate.quad(funbgh, 0, np.inf, args=(a,b,R,df))
-    print((stats.t.cdf(b[0], df) - stats.t.cdf(a[0], df))**3)
+    # print integrate.quad(funbgh, 0, np.inf, args=(a,b,R,df))
+    print((stats.t.cdf(b[0], df) - stats.t.cdf(a[0], df)) ** 3)
 
     s = 1
-    print(mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R))
-
+    print(mvstdnormcdf(s * a / sqrt_df, s * b / sqrt_df, R))
 
-    df=4
+    df = 4
     print(mvstdtprob(a, b, R, df))
 
-    S = np.array([[1.,.5],[.5,1.]])
-    print(multivariate_t_rvs([10.,20.], S, 2, 5))
+    S = np.array([[1.0, 0.5], [0.5, 1.0]])
+    print(multivariate_t_rvs([10.0, 20.0], S, 2, 5))
 
     nobs = 10000
-    rvst = multivariate_t_rvs([10.,20.], S, 2, nobs)
-    print(np.sum((rvst<[10.,20.]).all(1),0) * 1. / nobs)
-    print(mvstdtprob(-np.inf*np.ones(2), np.zeros(2), R[:2,:2], 2))
+    rvst = multivariate_t_rvs([10.0, 20.0], S, 2, nobs)
+    print(np.sum((rvst < [10.0, 20.0]).all(1), 0) * 1.0 / nobs)
+    print(mvstdtprob(-np.inf * np.ones(2), np.zeros(2), R[:2, :2], 2))
 
-
-    '''
+    """
         > lower <- -1
         > upper <- 3
         > df <- 4
@@ -168,4 +185,4 @@ def multivariate_t_rvs(m, S, df=np.inf, n=1):
         > (pt(upper, df) - pt(lower, df))**3
         [1] 0.4988254
 
-    '''
+    """
diff --git a/statsmodels/sandbox/distributions/mv_measures.py b/statsmodels/sandbox/distributions/mv_measures.py
index 7074da810f9..3b4f8eba523 100644
--- a/statsmodels/sandbox/distributions/mv_measures.py
+++ b/statsmodels/sandbox/distributions/mv_measures.py
@@ -1,4 +1,4 @@
-'''using multivariate dependence and divergence measures
+"""using multivariate dependence and divergence measures
 
 The standard correlation coefficient measures only linear dependence between
 random variables.
@@ -18,7 +18,7 @@
 http://pre.aps.org/abstract/PRE/v76/i2/e026209
 
 
-'''
+"""
 
 import numpy as np
 from scipy import stats
@@ -28,15 +28,13 @@
 
 
 def mutualinfo_kde(y, x, normed=True):
-    '''mutual information of two random variables estimated with kde
-
-    '''
+    """mutual information of two random variables estimated with kde"""
     nobs = len(x)
     if not len(y) == nobs:
-        raise ValueError('both data arrays need to have the same size')
+        raise ValueError("both data arrays need to have the same size")
     x = np.asarray(x, float)
     y = np.asarray(y, float)
-    yx = np.vstack((y,x))
+    yx = np.vstack((y, x))
     kde_x = gaussian_kde(x)(x)
     kde_y = gaussian_kde(y)(y)
     kde_yx = gaussian_kde(yx)(yx)
@@ -44,35 +42,35 @@ def mutualinfo_kde(y, x, normed=True):
     mi_obs = np.log(kde_yx) - np.log(kde_x) - np.log(kde_y)
     mi = mi_obs.sum() / nobs
     if normed:
-        mi_normed = np.sqrt(1. - np.exp(-2 * mi))
+        mi_normed = np.sqrt(1.0 - np.exp(-2 * mi))
         return mi_normed
     else:
         return mi
 
-def mutualinfo_kde_2sample(y, x, normed=True):
-    '''mutual information of two random variables estimated with kde
 
-    '''
+def mutualinfo_kde_2sample(y, x, normed=True):
+    """mutual information of two random variables estimated with kde"""
     nobs = len(x)
     x = np.asarray(x, float)
     y = np.asarray(y, float)
-    #yx = np.vstack((y,x))
+    # yx = np.vstack((y,x))
     kde_x = gaussian_kde(x.T)(x.T)
     kde_y = gaussian_kde(y.T)(x.T)
-    #kde_yx = gaussian_kde(yx)(yx)
+    # kde_yx = gaussian_kde(yx)(yx)
 
     mi_obs = np.log(kde_x) - np.log(kde_y)
     if len(mi_obs) != nobs:
         raise ValueError("Wrong number of observations")
     mi = mi_obs.mean()
     if normed:
-        mi_normed = np.sqrt(1. - np.exp(-2 * mi))
+        mi_normed = np.sqrt(1.0 - np.exp(-2 * mi))
         return mi_normed
     else:
         return mi
 
+
 def mutualinfo_binned(y, x, bins, normed=True):
-    '''mutual information of two random variables estimated with kde
+    """mutual information of two random variables estimated with kde
 
 
 
@@ -82,114 +80,123 @@ def mutualinfo_binned(y, x, bins, normed=True):
     are expected to be in each bin under the assumption of independence. This
     follows roughly the description in Kahn et al. 2007
 
-    '''
+    """
     nobs = len(x)
     if not len(y) == nobs:
-        raise ValueError('both data arrays need to have the same size')
+        raise ValueError("both data arrays need to have the same size")
     x = np.asarray(x, float)
     y = np.asarray(y, float)
-    #yx = np.vstack((y,x))
+    # yx = np.vstack((y,x))
 
+    ##    fyx, binsy, binsx = np.histogram2d(y, x, bins=bins)
+    ##    fx, binsx_ = np.histogram(x, bins=binsx)
+    ##    fy, binsy_ = np.histogram(y, bins=binsy)
 
-##    fyx, binsy, binsx = np.histogram2d(y, x, bins=bins)
-##    fx, binsx_ = np.histogram(x, bins=binsx)
-##    fy, binsy_ = np.histogram(y, bins=binsy)
-
-    if bins == 'auto':
+    if bins == "auto":
         ys = np.sort(y)
         xs = np.sort(x)
-        #quantiles = np.array([0,0.25, 0.4, 0.6, 0.75, 1])
-        qbin_sqr = np.sqrt(5./nobs)
-        quantiles = np.linspace(0, 1, 1./qbin_sqr)
-        quantile_index = ((nobs-1)*quantiles).astype(int)
-        #move edges so that they do not coincide with an observation
+        # quantiles = np.array([0,0.25, 0.4, 0.6, 0.75, 1])
+        qbin_sqr = np.sqrt(5.0 / nobs)
+        quantiles = np.linspace(0, 1, 1.0 / qbin_sqr)
+        quantile_index = ((nobs - 1) * quantiles).astype(int)
+        # move edges so that they do not coincide with an observation
         shift = 1e-6 + np.ones(quantiles.shape)
-        shift[0] -= 2*1e-6
+        shift[0] -= 2 * 1e-6
         binsy = ys[quantile_index] + shift
         binsx = xs[quantile_index] + shift
 
     elif np.size(bins) == 1:
         binsy = bins
         binsx = bins
-    elif (len(bins) == 2):
+    elif len(bins) == 2:
         binsy, binsx = bins
-##        if np.size(bins[0]) == 1:
-##            binsx = bins[0]
-##        if np.size(bins[1]) == 1:
-##            binsx = bins[1]
+    ##        if np.size(bins[0]) == 1:
+    ##            binsx = bins[0]
+    ##        if np.size(bins[1]) == 1:
+    ##            binsx = bins[1]
 
     fx, binsx = np.histogram(x, bins=binsx)
     fy, binsy = np.histogram(y, bins=binsy)
     fyx, binsy, binsx = np.histogram2d(y, x, bins=(binsy, binsx))
 
-    pyx = fyx * 1. / nobs
-    px = fx * 1. / nobs
-    py = fy * 1. / nobs
-
+    pyx = fyx * 1.0 / nobs
+    px = fx * 1.0 / nobs
+    py = fy * 1.0 / nobs
 
-    mi_obs = pyx * (np.log(pyx+1e-10) - np.log(py)[:,None] - np.log(px))
+    mi_obs = pyx * (np.log(pyx + 1e-10) - np.log(py)[:, None] - np.log(px))
     mi = mi_obs.sum()
 
     if normed:
-        mi_normed = np.sqrt(1. - np.exp(-2 * mi))
+        mi_normed = np.sqrt(1.0 - np.exp(-2 * mi))
         return mi_normed, (pyx, py, px, binsy, binsx), mi_obs
     else:
         return mi
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     import statsmodels.api as sm
 
-    funtype = ['linear', 'quadratic'][1]
+    funtype = ["linear", "quadratic"][1]
     nobs = 200
-    sig = 2#5.
-    #x = np.linspace(-3, 3, nobs) + np.random.randn(nobs)
-    x = np.sort(3*np.random.randn(nobs))
+    sig = 2  # 5.
+    # x = np.linspace(-3, 3, nobs) + np.random.randn(nobs)
+    x = np.sort(3 * np.random.randn(nobs))
     exog = sm.add_constant(x, prepend=True)
-    #y = 0 + np.log(1+x**2) + sig * np.random.randn(nobs)
-    if funtype == 'quadratic':
+    # y = 0 + np.log(1+x**2) + sig * np.random.randn(nobs)
+    if funtype == "quadratic":
         y = 0 + x**2 + sig * np.random.randn(nobs)
-    if funtype == 'linear':
+    if funtype == "linear":
         y = 0 + x + sig * np.random.randn(nobs)
 
-    print('correlation')
-    print(np.corrcoef(y,x)[0, 1])
-    print('pearsonr', stats.pearsonr(y,x))
-    print('spearmanr', stats.spearmanr(y,x))
-    print('kendalltau', stats.kendalltau(y,x))
+    print("correlation")
+    print(np.corrcoef(y, x)[0, 1])
+    print("pearsonr", stats.pearsonr(y, x))
+    print("spearmanr", stats.spearmanr(y, x))
+    print("kendalltau", stats.kendalltau(y, x))
 
-    pxy, binsx, binsy = np.histogram2d(x,y, bins=5)
+    pxy, binsx, binsy = np.histogram2d(x, y, bins=5)
     px, binsx_ = np.histogram(x, bins=binsx)
     py, binsy_ = np.histogram(y, bins=binsy)
-    print('mutualinfo', infotheo.mutualinfo(px*1./nobs, py*1./nobs,
-                                            1e-15+pxy*1./nobs, logbase=np.e))
-
-    print('mutualinfo_kde normed', mutualinfo_kde(y,x))
-    print('mutualinfo_kde       ', mutualinfo_kde(y,x, normed=False))
-    mi_normed, (pyx2, py2, px2, binsy2, binsx2), mi_obs = \
-               mutualinfo_binned(y, x, 5, normed=True)
-    print('mutualinfo_binned normed', mi_normed)
-    print('mutualinfo_binned       ', mi_obs.sum())
-
-    mi_normed, (pyx2, py2, px2, binsy2, binsx2), mi_obs = \
-               mutualinfo_binned(y, x, 'auto', normed=True)
-    print('auto')
-    print('mutualinfo_binned normed', mi_normed)
-    print('mutualinfo_binned       ', mi_obs.sum())
+    print(
+        "mutualinfo",
+        infotheo.mutualinfo(
+            px * 1.0 / nobs,
+            py * 1.0 / nobs,
+            1e-15 + pxy * 1.0 / nobs,
+            logbase=np.e,
+        ),
+    )
+
+    print("mutualinfo_kde normed", mutualinfo_kde(y, x))
+    print("mutualinfo_kde       ", mutualinfo_kde(y, x, normed=False))
+    mi_normed, (pyx2, py2, px2, binsy2, binsx2), mi_obs = mutualinfo_binned(
+        y, x, 5, normed=True
+    )
+    print("mutualinfo_binned normed", mi_normed)
+    print("mutualinfo_binned       ", mi_obs.sum())
+
+    mi_normed, (pyx2, py2, px2, binsy2, binsx2), mi_obs = mutualinfo_binned(
+        y, x, "auto", normed=True
+    )
+    print("auto")
+    print("mutualinfo_binned normed", mi_normed)
+    print("mutualinfo_binned       ", mi_obs.sum())
 
     ys = np.sort(y)
     xs = np.sort(x)
-    by = ys[((nobs-1)*np.array([0, 0.25, 0.4, 0.6, 0.75, 1])).astype(int)]
-    bx = xs[((nobs-1)*np.array([0, 0.25, 0.4, 0.6, 0.75, 1])).astype(int)]
-    mi_normed, (pyx2, py2, px2, binsy2, binsx2), mi_obs = \
-               mutualinfo_binned(y, x, (by,bx), normed=True)
-    print('quantiles')
-    print('mutualinfo_binned normed', mi_normed)
-    print('mutualinfo_binned       ', mi_obs.sum())
-
-    doplot = 1#False
+    by = ys[((nobs - 1) * np.array([0, 0.25, 0.4, 0.6, 0.75, 1])).astype(int)]
+    bx = xs[((nobs - 1) * np.array([0, 0.25, 0.4, 0.6, 0.75, 1])).astype(int)]
+    mi_normed, (pyx2, py2, px2, binsy2, binsx2), mi_obs = mutualinfo_binned(
+        y, x, (by, bx), normed=True
+    )
+    print("quantiles")
+    print("mutualinfo_binned normed", mi_normed)
+    print("mutualinfo_binned       ", mi_obs.sum())
+
+    doplot = 1  # False
     if doplot:
         import matplotlib.pyplot as plt
-        plt.plot(x, y, 'o')
+
+        plt.plot(x, y, "o")
         olsres = sm.OLS(y, exog).fit()
         plt.plot(x, olsres.fittedvalues)
diff --git a/statsmodels/sandbox/distributions/mv_normal.py b/statsmodels/sandbox/distributions/mv_normal.py
index 56510e54ce5..91d4ddc9a0e 100644
--- a/statsmodels/sandbox/distributions/mv_normal.py
+++ b/statsmodels/sandbox/distributions/mv_normal.py
@@ -148,11 +148,12 @@
 from scipy import special
 
 from statsmodels.sandbox.distributions.multivariate import mvstdtprob
+
 from .extras import mvnormcdf
 
 
 def expect_mc(dist, func=lambda x: 1, size=50000):
-    '''calculate expected value of function by Monte Carlo integration
+    """calculate expected value of function by Monte Carlo integration
 
     Parameters
     ----------
@@ -196,15 +197,25 @@ def expect_mc(dist, func=lambda x: 1, size=50000):
     array([ 0.09937,  0.10075])
 
 
-    '''
+    """
+
     def fun(x):
-        return func(x) # * dist.pdf(x)
+        return func(x)  # * dist.pdf(x)
+
     rvs = dist.rvs(size=size)
     return fun(rvs).mean(0)
 
-def expect_mc_bounds(dist, func=lambda x: 1, size=50000, lower=None, upper=None,
-                     conditional=False, overfact=1.2):
-    '''calculate expected value of function by Monte Carlo integration
+
+def expect_mc_bounds(
+    dist,
+    func=lambda x: 1,
+    size=50000,
+    lower=None,
+    upper=None,
+    conditional=False,
+    overfact=1.2,
+):
+    """calculate expected value of function by Monte Carlo integration
 
     Parameters
     ----------
@@ -260,8 +271,8 @@ def expect_mc_bounds(dist, func=lambda x: 1, size=50000, lower=None, upper=None,
     [0.0, 1.0, 0.0, 3.0]
 
 
-    '''
-    #call rvs once to find length of random vector
+    """
+    # call rvs once to find length of random vector
     rvsdim = dist.rvs(size=1).shape[-1]
     if lower is None:
         lower = -np.inf * np.ones(rvsdim)
@@ -273,33 +284,33 @@ def expect_mc_bounds(dist, func=lambda x: 1, size=50000, lower=None, upper=None,
         upper = np.asarray(upper)
 
     def fun(x):
-        return func(x) # * dist.pdf(x)
+        return func(x)  # * dist.pdf(x)
 
     rvsli = []
-    used = 0 #remain = size  #inplace changes size
+    used = 0  # remain = size  #inplace changes size
     total = 0
     while True:
-        remain = size - used  #just a temp variable
+        remain = size - used  # just a temp variable
         rvs = dist.rvs(size=int(remain * overfact))
         total += int(size * overfact)
 
         rvsok = rvs[((rvs >= lower) & (rvs <= upper)).all(-1)]
-        #if rvsok.ndim == 1: #possible shape problems if only 1 random vector
+        # if rvsok.ndim == 1: #possible shape problems if only 1 random vector
         rvsok = np.atleast_2d(rvsok)
         used += rvsok.shape[0]
 
-        rvsli.append(rvsok)   #[:remain]) use extras instead
+        rvsli.append(rvsok)  # [:remain]) use extras instead
         print(used)
         if used >= size:
             break
     rvs = np.vstack(rvsli)
     print(rvs.shape)
-    assert used == rvs.shape[0] #saftey check
+    assert used == rvs.shape[0]  # saftey check
     mean_conditional = fun(rvs).mean(0)
     if conditional:
         return mean_conditional
     else:
-        return mean_conditional * (used * 1. / total)
+        return mean_conditional * (used * 1.0 / total)
 
 
 def bivariate_normal(x, mu, cov):
@@ -314,20 +325,22 @@ def bivariate_normal(x, mu, cov):
     mux, muy = mu
     sigmax, sigmaxy, tmp, sigmay = np.ravel(cov)
     sigmax, sigmay = np.sqrt(sigmax), np.sqrt(sigmay)
-    Xmu = X-mux
-    Ymu = Y-muy
-
-    rho = sigmaxy/(sigmax*sigmay)
-    z = Xmu**2/sigmax**2 + Ymu**2/sigmay**2 - 2*rho*Xmu*Ymu/(sigmax*sigmay)
-    denom = 2*np.pi*sigmax*sigmay*np.sqrt(1-rho**2)
-    return np.exp( -z/(2*(1-rho**2))) / denom
+    Xmu = X - mux
+    Ymu = Y - muy
 
+    rho = sigmaxy / (sigmax * sigmay)
+    z = (
+        Xmu**2 / sigmax**2
+        + Ymu**2 / sigmay**2
+        - 2 * rho * Xmu * Ymu / (sigmax * sigmay)
+    )
+    denom = 2 * np.pi * sigmax * sigmay * np.sqrt(1 - rho**2)
+    return np.exp(-z / (2 * (1 - rho**2))) / denom
 
 
 class BivariateNormal(object):
 
-
-    #TODO: make integration limits more flexible
+    # TODO: make integration limits more flexible
     #      or normalize before integration
 
     def __init__(self, mean, cov):
@@ -343,22 +356,25 @@ def pdf(self, x):
         return bivariate_normal(x, self.mean, self.cov)
 
     def logpdf(self, x):
-        #TODO: replace this
+        # TODO: replace this
         return np.log(self.pdf(x))
 
     def cdf(self, x):
         return self.expect(upper=x)
 
-    def expect(self, func=lambda x: 1, lower=(-10,-10), upper=(10,10)):
+    def expect(self, func=lambda x: 1, lower=(-10, -10), upper=(10, 10)):
         def fun(x, y):
-            x = np.column_stack((x,y))
+            x = np.column_stack((x, y))
             return func(x) * self.pdf(x)
+
         from scipy.integrate import dblquad
-        return dblquad(fun, lower[0], upper[0], lambda y: lower[1],
-                       lambda y: upper[1])
+
+        return dblquad(
+            fun, lower[0], upper[0], lambda y: lower[1], lambda y: upper[1]
+        )
 
     def kl(self, other):
-        '''Kullback-Leibler divergence between this and another distribution
+        """Kullback-Leibler divergence between this and another distribution
 
         int f(x) (log f(x) - log g(x)) dx
 
@@ -368,27 +384,28 @@ def kl(self, other):
 
         limits currently hardcoded
 
-        '''
-        fun = lambda x : self.logpdf(x) - other.logpdf(x)
+        """
+        fun = lambda x: self.logpdf(x) - other.logpdf(x)
         return self.expect(fun)
 
     def kl_mc(self, other, size=500000):
-        fun = lambda x : self.logpdf(x) - other.logpdf(x)
+        fun = lambda x: self.logpdf(x) - other.logpdf(x)
         rvs = self.rvs(size=size)
         return fun(rvs).mean()
 
+
 class MVElliptical(object):
-    '''Base Class for multivariate elliptical distributions, normal and t
+    """Base Class for multivariate elliptical distributions, normal and t
 
     contains common initialization, and some common methods
     subclass needs to implement at least rvs and logpdf methods
 
-    '''
-    #getting common things between normal and t distribution
+    """
 
+    # getting common things between normal and t distribution
 
     def __init__(self, mean, sigma, *args, **kwds):
-        '''initialize instance
+        """initialize instance
 
         Parameters
         ----------
@@ -403,39 +420,38 @@ def __init__(self, mean, sigma, *args, **kwds):
         kwds : dict
             currently not used
 
-        '''
+        """
 
         self.extra_args = []
         self.mean = np.asarray(mean)
         self.sigma = sigma = np.asarray(sigma)
         sigma = np.squeeze(sigma)
         self.nvars = nvars = len(mean)
-        #self.covchol = np.linalg.cholesky(sigma)
-
+        # self.covchol = np.linalg.cholesky(sigma)
 
-        #in the following sigma is original, self.sigma is full matrix
+        # in the following sigma is original, self.sigma is full matrix
         if sigma.shape == ():
-            #iid
+            # iid
             self.sigma = np.eye(nvars) * sigma
             self.sigmainv = np.eye(nvars) / sigma
             self.cholsigmainv = np.eye(nvars) / np.sqrt(sigma)
         elif (sigma.ndim == 1) and (len(sigma) == nvars):
-            #independent heteroskedastic
+            # independent heteroskedastic
             self.sigma = np.diag(sigma)
-            self.sigmainv = np.diag(1. / sigma)
-            self.cholsigmainv = np.diag( 1. / np.sqrt(sigma))
-        elif sigma.shape == (nvars, nvars): #python tuple comparison
-            #general
+            self.sigmainv = np.diag(1.0 / sigma)
+            self.cholsigmainv = np.diag(1.0 / np.sqrt(sigma))
+        elif sigma.shape == (nvars, nvars):  # python tuple comparison
+            # general
             self.sigmainv = np.linalg.pinv(sigma)
             self.cholsigmainv = np.linalg.cholesky(self.sigmainv).T
         else:
-            raise ValueError('sigma has invalid shape')
+            raise ValueError("sigma has invalid shape")
 
-        #store logdetsigma for logpdf
+        # store logdetsigma for logpdf
         self.logdetsigma = np.log(np.linalg.det(self.sigma))
 
     def rvs(self, size=1):
-        '''random variable
+        """random variable
 
         Parameters
         ----------
@@ -450,11 +466,11 @@ def rvs(self, size=1):
             dimension
 
 
-        '''
+        """
         raise NotImplementedError
 
     def logpdf(self, x):
-        '''logarithm of probability density function
+        """logarithm of probability density function
 
         Parameters
         ----------
@@ -472,13 +488,12 @@ def logpdf(self, x):
         with multivariate normal vector in each row and iid across rows
         does not work now because of dot in whiten
 
-        '''
-
+        """
 
         raise NotImplementedError
 
     def cdf(self, x, **kwds):
-        '''cumulative distribution function
+        """cumulative distribution function
 
         Parameters
         ----------
@@ -493,14 +508,13 @@ def cdf(self, x, **kwds):
         cdf : float or array
             probability density value of each random vector
 
-        '''
+        """
         raise NotImplementedError
 
-
     def affine_transformed(self, shift, scale_matrix):
-        '''affine transformation define in subclass because of distribution
-        specific restrictions'''
-        #implemented in subclass at least for now
+        """affine transformation define in subclass because of distribution
+        specific restrictions"""
+        # implemented in subclass at least for now
         raise NotImplementedError
 
     def whiten(self, x):
@@ -530,7 +544,7 @@ def whiten(self, x):
         return np.dot(x, self.cholsigmainv.T)
 
     def pdf(self, x):
-        '''probability density function
+        """probability density function
 
         Parameters
         ----------
@@ -543,11 +557,11 @@ def pdf(self, x):
         pdf : float or array
             probability density value of each random vector
 
-        '''
+        """
         return np.exp(self.logpdf(x))
 
     def standardize(self, x):
-        '''standardize the random variable, i.e. subtract mean and whiten
+        """standardize the random variable, i.e. subtract mean and whiten
 
         Parameters
         ----------
@@ -568,17 +582,15 @@ def standardize(self, x):
         whiten : rescale random variable, standardize without subtracting mean.
 
 
-        '''
+        """
         return self.whiten(x - self.mean)
 
     def standardized(self):
-        '''return new standardized MVNormal instance
-        '''
+        """return new standardized MVNormal instance"""
         return self.affine_transformed(-self.mean, self.cholsigmainv)
 
-
     def normalize(self, x):
-        '''normalize the random variable, i.e. subtract mean and rescale
+        """normalize the random variable, i.e. subtract mean and rescale
 
         The distribution will have zero mean and sigma equal to correlation
 
@@ -601,16 +613,16 @@ def normalize(self, x):
         whiten : rescale random variable, standardize without subtracting mean.
 
 
-        '''
+        """
         std_ = np.atleast_2d(self.std_sigma)
-        return (x - self.mean)/std_ #/std_.T
+        return (x - self.mean) / std_  # /std_.T
 
     def normalized(self, demeaned=True):
-        '''return a normalized distribution where sigma=corr
+        """return a normalized distribution where sigma=corr
 
         if demeaned is True, then mean will be set to zero
 
-        '''
+        """
         if demeaned:
             mean_new = np.zeros_like(self.mean)
         else:
@@ -620,44 +632,39 @@ def normalized(self, demeaned=True):
         return self.__class__(mean_new, sigma_new, *args)
 
     def normalized2(self, demeaned=True):
-        '''return a normalized distribution where sigma=corr
+        """return a normalized distribution where sigma=corr
 
 
 
         second implementation for testing affine transformation
-        '''
+        """
         if demeaned:
             shift = -self.mean
         else:
-            shift = self.mean * (1. / self.std_sigma - 1.)
-        return self.affine_transformed(shift, np.diag(1. / self.std_sigma))
-        #the following "standardizes" cov instead
-        #return self.affine_transformed(shift, self.cholsigmainv)
-
-
+            shift = self.mean * (1.0 / self.std_sigma - 1.0)
+        return self.affine_transformed(shift, np.diag(1.0 / self.std_sigma))
+        # the following "standardizes" cov instead
+        # return self.affine_transformed(shift, self.cholsigmainv)
 
     @property
     def std(self):
-        '''standard deviation, square root of diagonal elements of cov
-        '''
+        """standard deviation, square root of diagonal elements of cov"""
         return np.sqrt(np.diag(self.cov))
 
     @property
     def std_sigma(self):
-        '''standard deviation, square root of diagonal elements of sigma
-        '''
+        """standard deviation, square root of diagonal elements of sigma"""
         return np.sqrt(np.diag(self.sigma))
 
-
     @property
     def corr(self):
-        '''correlation matrix'''
+        """correlation matrix"""
         return self.cov / np.outer(self.std, self.std)
 
     expect_mc = expect_mc
 
     def marginal(self, indices):
-        '''return marginal distribution for variables given by indices
+        """return marginal distribution for variables given by indices
 
         this should be correct for normal and t distribution
 
@@ -673,17 +680,17 @@ def marginal(self, indices):
             contains the marginal distribution of the variables given in
             indices
 
-        '''
+        """
         indices = np.asarray(indices)
         mean_new = self.mean[indices]
-        sigma_new = self.sigma[indices[:,None], indices]
+        sigma_new = self.sigma[indices[:, None], indices]
         args = [getattr(self, ea) for ea in self.extra_args]
         return self.__class__(mean_new, sigma_new, *args)
 
 
-#parts taken from linear_model, but heavy adjustments
+# parts taken from linear_model, but heavy adjustments
 class MVNormal0(object):
-    '''Class for Multivariate Normal Distribution
+    """Class for Multivariate Normal Distribution
 
     original full version, kept for testing, new version inherits from
     MVElliptical
@@ -691,8 +698,7 @@ class MVNormal0(object):
     uses Cholesky decomposition of covariance matrix for the transformation
     of the data
 
-    '''
-
+    """
 
     def __init__(self, mean, cov):
         self.mean = mean
@@ -700,26 +706,25 @@ def __init__(self, mean, cov):
         cov = np.squeeze(cov)
         self.nvars = nvars = len(mean)
 
-
-        #in the following cov is original, self.cov is full matrix
+        # in the following cov is original, self.cov is full matrix
         if cov.shape == ():
-            #iid
+            # iid
             self.cov = np.eye(nvars) * cov
             self.covinv = np.eye(nvars) / cov
             self.cholcovinv = np.eye(nvars) / np.sqrt(cov)
         elif (cov.ndim == 1) and (len(cov) == nvars):
-            #independent heteroskedastic
+            # independent heteroskedastic
             self.cov = np.diag(cov)
-            self.covinv = np.diag(1. / cov)
-            self.cholcovinv = np.diag( 1. / np.sqrt(cov))
-        elif cov.shape == (nvars, nvars): #python tuple comparison
-            #general
+            self.covinv = np.diag(1.0 / cov)
+            self.cholcovinv = np.diag(1.0 / np.sqrt(cov))
+        elif cov.shape == (nvars, nvars):  # python tuple comparison
+            # general
             self.covinv = np.linalg.pinv(cov)
             self.cholcovinv = np.linalg.cholesky(self.covinv).T
         else:
-            raise ValueError('cov has invalid shape')
+            raise ValueError("cov has invalid shape")
 
-        #store logdetcov for logpdf
+        # store logdetcov for logpdf
         self.logdetcov = np.log(np.linalg.det(self.cov))
 
     def whiten(self, x):
@@ -747,13 +752,13 @@ def whiten(self, x):
         """
         x = np.asarray(x)
         if np.any(self.cov):
-            #return np.dot(self.cholcovinv, x)
+            # return np.dot(self.cholcovinv, x)
             return np.dot(x, self.cholcovinv.T)
         else:
             return x
 
     def rvs(self, size=1):
-        '''random variable
+        """random variable
 
         Parameters
         ----------
@@ -771,11 +776,11 @@ def rvs(self, size=1):
         -----
         uses numpy.random.multivariate_normal directly
 
-        '''
+        """
         return np.random.multivariate_normal(self.mean, self.cov, size=size)
 
     def pdf(self, x):
-        '''probability density function
+        """probability density function
 
         Parameters
         ----------
@@ -788,12 +793,12 @@ def pdf(self, x):
         pdf : float or array
             probability density value of each random vector
 
-        '''
+        """
 
         return np.exp(self.logpdf(x))
 
     def logpdf(self, x):
-        '''logarithm of probability density function
+        """logarithm of probability density function
 
         Parameters
         ----------
@@ -811,12 +816,12 @@ def logpdf(self, x):
         with multivariate normal vector in each row and iid across rows
         does not work now because of dot in whiten
 
-        '''
+        """
         x = np.asarray(x)
         x_whitened = self.whiten(x - self.mean)
         SSR = np.sum(x_whitened**2, -1)
         llf = -SSR
-        llf -= self.nvars * np.log(2. * np.pi)
+        llf -= self.nvars * np.log(2.0 * np.pi)
         llf -= self.logdetcov
         llf *= 0.5
         return llf
@@ -825,17 +830,17 @@ def logpdf(self, x):
 
 
 class MVNormal(MVElliptical):
-    '''Class for Multivariate Normal Distribution
+    """Class for Multivariate Normal Distribution
 
     uses Cholesky decomposition of covariance matrix for the transformation
     of the data
 
-    '''
-    __name__ == 'Multivariate Normal Distribution'
+    """
 
+    __name__ == "Multivariate Normal Distribution"
 
     def rvs(self, size=1):
-        '''random variable
+        """random variable
 
         Parameters
         ----------
@@ -853,11 +858,11 @@ def rvs(self, size=1):
         -----
         uses numpy.random.multivariate_normal directly
 
-        '''
+        """
         return np.random.multivariate_normal(self.mean, self.sigma, size=size)
 
     def logpdf(self, x):
-        '''logarithm of probability density function
+        """logarithm of probability density function
 
         Parameters
         ----------
@@ -875,18 +880,18 @@ def logpdf(self, x):
         with multivariate normal vector in each row and iid across rows
         does not work now because of dot in whiten
 
-        '''
+        """
         x = np.asarray(x)
         x_whitened = self.whiten(x - self.mean)
         SSR = np.sum(x_whitened**2, -1)
         llf = -SSR
-        llf -= self.nvars * np.log(2. * np.pi)
+        llf -= self.nvars * np.log(2.0 * np.pi)
         llf -= self.logdetsigma
         llf *= 0.5
         return llf
 
     def cdf(self, x, **kwds):
-        '''cumulative distribution function
+        """cumulative distribution function
 
         Parameters
         ----------
@@ -901,18 +906,18 @@ def cdf(self, x, **kwds):
         cdf : float or array
             probability density value of each random vector
 
-        '''
-        #lower = -np.inf * np.ones_like(x)
-        #return mvstdnormcdf(lower, self.standardize(x), self.corr, **kwds)
+        """
+        # lower = -np.inf * np.ones_like(x)
+        # return mvstdnormcdf(lower, self.standardize(x), self.corr, **kwds)
         return mvnormcdf(x, self.mean, self.cov, **kwds)
 
     @property
     def cov(self):
-        '''covariance matrix'''
+        """covariance matrix"""
         return self.sigma
 
     def affine_transformed(self, shift, scale_matrix):
-        '''return distribution of an affine transform
+        """return distribution of an affine transform
 
         for full rank scale_matrix only
 
@@ -944,14 +949,14 @@ def affine_transformed(self, shift, scale_matrix):
 
         currently only tested because it's called by standardized
 
-        '''
-        B = scale_matrix  #tmp variable
+        """
+        B = scale_matrix  # tmp variable
         mean_new = np.dot(B, self.mean) + shift
         sigma_new = np.dot(np.dot(B, self.sigma), B.T)
         return MVNormal(mean_new, sigma_new)
 
     def conditional(self, indices, values):
-        r'''return conditional distribution
+        r"""return conditional distribution
 
         indices are the variables to keep, the complement is the conditioning
         set
@@ -978,8 +983,8 @@ def conditional(self, indices, values):
              values of the excluded variables.
 
 
-        '''
-        #indices need to be nd arrays for broadcasting
+        """
+        # indices need to be nd arrays for broadcasting
         keep = np.asarray(indices)
         given = np.asarray([i for i in range(self.nvars) if i not in keep])
         sigmakk = self.sigma[keep[:, None], keep]
@@ -987,29 +992,32 @@ def conditional(self, indices, values):
         sigmakg = self.sigma[keep[:, None], given]
         sigmagk = self.sigma[given[:, None], keep]
 
-
-        sigma_new = sigmakk - np.dot(sigmakg, np.linalg.solve(sigmagg, sigmagk))
-        mean_new = self.mean[keep] +  \
-            np.dot(sigmakg, np.linalg.solve(sigmagg, values-self.mean[given]))
-
-#        #or
-#        sig = np.linalg.solve(sigmagg, sigmagk).T
-#        mean_new = self.mean[keep] + np.dot(sigmakg, values-self.mean[given])
-#        sigma_new = sigmakk - np.dot(sigmakg, sig)
+        sigma_new = sigmakk - np.dot(
+            sigmakg, np.linalg.solve(sigmagg, sigmagk)
+        )
+        mean_new = self.mean[keep] + np.dot(
+            sigmakg, np.linalg.solve(sigmagg, values - self.mean[given])
+        )
+
+        #        #or
+        #        sig = np.linalg.solve(sigmagg, sigmagk).T
+        #        mean_new = self.mean[keep] + np.dot(sigmakg, values-self.mean[given])
+        #        sigma_new = sigmakk - np.dot(sigmakg, sig)
         return MVNormal(mean_new, sigma_new)
 
 
-#redefine some shortcuts
+# redefine some shortcuts
 np_log = np.log
 np_pi = np.pi
 sps_gamln = special.gammaln
 
+
 class MVT(MVElliptical):
 
-    __name__ == 'Multivariate Student T Distribution'
+    __name__ == "Multivariate Student T Distribution"
 
     def __init__(self, mean, sigma, df):
-        '''initialize instance
+        """initialize instance
 
         Parameters
         ----------
@@ -1024,13 +1032,13 @@ def __init__(self, mean, sigma, df):
         kwds : dict
             currently not used
 
-        '''
+        """
         super(MVT, self).__init__(mean, sigma)
-        self.extra_args = ['df']  #overwrites extra_args of super
+        self.extra_args = ["df"]  # overwrites extra_args of super
         self.df = df
 
     def rvs(self, size=1):
-        '''random variables with Student T distribution
+        """random variables with Student T distribution
 
         Parameters
         ----------
@@ -1052,13 +1060,13 @@ def rvs(self, size=1):
         does this require df>2 ?
 
 
-        '''
+        """
         from .multivariate import multivariate_t_rvs
-        return multivariate_t_rvs(self.mean, self.sigma, df=self.df, n=size)
 
+        return multivariate_t_rvs(self.mean, self.sigma, df=self.df, n=size)
 
     def logpdf(self, x):
-        '''logarithm of probability density function
+        """logarithm of probability density function
 
         Parameters
         ----------
@@ -1071,25 +1079,25 @@ def logpdf(self, x):
         logpdf : float or array
             probability density value of each random vector
 
-        '''
+        """
 
         x = np.asarray(x)
 
         df = self.df
         nvars = self.nvars
 
-        x_whitened = self.whiten(x - self.mean) #should be float
+        x_whitened = self.whiten(x - self.mean)  # should be float
 
-        llf = - nvars * np_log(df * np_pi)
+        llf = -nvars * np_log(df * np_pi)
         llf -= self.logdetsigma
-        llf -= (df + nvars) * np_log(1 + np.sum(x_whitened**2,-1) / df)
+        llf -= (df + nvars) * np_log(1 + np.sum(x_whitened**2, -1) / df)
         llf *= 0.5
-        llf += sps_gamln((df + nvars) / 2.) - sps_gamln(df / 2.)
+        llf += sps_gamln((df + nvars) / 2.0) - sps_gamln(df / 2.0)
 
         return llf
 
     def cdf(self, x, **kwds):
-        '''cumulative distribution function
+        """cumulative distribution function
 
         Parameters
         ----------
@@ -1104,29 +1112,29 @@ def cdf(self, x, **kwds):
         cdf : float or array
             probability density value of each random vector
 
-        '''
+        """
         lower = -np.inf * np.ones_like(x)
-        #std_sigma = np.sqrt(np.diag(self.sigma))
-        upper = (x - self.mean)/self.std_sigma
+        # std_sigma = np.sqrt(np.diag(self.sigma))
+        upper = (x - self.mean) / self.std_sigma
         return mvstdtprob(lower, upper, self.corr, self.df, **kwds)
-        #mvstdtcdf does not exist yet
-        #return mvstdtcdf(lower, x, self.corr, df, **kwds)
+        # mvstdtcdf does not exist yet
+        # return mvstdtcdf(lower, x, self.corr, df, **kwds)
 
     @property
     def cov(self):
-        '''covariance matrix
+        """covariance matrix
 
         The covariance matrix for the t distribution does not exist for df<=2,
         and is equal to sigma * df/(df-2) for df>2
 
-        '''
+        """
         if self.df <= 2:
             return np.nan * np.ones_like(self.sigma)
         else:
-            return self.df / (self.df - 2.) * self.sigma
+            return self.df / (self.df - 2.0) * self.sigma
 
     def affine_transformed(self, shift, scale_matrix):
-        '''return distribution of a full rank affine transform
+        """return distribution of a full rank affine transform
 
         for full rank scale_matrix only
 
@@ -1160,115 +1168,123 @@ def affine_transformed(self, shift, scale_matrix):
         where a is shift,
         B is full rank scale matrix with same dimension as sigma
 
-        '''
-        #full rank method could also be in elliptical and called with super
-        #after the rank check
-        B = scale_matrix  #tmp variable as shorthand
+        """
+        # full rank method could also be in elliptical and called with super
+        # after the rank check
+        B = scale_matrix  # tmp variable as shorthand
         if not B.shape == (self.nvars, self.nvars):
             if (np.linalg.eigvals(B) <= 0).any():
-                raise ValueError('affine transform has to be full rank')
+                raise ValueError("affine transform has to be full rank")
 
         mean_new = np.dot(B, self.mean) + shift
         sigma_new = np.dot(np.dot(B, self.sigma), B.T)
         return MVT(mean_new, sigma_new, self.df)
 
 
-def quad2d(func=lambda x: 1, lower=(-10,-10), upper=(10,10)):
+def quad2d(func=lambda x: 1, lower=(-10, -10), upper=(10, 10)):
     def fun(x, y):
-        x = np.column_stack((x,y))
+        x = np.column_stack((x, y))
         return func(x)
+
     from scipy.integrate import dblquad
-    return dblquad(fun, lower[0], upper[0], lambda y: lower[1],
-                   lambda y: upper[1])
 
-if __name__ == '__main__':
+    return dblquad(
+        fun, lower[0], upper[0], lambda y: lower[1], lambda y: upper[1]
+    )
+
+
+if __name__ == "__main__":
 
     from numpy.testing import assert_almost_equal, assert_array_almost_equal
 
-    examples = ['mvn']
+    examples = ["mvn"]
 
-    mu = (0,0)
+    mu = (0, 0)
     covx = np.array([[1.0, 0.5], [0.5, 1.0]])
-    mu3 = [-1, 0., 2.]
-    cov3 = np.array([[ 1.  ,  0.5 ,  0.75],
-                     [ 0.5 ,  1.5 ,  0.6 ],
-                     [ 0.75,  0.6 ,  2.  ]])
+    mu3 = [-1, 0.0, 2.0]
+    cov3 = np.array([[1.0, 0.5, 0.75], [0.5, 1.5, 0.6], [0.75, 0.6, 2.0]])
 
-
-    if 'mvn' in examples:
+    if "mvn" in examples:
         bvn = BivariateNormal(mu, covx)
         rvs = bvn.rvs(size=1000)
         print(rvs.mean(0))
         print(np.cov(rvs, rowvar=0))
         print(bvn.expect())
-        print(bvn.cdf([0,0]))
+        print(bvn.cdf([0, 0]))
         bvn1 = BivariateNormal(mu, np.eye(2))
-        bvn2 = BivariateNormal(mu, 4*np.eye(2))
-        fun = lambda x : np.log(bvn1.pdf(x)) - np.log(bvn.pdf(x))
+        bvn2 = BivariateNormal(mu, 4 * np.eye(2))
+        fun = lambda x: np.log(bvn1.pdf(x)) - np.log(bvn.pdf(x))
         print(bvn1.expect(fun))
         print(bvn1.kl(bvn2), bvn1.kl_mc(bvn2))
         print(bvn2.kl(bvn1), bvn2.kl_mc(bvn1))
         print(bvn1.kl(bvn), bvn1.kl_mc(bvn))
         mvn = MVNormal(mu, covx)
-        mvn.pdf([0,0])
-        mvn.pdf(np.zeros((2,2)))
-        #np.dot(mvn.cholcovinv.T, mvn.cholcovinv) - mvn.covinv
-
-        cov3 = np.array([[ 1.  ,  0.5 ,  0.75],
-                         [ 0.5 ,  1.5 ,  0.6 ],
-                         [ 0.75,  0.6 ,  2.  ]])
-        mu3 = [-1, 0., 2.]
+        mvn.pdf([0, 0])
+        mvn.pdf(np.zeros((2, 2)))
+        # np.dot(mvn.cholcovinv.T, mvn.cholcovinv) - mvn.covinv
+
+        cov3 = np.array([[1.0, 0.5, 0.75], [0.5, 1.5, 0.6], [0.75, 0.6, 2.0]])
+        mu3 = [-1, 0.0, 2.0]
         mvn3 = MVNormal(mu3, cov3)
-        mvn3.pdf((0., 2., 3.))
-        mvn3.logpdf((0., 2., 3.))
-        #comparisons with R mvtnorm::dmvnorm
-        #decimal=14
-#        mvn3.logpdf(cov3) - [-7.667977543898155, -6.917977543898155, -5.167977543898155]
-#        #decimal 18
-#        mvn3.pdf(cov3) - [0.000467562492721686, 0.000989829804859273, 0.005696077243833402]
-#        #cheating new mean, same cov
-#        mvn3.mean = np.array([0,0,0])
-#        #decimal= 16
-#        mvn3.pdf(cov3) - [0.02914269740502042, 0.02269635555984291, 0.01767593948287269]
-
-        #as asserts
+        mvn3.pdf((0.0, 2.0, 3.0))
+        mvn3.logpdf((0.0, 2.0, 3.0))
+        # comparisons with R mvtnorm::dmvnorm
+        # decimal=14
+        #        mvn3.logpdf(cov3) - [-7.667977543898155, -6.917977543898155, -5.167977543898155]
+        #        #decimal 18
+        #        mvn3.pdf(cov3) - [0.000467562492721686, 0.000989829804859273, 0.005696077243833402]
+        #        #cheating new mean, same cov
+        #        mvn3.mean = np.array([0,0,0])
+        #        #decimal= 16
+        #        mvn3.pdf(cov3) - [0.02914269740502042, 0.02269635555984291, 0.01767593948287269]
+
+        # as asserts
         r_val = [-7.667977543898155, -6.917977543898155, -5.167977543898155]
-        assert_array_almost_equal( mvn3.logpdf(cov3), r_val, decimal = 14)
-        #decimal 18
-        r_val = [0.000467562492721686, 0.000989829804859273, 0.005696077243833402]
-        assert_array_almost_equal( mvn3.pdf(cov3), r_val, decimal = 17)
-        #cheating new mean, same cov, too dangerous, got wrong instance in tests
-        #mvn3.mean = np.array([0,0,0])
-        mvn3c = MVNormal(np.array([0,0,0]), cov3)
+        assert_array_almost_equal(mvn3.logpdf(cov3), r_val, decimal=14)
+        # decimal 18
+        r_val = [
+            0.000467562492721686,
+            0.000989829804859273,
+            0.005696077243833402,
+        ]
+        assert_array_almost_equal(mvn3.pdf(cov3), r_val, decimal=17)
+        # cheating new mean, same cov, too dangerous, got wrong instance in tests
+        # mvn3.mean = np.array([0,0,0])
+        mvn3c = MVNormal(np.array([0, 0, 0]), cov3)
         r_val = [0.02914269740502042, 0.02269635555984291, 0.01767593948287269]
-        assert_array_almost_equal( mvn3c.pdf(cov3), r_val, decimal = 16)
+        assert_array_almost_equal(mvn3c.pdf(cov3), r_val, decimal=16)
 
-        mvn3b = MVNormal((0,0,0), 1)
-        fun = lambda x : np.log(mvn3.pdf(x)) - np.log(mvn3b.pdf(x))
+        mvn3b = MVNormal((0, 0, 0), 1)
+        fun = lambda x: np.log(mvn3.pdf(x)) - np.log(mvn3b.pdf(x))
         print(mvn3.expect_mc(fun))
         print(mvn3.expect_mc(fun, size=200000))
 
+    mvt = MVT((0, 0), 1, 5)
+    assert_almost_equal(
+        mvt.logpdf(np.array([0.0, 0.0])), -1.837877066409345, decimal=15
+    )
+    assert_almost_equal(
+        mvt.pdf(np.array([0.0, 0.0])), 0.1591549430918953, decimal=15
+    )
 
-    mvt = MVT((0,0), 1, 5)
-    assert_almost_equal(mvt.logpdf(np.array([0.,0.])), -1.837877066409345,
-                        decimal=15)
-    assert_almost_equal(mvt.pdf(np.array([0.,0.])), 0.1591549430918953,
-                        decimal=15)
-
-    mvt.logpdf(np.array([1.,1.]))-(-3.01552989458359)
+    mvt.logpdf(np.array([1.0, 1.0])) - (-3.01552989458359)
 
-    mvt1 = MVT((0,0), 1, 1)
-    mvt1.logpdf(np.array([1.,1.]))-(-3.48579549941151) #decimal=16
+    mvt1 = MVT((0, 0), 1, 1)
+    mvt1.logpdf(np.array([1.0, 1.0])) - (-3.48579549941151)  # decimal=16
 
     rvs = mvt.rvs(100000)
     assert_almost_equal(np.cov(rvs, rowvar=0), mvt.cov, decimal=1)
 
     mvt31 = MVT(mu3, cov3, 1)
-    assert_almost_equal(mvt31.pdf(cov3),
+    assert_almost_equal(
+        mvt31.pdf(cov3),
         [0.0007276818698165781, 0.0009980625182293658, 0.0027661422056214652],
-        decimal=18)
+        decimal=18,
+    )
 
     mvt = MVT(mu3, cov3, 3)
-    assert_almost_equal(mvt.pdf(cov3),
+    assert_almost_equal(
+        mvt.pdf(cov3),
         [0.000863777424247410, 0.001277510788307594, 0.004156314279452241],
-        decimal=17)
+        decimal=17,
+    )
diff --git a/statsmodels/sandbox/distributions/otherdist.py b/statsmodels/sandbox/distributions/otherdist.py
index bc249a9b0d1..335e50f0621 100644
--- a/statsmodels/sandbox/distributions/otherdist.py
+++ b/statsmodels/sandbox/distributions/otherdist.py
@@ -1,4 +1,4 @@
-'''Parametric Mixture Distributions
+"""Parametric Mixture Distributions
 
 Created on Sat Jun 04 2011
 
@@ -18,14 +18,15 @@
 Question: Metaclasses and class factories for generating new distributions from
 existing distributions by transformation, mixing, compounding
 
-'''
+"""
 
 
 import numpy as np
 from scipy import stats
 
+
 class ParametricMixtureD(object):
-    '''mixtures with a discrete distribution
+    """mixtures with a discrete distribution
 
     The mixing distribution is a discrete distribution like scipy.stats.poisson.
     All distribution in the mixture of the same type and parametrized
@@ -42,10 +43,12 @@ class ParametricMixtureD(object):
     initialization looks fragile for all possible cases of lower and upper
     bounds of the distributions.
 
-    '''
-    def __init__(self, mixing_dist, base_dist, bd_args_func, bd_kwds_func,
-                 cutoff=1e-3):
-        '''create a mixture distribution
+    """
+
+    def __init__(
+        self, mixing_dist, base_dist, bd_args_func, bd_kwds_func, cutoff=1e-3
+    ):
+        """create a mixture distribution
 
         Parameters
         ----------
@@ -70,10 +73,10 @@ def __init__(self, mixing_dist, base_dist, bd_args_func, bd_kwds_func,
             draws that are outside the truncated range are clipped, that is
             assigned to the highest or lowest value in the truncated support.
 
-        '''
+        """
         self.mixing_dist = mixing_dist
         self.base_dist = base_dist
-        #self.bd_args = bd_args
+        # self.bd_args = bd_args
         if not np.isneginf(mixing_dist.dist.a):
             lower = mixing_dist.dist.a
         else:
@@ -84,7 +87,7 @@ def __init__(self, mixing_dist, base_dist, bd_args_func, bd_kwds_func,
             upper = mixing_dist.isf(1e-4)
         self.ma = lower
         self.mb = upper
-        mixing_support = np.arange(lower, upper+1)
+        mixing_support = np.arange(lower, upper + 1)
         self.mixing_probs = mixing_dist.pmf(mixing_support)
 
         self.bd_args = bd_args_func(mixing_support)
@@ -92,24 +95,20 @@ def __init__(self, mixing_dist, base_dist, bd_args_func, bd_kwds_func,
 
     def rvs(self, size=1):
         mrvs = self.mixing_dist.rvs(size)
-        #TODO: check strange cases ? this assumes continous integers
+        # TODO: check strange cases ? this assumes continous integers
         mrvs_idx = (np.clip(mrvs, self.ma, self.mb) - self.ma).astype(int)
 
         bd_args = tuple(md[mrvs_idx] for md in self.bd_args)
         bd_kwds = dict((k, self.bd_kwds[k][mrvs_idx]) for k in self.bd_kwds)
-        kwds = {'size':size}
+        kwds = {"size": size}
         kwds.update(bd_kwds)
         rvs = self.base_dist.rvs(*self.bd_args, **kwds)
         return rvs, mrvs_idx
 
-
-
-
-
     def pdf(self, x):
         x = np.asarray(x)
         if np.size(x) > 1:
-            x = x[...,None] #[None, ...]
+            x = x[..., None]  # [None, ...]
         bd_probs = self.base_dist.pdf(x, *self.bd_args, **self.bd_kwds)
         prob = (bd_probs * self.mixing_probs).sum(-1)
         return prob, bd_probs
@@ -117,16 +116,17 @@ def pdf(self, x):
     def cdf(self, x):
         x = np.asarray(x)
         if np.size(x) > 1:
-            x = x[...,None] #[None, ...]
+            x = x[..., None]  # [None, ...]
         bd_probs = self.base_dist.cdf(x, *self.bd_args, **self.bd_kwds)
         prob = (bd_probs * self.mixing_probs).sum(-1)
         return prob, bd_probs
 
 
-#try:
+# try:
+
 
 class ClippedContinuous(object):
-    '''clipped continuous distribution with a masspoint at clip_lower
+    """clipped continuous distribution with a masspoint at clip_lower
 
 
     Notes
@@ -149,82 +149,77 @@ class ClippedContinuous(object):
     We could add a check whether the values are in a small neighborhood, but
     it would be expensive (need to search and check all values).
 
-    '''
+    """
 
     def __init__(self, base_dist, clip_lower):
         self.base_dist = base_dist
         self.clip_lower = clip_lower
 
     def _get_clip_lower(self, kwds):
-        '''helper method to get clip_lower from kwds or attribute
-
-        '''
-        if 'clip_lower' not in kwds:
+        """helper method to get clip_lower from kwds or attribute"""
+        if "clip_lower" not in kwds:
             clip_lower = self.clip_lower
         else:
-            clip_lower = kwds.pop('clip_lower')
+            clip_lower = kwds.pop("clip_lower")
         return clip_lower, kwds
 
     def rvs(self, *args, **kwds):
         clip_lower, kwds = self._get_clip_lower(kwds)
         rvs_ = self.base_dist.rvs(*args, **kwds)
-        #same as numpy.clip ?
+        # same as numpy.clip ?
         rvs_[rvs_ < clip_lower] = clip_lower
         return rvs_
 
-
-
     def pdf(self, x, *args, **kwds):
         x = np.atleast_1d(x)
-        if 'clip_lower' not in kwds:
+        if "clip_lower" not in kwds:
             clip_lower = self.clip_lower
         else:
-            #allow clip_lower to be a possible parameter
-            clip_lower = kwds.pop('clip_lower')
+            # allow clip_lower to be a possible parameter
+            clip_lower = kwds.pop("clip_lower")
         pdf_raw = np.atleast_1d(self.base_dist.pdf(x, *args, **kwds))
-        clip_mask = (x == self.clip_lower)
+        clip_mask = x == self.clip_lower
         if np.any(clip_mask):
             clip_prob = self.base_dist.cdf(clip_lower, *args, **kwds)
             pdf_raw[clip_mask] = clip_prob
 
-        #the following will be handled by sub-classing rv_continuous
+        # the following will be handled by sub-classing rv_continuous
         pdf_raw[x < clip_lower] = 0
 
         return pdf_raw
 
     def cdf(self, x, *args, **kwds):
-        if 'clip_lower' not in kwds:
+        if "clip_lower" not in kwds:
             clip_lower = self.clip_lower
         else:
-            #allow clip_lower to be a possible parameter
-            clip_lower = kwds.pop('clip_lower')
+            # allow clip_lower to be a possible parameter
+            clip_lower = kwds.pop("clip_lower")
         cdf_raw = self.base_dist.cdf(x, *args, **kwds)
 
-        #not needed if equality test is used
-##        clip_mask = (x == self.clip_lower)
-##        if np.any(clip_mask):
-##            clip_prob = self.base_dist.cdf(clip_lower, *args, **kwds)
-##            pdf_raw[clip_mask] = clip_prob
+        # not needed if equality test is used
+        ##        clip_mask = (x == self.clip_lower)
+        ##        if np.any(clip_mask):
+        ##            clip_prob = self.base_dist.cdf(clip_lower, *args, **kwds)
+        ##            pdf_raw[clip_mask] = clip_prob
 
-        #the following will be handled by sub-classing rv_continuous
-        #if self.a is defined
+        # the following will be handled by sub-classing rv_continuous
+        # if self.a is defined
         cdf_raw[x < clip_lower] = 0
 
         return cdf_raw
 
     def sf(self, x, *args, **kwds):
-        if 'clip_lower' not in kwds:
+        if "clip_lower" not in kwds:
             clip_lower = self.clip_lower
         else:
-            #allow clip_lower to be a possible parameter
-            clip_lower = kwds.pop('clip_lower')
+            # allow clip_lower to be a possible parameter
+            clip_lower = kwds.pop("clip_lower")
 
         sf_raw = self.base_dist.sf(x, *args, **kwds)
         sf_raw[x <= clip_lower] = 1
 
         return sf_raw
 
-
     def ppf(self, x, *args, **kwds):
         raise NotImplementedError
 
@@ -232,76 +227,76 @@ def plot(self, x, *args, **kwds):
 
         clip_lower, kwds = self._get_clip_lower(kwds)
         mass = self.pdf(clip_lower, *args, **kwds)
-        xr = np.concatenate(([clip_lower+1e-6], x[x>clip_lower]))
+        xr = np.concatenate(([clip_lower + 1e-6], x[x > clip_lower]))
         import matplotlib.pyplot as plt
-        #x = np.linspace(-4, 4, 21)
-        #plt.figure()
-        plt.xlim(clip_lower-0.1, x.max())
-        #remove duplicate calculation
+
+        # x = np.linspace(-4, 4, 21)
+        # plt.figure()
+        plt.xlim(clip_lower - 0.1, x.max())
+        # remove duplicate calculation
         xpdf = self.pdf(x, *args, **kwds)
-        plt.ylim(0, max(mass, xpdf.max())*1.1)
+        plt.ylim(0, max(mass, xpdf.max()) * 1.1)
         plt.plot(xr, self.pdf(xr, *args, **kwds))
-        #plt.vline(clip_lower, self.pdf(clip_lower, *args, **kwds))
-        plt.stem([clip_lower], [mass],
-                 linefmt='b-', markerfmt='bo', basefmt='r-')
+        # plt.vline(clip_lower, self.pdf(clip_lower, *args, **kwds))
+        plt.stem(
+            [clip_lower], [mass], linefmt="b-", markerfmt="bo", basefmt="r-"
+        )
         return
 
 
-
-
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     doplots = 1
 
-    #*********** Poisson-Normal Mixture
-    mdist = stats.poisson(2.)
+    # *********** Poisson-Normal Mixture
+    mdist = stats.poisson(2.0)
     bdist = stats.norm
     bd_args_fn = lambda x: ()
-    #bd_kwds_fn = lambda x: {'loc': np.atleast_2d(10./(1+x))}
-    bd_kwds_fn = lambda x: {'loc': x, 'scale': 0.1*np.ones_like(x)} #10./(1+x)}
-
+    # bd_kwds_fn = lambda x: {'loc': np.atleast_2d(10./(1+x))}
+    bd_kwds_fn = lambda x: {
+        "loc": x,
+        "scale": 0.1 * np.ones_like(x),
+    }  # 10./(1+x)}
 
     pd = ParametricMixtureD(mdist, bdist, bd_args_fn, bd_kwds_fn)
     print(pd.pdf(1))
-    p, bp = pd.pdf(np.linspace(0,20,21))
-    pc, bpc = pd.cdf(np.linspace(0,20,21))
+    p, bp = pd.pdf(np.linspace(0, 20, 21))
+    pc, bpc = pd.cdf(np.linspace(0, 20, 21))
     print(pd.rvs())
     rvs, m = pd.rvs(size=1000)
 
-
     if doplots:
         import matplotlib.pyplot as plt
-        plt.hist(rvs, bins = 100)
-        plt.title('poisson mixture of normal distributions')
 
-    #********** clipped normal distribution (Tobit)
+        plt.hist(rvs, bins=100)
+        plt.title("poisson mixture of normal distributions")
+
+    # ********** clipped normal distribution (Tobit)
 
     bdist = stats.norm
-    clip_lower_ = 0. #-0.5
+    clip_lower_ = 0.0  # -0.5
     cnorm = ClippedContinuous(bdist, clip_lower_)
     x = np.linspace(1e-8, 4, 11)
     print(cnorm.pdf(x))
     print(cnorm.cdf(x))
 
     if doplots:
-        #plt.figure()
-        #cnorm.plot(x)
+        # plt.figure()
+        # cnorm.plot(x)
         plt.figure()
-        cnorm.plot(x = np.linspace(-1, 4, 51), loc=0.5, scale=np.sqrt(2))
-        plt.title('clipped normal distribution')
+        cnorm.plot(x=np.linspace(-1, 4, 51), loc=0.5, scale=np.sqrt(2))
+        plt.title("clipped normal distribution")
 
         fig = plt.figure()
-        for i, loc in enumerate([0., 0.5, 1.,2.]):
-            fig.add_subplot(2,2,i+1)
-            cnorm.plot(x = np.linspace(-1, 4, 51), loc=loc, scale=np.sqrt(2))
-            plt.title('clipped normal, loc = %3.2f' % loc)
-
+        for i, loc in enumerate([0.0, 0.5, 1.0, 2.0]):
+            fig.add_subplot(2, 2, i + 1)
+            cnorm.plot(x=np.linspace(-1, 4, 51), loc=loc, scale=np.sqrt(2))
+            plt.title("clipped normal, loc = %3.2f" % loc)
 
         loc = 1.5
         rvs = cnorm.rvs(loc=loc, size=2000)
         plt.figure()
         plt.hist(rvs, bins=50)
-        plt.title('clipped normal rvs, loc = %3.2f' % loc)
-
+        plt.title("clipped normal rvs, loc = %3.2f" % loc)
 
-    #plt.show()
+    # plt.show()
diff --git a/statsmodels/sandbox/distributions/quantize.py b/statsmodels/sandbox/distributions/quantize.py
index 1d1c06a452a..2a537e1a84e 100644
--- a/statsmodels/sandbox/distributions/quantize.py
+++ b/statsmodels/sandbox/distributions/quantize.py
@@ -1,12 +1,14 @@
-'''Quantizing a continuous distribution in 2d
+"""Quantizing a continuous distribution in 2d
 
 Author: josef-pktd
-'''
+"""
 from statsmodels.compat.python import lmap
+
 import numpy as np
 
+
 def prob_bv_rectangle(lower, upper, cdf):
-    '''helper function for probability of a rectangle in a bivariate distribution
+    """helper function for probability of a rectangle in a bivariate distribution
 
     Parameters
     ----------
@@ -19,15 +21,16 @@ def prob_bv_rectangle(lower, upper, cdf):
 
 
     how does this generalize to more than 2 variates ?
-    '''
+    """
     probuu = cdf(*upper)
     probul = cdf(upper[0], lower[1])
     problu = cdf(lower[0], upper[1])
     probll = cdf(*lower)
     return probuu - probul - problu + probll
 
+
 def prob_mv_grid(bins, cdf, axis=-1):
-    '''helper function for probability of a rectangle grid in a multivariate distribution
+    """helper function for probability of a rectangle grid in a multivariate distribution
 
     how does this generalize to more than 2 variates ?
 
@@ -35,18 +38,18 @@ def prob_mv_grid(bins, cdf, axis=-1):
         tuple of bin edges, currently it is assumed that they broadcast
         correctly
 
-    '''
+    """
     if not isinstance(bins, np.ndarray):
         bins = lmap(np.asarray, bins)
         n_dim = len(bins)
         bins_ = []
-        #broadcast if binedges are 1d
+        # broadcast if binedges are 1d
         if all(lmap(np.ndim, bins) == np.ones(n_dim)):
             for d in range(n_dim):
-                sl = [None]*n_dim
+                sl = [None] * n_dim
                 sl[d] = slice(None)
                 bins_.append(bins[d][sl])
-    else: #assume it is already correctly broadcasted
+    else:  # assume it is already correctly broadcasted
         n_dim = bins.shape[0]
         bins_ = bins
 
@@ -60,33 +63,36 @@ def prob_mv_grid(bins, cdf, axis=-1):
 
 
 def prob_quantize_cdf(binsx, binsy, cdf):
-    '''quantize a continuous distribution given by a cdf
+    """quantize a continuous distribution given by a cdf
 
     Parameters
     ----------
     binsx : array_like, 1d
         binedges
 
-    '''
+    """
     binsx = np.asarray(binsx)
     binsy = np.asarray(binsy)
     nx = len(binsx) - 1
     ny = len(binsy) - 1
-    probs = np.nan * np.ones((nx, ny)) #np.empty(nx,ny)
-    cdf_values = cdf(binsx[:,None], binsy)
-    cdf_func = lambda x, y: cdf_values[x,y]
-    for xind in range(1, nx+1):
-        for yind in range(1, ny+1):
+    probs = np.nan * np.ones((nx, ny))  # np.empty(nx,ny)
+    cdf_values = cdf(binsx[:, None], binsy)
+    cdf_func = lambda x, y: cdf_values[x, y]
+    for xind in range(1, nx + 1):
+        for yind in range(1, ny + 1):
             upper = (xind, yind)
-            lower = (xind-1, yind-1)
-            #print upper,lower,
-            probs[xind-1,yind-1] = prob_bv_rectangle(lower, upper, cdf_func)
+            lower = (xind - 1, yind - 1)
+            # print upper,lower,
+            probs[xind - 1, yind - 1] = prob_bv_rectangle(
+                lower, upper, cdf_func
+            )
 
     assert not np.isnan(probs).any()
     return probs
 
+
 def prob_quantize_cdf_old(binsx, binsy, cdf):
-    '''quantize a continuous distribution given by a cdf
+    """quantize a continuous distribution given by a cdf
 
     old version without precomputing cdf values
 
@@ -95,47 +101,55 @@ def prob_quantize_cdf_old(binsx, binsy, cdf):
     binsx : array_like, 1d
         binedges
 
-    '''
+    """
     binsx = np.asarray(binsx)
     binsy = np.asarray(binsy)
     nx = len(binsx) - 1
     ny = len(binsy) - 1
-    probs = np.nan * np.ones((nx, ny)) #np.empty(nx,ny)
-    for xind in range(1, nx+1):
-        for yind in range(1, ny+1):
+    probs = np.nan * np.ones((nx, ny))  # np.empty(nx,ny)
+    for xind in range(1, nx + 1):
+        for yind in range(1, ny + 1):
             upper = (binsx[xind], binsy[yind])
-            lower = (binsx[xind-1], binsy[yind-1])
-            #print upper,lower,
-            probs[xind-1,yind-1] = prob_bv_rectangle(lower, upper, cdf)
+            lower = (binsx[xind - 1], binsy[yind - 1])
+            # print upper,lower,
+            probs[xind - 1, yind - 1] = prob_bv_rectangle(lower, upper, cdf)
 
     assert not np.isnan(probs).any()
     return probs
 
 
-
-
-if __name__ == '__main__':
+if __name__ == "__main__":
     from numpy.testing import assert_almost_equal
-    unif_2d = lambda x,y: x*y
-    assert_almost_equal(prob_bv_rectangle([0,0], [1,0.5], unif_2d), 0.5, 14)
-    assert_almost_equal(prob_bv_rectangle([0,0], [0.5,0.5], unif_2d), 0.25, 14)
-
-    arr1b = np.array([[ 0.05,  0.05,  0.05,  0.05],
-                       [ 0.05,  0.05,  0.05,  0.05],
-                       [ 0.05,  0.05,  0.05,  0.05],
-                       [ 0.05,  0.05,  0.05,  0.05],
-                       [ 0.05,  0.05,  0.05,  0.05]])
 
-    arr1a = prob_quantize_cdf(np.linspace(0,1,6), np.linspace(0,1,5), unif_2d)
+    unif_2d = lambda x, y: x * y
+    assert_almost_equal(prob_bv_rectangle([0, 0], [1, 0.5], unif_2d), 0.5, 14)
+    assert_almost_equal(
+        prob_bv_rectangle([0, 0], [0.5, 0.5], unif_2d), 0.25, 14
+    )
+
+    arr1b = np.array(
+        [
+            [0.05, 0.05, 0.05, 0.05],
+            [0.05, 0.05, 0.05, 0.05],
+            [0.05, 0.05, 0.05, 0.05],
+            [0.05, 0.05, 0.05, 0.05],
+            [0.05, 0.05, 0.05, 0.05],
+        ]
+    )
+
+    arr1a = prob_quantize_cdf(
+        np.linspace(0, 1, 6), np.linspace(0, 1, 5), unif_2d
+    )
     assert_almost_equal(arr1a, arr1b, 14)
 
-    arr2b = np.array([[ 0.25],
-                      [ 0.25],
-                      [ 0.25],
-                      [ 0.25]])
-    arr2a = prob_quantize_cdf(np.linspace(0,1,5), np.linspace(0,1,2), unif_2d)
+    arr2b = np.array([[0.25], [0.25], [0.25], [0.25]])
+    arr2a = prob_quantize_cdf(
+        np.linspace(0, 1, 5), np.linspace(0, 1, 2), unif_2d
+    )
     assert_almost_equal(arr2a, arr2b, 14)
 
-    arr3b = np.array([[ 0.25,  0.25,  0.25,  0.25]])
-    arr3a = prob_quantize_cdf(np.linspace(0,1,2), np.linspace(0,1,5), unif_2d)
+    arr3b = np.array([[0.25, 0.25, 0.25, 0.25]])
+    arr3a = prob_quantize_cdf(
+        np.linspace(0, 1, 2), np.linspace(0, 1, 5), unif_2d
+    )
     assert_almost_equal(arr3a, arr3b, 14)
diff --git a/statsmodels/sandbox/distributions/sppatch.py b/statsmodels/sandbox/distributions/sppatch.py
index cd62f4a50f3..ca2f5a7e428 100644
--- a/statsmodels/sandbox/distributions/sppatch.py
+++ b/statsmodels/sandbox/distributions/sppatch.py
@@ -1,4 +1,4 @@
-'''patching scipy to fit distributions and expect method
+"""patching scipy to fit distributions and expect method
 
 This adds new methods to estimate continuous distribution parameters with some
 fixed/frozen parameters. It also contains functions that calculate the expected
@@ -9,25 +9,26 @@
 
 Author: josef-pktd
 License: Simplified BSD
-'''
+"""
 from statsmodels.compat.python import lmap
-import numpy as np
-from scipy import stats, optimize, integrate
 
+import numpy as np
+from scipy import integrate, optimize, stats
 
 ########## patching scipy
 
-#vonmises does not define finite bounds, because it is intended for circular
-#support which does not define a proper pdf on the real line
+# vonmises does not define finite bounds, because it is intended for circular
+# support which does not define a proper pdf on the real line
 
 stats.distributions.vonmises.a = -np.pi
 stats.distributions.vonmises.b = np.pi
 
-#the next 3 functions are for fit with some fixed parameters
-#As they are written, they do not work as functions, only as methods
+# the next 3 functions are for fit with some fixed parameters
+# As they are written, they do not work as functions, only as methods
+
 
 def _fitstart(self, x):
-    '''example method, method of moment estimator as starting values
+    """example method, method of moment estimator as starting values
 
     Parameters
     ----------
@@ -47,14 +48,15 @@ def _fitstart(self, x):
     This example was written for the gamma distribution, but not verified
     with literature
 
-    '''
-    loc = np.min([x.min(),0])
-    a = 4/stats.skew(x)**2
+    """
+    loc = np.min([x.min(), 0])
+    a = 4 / stats.skew(x) ** 2
     scale = np.std(x) / np.sqrt(a)
     return (a, loc, scale)
 
+
 def _fitstart_beta(self, x, fixed=None):
-    '''method of moment estimator as starting values for beta distribution
+    """method of moment estimator as starting values for beta distribution
 
     Parameters
     ----------
@@ -82,41 +84,47 @@ def _fitstart_beta(self, x, fixed=None):
     NIST reference also includes reference to MLE in
     Johnson, Kotz, and Balakrishan, Volume II, pages 221-235
 
-    '''
-    #todo: separate out this part to be used for other compact support distributions
+    """
+    # todo: separate out this part to be used for other compact support distributions
     #      e.g. rdist, vonmises, and truncnorm
     #      but this might not work because it might still be distribution specific
     a, b = x.min(), x.max()
-    eps = (a-b)*0.01
+    eps = (a - b) * 0.01
     if fixed is None:
-        #this part not checked with books
+        # this part not checked with books
         loc = a - eps
-        scale = (a - b) * (1 + 2*eps)
+        scale = (a - b) * (1 + 2 * eps)
     else:
         if np.isnan(fixed[-2]):
-            #estimate loc
+            # estimate loc
             loc = a - eps
         else:
             loc = fixed[-2]
         if np.isnan(fixed[-1]):
-            #estimate scale
+            # estimate scale
             scale = (b + eps) - loc
         else:
             scale = fixed[-1]
 
-    #method of moment for known loc scale:
+    # method of moment for known loc scale:
     scale = float(scale)
-    xtrans = (x - loc)/scale
+    xtrans = (x - loc) / scale
     xm = xtrans.mean()
     xv = xtrans.var()
-    tmp = (xm*(1-xm)/xv - 1)
+    tmp = xm * (1 - xm) / xv - 1
     p = xm * tmp
     q = (1 - xm) * tmp
 
-    return (p, q, loc, scale)  #check return type and should fixed be returned ?
+    return (
+        p,
+        q,
+        loc,
+        scale,
+    )  # check return type and should fixed be returned ?
+
 
 def _fitstart_poisson(self, x, fixed=None):
-    '''maximum likelihood estimator as starting values for Poisson distribution
+    """maximum likelihood estimator as starting values for Poisson distribution
 
     Parameters
     ----------
@@ -141,30 +149,30 @@ def _fitstart_poisson(self, x, fixed=None):
     MLE :
     https://en.wikipedia.org/wiki/Poisson_distribution#Maximum_likelihood
 
-    '''
-    #todo: separate out this part to be used for other compact support distributions
+    """
+    # todo: separate out this part to be used for other compact support distributions
     #      e.g. rdist, vonmises, and truncnorm
     #      but this might not work because it might still be distribution specific
     a = x.min()
-    eps = 0 # is this robust ?
+    eps = 0  # is this robust ?
     if fixed is None:
-        #this part not checked with books
+        # this part not checked with books
         loc = a - eps
     else:
         if np.isnan(fixed[-1]):
-            #estimate loc
+            # estimate loc
             loc = a - eps
         else:
             loc = fixed[-1]
 
-    #MLE for standard (unshifted, if loc=0) Poisson distribution
+    # MLE for standard (unshifted, if loc=0) Poisson distribution
 
-    xtrans = (x - loc)
+    xtrans = x - loc
     lambd = xtrans.mean()
-    #second derivative d loglike/ dlambd Not used
-    #dlldlambd = 1/lambd # check
+    # second derivative d loglike/ dlambd Not used
+    # dlldlambd = 1/lambd # check
 
-    return (lambd, loc)  #check return type and should fixed be returned ?
+    return (lambd, loc)  # check return type and should fixed be returned ?
 
 
 def nnlf_fr(self, thetash, x, frmask):
@@ -185,17 +193,18 @@ def nnlf_fr(self, thetash, x, frmask):
         raise ValueError("Not enough input arguments.")
     if not self._argcheck(*args) or scale <= 0:
         return np.inf
-    x = np.array((x-loc) / scale)
+    x = np.array((x - loc) / scale)
     cond0 = (x <= self.a) | (x >= self.b)
-    if (np.any(cond0)):
+    if np.any(cond0):
         return np.inf
     else:
         N = len(x)
-        #raise ValueError
-        return self._nnlf(x, *args) + N*np.log(scale)
+        # raise ValueError
+        return self._nnlf(x, *args) + N * np.log(scale)
+
 
 def fit_fr(self, data, *args, **kwds):
-    '''estimate distribution parameters by MLE taking some parameters as fixed
+    """estimate distribution parameters by MLE taking some parameters as fixed
 
     Parameters
     ----------
@@ -251,22 +260,22 @@ def fit_fr(self, data, *args, **kwds):
     * check if docstring is correct
     * more input checking, args is list ? might also apply to current fit method
 
-    '''
-    loc0, scale0 = lmap(kwds.get, ['loc', 'scale'],[0.0, 1.0])
+    """
+    loc0, scale0 = lmap(kwds.get, ["loc", "scale"], [0.0, 1.0])
     Narg = len(args)
 
-    if Narg == 0 and hasattr(self, '_fitstart'):
+    if Narg == 0 and hasattr(self, "_fitstart"):
         x0 = self._fitstart(data)
     elif Narg > self.numargs:
         raise ValueError("Too many input arguments.")
     else:
-        args += (1.0,)*(self.numargs-Narg)
+        args += (1.0,) * (self.numargs - Narg)
         # location and scale are at the end
         x0 = args + (loc0, scale0)
 
-    if 'frozen' in kwds:
-        frmask = np.array(kwds['frozen'])
-        if len(frmask) != self.numargs+2:
+    if "frozen" in kwds:
+        frmask = np.array(kwds["frozen"])
+        if len(frmask) != self.numargs + 2:
             raise ValueError("Incorrect number of frozen arguments.")
         else:
             # keep starting values for not frozen parameters
@@ -278,25 +287,29 @@ def fit_fr(self, data, *args, **kwds):
             # If there were array elements, then frmask will be object-dtype,
             #  in which case np.isnan will raise TypeError
             frmask = frmask.astype(np.float64)
-            x0  = np.array(x0)[np.isnan(frmask)]
+            x0 = np.array(x0)[np.isnan(frmask)]
     else:
         frmask = None
 
-    #print(x0
-    #print(frmask
-    return optimize.fmin(self.nnlf_fr, x0,
-                args=(np.ravel(data), frmask), disp=0)
+    # print(x0
+    # print(frmask
+    return optimize.fmin(
+        self.nnlf_fr, x0, args=(np.ravel(data), frmask), disp=0
+    )
 
 
-#The next two functions/methods calculate expected value of an arbitrary
-#function, however for the continuous functions intquad is use, which might
-#require continuouity or smoothness in the function.
+# The next two functions/methods calculate expected value of an arbitrary
+# function, however for the continuous functions intquad is use, which might
+# require continuouity or smoothness in the function.
 
 
-#TODO: add option for Monte Carlo integration
+# TODO: add option for Monte Carlo integration
 
-def expect(self, fn=None, args=(), loc=0, scale=1, lb=None, ub=None, conditional=False):
-    '''calculate expected value of a function with respect to the distribution
+
+def expect(
+    self, fn=None, args=(), loc=0, scale=1, lb=None, ub=None, conditional=False
+):
+    """calculate expected value of a function with respect to the distribution
 
     location and scale only tested on a few examples
 
@@ -324,28 +337,34 @@ def expect(self, fn=None, args=(), loc=0, scale=1, lb=None, ub=None, conditional
     This function has not been checked for it's behavior when the integral is
     not finite. The integration behavior is inherited from scipy.integrate.quad.
 
-    '''
+    """
     if fn is None:
+
         def fun(x, *args):
-            return x*self.pdf(x, loc=loc, scale=scale, *args)
+            return x * self.pdf(x, loc=loc, scale=scale, *args)
+
     else:
+
         def fun(x, *args):
-            return fn(x)*self.pdf(x, loc=loc, scale=scale, *args)
+            return fn(x) * self.pdf(x, loc=loc, scale=scale, *args)
+
     if lb is None:
-        lb = loc + self.a * scale #(self.a - loc)/(1.0*scale)
+        lb = loc + self.a * scale  # (self.a - loc)/(1.0*scale)
     if ub is None:
-        ub = loc + self.b * scale #(self.b - loc)/(1.0*scale)
+        ub = loc + self.b * scale  # (self.b - loc)/(1.0*scale)
     if conditional:
-        invfac = (self.sf(lb, loc=loc, scale=scale, *args)
-                  - self.sf(ub, loc=loc, scale=scale, *args))
+        invfac = self.sf(lb, loc=loc, scale=scale, *args) - self.sf(
+            ub, loc=loc, scale=scale, *args
+        )
     else:
         invfac = 1.0
-    return integrate.quad(fun, lb, ub,
-                                args=args)[0]/invfac
+    return integrate.quad(fun, lb, ub, args=args)[0] / invfac
 
 
-def expect_v2(self, fn=None, args=(), loc=0, scale=1, lb=None, ub=None, conditional=False):
-    '''calculate expected value of a function with respect to the distribution
+def expect_v2(
+    self, fn=None, args=(), loc=0, scale=1, lb=None, ub=None, conditional=False
+):
+    """calculate expected value of a function with respect to the distribution
 
     location and scale only tested on a few examples
 
@@ -385,50 +404,57 @@ def expect_v2(self, fn=None, args=(), loc=0, scale=1, lb=None, ub=None, conditio
     for example if the distribution is very concentrated and the default limits
     are too large.
 
-    '''
-    #changes: 20100809
-    #correction and refactoring how loc and scale are handled
-    #uses now _pdf
-    #needs more testing for distribution with bound support, e.g. genpareto
+    """
+    # changes: 20100809
+    # correction and refactoring how loc and scale are handled
+    # uses now _pdf
+    # needs more testing for distribution with bound support, e.g. genpareto
 
     if fn is None:
+
         def fun(x, *args):
-            return (loc + x*scale)*self._pdf(x, *args)
+            return (loc + x * scale) * self._pdf(x, *args)
+
     else:
+
         def fun(x, *args):
-            return fn(loc + x*scale)*self._pdf(x, *args)
+            return fn(loc + x * scale) * self._pdf(x, *args)
+
     if lb is None:
-        #lb = self.a
+        # lb = self.a
         try:
-            lb = self.ppf(1e-9, *args)  #1e-14 quad fails for pareto
+            lb = self.ppf(1e-9, *args)  # 1e-14 quad fails for pareto
         except ValueError:
             lb = self.a
     else:
-        lb = max(self.a, (lb - loc)/(1.0*scale)) #transform to standardized
+        lb = max(
+            self.a, (lb - loc) / (1.0 * scale)
+        )  # transform to standardized
     if ub is None:
-        #ub = self.b
+        # ub = self.b
         try:
-            ub = self.ppf(1-1e-9, *args)
+            ub = self.ppf(1 - 1e-9, *args)
         except ValueError:
             ub = self.b
     else:
-        ub = min(self.b, (ub - loc)/(1.0*scale))
+        ub = min(self.b, (ub - loc) / (1.0 * scale))
     if conditional:
-        invfac = self._sf(lb,*args) - self._sf(ub,*args)
+        invfac = self._sf(lb, *args) - self._sf(ub, *args)
     else:
         invfac = 1.0
-    return integrate.quad(fun, lb, ub,
-                                args=args, limit=500)[0]/invfac
+    return integrate.quad(fun, lb, ub, args=args, limit=500)[0] / invfac
+
 
 ### for discrete distributions
 
-#TODO: check that for a distribution with finite support the calculations are
+# TODO: check that for a distribution with finite support the calculations are
 #      done with one array summation (np.dot)
 
-#based on _drv2_moment(self, n, *args), but streamlined
-def expect_discrete(self, fn=None, args=(), loc=0, lb=None, ub=None,
-                    conditional=False):
-    '''calculate expected value of a function with respect to the distribution
+# based on _drv2_moment(self, n, *args), but streamlined
+def expect_discrete(
+    self, fn=None, args=(), loc=0, lb=None, ub=None, conditional=False
+):
+    """calculate expected value of a function with respect to the distribution
     for discrete distribution
 
     Parameters
@@ -468,39 +494,43 @@ def expect_discrete(self, fn=None, args=(), loc=0, lb=None, ub=None,
         are evaluated)
 
 
-    '''
+    """
 
-    #moment_tol = 1e-12 # increase compared to self.moment_tol,
+    # moment_tol = 1e-12 # increase compared to self.moment_tol,
     # too slow for only small gain in precision for zipf
 
-    #avoid endless loop with unbound integral, eg. var of zipf(2)
+    # avoid endless loop with unbound integral, eg. var of zipf(2)
     maxcount = 1000
-    suppnmin = 100  #minimum number of points to evaluate (+ and -)
+    suppnmin = 100  # minimum number of points to evaluate (+ and -)
 
     if fn is None:
+
         def fun(x):
-            #loc and args from outer scope
-            return (x+loc)*self._pmf(x, *args)
+            # loc and args from outer scope
+            return (x + loc) * self._pmf(x, *args)
+
     else:
+
         def fun(x):
-            #loc and args from outer scope
-            return fn(x+loc)*self._pmf(x, *args)
+            # loc and args from outer scope
+            return fn(x + loc) * self._pmf(x, *args)
+
     # used pmf because _pmf does not check support in randint
     # and there might be problems(?) with correct self.a, self.b at this stage
     # maybe not anymore, seems to work now with _pmf
 
-    self._argcheck(*args) # (re)generate scalar self.a and self.b
+    self._argcheck(*args)  # (re)generate scalar self.a and self.b
     if lb is None:
-        lb = (self.a)
+        lb = self.a
     else:
         lb = lb - loc
 
     if ub is None:
-        ub = (self.b)
+        ub = self.b
     else:
         ub = ub - loc
     if conditional:
-        invfac = self.sf(lb,*args) - self.sf(ub+1,*args)
+        invfac = self.sf(lb, *args) - self.sf(ub + 1, *args)
     else:
         invfac = 1.0
 
@@ -508,14 +538,14 @@ def fun(x):
     low, upp = self._ppf(0.001, *args), self._ppf(0.999, *args)
     low = max(min(-suppnmin, low), lb)
     upp = min(max(suppnmin, upp), ub)
-    supp = np.arange(low, upp+1, self.inc) #check limits
-    #print('low, upp', low, upp
+    supp = np.arange(low, upp + 1, self.inc)  # check limits
+    # print('low, upp', low, upp
     tot = np.sum(fun(supp))
     diff = 1e100
     pos = upp + self.inc
     count = 0
 
-    #handle cases with infinite support
+    # handle cases with infinite support
 
     while (pos <= ub) and (diff > self.moment_tol) and count <= maxcount:
         diff = fun(pos)
@@ -523,7 +553,7 @@ def fun(x):
         pos += self.inc
         count += 1
 
-    if self.a < 0: #handle case when self.a = -inf
+    if self.a < 0:  # handle case when self.a = -inf
         diff = 1e100
         pos = low - self.inc
         while (pos >= lb) and (diff > self.moment_tol) and count <= maxcount:
@@ -533,21 +563,24 @@ def fun(x):
             count += 1
     if count > maxcount:
         # replace with proper warning
-        print('sum did not converge')
-    return tot/invfac
+        print("sum did not converge")
+    return tot / invfac
+
 
 stats.distributions.rv_continuous.fit_fr = fit_fr
 stats.distributions.rv_continuous.nnlf_fr = nnlf_fr
 stats.distributions.rv_continuous.expect = expect
 stats.distributions.rv_discrete.expect = expect_discrete
-stats.distributions.beta_gen._fitstart = _fitstart_beta  #not tried out yet
-stats.distributions.poisson_gen._fitstart = _fitstart_poisson  #not tried out yet
+stats.distributions.beta_gen._fitstart = _fitstart_beta  # not tried out yet
+stats.distributions.poisson_gen._fitstart = (
+    _fitstart_poisson  # not tried out yet
+)
 
 ########## end patching scipy
 
 
 def distfitbootstrap(sample, distr, nrepl=100):
-    '''run bootstrap for estimation of distribution parameters
+    """run bootstrap for estimation of distribution parameters
 
     hard coded: only one shape parameter is allowed and estimated,
         loc=0 and scale=1 are fixed in the estimation
@@ -565,7 +598,7 @@ def distfitbootstrap(sample, distr, nrepl=100):
     res : array (nrepl,)
         parameter estimates for all bootstrap replications
 
-    '''
+    """
     nobs = len(sample)
     res = np.zeros(nrepl)
     for ii in range(nrepl):
@@ -574,8 +607,9 @@ def distfitbootstrap(sample, distr, nrepl=100):
         res[ii] = distr.fit_fr(x, frozen=[np.nan, 0.0, 1.0])
     return res
 
+
 def distfitmc(sample, distr, nrepl=100, distkwds={}):
-    '''run Monte Carlo for estimation of distribution parameters
+    """run Monte Carlo for estimation of distribution parameters
 
     hard coded: only one shape parameter is allowed and estimated,
         loc=0 and scale=1 are fixed in the estimation
@@ -593,8 +627,8 @@ def distfitmc(sample, distr, nrepl=100, distkwds={}):
     res : array (nrepl,)
         parameter estimates for all Monte Carlo replications
 
-    '''
-    arg = distkwds.pop('arg')
+    """
+    arg = distkwds.pop("arg")
     nobs = len(sample)
     res = np.zeros(nrepl)
     for ii in range(nrepl):
@@ -603,8 +637,8 @@ def distfitmc(sample, distr, nrepl=100, distkwds={}):
     return res
 
 
-def printresults(sample, arg, bres, kind='bootstrap'):
-    '''calculate and print(Bootstrap or Monte Carlo result
+def printresults(sample, arg, bres, kind="bootstrap"):
+    """calculate and print(Bootstrap or Monte Carlo result
 
     Parameters
     ----------
@@ -633,95 +667,102 @@ def printresults(sample, arg, bres, kind='bootstrap'):
 
     todo: return results and string instead of printing
 
-    '''
-    print('true parameter value')
+    """
+    print("true parameter value")
     print(arg)
-    print('MLE estimate of parameters using sample (nobs=%d)'% (nobs))
+    print("MLE estimate of parameters using sample (nobs=%d)" % (nobs))
     argest = distr.fit_fr(sample, frozen=[np.nan, 0.0, 1.0])
     print(argest)
-    if kind == 'bootstrap':
-        #bootstrap compares to estimate from sample
+    if kind == "bootstrap":
+        # bootstrap compares to estimate from sample
         argorig = arg
         arg = argest
 
-    print('%s distribution of parameter estimate (nrepl=%d)'% (kind, nrepl))
-    print('mean = %f, bias=%f' % (bres.mean(0), bres.mean(0)-arg))
-    print('median', np.median(bres, axis=0))
-    print('var and std', bres.var(0), np.sqrt(bres.var(0)))
-    bmse = ((bres - arg)**2).mean(0)
-    print('mse, rmse', bmse, np.sqrt(bmse))
+    print("%s distribution of parameter estimate (nrepl=%d)" % (kind, nrepl))
+    print("mean = %f, bias=%f" % (bres.mean(0), bres.mean(0) - arg))
+    print("median", np.median(bres, axis=0))
+    print("var and std", bres.var(0), np.sqrt(bres.var(0)))
+    bmse = ((bres - arg) ** 2).mean(0)
+    print("mse, rmse", bmse, np.sqrt(bmse))
     bressorted = np.sort(bres)
-    print('%s confidence interval (90%% coverage)' % kind)
-    print(bressorted[np.floor(nrepl*0.05)], bressorted[np.floor(nrepl*0.95)])
-    print('%s confidence interval (90%% coverage) normal approximation' % kind)
-    print(stats.norm.ppf(0.05, loc=bres.mean(), scale=bres.std()),)
+    print("%s confidence interval (90%% coverage)" % kind)
+    print(
+        bressorted[np.floor(nrepl * 0.05)], bressorted[np.floor(nrepl * 0.95)]
+    )
+    print("%s confidence interval (90%% coverage) normal approximation" % kind)
+    print(
+        stats.norm.ppf(0.05, loc=bres.mean(), scale=bres.std()),
+    )
     print(stats.norm.isf(0.05, loc=bres.mean(), scale=bres.std()))
-    print('Kolmogorov-Smirnov test for normality of %s distribution' % kind)
-    print(' - estimated parameters, p-values not really correct')
-    print(stats.kstest(bres, 'norm', (bres.mean(), bres.std())))
+    print("Kolmogorov-Smirnov test for normality of %s distribution" % kind)
+    print(" - estimated parameters, p-values not really correct")
+    print(stats.kstest(bres, "norm", (bres.mean(), bres.std())))
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
 
-    examplecases = ['largenumber', 'bootstrap', 'montecarlo'][:]
+    examplecases = ["largenumber", "bootstrap", "montecarlo"][:]
 
-    if 'largenumber' in examplecases:
+    if "largenumber" in examplecases:
 
-        print('\nDistribution: vonmises')
+        print("\nDistribution: vonmises")
 
-        for nobs in [200]:#[20000, 1000, 100]:
+        for nobs in [200]:  # [20000, 1000, 100]:
             x = stats.vonmises.rvs(1.23, loc=0, scale=1, size=nobs)
-            print('\nnobs:', nobs)
-            print('true parameter')
-            print('1.23, loc=0, scale=1')
-            print('unconstrained')
+            print("\nnobs:", nobs)
+            print("true parameter")
+            print("1.23, loc=0, scale=1")
+            print("unconstrained")
             print(stats.vonmises.fit(x))
             print(stats.vonmises.fit_fr(x, frozen=[np.nan, np.nan, np.nan]))
-            print('with fixed loc and scale')
+            print("with fixed loc and scale")
             print(stats.vonmises.fit_fr(x, frozen=[np.nan, 0.0, 1.0]))
 
-        print('\nDistribution: gamma')
+        print("\nDistribution: gamma")
         distr = stats.gamma
-        arg, loc, scale = 2.5, 0., 20.
+        arg, loc, scale = 2.5, 0.0, 20.0
 
-        for nobs in [200]:#[20000, 1000, 100]:
+        for nobs in [200]:  # [20000, 1000, 100]:
             x = distr.rvs(arg, loc=loc, scale=scale, size=nobs)
-            print('\nnobs:', nobs)
-            print('true parameter')
-            print('%f, loc=%f, scale=%f' % (arg, loc, scale))
-            print('unconstrained')
+            print("\nnobs:", nobs)
+            print("true parameter")
+            print("%f, loc=%f, scale=%f" % (arg, loc, scale))
+            print("unconstrained")
             print(distr.fit(x))
             print(distr.fit_fr(x, frozen=[np.nan, np.nan, np.nan]))
-            print('with fixed loc and scale')
+            print("with fixed loc and scale")
             print(distr.fit_fr(x, frozen=[np.nan, 0.0, 1.0]))
-            print('with fixed loc')
+            print("with fixed loc")
             print(distr.fit_fr(x, frozen=[np.nan, 0.0, np.nan]))
 
+    ex = ["gamma", "vonmises"][0]
 
-    ex = ['gamma', 'vonmises'][0]
-
-    if ex == 'gamma':
+    if ex == "gamma":
         distr = stats.gamma
-        arg, loc, scale = 2.5, 0., 1
-    elif ex == 'vonmises':
+        arg, loc, scale = 2.5, 0.0, 1
+    elif ex == "vonmises":
         distr = stats.vonmises
-        arg, loc, scale = 1.5, 0., 1
+        arg, loc, scale = 1.5, 0.0, 1
     else:
-        raise ValueError('wrong example')
+        raise ValueError("wrong example")
 
     nobs = 100
     nrepl = 1000
 
     sample = distr.rvs(arg, loc=loc, scale=scale, size=nobs)
 
-    print('\nDistribution:', distr)
-    if 'bootstrap' in examplecases:
-        print('\nBootstrap')
-        bres = distfitbootstrap(sample, distr, nrepl=nrepl )
+    print("\nDistribution:", distr)
+    if "bootstrap" in examplecases:
+        print("\nBootstrap")
+        bres = distfitbootstrap(sample, distr, nrepl=nrepl)
         printresults(sample, arg, bres)
 
-    if 'montecarlo' in examplecases:
-        print('\nMonteCarlo')
-        mcres = distfitmc(sample, distr, nrepl=nrepl,
-                          distkwds=dict(arg=arg, loc=loc, scale=scale))
-        printresults(sample, arg, mcres, kind='montecarlo')
+    if "montecarlo" in examplecases:
+        print("\nMonteCarlo")
+        mcres = distfitmc(
+            sample,
+            distr,
+            nrepl=nrepl,
+            distkwds=dict(arg=arg, loc=loc, scale=scale),
+        )
+        printresults(sample, arg, mcres, kind="montecarlo")
diff --git a/statsmodels/sandbox/distributions/tests/_est_fit.py b/statsmodels/sandbox/distributions/tests/_est_fit.py
index 0a5facd0229..f9ce6cdad90 100644
--- a/statsmodels/sandbox/distributions/tests/_est_fit.py
+++ b/statsmodels/sandbox/distributions/tests/_est_fit.py
@@ -8,19 +8,19 @@
 
 
 import numpy as np
-
 from scipy import stats
 
 from .distparams import distcont
 
 # this is not a proper statistical test for convergence, but only
 # verifies that the estimate and true values do not differ by too much
-n_repl1 = 1000 # sample size for first run
-n_repl2 = 5000 # sample size for second run, if first run fails
-thresh_percent = 0.25 # percent of true parameters for fail cut-off
+n_repl1 = 1000  # sample size for first run
+n_repl2 = 5000  # sample size for second run, if first run fails
+thresh_percent = 0.25  # percent of true parameters for fail cut-off
 thresh_min = 0.75  # minimum difference estimate - true to fail test
 
-#distcont = [['genextreme', (3.3184017469423535,)]]
+# distcont = [['genextreme', (3.3184017469423535,)]]
+
 
 def _est_cont_fit():
     # this tests the closeness of the estimated parameters to the true
@@ -28,41 +28,54 @@ def _est_cont_fit():
     # Note: is slow, some distributions do not converge with sample size <= 10000
 
     for distname, arg in distcont:
-        yield check_cont_fit, distname,arg
+        yield check_cont_fit, distname, arg
 
 
-def check_cont_fit(distname,arg):
+def check_cont_fit(distname, arg):
     distfn = getattr(stats, distname)
-    rvs = distfn.rvs(size=n_repl1,*arg)
-    est = distfn.fit(rvs)  #,*arg) # start with default values
-
-    truearg = np.hstack([arg,[0.0,1.0]])
-    diff = est-truearg
-
-    txt = ''
-    diffthreshold = np.max(np.vstack([truearg*thresh_percent,
-                    np.ones(distfn.numargs+2)*thresh_min]),0)
+    rvs = distfn.rvs(size=n_repl1, *arg)
+    est = distfn.fit(rvs)  # ,*arg) # start with default values
+
+    truearg = np.hstack([arg, [0.0, 1.0]])
+    diff = est - truearg
+
+    txt = ""
+    diffthreshold = np.max(
+        np.vstack(
+            [
+                truearg * thresh_percent,
+                np.ones(distfn.numargs + 2) * thresh_min,
+            ]
+        ),
+        0,
+    )
     # threshold for location
-    diffthreshold[-2] = np.max([np.abs(rvs.mean())*thresh_percent,thresh_min])
+    diffthreshold[-2] = np.max(
+        [np.abs(rvs.mean()) * thresh_percent, thresh_min]
+    )
 
     if np.any(np.isnan(est)):
-        raise AssertionError('nan returned in fit')
+        raise AssertionError("nan returned in fit")
     else:
         if np.any((np.abs(diff) - diffthreshold) > 0.0):
-##            txt = 'WARNING - diff too large with small sample'
-##            print 'parameter diff =', diff - diffthreshold, txt
-            rvs = np.concatenate([rvs,distfn.rvs(size=n_repl2-n_repl1,*arg)])
-            est = distfn.fit(rvs) #,*arg)
-            truearg = np.hstack([arg,[0.0,1.0]])
-            diff = est-truearg
+            ##            txt = 'WARNING - diff too large with small sample'
+            ##            print 'parameter diff =', diff - diffthreshold, txt
+            rvs = np.concatenate(
+                [rvs, distfn.rvs(size=n_repl2 - n_repl1, *arg)]
+            )
+            est = distfn.fit(rvs)  # ,*arg)
+            truearg = np.hstack([arg, [0.0, 1.0]])
+            diff = est - truearg
             if np.any((np.abs(diff) - diffthreshold) > 0.0):
-                txt  = 'parameter: %s\n' % str(truearg)
-                txt += 'estimated: %s\n' % str(est)
-                txt += 'diff     : %s\n' % str(diff)
-                raise AssertionError('fit not very good in %s\n' % distfn.name + txt)
-
+                txt = "parameter: %s\n" % str(truearg)
+                txt += "estimated: %s\n" % str(est)
+                txt += "diff     : %s\n" % str(diff)
+                raise AssertionError(
+                    "fit not very good in %s\n" % distfn.name + txt
+                )
 
 
 if __name__ == "__main__":
     import pytest
-    pytest.main([__file__, '-vvs', '-x', '--pdb'])
+
+    pytest.main([__file__, "-vvs", "-x", "--pdb"])
diff --git a/statsmodels/sandbox/distributions/tests/check_moments.py b/statsmodels/sandbox/distributions/tests/check_moments.py
index 7014e4b323e..d91d7dc676f 100644
--- a/statsmodels/sandbox/distributions/tests/check_moments.py
+++ b/statsmodels/sandbox/distributions/tests/check_moments.py
@@ -1,54 +1,60 @@
-'''script to test expect and moments in distributions.stats method
+"""script to test expect and moments in distributions.stats method
 
 not written as a test, prints results, renamed to prevent test runner from running it
 
 
-'''
+"""
 import numpy as np
 from scipy import stats
-#from statsmodels.stats.moment_helpers import mnc2mvsk
+
+# from statsmodels.stats.moment_helpers import mnc2mvsk
 from statsmodels.sandbox.distributions.sppatch import expect_v2
 
 from .distparams import distcont
 
+specialcases = {
+    "ncf": {"ub": 1000}  # diverges if it's too large, checked for mean
+}
 
-specialcases = {'ncf':{'ub':1000} # diverges if it's too large, checked for mean
-                }
-
-#next functions are copies from sm.stats.moment_helpers
+# next functions are copies from sm.stats.moment_helpers
 def mc2mvsk(args):
-    '''convert central moments to mean, variance, skew, kurtosis
-    '''
+    """convert central moments to mean, variance, skew, kurtosis"""
     mc, mc2, mc3, mc4 = args
     skew = np.divide(mc3, mc2**1.5)
     kurt = np.divide(mc4, mc2**2.0) - 3.0
     return (mc, mc2, skew, kurt)
 
+
 def mnc2mvsk(args):
-    '''convert central moments to mean, variance, skew, kurtosis
-    '''
-    #convert four non-central moments to central moments
+    """convert central moments to mean, variance, skew, kurtosis"""
+    # convert four non-central moments to central moments
     mnc, mnc2, mnc3, mnc4 = args
     mc = mnc
-    mc2 = mnc2 - mnc*mnc
-    mc3 = mnc3 - (3*mc*mc2+mc**3) # 3rd central moment
-    mc4 = mnc4 - (4*mc*mc3+6*mc*mc*mc2+mc**4)
+    mc2 = mnc2 - mnc * mnc
+    mc3 = mnc3 - (3 * mc * mc2 + mc**3)  # 3rd central moment
+    mc4 = mnc4 - (4 * mc * mc3 + 6 * mc * mc * mc2 + mc**4)
     return mc2mvsk((mc, mc2, mc3, mc4))
 
+
 def mom_nc0(x):
-    return 1.
+    return 1.0
+
 
 def mom_nc1(x):
     return x
 
+
 def mom_nc2(x):
-    return x*x
+    return x * x
+
 
 def mom_nc3(x):
-    return x*x*x
+    return x * x * x
+
 
 def mom_nc4(x):
-    return np.power(x,4)
+    return np.power(x, 4)
+
 
 res = {}
 distex = []
@@ -56,28 +62,29 @@ def mom_nc4(x):
 distok = []
 distnonfinite = []
 
+
 def check_cont_basic():
-    #results saved in module global variable
+    # results saved in module global variable
 
     for distname, distargs in distcont[:]:
-        #if distname not in distex_0: continue
+        # if distname not in distex_0: continue
         distfn = getattr(stats, distname)
-##        np.random.seed(765456)
-##        sn = 1000
-##        rvs = distfn.rvs(size=sn,*arg)
-##        sm = rvs.mean()
-##        sv = rvs.var()
-##        skurt = stats.kurtosis(rvs)
-##        sskew = stats.skew(rvs)
-        m,v,s,k = distfn.stats(*distargs, **dict(moments='mvsk'))
-        st = np.array([m,v,s,k])
+        ##        np.random.seed(765456)
+        ##        sn = 1000
+        ##        rvs = distfn.rvs(size=sn,*arg)
+        ##        sm = rvs.mean()
+        ##        sv = rvs.var()
+        ##        skurt = stats.kurtosis(rvs)
+        ##        sskew = stats.skew(rvs)
+        m, v, s, k = distfn.stats(*distargs, **dict(moments="mvsk"))
+        st = np.array([m, v, s, k])
         mask = np.isfinite(st)
         if mask.sum() < 4:
             distnonfinite.append(distname)
         print(distname)
-        #print 'stats ', m,v,s,k
+        # print 'stats ', m,v,s,k
         expect = distfn.expect
-        expect = lambda *args, **kwds : expect_v2(distfn, *args, **kwds)
+        expect = lambda *args, **kwds: expect_v2(distfn, *args, **kwds)
 
         special_kwds = specialcases.get(distname, {})
         mnc0 = expect(mom_nc0, args=distargs, **special_kwds)
@@ -87,72 +94,90 @@ def check_cont_basic():
         mnc4 = expect(mom_nc4, args=distargs, **special_kwds)
 
         mnc1_lc = expect(args=distargs, loc=1, scale=2, **special_kwds)
-        #print mnc1, mnc2, mnc3, mnc4
+        # print mnc1, mnc2, mnc3, mnc4
         try:
             me, ve, se, ke = mnc2mvsk((mnc1, mnc2, mnc3, mnc4))
         except:
-            print('exception', mnc1, mnc2, mnc3, mnc4, st)
-            me, ve, se, ke = [np.nan]*4
+            print("exception", mnc1, mnc2, mnc3, mnc4, st)
+            me, ve, se, ke = [np.nan] * 4
             if mask.size > 0:
                 distex.append(distname)
-        #print 'expect', me, ve, se, ke,
-        #print mnc1, mnc2, mnc3, mnc4
+        # print 'expect', me, ve, se, ke,
+        # print mnc1, mnc2, mnc3, mnc4
 
         em = np.array([me, ve, se, ke])
 
         diff = st[mask] - em[mask]
-        print(diff, mnc1_lc - (1 + 2*mnc1))
-        if np.size(diff)>0 and np.max(np.abs(diff)) > 1e-3:
+        print(diff, mnc1_lc - (1 + 2 * mnc1))
+        if np.size(diff) > 0 and np.max(np.abs(diff)) > 1e-3:
             distlow.append(distname)
         else:
             distok.append(distname)
 
         res[distname] = [mnc0, st, em, diff, mnc1_lc]
 
+
 def nct_kurt_bug():
-    '''test for incorrect kurtosis of nct
+    """test for incorrect kurtosis of nct
 
     D. Hogben, R. S. Pinkham, M. B. Wilk: The Moments of the Non-Central
     t-DistributionAuthor(s): Biometrika, Vol. 48, No. 3/4 (Dec., 1961),
     pp. 465-468
-    '''
+    """
     from numpy.testing import assert_almost_equal
+
     mvsk_10_1 = (1.08372, 1.325546, 0.39993, 1.2499424941142943)
-    assert_almost_equal(stats.nct.stats(10, 1, moments='mvsk'), mvsk_10_1, decimal=6)
-    c1=np.array([1.08372])
-    c2=np.array([.0755460, 1.25000])
-    c3 = np.array([.0297802, .580566])
+    assert_almost_equal(
+        stats.nct.stats(10, 1, moments="mvsk"), mvsk_10_1, decimal=6
+    )
+    c1 = np.array([1.08372])
+    c2 = np.array([0.0755460, 1.25000])
+    c3 = np.array([0.0297802, 0.580566])
     c4 = np.array([0.0425458, 1.17491, 6.25])
 
-    #calculation for df=10, for arbitrary nc
+    # calculation for df=10, for arbitrary nc
     nc = 1
     mc1 = c1.item()
-    mc2 = (c2*nc**np.array([2,0])).sum()
-    mc3 = (c3*nc**np.array([3,1])).sum()
-    mc4 = c4=np.array([0.0425458, 1.17491, 6.25])
-    mvsk_nc = mc2mvsk((mc1,mc2,mc3,mc4))
+    mc2 = (c2 * nc ** np.array([2, 0])).sum()
+    mc3 = (c3 * nc ** np.array([3, 1])).sum()
+    mc4 = c4 = np.array([0.0425458, 1.17491, 6.25])
+    mvsk_nc = mc2mvsk((mc1, mc2, mc3, mc4))
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     check_cont_basic()
-    #print [(k, v[0]) for k,v in res.items() if np.abs(v[0]-1)>1e-3]
-    #print [(k, v[2][0], 1+2*v[2][0]) for k,v in res.items() if np.abs(v[-1]-(1+2*v[2][0]))>1e-3]
-    mean_ = [(k, v[1][0], v[2][0]) for k,v in res.items()
-             if np.abs(v[1][0] - v[2][0])>1e-6 and np.isfinite(v[1][0])]
-    var_ = [(k, v[1][1], v[2][1]) for k,v in res.items()
-            if np.abs(v[1][1] - v[2][1])>1e-2 and np.isfinite(v[1][1])]
-    skew = [(k, v[1][2], v[2][2]) for k,v in res.items()
-            if np.abs(v[1][2] - v[2][2])>1e-2 and np.isfinite(v[1][1])]
-    kurt = [(k, v[1][3], v[2][3]) for k,v in res.items()
-            if np.abs(v[1][3] - v[2][3])>1e-2 and np.isfinite(v[1][1])]
+    # print [(k, v[0]) for k,v in res.items() if np.abs(v[0]-1)>1e-3]
+    # print [(k, v[2][0], 1+2*v[2][0]) for k,v in res.items() if np.abs(v[-1]-(1+2*v[2][0]))>1e-3]
+    mean_ = [
+        (k, v[1][0], v[2][0])
+        for k, v in res.items()
+        if np.abs(v[1][0] - v[2][0]) > 1e-6 and np.isfinite(v[1][0])
+    ]
+    var_ = [
+        (k, v[1][1], v[2][1])
+        for k, v in res.items()
+        if np.abs(v[1][1] - v[2][1]) > 1e-2 and np.isfinite(v[1][1])
+    ]
+    skew = [
+        (k, v[1][2], v[2][2])
+        for k, v in res.items()
+        if np.abs(v[1][2] - v[2][2]) > 1e-2 and np.isfinite(v[1][1])
+    ]
+    kurt = [
+        (k, v[1][3], v[2][3])
+        for k, v in res.items()
+        if np.abs(v[1][3] - v[2][3]) > 1e-2 and np.isfinite(v[1][1])
+    ]
 
     from statsmodels.iolib import SimpleTable
+
     if len(mean_) > 0:
-        print('\nMean difference at least 1e-6')
-        print(SimpleTable(mean_, headers=['distname', 'diststats', 'expect']))
-    print('\nVariance difference at least 1e-2')
-    print(SimpleTable(var_, headers=['distname', 'diststats', 'expect']))
-    print('\nSkew difference at least 1e-2')
-    print(SimpleTable(skew, headers=['distname', 'diststats', 'expect']))
-    print('\nKurtosis difference at least 1e-2')
-    print(SimpleTable(kurt, headers=['distname', 'diststats', 'expect']))
+        print("\nMean difference at least 1e-6")
+        print(SimpleTable(mean_, headers=["distname", "diststats", "expect"]))
+    print("\nVariance difference at least 1e-2")
+    print(SimpleTable(var_, headers=["distname", "diststats", "expect"]))
+    print("\nSkew difference at least 1e-2")
+    print(SimpleTable(skew, headers=["distname", "diststats", "expect"]))
+    print("\nKurtosis difference at least 1e-2")
+    print(SimpleTable(kurt, headers=["distname", "diststats", "expect"]))
diff --git a/statsmodels/sandbox/distributions/tests/distparams.py b/statsmodels/sandbox/distributions/tests/distparams.py
index 19b11ae1f3b..d1d799ae2d5 100644
--- a/statsmodels/sandbox/distributions/tests/distparams.py
+++ b/statsmodels/sandbox/distributions/tests/distparams.py
@@ -1,121 +1,153 @@
-
-
 distcont = [
-    ['alpha', (3.5704770516650459,)],
-    ['anglit', ()],
-    ['arcsine', ()],
-    ['beta', (2.3098496451481823, 0.62687954300963677)],
-    ['betaprime', (5, 6)],   # avoid unbound error in entropy with (100, 86)],
-    ['bradford', (0.29891359763170633,)],
-    ['burr', (10.5, 4.3)],    #incorrect mean and var for(0.94839838075366045, 4.3820284068855795)],
-    ['cauchy', ()],
-    ['chi', (78,)],
-    ['chi2', (55,)],
-    ['cosine', ()],
-    ['dgamma', (1.1023326088288166,)],
-    ['dweibull', (2.0685080649914673,)],
-    ['erlang', (20,)],    #correction numargs = 1
-    ['expon', ()],
-    ['exponpow', (2.697119160358469,)],
-    ['exponweib', (2.8923945291034436, 1.9505288745913174)],
-    ['f', (29, 18)],
-    #['fatiguelife', (29,)],   #correction numargs = 1, variance very large
-    ['fatiguelife', (2,)],
-    ['fisk', (3.0857548622253179,)],
-    ['foldcauchy', (4.7164673455831894,)],
-    ['foldnorm', (1.9521253373555869,)],
-    ['frechet_l', (3.6279911255583239,)],
-    ['frechet_r', (1.8928171603534227,)],
-    ['gamma', (1.9932305483800778,)],
-    ['gausshyper', (13.763771604130699, 3.1189636648681431,
-                    2.5145980350183019, 5.1811649903971615)],  #veryslow
-    ['genexpon', (9.1325976465418908, 16.231956600590632, 3.2819552690843983)],
-    ['genextreme', (-0.1,)],  # sample mean test fails for (3.3184017469423535,)],
-    ['gengamma', (4.4162385429431925, 3.1193091679242761)],
-    ['genhalflogistic', (0.77274727809929322,)],
-    ['genlogistic', (0.41192440799679475,)],
-    ['genpareto', (0.1,)],   # use case with finite moments
-    ['gilbrat', ()],
-    ['gompertz', (0.94743713075105251,)],
-    ['gumbel_l', ()],
-    ['gumbel_r', ()],
-    ['halfcauchy', ()],
-    ['halflogistic', ()],
-    ['halfnorm', ()],
-    ['hypsecant', ()],
-    #['invgamma', (2.0668996136993067,)], #convergence problem with expect
-    #['invgamma', (3.0,)],
-    ['invgamma', (5.0,)],   #kurtosis requires alpha > 4
-    ['invnorm', (0.14546264555347513,)],
-    ['invweibull', (10.58,)], # sample mean test fails at(0.58847112119264788,)]
-    ['johnsonsb', (4.3172675099141058, 3.1837781130785063)],
-    ['johnsonsu', (2.554395574161155, 2.2482281679651965)],
-    ['ksone', (1000,)],  #replace 22 by 100 to avoid failing range, ticket 956
-    ['kstwobign', ()],
-    ['laplace', ()],
-    ['levy', ()],
-    ['levy_l', ()],
-#    ['levy_stable', (0.35667405469844993,
-#                     -0.67450531578494011)], #NotImplementedError
+    ["alpha", (3.5704770516650459,)],
+    ["anglit", ()],
+    ["arcsine", ()],
+    ["beta", (2.3098496451481823, 0.62687954300963677)],
+    ["betaprime", (5, 6)],  # avoid unbound error in entropy with (100, 86)],
+    ["bradford", (0.29891359763170633,)],
+    [
+        "burr",
+        (10.5, 4.3),
+    ],  # incorrect mean and var for(0.94839838075366045, 4.3820284068855795)],
+    ["cauchy", ()],
+    ["chi", (78,)],
+    ["chi2", (55,)],
+    ["cosine", ()],
+    ["dgamma", (1.1023326088288166,)],
+    ["dweibull", (2.0685080649914673,)],
+    ["erlang", (20,)],  # correction numargs = 1
+    ["expon", ()],
+    ["exponpow", (2.697119160358469,)],
+    ["exponweib", (2.8923945291034436, 1.9505288745913174)],
+    ["f", (29, 18)],
+    # ['fatiguelife', (29,)],   #correction numargs = 1, variance very large
+    ["fatiguelife", (2,)],
+    ["fisk", (3.0857548622253179,)],
+    ["foldcauchy", (4.7164673455831894,)],
+    ["foldnorm", (1.9521253373555869,)],
+    ["frechet_l", (3.6279911255583239,)],
+    ["frechet_r", (1.8928171603534227,)],
+    ["gamma", (1.9932305483800778,)],
+    [
+        "gausshyper",
+        (
+            13.763771604130699,
+            3.1189636648681431,
+            2.5145980350183019,
+            5.1811649903971615,
+        ),
+    ],  # veryslow
+    ["genexpon", (9.1325976465418908, 16.231956600590632, 3.2819552690843983)],
+    [
+        "genextreme",
+        (-0.1,),
+    ],  # sample mean test fails for (3.3184017469423535,)],
+    ["gengamma", (4.4162385429431925, 3.1193091679242761)],
+    ["genhalflogistic", (0.77274727809929322,)],
+    ["genlogistic", (0.41192440799679475,)],
+    ["genpareto", (0.1,)],  # use case with finite moments
+    ["gilbrat", ()],
+    ["gompertz", (0.94743713075105251,)],
+    ["gumbel_l", ()],
+    ["gumbel_r", ()],
+    ["halfcauchy", ()],
+    ["halflogistic", ()],
+    ["halfnorm", ()],
+    ["hypsecant", ()],
+    # ['invgamma', (2.0668996136993067,)], #convergence problem with expect
+    # ['invgamma', (3.0,)],
+    ["invgamma", (5.0,)],  # kurtosis requires alpha > 4
+    ["invnorm", (0.14546264555347513,)],
+    [
+        "invweibull",
+        (10.58,),
+    ],  # sample mean test fails at(0.58847112119264788,)]
+    ["johnsonsb", (4.3172675099141058, 3.1837781130785063)],
+    ["johnsonsu", (2.554395574161155, 2.2482281679651965)],
+    ["ksone", (1000,)],  # replace 22 by 100 to avoid failing range, ticket 956
+    ["kstwobign", ()],
+    ["laplace", ()],
+    ["levy", ()],
+    ["levy_l", ()],
+    #    ['levy_stable', (0.35667405469844993,
+    #                     -0.67450531578494011)], #NotImplementedError
     #           rvs not tested
-    ['loggamma', (0.41411931826052117,)],
-    ['logistic', ()],
-    ['loglaplace', (3.2505926592051435,)],
-    ['lognorm', (0.95368226960575331,)],
-    ['lomax', (1.8771398388773268,)],  #this has infinite variance
-    ['lomax', (10,)],  #first 4 moments are finite
-    ['maxwell', ()],
-    ['mielke', (10.4, 3.6)], # sample mean test fails for (4.6420495492121487, 0.59707419545516938)],
-                             # mielke: good results if 2nd parameter >2, weird mean or var below
-    ['nakagami', (4.9673794866666237,)],
-    ['ncf', (27, 27, 0.41578441799226107)],
-    ['nct', (14, 0.24045031331198066)],
-    ['ncx2', (21, 1.0560465975116415)],
-    ['norm', ()],
-    ['pareto', (2.621716532144454,)],
-    ['powerlaw', (1.6591133289905851,)],
-    ['powerlognorm', (2.1413923530064087, 0.44639540782048337)],
-    ['powernorm', (4.4453652254590779,)],
-    ['rayleigh', ()],
-    ['rdist', (0.9,)],   # feels also slow
-#    ['rdist', (3.8266985793976525,)],  #veryslow, especially rvs
-    #['rdist', (541.0,)],   # from ticket #758    #veryslow
-    ['recipinvgauss', (0.63004267809369119,)],
-    ['reciprocal', (0.0062309367010521255, 1.0062309367010522)],
-    ['rice', (0.7749725210111873,)],
-    ['semicircular', ()],
-    ['t', (2.7433514990818093,)],
-    ['triang', (0.15785029824528218,)],
-    ['truncexpon', (4.6907725456810478,)],
-    ['truncnorm', (-1.0978730080013919, 2.7306754109031979)],
-    ['tukeylambda', (3.1321477856738267,)],
-    ['uniform', ()],
-    ['vonmises', (3.9939042581071398,)],
-    ['wald', ()],
-    ['weibull_max', (2.8687961709100187,)],
-    ['weibull_min', (1.7866166930421596,)],
-    ['wrapcauchy', (0.031071279018614728,)]]
+    ["loggamma", (0.41411931826052117,)],
+    ["logistic", ()],
+    ["loglaplace", (3.2505926592051435,)],
+    ["lognorm", (0.95368226960575331,)],
+    ["lomax", (1.8771398388773268,)],  # this has infinite variance
+    ["lomax", (10,)],  # first 4 moments are finite
+    ["maxwell", ()],
+    [
+        "mielke",
+        (10.4, 3.6),
+    ],  # sample mean test fails for (4.6420495492121487, 0.59707419545516938)],
+    # mielke: good results if 2nd parameter >2, weird mean or var below
+    ["nakagami", (4.9673794866666237,)],
+    ["ncf", (27, 27, 0.41578441799226107)],
+    ["nct", (14, 0.24045031331198066)],
+    ["ncx2", (21, 1.0560465975116415)],
+    ["norm", ()],
+    ["pareto", (2.621716532144454,)],
+    ["powerlaw", (1.6591133289905851,)],
+    ["powerlognorm", (2.1413923530064087, 0.44639540782048337)],
+    ["powernorm", (4.4453652254590779,)],
+    ["rayleigh", ()],
+    ["rdist", (0.9,)],  # feels also slow
+    #    ['rdist', (3.8266985793976525,)],  #veryslow, especially rvs
+    # ['rdist', (541.0,)],   # from ticket #758    #veryslow
+    ["recipinvgauss", (0.63004267809369119,)],
+    ["reciprocal", (0.0062309367010521255, 1.0062309367010522)],
+    ["rice", (0.7749725210111873,)],
+    ["semicircular", ()],
+    ["t", (2.7433514990818093,)],
+    ["triang", (0.15785029824528218,)],
+    ["truncexpon", (4.6907725456810478,)],
+    ["truncnorm", (-1.0978730080013919, 2.7306754109031979)],
+    ["tukeylambda", (3.1321477856738267,)],
+    ["uniform", ()],
+    ["vonmises", (3.9939042581071398,)],
+    ["wald", ()],
+    ["weibull_max", (2.8687961709100187,)],
+    ["weibull_min", (1.7866166930421596,)],
+    ["wrapcauchy", (0.031071279018614728,)],
+]
 
 distdiscrete = [
-    ['bernoulli',(0.3,)],
-    ['binom',    (5, 0.4)],
-    ['boltzmann',(1.4, 19)],
-    ['dlaplace', (0.8,)], #0.5
-    ['geom',     (0.5,)],
-    ['hypergeom',(30, 12, 6)],
-    ['hypergeom',(21,3,12)],  #numpy.random (3,18,12) numpy ticket:921
-    ['hypergeom',(21,18,11)],  #numpy.random (18,3,11) numpy ticket:921
-    ['logser',   (0.6,)],  # reenabled, numpy ticket:921
-    ['nbinom',   (5, 0.5)],
-    ['nbinom',   (0.4, 0.4)], #from tickets: 583
-    ['planck',   (0.51,)],   #4.1
-    ['poisson',  (0.6,)],
-    ['randint',  (7, 31)],
-    ['skellam',  (15, 8)],
-    ['zipf',     (4,)] ]    # arg=4 is ok,
-                            # Zipf broken for arg = 2, e.g. weird .stats
-                            # looking closer, mean, var should be inf for arg=2
+    ["bernoulli", (0.3,)],
+    ["binom", (5, 0.4)],
+    ["boltzmann", (1.4, 19)],
+    ["dlaplace", (0.8,)],  # 0.5
+    ["geom", (0.5,)],
+    ["hypergeom", (30, 12, 6)],
+    ["hypergeom", (21, 3, 12)],  # numpy.random (3,18,12) numpy ticket:921
+    ["hypergeom", (21, 18, 11)],  # numpy.random (18,3,11) numpy ticket:921
+    ["logser", (0.6,)],  # reenabled, numpy ticket:921
+    ["nbinom", (5, 0.5)],
+    ["nbinom", (0.4, 0.4)],  # from tickets: 583
+    ["planck", (0.51,)],  # 4.1
+    ["poisson", (0.6,)],
+    ["randint", (7, 31)],
+    ["skellam", (15, 8)],
+    ["zipf", (4,)],
+]  # arg=4 is ok,
+# Zipf broken for arg = 2, e.g. weird .stats
+# looking closer, mean, var should be inf for arg=2
 
-distslow = ['rdist', 'gausshyper', 'recipinvgauss', 'ksone', 'genexpon',
-            'vonmises', 'rice', 'mielke', 'semicircular', 'cosine', 'invweibull',
-            'powerlognorm', 'johnsonsu', 'kstwobign']
+distslow = [
+    "rdist",
+    "gausshyper",
+    "recipinvgauss",
+    "ksone",
+    "genexpon",
+    "vonmises",
+    "rice",
+    "mielke",
+    "semicircular",
+    "cosine",
+    "invweibull",
+    "powerlognorm",
+    "johnsonsu",
+    "kstwobign",
+]
diff --git a/statsmodels/sandbox/distributions/tests/test_extras.py b/statsmodels/sandbox/distributions/tests/test_extras.py
index 0f5cd625f2d..a4d9eff12d5 100644
--- a/statsmodels/sandbox/distributions/tests/test_extras.py
+++ b/statsmodels/sandbox/distributions/tests/test_extras.py
@@ -8,111 +8,168 @@
 import numpy as np
 from numpy.testing import assert_, assert_almost_equal
 
-from statsmodels.sandbox.distributions.extras import (skewnorm,
-                skewnorm2, ACSkewT_gen)
+from statsmodels.sandbox.distributions.extras import (
+    ACSkewT_gen,
+    skewnorm,
+    skewnorm2,
+)
 
 
 def test_skewnorm():
-    #library("sn")
-    #dsn(c(-2,-1,0,1,2), shape=10)
-    #psn(c(-2,-1,0,1,2), shape=10)
-    #noquote(sprintf("%.15e,", snp))
-    pdf_r = np.array([2.973416551551523e-90, 3.687562713971017e-24,
-                      3.989422804014327e-01, 4.839414490382867e-01,
-                      1.079819330263761e-01])
-    pdf_sn = skewnorm.pdf([-2,-1,0,1,2], 10)
-
-    #res = (snp-snp_r)/snp
-    assert_(np.allclose(pdf_sn, pdf_r,rtol=1e-13, atol=0))
-
-    pdf_sn2 = skewnorm2.pdf([-2,-1,0,1,2], 10)
+    # library("sn")
+    # dsn(c(-2,-1,0,1,2), shape=10)
+    # psn(c(-2,-1,0,1,2), shape=10)
+    # noquote(sprintf("%.15e,", snp))
+    pdf_r = np.array(
+        [
+            2.973416551551523e-90,
+            3.687562713971017e-24,
+            3.989422804014327e-01,
+            4.839414490382867e-01,
+            1.079819330263761e-01,
+        ]
+    )
+    pdf_sn = skewnorm.pdf([-2, -1, 0, 1, 2], 10)
+
+    # res = (snp-snp_r)/snp
+    assert_(np.allclose(pdf_sn, pdf_r, rtol=1e-13, atol=0))
+
+    pdf_sn2 = skewnorm2.pdf([-2, -1, 0, 1, 2], 10)
     assert_(np.allclose(pdf_sn2, pdf_r, rtol=1e-13, atol=0))
 
-
-    cdf_r = np.array([0.000000000000000e+00, 0.000000000000000e+00,
-                      3.172551743055357e-02, 6.826894921370859e-01,
-                      9.544997361036416e-01])
-    cdf_sn = skewnorm.cdf([-2,-1,0,1,2], 10)
+    cdf_r = np.array(
+        [
+            0.000000000000000e00,
+            0.000000000000000e00,
+            3.172551743055357e-02,
+            6.826894921370859e-01,
+            9.544997361036416e-01,
+        ]
+    )
+    cdf_sn = skewnorm.cdf([-2, -1, 0, 1, 2], 10)
     maxabs = np.max(np.abs(cdf_sn - cdf_r))
-    maxrel = np.max(np.abs(cdf_sn - cdf_r)/(cdf_r+1e-50))
-    msg = "maxabs=%15.13g, maxrel=%15.13g\n%r\n%r" % (maxabs, maxrel, cdf_sn,
-                                                       cdf_r)
-    #assert_(np.allclose(cdf_sn, cdf_r, rtol=1e-13, atol=1e-25), msg=msg)
+    maxrel = np.max(np.abs(cdf_sn - cdf_r) / (cdf_r + 1e-50))
+    msg = "maxabs=%15.13g, maxrel=%15.13g\n%r\n%r" % (
+        maxabs,
+        maxrel,
+        cdf_sn,
+        cdf_r,
+    )
+    # assert_(np.allclose(cdf_sn, cdf_r, rtol=1e-13, atol=1e-25), msg=msg)
     assert_almost_equal(cdf_sn, cdf_r, decimal=10)
 
-    cdf_sn2 = skewnorm2.cdf([-2,-1,0,1,2], 10)
+    cdf_sn2 = skewnorm2.cdf([-2, -1, 0, 1, 2], 10)
     maxabs = np.max(np.abs(cdf_sn2 - cdf_r))
-    maxrel = np.max(np.abs(cdf_sn2 - cdf_r)/(cdf_r+1e-50))
+    maxrel = np.max(np.abs(cdf_sn2 - cdf_r) / (cdf_r + 1e-50))
     msg = "maxabs=%15.13g, maxrel=%15.13g" % (maxabs, maxrel)
-    #assert_(np.allclose(cdf_sn2, cdf_r, rtol=1e-13, atol=1e-25), msg=msg)
+    # assert_(np.allclose(cdf_sn2, cdf_r, rtol=1e-13, atol=1e-25), msg=msg)
     assert_almost_equal(cdf_sn2, cdf_r, decimal=10, err_msg=msg)
 
 
 def test_skewt():
     skewt = ACSkewT_gen()
     x = [-2, -1, -0.5, 0, 1, 2]
-    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10)))
-    #default in R:sn is df=inf
-    pdf_r = np.array([2.973416551551523e-90, 3.687562713971017e-24,
-                      2.018401586422970e-07, 3.989422804014327e-01,
-                      4.839414490382867e-01, 1.079819330263761e-01])
+    # noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10)))
+    # default in R:sn is df=inf
+    pdf_r = np.array(
+        [
+            2.973416551551523e-90,
+            3.687562713971017e-24,
+            2.018401586422970e-07,
+            3.989422804014327e-01,
+            4.839414490382867e-01,
+            1.079819330263761e-01,
+        ]
+    )
     pdf_st = skewt.pdf(x, 1000000, 10)
     pass
     np.allclose(pdf_st, pdf_r, rtol=0, atol=1e-6)
     np.allclose(pdf_st, pdf_r, rtol=1e-1, atol=0)
 
-
-    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10)))
-    cdf_r = np.array([0.000000000000000e+00, 0.000000000000000e+00,
-                      3.729478836866917e-09, 3.172551743055357e-02,
-                      6.826894921370859e-01, 9.544997361036416e-01])
+    # noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10)))
+    cdf_r = np.array(
+        [
+            0.000000000000000e00,
+            0.000000000000000e00,
+            3.729478836866917e-09,
+            3.172551743055357e-02,
+            6.826894921370859e-01,
+            9.544997361036416e-01,
+        ]
+    )
     cdf_st = skewt.cdf(x, 1000000, 10)
     np.allclose(cdf_st, cdf_r, rtol=0, atol=1e-6)
     np.allclose(cdf_st, cdf_r, rtol=1e-1, atol=0)
-    #assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-15))
-
-
-    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
-    pdf_r = np.array([2.185448836190663e-07, 1.272381597868587e-05,
-                      5.746937644959992e-04, 3.796066898224945e-01,
-                      4.393468708859825e-01, 1.301804021075493e-01])
-    pdf_st = skewt.pdf(x, 5, 10)  #args = (df, alpha)
+    # assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-15))
+
+    # noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
+    pdf_r = np.array(
+        [
+            2.185448836190663e-07,
+            1.272381597868587e-05,
+            5.746937644959992e-04,
+            3.796066898224945e-01,
+            4.393468708859825e-01,
+            1.301804021075493e-01,
+        ]
+    )
+    pdf_st = skewt.pdf(x, 5, 10)  # args = (df, alpha)
     assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25))
 
-    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
-    cdf_r = np.array([8.822783669199699e-08, 2.638467463775795e-06,
-                      6.573106017198583e-05, 3.172551743055352e-02,
-                      6.367851708183412e-01, 8.980606093979784e-01])
-    cdf_st = skewt.cdf(x, 5, 10)  #args = (df, alpha)
+    # noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
+    cdf_r = np.array(
+        [
+            8.822783669199699e-08,
+            2.638467463775795e-06,
+            6.573106017198583e-05,
+            3.172551743055352e-02,
+            6.367851708183412e-01,
+            8.980606093979784e-01,
+        ]
+    )
+    cdf_st = skewt.cdf(x, 5, 10)  # args = (df, alpha)
     assert_(np.allclose(cdf_st, cdf_r, rtol=1e-10, atol=0))
 
-
-    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
-    pdf_r = np.array([3.941955996757291e-04, 1.568067236862745e-03,
-                      6.136996029432048e-03, 3.183098861837907e-01,
-                      3.167418189469279e-01, 1.269297588738406e-01])
-    pdf_st = skewt.pdf(x, 1, 10)  #args = (df, alpha) = (1, 10))
+    # noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
+    pdf_r = np.array(
+        [
+            3.941955996757291e-04,
+            1.568067236862745e-03,
+            6.136996029432048e-03,
+            3.183098861837907e-01,
+            3.167418189469279e-01,
+            1.269297588738406e-01,
+        ]
+    )
+    pdf_st = skewt.pdf(x, 1, 10)  # args = (df, alpha) = (1, 10))
     assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25))
 
-    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
-    cdf_r = np.array([7.893671370544414e-04, 1.575817262600422e-03,
-                      3.128720749105560e-03, 3.172551743055351e-02,
-                      5.015758172626005e-01, 7.056221318361879e-01])
-    cdf_st = skewt.cdf(x, 1, 10)  #args = (df, alpha) = (1, 10)
+    # noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
+    cdf_r = np.array(
+        [
+            7.893671370544414e-04,
+            1.575817262600422e-03,
+            3.128720749105560e-03,
+            3.172551743055351e-02,
+            5.015758172626005e-01,
+            7.056221318361879e-01,
+        ]
+    )
+    cdf_st = skewt.cdf(x, 1, 10)  # args = (df, alpha) = (1, 10)
     assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-25))
 
 
-
-if __name__ == '__main__':
+if __name__ == "__main__":
     import pytest
-    pytest.main([__file__, '-vvs', '-x', '--pdb'])
-    print('Done')
+
+    pytest.main([__file__, "-vvs", "-x", "--pdb"])
+    print("Done")
 
 
-'''
+"""
 >>> skewt.pdf([-2,-1,0,1,2], 10000000, 10)
 array([  2.98557345e-90,   3.68850289e-24,   3.98942271e-01,
          4.83941426e-01,   1.07981952e-01])
 >>> skewt.pdf([-2,-1,0,1,2], np.inf, 10)
 array([ nan,  nan,  nan,  nan,  nan])
-'''
+"""
diff --git a/statsmodels/sandbox/distributions/tests/test_gof_new.py b/statsmodels/sandbox/distributions/tests/test_gof_new.py
index 8fc3b2bf809..4156c2e284e 100644
--- a/statsmodels/sandbox/distributions/tests/test_gof_new.py
+++ b/statsmodels/sandbox/distributions/tests/test_gof_new.py
@@ -1,7 +1,7 @@
 import numpy as np
 from numpy.testing import assert_array_almost_equal
 
-from statsmodels.sandbox.distributions.gof_new import bootstrap, NewNorm
+from statsmodels.sandbox.distributions.gof_new import NewNorm, bootstrap
 
 
 def test_loop_vectorized_batch_equivalence():
@@ -9,18 +9,35 @@ def test_loop_vectorized_batch_equivalence():
     nobs = 200
 
     np.random.seed(8765679)
-    resu1 = bootstrap(NewNorm(), args=(0, 1), nobs=nobs, nrep=100,
-                      value=0.576/(1 + 4./nobs - 25./nobs**2))
+    resu1 = bootstrap(
+        NewNorm(),
+        args=(0, 1),
+        nobs=nobs,
+        nrep=100,
+        value=0.576 / (1 + 4.0 / nobs - 25.0 / nobs**2),
+    )
 
     np.random.seed(8765679)
-    tmp = [bootstrap(NewNorm(), args=(0, 1), nobs=nobs, nrep=1)
-           for _ in range(100)]
-    resu2 = (np.array(tmp) > 0.576/(1 + 4./nobs - 25./nobs**2)).mean()
+    tmp = [
+        bootstrap(NewNorm(), args=(0, 1), nobs=nobs, nrep=1)
+        for _ in range(100)
+    ]
+    resu2 = (
+        np.array(tmp) > 0.576 / (1 + 4.0 / nobs - 25.0 / nobs**2)
+    ).mean()
 
     np.random.seed(8765679)
-    tmp = [bootstrap(NewNorm(), args=(0, 1), nobs=nobs, nrep=1,
-                     value=0.576/(1 + 4./nobs - 25./nobs**2),
-                     batch_size=10) for _ in range(10)]
+    tmp = [
+        bootstrap(
+            NewNorm(),
+            args=(0, 1),
+            nobs=nobs,
+            nrep=1,
+            value=0.576 / (1 + 4.0 / nobs - 25.0 / nobs**2),
+            batch_size=10,
+        )
+        for _ in range(10)
+    ]
     resu3 = np.array(tmp).mean()
 
     assert_array_almost_equal(resu1, resu2, 15)
diff --git a/statsmodels/sandbox/distributions/tests/test_multivariate.py b/statsmodels/sandbox/distributions/tests/test_multivariate.py
index 7cc6d4a2c6c..2a859b28607 100644
--- a/statsmodels/sandbox/distributions/tests/test_multivariate.py
+++ b/statsmodels/sandbox/distributions/tests/test_multivariate.py
@@ -4,58 +4,65 @@
 @author: Josef Perktold
 """
 import numpy as np
-from numpy.testing import assert_almost_equal,  assert_allclose
+from numpy.testing import assert_allclose, assert_almost_equal
 
 from statsmodels.sandbox.distributions.multivariate import (
-    mvstdtprob, mvstdnormcdf)
+    mvstdnormcdf,
+    mvstdtprob,
+)
 from statsmodels.sandbox.distributions.mv_normal import MVT, MVNormal
 
 
 class Test_MVN_MVT_prob(object):
-    #test for block integratal, cdf, of multivariate t and normal
-    #comparison results from R
+    # test for block integratal, cdf, of multivariate t and normal
+    # comparison results from R
 
     @classmethod
     def setup_class(cls):
-        cls.corr_equal = np.asarray([[1.0, 0.5, 0.5],[0.5,1,0.5],[0.5,0.5,1]])
+        cls.corr_equal = np.asarray(
+            [[1.0, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 1]]
+        )
         cls.a = -1 * np.ones(3)
         cls.b = 3 * np.ones(3)
         cls.df = 4
 
         corr2 = cls.corr_equal.copy()
-        corr2[2,1] = -0.5
+        corr2[2, 1] = -0.5
         cls.corr2 = corr2
 
     def test_mvn_mvt_1(self):
         a, b = self.a, self.b
         df = self.df
         corr_equal = self.corr_equal
-        #result from R, mvtnorm with option
-        #algorithm = GenzBretz(maxpts = 100000, abseps = 0.000001, releps = 0)
+        # result from R, mvtnorm with option
+        # algorithm = GenzBretz(maxpts = 100000, abseps = 0.000001, releps = 0)
         #     or higher
-        probmvt_R = 0.60414   #report, ed error approx. 7.5e-06
-        probmvn_R = 0.673970  #reported error approx. 6.4e-07
+        probmvt_R = 0.60414  # report, ed error approx. 7.5e-06
+        probmvn_R = 0.673970  # reported error approx. 6.4e-07
         assert_almost_equal(probmvt_R, mvstdtprob(a, b, corr_equal, df), 4)
-        assert_almost_equal(probmvn_R,
-                            mvstdnormcdf(a, b, corr_equal, abseps=1e-5), 4)
+        assert_almost_equal(
+            probmvn_R, mvstdnormcdf(a, b, corr_equal, abseps=1e-5), 4
+        )
 
         mvn_high = mvstdnormcdf(a, b, corr_equal, abseps=1e-8, maxpts=10000000)
         assert_almost_equal(probmvn_R, mvn_high, 5)
-        #this still barely fails sometimes at 6 why?? error is -7.2627419411830374e-007
-        #>>> 0.67396999999999996 - 0.67397072627419408
-        #-7.2627419411830374e-007
-        #>>> assert_almost_equal(0.67396999999999996, 0.67397072627419408, 6)
-        #Fail
+        # this still barely fails sometimes at 6 why?? error is -7.2627419411830374e-007
+        # >>> 0.67396999999999996 - 0.67397072627419408
+        # -7.2627419411830374e-007
+        # >>> assert_almost_equal(0.67396999999999996, 0.67397072627419408, 6)
+        # Fail
 
     def test_mvn_mvt_2(self):
         a, b = self.a, self.b
         df = self.df
         corr2 = self.corr2
 
-        probmvn_R = 0.6472497 #reported error approx. 7.7e-08
-        probmvt_R = 0.5881863 #highest reported error up to approx. 1.99e-06
+        probmvn_R = 0.6472497  # reported error approx. 7.7e-08
+        probmvt_R = 0.5881863  # highest reported error up to approx. 1.99e-06
         assert_almost_equal(probmvt_R, mvstdtprob(a, b, corr2, df), 4)
-        assert_almost_equal(probmvn_R, mvstdnormcdf(a, b, corr2, abseps=1e-5), 4)
+        assert_almost_equal(
+            probmvn_R, mvstdnormcdf(a, b, corr2, abseps=1e-5), 4
+        )
 
     def test_mvn_mvt_3(self):
         a, b = self.a, self.b
@@ -68,7 +75,7 @@ def test_mvn_mvt_3(self):
         probmvn_R = 0.9961141
         # using higher precision in R, error approx. 1.6e-07
         probmvt_R = 0.9522146
-        quadkwds = {'epsabs': 1e-08}
+        quadkwds = {"epsabs": 1e-08}
         probmvt = mvstdtprob(a2, b, corr2, df, quadkwds=quadkwds)
         assert_allclose(probmvt_R, probmvt, atol=5e-4)
         probmvn = mvstdnormcdf(a2, b, corr2, maxpts=100000, abseps=1e-5)
@@ -79,47 +86,56 @@ def test_mvn_mvt_4(self):
         df = self.df
         corr2 = self.corr2
 
-        #from 0 to inf
-        #print '0 inf'
+        # from 0 to inf
+        # print '0 inf'
         a2 = a.copy()
         a2[:] = -np.inf
-        probmvn_R = 0.1666667 #error approx. 6.1e-08
-        probmvt_R = 0.1666667 #error approx. 8.2e-08
-        assert_almost_equal(probmvt_R, mvstdtprob(np.zeros(3), -a2, corr2, df), 4)
-        assert_almost_equal(probmvn_R,
-                            mvstdnormcdf(np.zeros(3), -a2, corr2,
-                                         maxpts=100000, abseps=1e-5), 4)
+        probmvn_R = 0.1666667  # error approx. 6.1e-08
+        probmvt_R = 0.1666667  # error approx. 8.2e-08
+        assert_almost_equal(
+            probmvt_R, mvstdtprob(np.zeros(3), -a2, corr2, df), 4
+        )
+        assert_almost_equal(
+            probmvn_R,
+            mvstdnormcdf(np.zeros(3), -a2, corr2, maxpts=100000, abseps=1e-5),
+            4,
+        )
 
     def test_mvn_mvt_5(self):
         a, bl = self.a, self.b
         df = self.df
         corr2 = self.corr2
 
-        #unequal integration bounds
-        #print "ue"
+        # unequal integration bounds
+        # print "ue"
         a3 = np.array([0.5, -0.5, 0.5])
-        probmvn_R = 0.06910487 #using higher precision in R, error approx. 3.5e-08
-        probmvt_R = 0.05797867 #using higher precision in R, error approx. 5.8e-08
-        assert_almost_equal(mvstdtprob(a3, a3+1, corr2, df), probmvt_R, 4)
-        assert_almost_equal(probmvn_R, mvstdnormcdf(a3, a3+1, corr2,
-                                                maxpts=100000, abseps=1e-5), 4)
+        probmvn_R = (
+            0.06910487  # using higher precision in R, error approx. 3.5e-08
+        )
+        probmvt_R = (
+            0.05797867  # using higher precision in R, error approx. 5.8e-08
+        )
+        assert_almost_equal(mvstdtprob(a3, a3 + 1, corr2, df), probmvt_R, 4)
+        assert_almost_equal(
+            probmvn_R,
+            mvstdnormcdf(a3, a3 + 1, corr2, maxpts=100000, abseps=1e-5),
+            4,
+        )
 
 
 class TestMVDistributions(object):
-    #this is not well organized
+    # this is not well organized
 
     @classmethod
     def setup_class(cls):
         covx = np.array([[1.0, 0.5], [0.5, 1.0]])
-        mu3 = [-1, 0., 2.]
-        cov3 = np.array([[ 1.  ,  0.5 ,  0.75],
-                         [ 0.5 ,  1.5 ,  0.6 ],
-                         [ 0.75,  0.6 ,  2.  ]])
+        mu3 = [-1, 0.0, 2.0]
+        cov3 = np.array([[1.0, 0.5, 0.75], [0.5, 1.5, 0.6], [0.75, 0.6, 2.0]])
         cls.mu3 = mu3
         cls.cov3 = cov3
 
         mvn3 = MVNormal(mu3, cov3)
-        mvn3c = MVNormal(np.array([0,0,0]), cov3)
+        mvn3c = MVNormal(np.array([0, 0, 0]), cov3)
         cls.mvn3 = mvn3
         cls.mvn3c = mvn3c
 
@@ -127,20 +143,18 @@ def test_mvn_pdf(self):
         cov3 = self.cov3
         mvn3 = self.mvn3
 
-        r_val = [
-            -7.667977543898155, -6.917977543898155, -5.167977543898155
-        ]
+        r_val = [-7.667977543898155, -6.917977543898155, -5.167977543898155]
         assert_allclose(mvn3.logpdf(cov3), r_val, rtol=1e-13)
 
         r_val = [
-            0.000467562492721686, 0.000989829804859273, 0.005696077243833402
+            0.000467562492721686,
+            0.000989829804859273,
+            0.005696077243833402,
         ]
         assert_allclose(mvn3.pdf(cov3), r_val, rtol=1e-13)
 
         mvn3b = MVNormal(np.array([0, 0, 0]), cov3)
-        r_val = [
-            0.02914269740502042, 0.02269635555984291, 0.01767593948287269
-        ]
+        r_val = [0.02914269740502042, 0.02269635555984291, 0.01767593948287269]
         assert_allclose(mvn3b.pdf(cov3), r_val, rtol=1e-13)
 
     def test_mvt_pdf(self, reset_randomstate):
@@ -148,33 +162,41 @@ def test_mvt_pdf(self, reset_randomstate):
         mu3 = self.mu3
 
         mvt = MVT((0, 0), 1, 5)
-        assert_almost_equal(mvt.logpdf(np.array([0., 0.])), -1.837877066409345,
-                            decimal=15)
-        assert_almost_equal(mvt.pdf(np.array([0., 0.])), 0.1591549430918953,
-                            decimal=15)
+        assert_almost_equal(
+            mvt.logpdf(np.array([0.0, 0.0])), -1.837877066409345, decimal=15
+        )
+        assert_almost_equal(
+            mvt.pdf(np.array([0.0, 0.0])), 0.1591549430918953, decimal=15
+        )
 
-        mvt.logpdf(np.array([1., 1.])) - (-3.01552989458359)
+        mvt.logpdf(np.array([1.0, 1.0])) - (-3.01552989458359)
 
         mvt1 = MVT((0, 0), 1, 1)
-        mvt1.logpdf(np.array([1., 1.])) - (-3.48579549941151)  # decimal=16
+        mvt1.logpdf(np.array([1.0, 1.0])) - (-3.48579549941151)  # decimal=16
 
         rvs = mvt.rvs(100000)
         assert_almost_equal(np.cov(rvs, rowvar=False), mvt.cov, decimal=1)
 
         mvt31 = MVT(mu3, cov3, 1)
-        assert_almost_equal(mvt31.pdf(cov3),
-                            [0.0007276818698165781, 0.0009980625182293658,
-                             0.0027661422056214652],
-                            decimal=17)
+        assert_almost_equal(
+            mvt31.pdf(cov3),
+            [
+                0.0007276818698165781,
+                0.0009980625182293658,
+                0.0027661422056214652,
+            ],
+            decimal=17,
+        )
 
         mvt = MVT(mu3, cov3, 3)
-        assert_almost_equal(mvt.pdf(cov3),
-                            [0.000863777424247410, 0.001277510788307594,
-                             0.004156314279452241],
-                            decimal=17)
+        assert_almost_equal(
+            mvt.pdf(cov3),
+            [0.000863777424247410, 0.001277510788307594, 0.004156314279452241],
+            decimal=17,
+        )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     import pytest
 
-    pytest.main([__file__, '-vvs', '-x', '--pdb'])
+    pytest.main([__file__, "-vvs", "-x", "--pdb"])
diff --git a/statsmodels/sandbox/distributions/tests/test_norm_expan.py b/statsmodels/sandbox/distributions/tests/test_norm_expan.py
index ad4e7010c3c..5f24804e52c 100644
--- a/statsmodels/sandbox/distributions/tests/test_norm_expan.py
+++ b/statsmodels/sandbox/distributions/tests/test_norm_expan.py
@@ -8,17 +8,15 @@
 Author: Josef Perktold
 """
 
-import pytest
 import numpy as np
-from scipy import stats
-
 from numpy.testing import assert_allclose, assert_array_less
+import pytest
+from scipy import stats
 
 from statsmodels.sandbox.distributions.extras import NormExpan_gen
 
 
 class CheckDistribution(object):
-
     @pytest.mark.smoke
     def test_dist1(self):
         self.dist1.rvs(size=10)
@@ -35,27 +33,27 @@ def test_cdf_ppf_roundtrip(self):
 
 
 class CheckExpandNorm(CheckDistribution):
-
     def test_pdf(self):
-        scale = getattr(self, 'scale', 1)
+        scale = getattr(self, "scale", 1)
         x = np.linspace(-4, 4, 11) * scale
         pdf2 = self.dist2.pdf(x)
         pdf1 = self.dist1.pdf(x)
-        atol_pdf = getattr(self, 'atol_pdf', 0)
-        assert_allclose(((pdf2 - pdf1)**2).mean(), 0, rtol=1e-6, atol=atol_pdf)
+        atol_pdf = getattr(self, "atol_pdf", 0)
+        assert_allclose(
+            ((pdf2 - pdf1) ** 2).mean(), 0, rtol=1e-6, atol=atol_pdf
+        )
         assert_allclose(pdf2, pdf1, rtol=1e-6, atol=atol_pdf)
 
     def test_mvsk(self):
-        #compare defining mvsk with numerical integration, generic stats
+        # compare defining mvsk with numerical integration, generic stats
         mvsk2 = self.dist2.mvsk
-        mvsk1 = self.dist2.stats(moments='mvsk')
+        mvsk1 = self.dist2.stats(moments="mvsk")
         assert_allclose(mvsk2, mvsk1, rtol=1e-6, atol=1e-13)
 
         # check mvsk that was used to generate distribution
         assert_allclose(self.dist2.mvsk, self.mvsk, rtol=1e-12)
 
 
-
 class TestExpandNormMom(CheckExpandNorm):
     # compare with normal, skew=0, excess_kurtosis=0
 
@@ -63,8 +61,8 @@ class TestExpandNormMom(CheckExpandNorm):
     def setup_class(kls):
         kls.scale = 2
         kls.dist1 = stats.norm(1, 2)
-        kls.mvsk = [1., 2**2, 0, 0]
-        kls.dist2 = NormExpan_gen(kls.mvsk, mode='mvsk')
+        kls.mvsk = [1.0, 2**2, 0, 0]
+        kls.dist2 = NormExpan_gen(kls.mvsk, mode="mvsk")
 
 
 class TestExpandNormSample(object):
@@ -76,10 +74,10 @@ def setup_class(kls):
         kls.dist1 = dist1 = stats.norm(1, 2)
         np.random.seed(5999)
         kls.rvs = dist1.rvs(size=200)
-        #rvs = np.concatenate([rvs, -rvs])
+        # rvs = np.concatenate([rvs, -rvs])
         # fix mean and std of sample
-        #rvs = (rvs - rvs.mean())/rvs.std(ddof=1) * np.sqrt(2) + 1
-        kls.dist2 = NormExpan_gen(kls.rvs, mode='sample')
+        # rvs = (rvs - rvs.mean())/rvs.std(ddof=1) * np.sqrt(2) + 1
+        kls.dist2 = NormExpan_gen(kls.rvs, mode="sample")
 
         kls.scale = 2
         kls.atol_pdf = 1e-3
diff --git a/statsmodels/sandbox/distributions/tests/test_transf.py b/statsmodels/sandbox/distributions/tests/test_transf.py
index 60348f3bd98..7b8bdaf29b9 100644
--- a/statsmodels/sandbox/distributions/tests/test_transf.py
+++ b/statsmodels/sandbox/distributions/tests/test_transf.py
@@ -15,165 +15,207 @@
   the best which can be obtained.
 array(2981.0032380193438)
 """
-import warnings # for silencing, see above...
+import warnings  # for silencing, see above...
+
 import numpy as np
 from numpy.testing import assert_almost_equal
-from scipy import stats, special
-from statsmodels.sandbox.distributions.extras import (
-    squarenormalg, absnormalg, negsquarenormalg, squaretg)
+from scipy import special, stats
 
+from statsmodels.sandbox.distributions.extras import (
+    absnormalg,
+    negsquarenormalg,
+    squarenormalg,
+    squaretg,
+)
 
 # some patches to scipy.stats.distributions so tests work and pass
 # this should be necessary only for older scipy
 
-#patch frozen distributions with a name
+# patch frozen distributions with a name
 stats.distributions.rv_frozen.name = property(lambda self: self.dist.name)
 
-#patch f distribution, correct skew and maybe kurtosis
+# patch f distribution, correct skew and maybe kurtosis
 def f_stats(self, dfn, dfd):
     arr, where, inf, sqrt, nan = np.array, np.where, np.inf, np.sqrt, np.nan
-    v2 = arr(dfd*1.0)
-    v1 = arr(dfn*1.0)
+    v2 = arr(dfd * 1.0)
+    v1 = arr(dfn * 1.0)
     mu = where(v2 > 2, v2 / arr(v2 - 2), inf)
-    mu2 = 2*v2*v2*(v2+v1-2)/(v1*(v2-2)**2 * (v2-4))
+    mu2 = 2 * v2 * v2 * (v2 + v1 - 2) / (v1 * (v2 - 2) ** 2 * (v2 - 4))
     mu2 = where(v2 > 4, mu2, inf)
-    #g1 = 2*(v2+2*v1-2)/(v2-6)*sqrt((2*v2-4)/(v1*(v2+v1-2)))
-    g1 = 2*(v2+2*v1-2.)/(v2-6.)*np.sqrt(2*(v2-4.)/(v1*(v2+v1-2.)))
+    # g1 = 2*(v2+2*v1-2)/(v2-6)*sqrt((2*v2-4)/(v1*(v2+v1-2)))
+    g1 = (
+        2
+        * (v2 + 2 * v1 - 2.0)
+        / (v2 - 6.0)
+        * np.sqrt(2 * (v2 - 4.0) / (v1 * (v2 + v1 - 2.0)))
+    )
     g1 = where(v2 > 6, g1, nan)
-    #g2 = 3/(2*v2-16)*(8+g1*g1*(v2-6))
-    g2 = 3/(2.*v2-16)*(8+g1*g1*(v2-6.))
+    # g2 = 3/(2*v2-16)*(8+g1*g1*(v2-6))
+    g2 = 3 / (2.0 * v2 - 16) * (8 + g1 * g1 * (v2 - 6.0))
     g2 = where(v2 > 8, g2, nan)
     return mu, mu2, g1, g2
 
-#stats.distributions.f_gen._stats = f_stats
+
+# stats.distributions.f_gen._stats = f_stats
 stats.f.__class__._stats = f_stats
 
-#correct kurtosis by subtracting 3 (Fisher)
-#after this it matches halfnorm for arg close to zero
+# correct kurtosis by subtracting 3 (Fisher)
+# after this it matches halfnorm for arg close to zero
 def foldnorm_stats(self, c):
     arr, where, inf, sqrt, nan = np.array, np.where, np.inf, np.sqrt, np.nan
     exp = np.exp
     pi = np.pi
 
-    fac = special.erf(c/sqrt(2))
-    mu = sqrt(2.0/pi)*exp(-0.5*c*c)+c*fac
-    mu2 = c*c + 1 - mu*mu
-    c2 = c*c
-    g1 = sqrt(2/pi)*exp(-1.5*c2)*(4-pi*exp(c2)*(2*c2+1.0))
-    g1 += 2*c*fac*(6*exp(-c2) + 3*sqrt(2*pi)*c*exp(-c2/2.0)*fac + \
-                   pi*c*(fac*fac-1))
-    g1 /= pi*mu2**1.5
-
-    g2 = c2*c2+6*c2+3+6*(c2+1)*mu*mu - 3*mu**4
-    g2 -= 4*exp(-c2/2.0)*mu*(sqrt(2.0/pi)*(c2+2)+c*(c2+3)*exp(c2/2.0)*fac)
+    fac = special.erf(c / sqrt(2))
+    mu = sqrt(2.0 / pi) * exp(-0.5 * c * c) + c * fac
+    mu2 = c * c + 1 - mu * mu
+    c2 = c * c
+    g1 = sqrt(2 / pi) * exp(-1.5 * c2) * (4 - pi * exp(c2) * (2 * c2 + 1.0))
+    g1 += (
+        2
+        * c
+        * fac
+        * (
+            6 * exp(-c2)
+            + 3 * sqrt(2 * pi) * c * exp(-c2 / 2.0) * fac
+            + pi * c * (fac * fac - 1)
+        )
+    )
+    g1 /= pi * mu2**1.5
+
+    g2 = c2 * c2 + 6 * c2 + 3 + 6 * (c2 + 1) * mu * mu - 3 * mu**4
+    g2 -= (
+        4
+        * exp(-c2 / 2.0)
+        * mu
+        * (sqrt(2.0 / pi) * (c2 + 2) + c * (c2 + 3) * exp(c2 / 2.0) * fac)
+    )
     g2 /= mu2**2.0
-    g2 -= 3.
+    g2 -= 3.0
     return mu, mu2, g1, g2
 
-#stats.distributions.foldnorm_gen._stats = foldnorm_stats
+
+# stats.distributions.foldnorm_gen._stats = foldnorm_stats
 stats.foldnorm.__class__._stats = foldnorm_stats
 
 
-#-----------------------------
+# -----------------------------
 
 DECIMAL = 5
 
-class Test_Transf2(object):
 
+class Test_Transf2(object):
     @classmethod
     def setup_class(cls):
         cls.dist_equivalents = [
-            #transf, stats.lognorm(1))
-            #The below fails on the SPARC box with scipy 10.1
-            #(lognormalg, stats.lognorm(1)),
-            #transf2
+            # transf, stats.lognorm(1))
+            # The below fails on the SPARC box with scipy 10.1
+            # (lognormalg, stats.lognorm(1)),
+            # transf2
             (squarenormalg, stats.chi2(1)),
             (absnormalg, stats.halfnorm),
-            (absnormalg, stats.foldnorm(1e-5)),  #try frozen
-            #(negsquarenormalg, 1-stats.chi2),  # will not work as distribution
-            (squaretg(10), stats.f(1, 10))
-        ]      #try both frozen
+            (absnormalg, stats.foldnorm(1e-5)),  # try frozen
+            # (negsquarenormalg, 1-stats.chi2),  # will not work as distribution
+            (squaretg(10), stats.f(1, 10)),
+        ]  # try both frozen
 
-        l,s = 0.0, 1.0
-        cls.ppfq = [0.1,0.5,0.9]
-        cls.xx = [0.95,1.0,1.1]
-        cls.nxx = [-0.95,-1.0,-1.1]
+        l, s = 0.0, 1.0
+        cls.ppfq = [0.1, 0.5, 0.9]
+        cls.xx = [0.95, 1.0, 1.1]
+        cls.nxx = [-0.95, -1.0, -1.1]
 
     def test_equivalent(self):
         xx, ppfq = self.xx, self.ppfq
-        for d1,d2 in self.dist_equivalents:
-##            print d1.name
-            assert_almost_equal(d1.cdf(xx), d2.cdf(xx), err_msg='cdf'+d1.name)
-            assert_almost_equal(d1.pdf(xx), d2.pdf(xx),
-                                err_msg='pdf '+d1.name+d2.name)
-            assert_almost_equal(d1.sf(xx), d2.sf(xx),
-                                err_msg='sf '+d1.name+d2.name)
-            assert_almost_equal(d1.ppf(ppfq), d2.ppf(ppfq),
-                                err_msg='ppq '+d1.name+d2.name)
-            assert_almost_equal(d1.isf(ppfq), d2.isf(ppfq),
-                                err_msg='isf '+d1.name+d2.name)
+        for d1, d2 in self.dist_equivalents:
+            ##            print d1.name
+            assert_almost_equal(
+                d1.cdf(xx), d2.cdf(xx), err_msg="cdf" + d1.name
+            )
+            assert_almost_equal(
+                d1.pdf(xx), d2.pdf(xx), err_msg="pdf " + d1.name + d2.name
+            )
+            assert_almost_equal(
+                d1.sf(xx), d2.sf(xx), err_msg="sf " + d1.name + d2.name
+            )
+            assert_almost_equal(
+                d1.ppf(ppfq), d2.ppf(ppfq), err_msg="ppq " + d1.name + d2.name
+            )
+            assert_almost_equal(
+                d1.isf(ppfq), d2.isf(ppfq), err_msg="isf " + d1.name + d2.name
+            )
             self.d1 = d1
             self.d2 = d2
-##            print d1, d2
-##            print d1.moment(3)
-##            print d2.moment(3)
-            #work around bug#1293
-            if hasattr(d2, 'dist'):
+            ##            print d1, d2
+            ##            print d1.moment(3)
+            ##            print d2.moment(3)
+            # work around bug#1293
+            if hasattr(d2, "dist"):
                 d2mom = d2.dist.moment(3, *d2.args)
             else:
                 d2mom = d2.moment(3)
-            assert_almost_equal(d1.moment(3), d2mom,
-                                DECIMAL,
-                                err_msg='moment '+d1.name+d2.name)
+            assert_almost_equal(
+                d1.moment(3),
+                d2mom,
+                DECIMAL,
+                err_msg="moment " + d1.name + d2.name,
+            )
             # silence warnings in scipy, works for versions
             # after print changed to warning in scipy
             orig_filter = warnings.filters[:]
-            warnings.simplefilter('ignore')
+            warnings.simplefilter("ignore")
             try:
-                s1 = d1.stats(moments='mvsk')
-                s2 = d2.stats(moments='mvsk')
+                s1 = d1.stats(moments="mvsk")
+                s2 = d2.stats(moments="mvsk")
             finally:
                 warnings.filters = orig_filter
-            #stats(moments='k') prints warning for lognormalg
-            assert_almost_equal(s1[:2], s2[:2],
-                                err_msg='stats '+d1.name+d2.name)
-            assert_almost_equal(s1[2:], s2[2:],
-                                decimal=2, #lognorm for kurtosis
-                                err_msg='stats '+d1.name+d2.name)
-
-
+            # stats(moments='k') prints warning for lognormalg
+            assert_almost_equal(
+                s1[:2], s2[:2], err_msg="stats " + d1.name + d2.name
+            )
+            assert_almost_equal(
+                s1[2:],
+                s2[2:],
+                decimal=2,  # lognorm for kurtosis
+                err_msg="stats " + d1.name + d2.name,
+            )
 
     def test_equivalent_negsq(self):
-        #special case negsquarenormalg
-        #negsquarenormalg.cdf(x) == stats.chi2(1).cdf(-x), for x<=0
+        # special case negsquarenormalg
+        # negsquarenormalg.cdf(x) == stats.chi2(1).cdf(-x), for x<=0
 
         xx, nxx, ppfq = self.xx, self.nxx, self.ppfq
-        d1,d2 = (negsquarenormalg, stats.chi2(1))
-        #print d1.name
-        assert_almost_equal(d1.cdf(nxx), 1-d2.cdf(xx), err_msg='cdf'+d1.name)
+        d1, d2 = (negsquarenormalg, stats.chi2(1))
+        # print d1.name
+        assert_almost_equal(
+            d1.cdf(nxx), 1 - d2.cdf(xx), err_msg="cdf" + d1.name
+        )
         assert_almost_equal(d1.pdf(nxx), d2.pdf(xx))
-        assert_almost_equal(d1.sf(nxx), 1-d2.sf(xx))
+        assert_almost_equal(d1.sf(nxx), 1 - d2.sf(xx))
         assert_almost_equal(d1.ppf(ppfq), -d2.ppf(ppfq)[::-1])
         assert_almost_equal(d1.isf(ppfq), -d2.isf(ppfq)[::-1])
         assert_almost_equal(d1.moment(3), -d2.moment(3))
-        ch2oddneg = [v*(-1)**(i+1) for i,v in
-                     enumerate(d2.stats(moments='mvsk'))]
-        assert_almost_equal(d1.stats(moments='mvsk'), ch2oddneg,
-                            err_msg='stats '+d1.name+d2.name)
+        ch2oddneg = [
+            v * (-1) ** (i + 1) for i, v in enumerate(d2.stats(moments="mvsk"))
+        ]
+        assert_almost_equal(
+            d1.stats(moments="mvsk"),
+            ch2oddneg,
+            err_msg="stats " + d1.name + d2.name,
+        )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     tt = Test_Transf2()
     tt.test_equivalent()
     tt.test_equivalent_negsq()
 
     debug = 0
     if debug:
-        print(negsquarenormalg.ppf([0.1,0.5,0.9]))
-        print(stats.chi2.ppf([0.1,0.5,0.9],1))
+        print(negsquarenormalg.ppf([0.1, 0.5, 0.9]))
+        print(stats.chi2.ppf([0.1, 0.5, 0.9], 1))
         print(negsquarenormalg.a)
         print(negsquarenormalg.b)
 
-        print(absnormalg.stats( moments='mvsk'))
-        print(stats.foldnorm(1e-10).stats( moments='mvsk'))
-        print(stats.halfnorm.stats( moments='mvsk'))
+        print(absnormalg.stats(moments="mvsk"))
+        print(stats.foldnorm(1e-10).stats(moments="mvsk"))
+        print(stats.halfnorm.stats(moments="mvsk"))
diff --git a/statsmodels/sandbox/distributions/transform_functions.py b/statsmodels/sandbox/distributions/transform_functions.py
index f8bad5e83f6..4b9dab86ce0 100644
--- a/statsmodels/sandbox/distributions/transform_functions.py
+++ b/statsmodels/sandbox/distributions/transform_functions.py
@@ -11,24 +11,22 @@
 
 
 class TransformFunction(object):
-
     def __call__(self, x):
         self.func(x)
 
 
-
 ## Hump and U-shaped functions
 
 
 class SquareFunc(TransformFunction):
-    '''class to hold quadratic function with inverse function and derivative
+    """class to hold quadratic function with inverse function and derivative
 
     using instance methods instead of class methods, if we want extension
     to parametrized function
-    '''
+    """
 
     def func(self, x):
-        return np.power(x, 2.)
+        return np.power(x, 2.0)
 
     def inverseplus(self, x):
         return np.sqrt(x)
@@ -37,20 +35,17 @@ def inverseminus(self, x):
         return 0.0 - np.sqrt(x)
 
     def derivplus(self, x):
-        return 0.5/np.sqrt(x)
+        return 0.5 / np.sqrt(x)
 
     def derivminus(self, x):
-        return 0.0 - 0.5/np.sqrt(x)
-
-
+        return 0.0 - 0.5 / np.sqrt(x)
 
 
 class NegSquareFunc(TransformFunction):
-    '''negative quadratic function
+    """negative quadratic function"""
 
-    '''
     def func(self, x):
-        return -np.power(x,2)
+        return -np.power(x, 2)
 
     def inverseplus(self, x):
         return np.sqrt(-x)
@@ -59,15 +54,14 @@ def inverseminus(self, x):
         return 0.0 - np.sqrt(-x)
 
     def derivplus(self, x):
-        return 0.0 - 0.5/np.sqrt(-x)
+        return 0.0 - 0.5 / np.sqrt(-x)
 
     def derivminus(self, x):
-        return 0.5/np.sqrt(-x)
+        return 0.5 / np.sqrt(-x)
 
 
 class AbsFunc(TransformFunction):
-    '''class for absolute value transformation
-    '''
+    """class for absolute value transformation"""
 
     def func(self, x):
         return np.abs(x)
@@ -90,7 +84,6 @@ def derivminus(self, x):
 
 
 class LogFunc(TransformFunction):
-
     def func(self, x):
         return np.log(x)
 
@@ -98,11 +91,10 @@ def inverse(self, y):
         return np.exp(y)
 
     def deriv(self, x):
-        return 1./x
-
-class ExpFunc(TransformFunction):
+        return 1.0 / x
 
 
+class ExpFunc(TransformFunction):
     def func(self, x):
         return np.exp(x)
 
@@ -114,22 +106,20 @@ def deriv(self, x):
 
 
 class BoxCoxNonzeroFunc(TransformFunction):
-
     def __init__(self, lamda):
         self.lamda = lamda
 
     def func(self, x):
-        return (np.power(x, self.lamda) - 1)/self.lamda
+        return (np.power(x, self.lamda) - 1) / self.lamda
 
     def inverse(self, y):
-        return (self.lamda * y + 1)/self.lamda
+        return (self.lamda * y + 1) / self.lamda
 
     def deriv(self, x):
         return np.power(x, self.lamda - 1)
 
 
 class AffineFunc(TransformFunction):
-
     def __init__(self, constant, slope):
         self.constant = constant
         self.slope = slope
@@ -145,7 +135,6 @@ def deriv(self, x):
 
 
 class ChainFunc(TransformFunction):
-
     def __init__(self, finn, fout):
         self.finn = finn
         self.fout = fout
@@ -161,28 +150,28 @@ def deriv(self, x):
         return self.fout.deriv(z) * self.finn.deriv(x)
 
 
-#def inverse(x):
+# def inverse(x):
 #    return np.divide(1.0,x)
 #
-#mux, stdx = 0.05, 0.1
-#mux, stdx = 9.0, 1.0
-#def inversew(x):
+# mux, stdx = 0.05, 0.1
+# mux, stdx = 9.0, 1.0
+# def inversew(x):
 #    return 1.0/(1+mux+x*stdx)
-#def inversew_inv(x):
+# def inversew_inv(x):
 #    return (1.0/x - 1.0 - mux)/stdx #.np.divide(1.0,x)-10
 #
-#def identit(x):
+# def identit(x):
 #    return x
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     absf = AbsFunc()
     absf.func(5) == 5
     absf.func(-5) == 5
     absf.inverseplus(5) == 5
     absf.inverseminus(5) == -5
 
-    chainf = ChainFunc(AffineFunc(1,2), BoxCoxNonzeroFunc(2))
-    print(chainf.func(3.))
-    chainf2 = ChainFunc(BoxCoxNonzeroFunc(2), AffineFunc(1,2))
-    print(chainf.func(3.))
+    chainf = ChainFunc(AffineFunc(1, 2), BoxCoxNonzeroFunc(2))
+    print(chainf.func(3.0))
+    chainf2 = ChainFunc(BoxCoxNonzeroFunc(2), AffineFunc(1, 2))
+    print(chainf.func(3.0))
diff --git a/statsmodels/sandbox/distributions/transformed.py b/statsmodels/sandbox/distributions/transformed.py
index 6a5ae2f16be..e192f3855a2 100644
--- a/statsmodels/sandbox/distributions/transformed.py
+++ b/statsmodels/sandbox/distributions/transformed.py
@@ -1,8 +1,6 @@
-
-
 ## copied from nonlinear_transform_gen.py
 
-''' A class for the distribution of a non-linear monotonic transformation of a continuous random variable
+""" A class for the distribution of a non-linear monotonic transformation of a continuous random variable
 
 simplest usage:
 example: create log-gamma distribution, i.e. y = log(x),
@@ -37,160 +35,197 @@
 Author: josef-pktd
 License: BSD
 
-'''
+"""
+import numpy as np
 from scipy import stats
 from scipy.stats import distributions
-import numpy as np
 
 
 def get_u_argskwargs(**kwargs):
-    #Todo: What's this? wrong spacing, used in Transf_gen TransfTwo_gen
-    u_kwargs = dict((k.replace('u_','',1),v) for k,v in kwargs.items()
-                    if k.startswith('u_'))
-    u_args = u_kwargs.pop('u_args',None)
+    # Todo: What's this? wrong spacing, used in Transf_gen TransfTwo_gen
+    u_kwargs = dict(
+        (k.replace("u_", "", 1), v)
+        for k, v in kwargs.items()
+        if k.startswith("u_")
+    )
+    u_args = u_kwargs.pop("u_args", None)
     return u_args, u_kwargs
 
+
 class Transf_gen(distributions.rv_continuous):
-    '''a class for non-linear monotonic transformation of a continuous random variable
+    """a class for non-linear monotonic transformation of a continuous random variable"""
 
-    '''
     def __init__(self, kls, func, funcinv, *args, **kwargs):
-        #print(args
-        #print(kwargs
+        # print(args
+        # print(kwargs
 
         self.func = func
         self.funcinv = funcinv
-        #explicit for self.__dict__.update(kwargs)
-        #need to set numargs because inspection does not work
-        self.numargs = kwargs.pop('numargs', 0)
-        #print(self.numargs
-        name = kwargs.pop('name','transfdist')
-        longname = kwargs.pop('longname','Non-linear transformed distribution')
-        extradoc = kwargs.pop('extradoc',None)
-        a = kwargs.pop('a', -np.inf)
-        b = kwargs.pop('b', np.inf)
-        self.decr = kwargs.pop('decr', False)
-            #defines whether it is a decreasing (True)
-            #       or increasing (False) monotonic transformation
-
+        # explicit for self.__dict__.update(kwargs)
+        # need to set numargs because inspection does not work
+        self.numargs = kwargs.pop("numargs", 0)
+        # print(self.numargs
+        name = kwargs.pop("name", "transfdist")
+        longname = kwargs.pop(
+            "longname", "Non-linear transformed distribution"
+        )
+        # extradoc = kwargs.pop('extradoc',None)
+        a = kwargs.pop("a", -np.inf)
+        b = kwargs.pop("b", np.inf)
+        self.decr = kwargs.pop("decr", False)
+        # defines whether it is a decreasing (True)
+        #       or increasing (False) monotonic transformation
 
         self.u_args, self.u_kwargs = get_u_argskwargs(**kwargs)
-        self.kls = kls  #(self.u_args, self.u_kwargs)
-                        # possible to freeze the underlying distribution
-
-        super(Transf_gen,self).__init__(a=a, b=b, name = name,
-                                        shapes=kls.shapes,
-                                        longname = longname,
-                                        extradoc = extradoc)
-
-    def _cdf(self,x,*args, **kwargs):
-        #print(args
+        self.kls = kls  # (self.u_args, self.u_kwargs)
+        # possible to freeze the underlying distribution
+
+        super(Transf_gen, self).__init__(
+            a=a,
+            b=b,
+            name=name,
+            shapes=kls.shapes,
+            longname=longname,
+            # extradoc = extradoc
+        )
+
+    def _cdf(self, x, *args, **kwargs):
+        # print(args
         if not self.decr:
-            return self.kls._cdf(self.funcinv(x),*args, **kwargs)
-            #note scipy _cdf only take *args not *kwargs
+            return self.kls._cdf(self.funcinv(x), *args, **kwargs)
+            # note scipy _cdf only take *args not *kwargs
         else:
-            return 1.0 - self.kls._cdf(self.funcinv(x),*args, **kwargs)
+            return 1.0 - self.kls._cdf(self.funcinv(x), *args, **kwargs)
+
     def _ppf(self, q, *args, **kwargs):
         if not self.decr:
-            return self.func(self.kls._ppf(q,*args, **kwargs))
+            return self.func(self.kls._ppf(q, *args, **kwargs))
         else:
-            return self.func(self.kls._ppf(1-q,*args, **kwargs))
+            return self.func(self.kls._ppf(1 - q, *args, **kwargs))
 
 
 def inverse(x):
-    return np.divide(1.0,x)
+    return np.divide(1.0, x)
+
 
 mux, stdx = 0.05, 0.1
 mux, stdx = 9.0, 1.0
+
+
 def inversew(x):
-    return 1.0/(1+mux+x*stdx)
+    return 1.0 / (1 + mux + x * stdx)
+
+
 def inversew_inv(x):
-    return (1.0/x - 1.0 - mux)/stdx #.np.divide(1.0,x)-10
+    return (1.0 / x - 1.0 - mux) / stdx  # .np.divide(1.0,x)-10
+
 
 def identit(x):
     return x
 
-invdnormalg = Transf_gen(stats.norm, inversew, inversew_inv, decr=True, #a=-np.inf,
-                numargs = 0, name = 'discf', longname = 'normal-based discount factor',
-                extradoc = '\ndistribution of discount factor y=1/(1+x)) with x N(0.05,0.1**2)')
 
-lognormalg = Transf_gen(stats.norm, np.exp, np.log,
-                numargs = 2, a=0, name = 'lnnorm',
-                longname = 'Exp transformed normal',
-                extradoc = '\ndistribution of y = exp(x), with x standard normal'
-                'precision for moment andstats is not very high, 2-3 decimals')
+invdnormalg = Transf_gen(
+    stats.norm,
+    inversew,
+    inversew_inv,
+    decr=True,  # a=-np.inf,
+    numargs=0,
+    name="discf",
+    longname="normal-based discount factor",
+    # extradoc = '\ndistribution of discount factor y=1/(1+x)) with x N(0.05,0.1**2)'
+)
+
+lognormalg = Transf_gen(
+    stats.norm,
+    np.exp,
+    np.log,
+    numargs=2,
+    a=0,
+    name="lnnorm",
+    longname="Exp transformed normal",
+    # extradoc = '\ndistribution of y = exp(x), with x standard normal'
+    # 'precision for moment andstats is not very high, 2-3 decimals'
+)
 
 
 loggammaexpg = Transf_gen(stats.gamma, np.log, np.exp, numargs=1)
 
 ## copied form nonlinear_transform_short.py
 
-'''univariate distribution of a non-linear monotonic transformation of a
+"""univariate distribution of a non-linear monotonic transformation of a
 random variable
 
-'''
+"""
+
 
 class ExpTransf_gen(distributions.rv_continuous):
-    '''Distribution based on log/exp transformation
+    """Distribution based on log/exp transformation
 
     the constructor can be called with a distribution class
     and generates the distribution of the transformed random variable
 
-    '''
+    """
+
     def __init__(self, kls, *args, **kwargs):
-        #print(args
-        #print(kwargs
-        #explicit for self.__dict__.update(kwargs)
-        if 'numargs' in kwargs:
-            self.numargs = kwargs['numargs']
+        # print(args
+        # print(kwargs
+        # explicit for self.__dict__.update(kwargs)
+        if "numargs" in kwargs:
+            self.numargs = kwargs["numargs"]
         else:
             self.numargs = 1
-        if 'name' in kwargs:
-            name = kwargs['name']
+        if "name" in kwargs:
+            name = kwargs["name"]
         else:
-            name = 'Log transformed distribution'
-        if 'a' in kwargs:
-            a = kwargs['a']
+            name = "Log transformed distribution"
+        if "a" in kwargs:
+            a = kwargs["a"]
         else:
             a = 0
-        super(ExpTransf_gen,self).__init__(a=a, name=name)
+        super(ExpTransf_gen, self).__init__(a=a, name=name)
         self.kls = kls
-    def _cdf(self,x,*args):
-        #print(args
-        return self.kls._cdf(np.log(x),*args)
+
+    def _cdf(self, x, *args):
+        # print(args
+        return self.kls._cdf(np.log(x), *args)
+
     def _ppf(self, q, *args):
-        return np.exp(self.kls._ppf(q,*args))
+        return np.exp(self.kls._ppf(q, *args))
+
 
 class LogTransf_gen(distributions.rv_continuous):
-    '''Distribution based on log/exp transformation
+    """Distribution based on log/exp transformation
 
     the constructor can be called with a distribution class
     and generates the distribution of the transformed random variable
 
-    '''
+    """
+
     def __init__(self, kls, *args, **kwargs):
-        #explicit for self.__dict__.update(kwargs)
-        if 'numargs' in kwargs:
-            self.numargs = kwargs['numargs']
+        # explicit for self.__dict__.update(kwargs)
+        if "numargs" in kwargs:
+            self.numargs = kwargs["numargs"]
         else:
             self.numargs = 1
-        if 'name' in kwargs:
-            name = kwargs['name']
+        if "name" in kwargs:
+            name = kwargs["name"]
         else:
-            name = 'Log transformed distribution'
-        if 'a' in kwargs:
-            a = kwargs['a']
+            name = "Log transformed distribution"
+        if "a" in kwargs:
+            a = kwargs["a"]
         else:
             a = 0
 
-        super(LogTransf_gen,self).__init__(a=a, name = name)
+        super(LogTransf_gen, self).__init__(a=a, name=name)
         self.kls = kls
 
-    def _cdf(self,x, *args):
-        #print(args
-        return self.kls._cdf(np.exp(x),*args)
+    def _cdf(self, x, *args):
+        # print(args
+        return self.kls._cdf(np.exp(x), *args)
+
     def _ppf(self, q, *args):
-        return np.log(self.kls._ppf(q,*args))
+        return np.log(self.kls._ppf(q, *args))
+
 
 def examples_transf():
     ##lognormal = ExpTransf(a=0.0, xa=-10.0, name = 'Log transformed normal')
@@ -200,10 +235,12 @@ def examples_transf():
     ##print(stats.lognorm.stats(1)
     ##print(lognormal.rvs(size=10)
 
-    print('Results for lognormal')
-    lognormalg = ExpTransf_gen(stats.norm, a=0, name = 'Log transformed normal general')
+    print("Results for lognormal")
+    lognormalg = ExpTransf_gen(
+        stats.norm, a=0, name="Log transformed normal general"
+    )
     print(lognormalg.cdf(1))
-    print(stats.lognorm.cdf(1,1))
+    print(stats.lognorm.cdf(1, 1))
     print(lognormalg.stats())
     print(stats.lognorm.stats(1))
     print(lognormalg.rvs(size=5))
@@ -213,36 +250,33 @@ def examples_transf():
     ##print(loggammag._cdf(1,10)
     ##print(stats.loggamma.cdf(1,10)
 
-    print('Results for expgamma')
+    print("Results for expgamma")
     loggammaexpg = LogTransf_gen(stats.gamma)
-    print(loggammaexpg._cdf(1,10))
-    print(stats.loggamma.cdf(1,10))
-    print(loggammaexpg._cdf(2,15))
-    print(stats.loggamma.cdf(2,15))
-
+    print(loggammaexpg._cdf(1, 10))
+    print(stats.loggamma.cdf(1, 10))
+    print(loggammaexpg._cdf(2, 15))
+    print(stats.loggamma.cdf(2, 15))
 
     # this requires change in scipy.stats.distribution
-    #print(loggammaexpg.cdf(1,10)
+    # print(loggammaexpg.cdf(1,10)
 
-    print('Results for loglaplace')
+    print("Results for loglaplace")
     loglaplaceg = LogTransf_gen(stats.laplace)
-    print(loglaplaceg._cdf(2,10))
-    print(stats.loglaplace.cdf(2,10))
+    print(loglaplaceg._cdf(2, 10))
+    print(stats.loglaplace.cdf(2, 10))
     loglaplaceexpg = ExpTransf_gen(stats.laplace)
-    print(loglaplaceexpg._cdf(2,10))
-
-
+    print(loglaplaceexpg._cdf(2, 10))
 
 
 ## copied from transformtwo.py
 
-'''
+"""
 Created on Apr 28, 2009
 
 @author: Josef Perktold
-'''
+"""
 
-''' A class for the distribution of a non-linear u-shaped or hump shaped transformation of a
+""" A class for the distribution of a non-linear u-shaped or hump shaped transformation of a
 continuous random variable
 
 This is a companion to the distributions of non-linear monotonic transformation to the case
@@ -270,11 +304,11 @@ def examples_transf():
 
   * add _rvs as method, will be faster in many cases
 
-'''
+"""
 
 
 class TransfTwo_gen(distributions.rv_continuous):
-    '''Distribution based on a non-monotonic (u- or hump-shaped transformation)
+    """Distribution based on a non-monotonic (u- or hump-shaped transformation)
 
     the constructor can be called with a distribution class, and functions
     that define the non-linear transformation.
@@ -287,79 +321,103 @@ class TransfTwo_gen(distributions.rv_continuous):
     This can be used to generate distribution instances similar to the
     distributions in scipy.stats.
 
-    '''
-    #a class for non-linear non-monotonic transformation of a continuous random variable
-    def __init__(self, kls, func, funcinvplus, funcinvminus, derivplus,
-                 derivminus, *args, **kwargs):
-        #print(args
-        #print(kwargs
+    """
+
+    # a class for non-linear non-monotonic transformation of a continuous random variable
+    def __init__(
+        self,
+        kls,
+        func,
+        funcinvplus,
+        funcinvminus,
+        derivplus,
+        derivminus,
+        *args,
+        **kwargs
+    ):
+        # print(args
+        # print(kwargs
 
         self.func = func
         self.funcinvplus = funcinvplus
         self.funcinvminus = funcinvminus
         self.derivplus = derivplus
         self.derivminus = derivminus
-        #explicit for self.__dict__.update(kwargs)
-        #need to set numargs because inspection does not work
-        self.numargs = kwargs.pop('numargs', 0)
-        #print(self.numargs
-        name = kwargs.pop('name','transfdist')
-        longname = kwargs.pop('longname','Non-linear transformed distribution')
-        extradoc = kwargs.pop('extradoc',None)
-        a = kwargs.pop('a', -np.inf) # attached to self in super
-        b = kwargs.pop('b', np.inf)  # self.a, self.b would be overwritten
-        self.shape = kwargs.pop('shape', False)
-            #defines whether it is a `u` shaped or `hump' shaped
-            #       transformation
-
+        # explicit for self.__dict__.update(kwargs)
+        # need to set numargs because inspection does not work
+        self.numargs = kwargs.pop("numargs", 0)
+        # print(self.numargs
+        name = kwargs.pop("name", "transfdist")
+        longname = kwargs.pop(
+            "longname", "Non-linear transformed distribution"
+        )
+        # extradoc = kwargs.pop('extradoc',None)
+        a = kwargs.pop("a", -np.inf)  # attached to self in super
+        b = kwargs.pop("b", np.inf)  # self.a, self.b would be overwritten
+        self.shape = kwargs.pop("shape", False)
+        # defines whether it is a `u` shaped or `hump' shaped
+        #       transformation
 
         self.u_args, self.u_kwargs = get_u_argskwargs(**kwargs)
-        self.kls = kls  #(self.u_args, self.u_kwargs)
-                        # possible to freeze the underlying distribution
-
-        super(TransfTwo_gen,self).__init__(a=a, b=b,
-                                           name = name,
-                                           shapes=kls.shapes,
-                                           longname = longname,
-                                           extradoc = extradoc)
+        self.kls = kls  # (self.u_args, self.u_kwargs)
+        # possible to freeze the underlying distribution
+
+        super(TransfTwo_gen, self).__init__(
+            a=a,
+            b=b,
+            name=name,
+            shapes=kls.shapes,
+            longname=longname,
+            # extradoc = extradoc
+        )
 
     def _rvs(self, *args):
-        self.kls._size = self._size   #size attached to self, not function argument
+        self.kls._size = (
+            self._size
+        )  # size attached to self, not function argument
         return self.func(self.kls._rvs(*args))
 
-    def _pdf(self,x,*args, **kwargs):
-        #print(args
-        if self.shape == 'u':
+    def _pdf(self, x, *args, **kwargs):
+        # print(args
+        if self.shape == "u":
             signpdf = 1
-        elif self.shape == 'hump':
+        elif self.shape == "hump":
             signpdf = -1
         else:
-            raise ValueError('shape can only be `u` or `hump`')
-
-        return signpdf * (self.derivplus(x)*self.kls._pdf(self.funcinvplus(x),*args, **kwargs) -
-                   self.derivminus(x)*self.kls._pdf(self.funcinvminus(x),*args, **kwargs))
-            #note scipy _cdf only take *args not *kwargs
-
-    def _cdf(self,x,*args, **kwargs):
-        #print(args
-        if self.shape == 'u':
-            return self.kls._cdf(self.funcinvplus(x),*args, **kwargs) - \
-                   self.kls._cdf(self.funcinvminus(x),*args, **kwargs)
-            #note scipy _cdf only take *args not *kwargs
+            raise ValueError("shape can only be `u` or `hump`")
+
+        return signpdf * (
+            self.derivplus(x)
+            * self.kls._pdf(self.funcinvplus(x), *args, **kwargs)
+            - self.derivminus(x)
+            * self.kls._pdf(self.funcinvminus(x), *args, **kwargs)
+        )
+        # note scipy _cdf only take *args not *kwargs
+
+    def _cdf(self, x, *args, **kwargs):
+        # print(args
+        if self.shape == "u":
+            return self.kls._cdf(
+                self.funcinvplus(x), *args, **kwargs
+            ) - self.kls._cdf(self.funcinvminus(x), *args, **kwargs)
+            # note scipy _cdf only take *args not *kwargs
         else:
-            return 1.0 - self._sf(x,*args, **kwargs)
-
-    def _sf(self,x,*args, **kwargs):
-        #print(args
-        if self.shape == 'hump':
-            return self.kls._cdf(self.funcinvplus(x),*args, **kwargs) - \
-                   self.kls._cdf(self.funcinvminus(x),*args, **kwargs)
-            #note scipy _cdf only take *args not *kwargs
+            return 1.0 - self._sf(x, *args, **kwargs)
+
+    def _sf(self, x, *args, **kwargs):
+        # print(args
+        if self.shape == "hump":
+            return self.kls._cdf(
+                self.funcinvplus(x), *args, **kwargs
+            ) - self.kls._cdf(self.funcinvminus(x), *args, **kwargs)
+            # note scipy _cdf only take *args not *kwargs
         else:
             return 1.0 - self._cdf(x, *args, **kwargs)
 
-    def _munp(self, n,*args, **kwargs):
-        return self._mom0_sc(n,*args)
+    def _munp(self, n, *args, **kwargs):
+        return self._mom0_sc(n, *args)
+
+
 # ppf might not be possible in general case?
 # should be possible in symmetric case
 #    def _ppf(self, q, *args, **kwargs):
@@ -368,14 +426,16 @@ def _munp(self, n,*args, **kwargs):
 #        elif self.shape == 'hump':
 #            return self.func(self.kls._ppf(1-q,*args, **kwargs))
 
-#TODO: rename these functions to have unique names
+# TODO: rename these functions to have unique names
+
 
 class SquareFunc(object):
-    '''class to hold quadratic function with inverse function and derivative
+    """class to hold quadratic function with inverse function and derivative
 
     using instance methods instead of class methods, if we want extension
     to parametrized function
-    '''
+    """
+
     def inverseplus(self, x):
         return np.sqrt(x)
 
@@ -383,71 +443,124 @@ def inverseminus(self, x):
         return 0.0 - np.sqrt(x)
 
     def derivplus(self, x):
-        return 0.5/np.sqrt(x)
+        return 0.5 / np.sqrt(x)
 
     def derivminus(self, x):
-        return 0.0 - 0.5/np.sqrt(x)
+        return 0.0 - 0.5 / np.sqrt(x)
 
     def squarefunc(self, x):
-        return np.power(x,2)
+        return np.power(x, 2)
+
 
 sqfunc = SquareFunc()
 
-squarenormalg = TransfTwo_gen(stats.norm, sqfunc.squarefunc, sqfunc.inverseplus,
-                sqfunc.inverseminus, sqfunc.derivplus, sqfunc.derivminus,
-                shape='u', a=0.0, b=np.inf,
-                numargs = 0, name = 'squarenorm', longname = 'squared normal distribution',
-                extradoc = '\ndistribution of the square of a normal random variable' +\
-                           ' y=x**2 with x N(0.0,1)')
-                        #u_loc=l, u_scale=s)
-squaretg = TransfTwo_gen(stats.t, sqfunc.squarefunc, sqfunc.inverseplus,
-                sqfunc.inverseminus, sqfunc.derivplus, sqfunc.derivminus,
-                shape='u', a=0.0, b=np.inf,
-                numargs = 1, name = 'squarenorm', longname = 'squared t distribution',
-                extradoc = '\ndistribution of the square of a t random variable' +\
-                           ' y=x**2 with x t(dof,0.0,1)')
+squarenormalg = TransfTwo_gen(
+    stats.norm,
+    sqfunc.squarefunc,
+    sqfunc.inverseplus,
+    sqfunc.inverseminus,
+    sqfunc.derivplus,
+    sqfunc.derivminus,
+    shape="u",
+    a=0.0,
+    b=np.inf,
+    numargs=0,
+    name="squarenorm",
+    longname="squared normal distribution",
+    # extradoc = '\ndistribution of the square of a normal random variable' +\
+    # ' y=x**2 with x N(0.0,1)'
+)
+# u_loc=l, u_scale=s)
+squaretg = TransfTwo_gen(
+    stats.t,
+    sqfunc.squarefunc,
+    sqfunc.inverseplus,
+    sqfunc.inverseminus,
+    sqfunc.derivplus,
+    sqfunc.derivminus,
+    shape="u",
+    a=0.0,
+    b=np.inf,
+    numargs=1,
+    name="squarenorm",
+    longname="squared t distribution",
+    # extradoc = '\ndistribution of the square of a t random variable' +\
+    # ' y=x**2 with x t(dof,0.0,1)'
+)
+
 
 def inverseplus(x):
     return np.sqrt(-x)
 
+
 def inverseminus(x):
     return 0.0 - np.sqrt(-x)
 
+
 def derivplus(x):
-    return 0.0 - 0.5/np.sqrt(-x)
+    return 0.0 - 0.5 / np.sqrt(-x)
+
 
 def derivminus(x):
-    return 0.5/np.sqrt(-x)
+    return 0.5 / np.sqrt(-x)
 
-def negsquarefunc(x):
-    return -np.power(x,2)
 
+def negsquarefunc(x):
+    return -np.power(x, 2)
+
+
+negsquarenormalg = TransfTwo_gen(
+    stats.norm,
+    negsquarefunc,
+    inverseplus,
+    inverseminus,
+    derivplus,
+    derivminus,
+    shape="hump",
+    a=-np.inf,
+    b=0.0,
+    numargs=0,
+    name="negsquarenorm",
+    longname="negative squared normal distribution",
+    # extradoc = '\ndistribution of the negative square of a normal random variable' +\
+    # ' y=-x**2 with x N(0.0,1)'
+)
+# u_loc=l, u_scale=s)
 
-negsquarenormalg = TransfTwo_gen(stats.norm, negsquarefunc, inverseplus, inverseminus,
-                derivplus, derivminus, shape='hump', a=-np.inf, b=0.0,
-                numargs = 0, name = 'negsquarenorm', longname = 'negative squared normal distribution',
-                extradoc = '\ndistribution of the negative square of a normal random variable' +\
-                           ' y=-x**2 with x N(0.0,1)')
-                        #u_loc=l, u_scale=s)
 
 def inverseplus(x):
     return x
 
+
 def inverseminus(x):
     return 0.0 - x
 
+
 def derivplus(x):
     return 1.0
 
+
 def derivminus(x):
     return 0.0 - 1.0
 
+
 def absfunc(x):
     return np.abs(x)
 
 
-absnormalg = TransfTwo_gen(stats.norm, np.abs, inverseplus, inverseminus,
-                derivplus, derivminus, shape='u', a=0.0, b=np.inf,
-                numargs = 0, name = 'absnorm', longname = 'absolute of normal distribution',
-                extradoc = '\ndistribution of the absolute value of a normal random variable' +\
-                           ' y=abs(x) with x N(0,1)')
+absnormalg = TransfTwo_gen(
+    stats.norm,
+    np.abs,
+    inverseplus,
+    inverseminus,
+    derivplus,
+    derivminus,
+    shape="u",
+    a=0.0,
+    b=np.inf,
+    numargs=0,
+    name="absnorm",
+    longname="absolute of normal distribution",
+    # extradoc = '\ndistribution of the absolute value of a normal random variable' +\
+    # ' y=abs(x) with x N(0,1)'
+)
diff --git a/statsmodels/sandbox/distributions/try_max.py b/statsmodels/sandbox/distributions/try_max.py
index 5457325ff7a..c287927adc4 100644
--- a/statsmodels/sandbox/distributions/try_max.py
+++ b/statsmodels/sandbox/distributions/try_max.py
@@ -1,40 +1,51 @@
-'''
+"""
 
 adjusted from Denis on pystatsmodels mailing list
 
 there might still be problems with loc and scale,
 
-'''
+"""
 
 
 from scipy import stats
+
 __date__ = "2010-12-29 dec"
 
+
 class MaxDist(stats.rv_continuous):
-    """ max of n of scipy.stats normal expon ...
-        Example:
-            maxnormal10 = RVmax( scipy.stats.norm, 10 )
-            sample = maxnormal10( size=1000 )
-            sample.cdf = cdf ^ n,  ppf ^ (1/n)
+    """max of n of scipy.stats normal expon ...
+    Example:
+        maxnormal10 = RVmax( scipy.stats.norm, 10 )
+        sample = maxnormal10( size=1000 )
+        sample.cdf = cdf ^ n,  ppf ^ (1/n)
     """
-    def __init__( self, dist, n ):
+
+    def __init__(self, dist, n):
         self.dist = dist
         self.n = n
-        extradoc = 'maximumdistribution is the distribution of the '\
-                   + 'maximum of n i.i.d. random variable'
-        super(MaxDist, self).__init__(name='maxdist', a=dist.a, b=dist.b,
-                        longname = 'A maximumdistribution', extradoc = extradoc)
+        # extradoc = 'maximumdistribution is the distribution of the '\
+        #           + 'maximum of n i.i.d. random variable'
+        super(MaxDist, self).__init__(
+            name="maxdist",
+            a=dist.a,
+            b=dist.b,
+            longname="A maximumdistribution",  # extradoc = extradoc
+        )
 
     def _pdf(self, x, *args, **kw):
-        return self.n * self.dist.pdf(x, *args, **kw) \
-               * self.dist.cdf(x, *args, **kw )**(self.n-1)
+        return (
+            self.n
+            * self.dist.pdf(x, *args, **kw)
+            * self.dist.cdf(x, *args, **kw) ** (self.n - 1)
+        )
 
     def _cdf(self, x, *args, **kw):
-        return self.dist.cdf(x, *args, **kw)**self.n
+        return self.dist.cdf(x, *args, **kw) ** self.n
 
     def _ppf(self, q, *args, **kw):
         # y = F(x) ^ n  <=>  x = F-1( y ^ 1/n)
-        return self.dist.ppf(q**(1./self.n), *args, **kw)
+        return self.dist.ppf(q ** (1.0 / self.n), *args, **kw)
+
 
 ##    def rvs( self, *args, **kw ):
 ##       size = kw.pop( "size", 1 )
@@ -45,9 +56,9 @@ def _ppf(self, q, *args, **kw):
 maxdistr = MaxDist(stats.norm, 10)
 
 print(maxdistr.rvs(size=10))
-print(maxdistr.stats(moments = 'mvsk'))
+print(maxdistr.stats(moments="mvsk"))
 
-'''
+"""
 >>> print maxdistr.stats(moments = 'mvsk')
 (array(1.5387527308351818), array(0.34434382328492852), array(0.40990510188513779), array(0.33139861783918922))
 >>> rvs = np.random.randn(1000,10)
@@ -70,4 +81,4 @@ def _ppf(self, q, *args, **kw):
 0.99999999999999956
 
 
-'''
+"""
diff --git a/statsmodels/sandbox/distributions/try_pot.py b/statsmodels/sandbox/distributions/try_pot.py
index 6a088423b26..dd879f11b9e 100644
--- a/statsmodels/sandbox/distributions/try_pot.py
+++ b/statsmodels/sandbox/distributions/try_pot.py
@@ -8,7 +8,7 @@
 
 
 def mean_residual_life(x, frac=None, alpha=0.05):
-    '''empirical mean residual life or expected shortfall
+    """empirical mean residual life or expected shortfall
 
     Parameters
     ----------
@@ -24,7 +24,7 @@ def mean_residual_life(x, frac=None, alpha=0.05):
         last observations std is zero
         vectorize loop using cumsum
         frac does not work yet
-    '''
+    """
 
     axis = 0  # searchsorted is 1d only
     x = np.asarray(x)
@@ -35,16 +35,18 @@ def mean_residual_life(x, frac=None, alpha=0.05):
     else:
         xthreshold = xsorted[np.floor(nobs * frac).astype(int)]
     # use searchsorted instead of simple index in case of ties
-    xlargerindex = np.searchsorted(xsorted, xthreshold, side='right')
+    xlargerindex = np.searchsorted(xsorted, xthreshold, side="right")
 
     # TODO:replace loop with cumsum ?
     result = []
-    for i in range(len(xthreshold)-1):
+    for i in range(len(xthreshold) - 1):
         k_ind = xlargerindex[i]
         rmean = x[k_ind:].mean()
         # this does not work for last observations, nans
         rstd = x[k_ind:].std()
-        rmstd = rstd/np.sqrt(nobs-k_ind)  # std error of mean, check formula
+        rmstd = rstd / np.sqrt(
+            nobs - k_ind
+        )  # std error of mean, check formula
         result.append((k_ind, xthreshold[i], rmean, rmstd))
 
     res = np.array(result)
diff --git a/statsmodels/sandbox/examples/bayesprior.py b/statsmodels/sandbox/examples/bayesprior.py
index c0e48959d58..a192006f2ae 100644
--- a/statsmodels/sandbox/examples/bayesprior.py
+++ b/statsmodels/sandbox/examples/bayesprior.py
@@ -5,40 +5,47 @@
 
 try:
     import pymc
+
     pymc_installed = 1
 except:
     print("pymc not imported")
     pymc_installed = 0
 
-import numpy as np
 from matplotlib import pyplot as plt
-from scipy import stats, integrate
+import numpy as np
+from numpy import exp, log
+from scipy import integrate, stats
+from scipy.special import gammainc, gammaincinv, gammaln
 from scipy.stats import rv_continuous
-from scipy.special import gammaln, gammaincinv, gammainc
-from numpy import log,exp
 
-#np.random.seed(12345)
+# np.random.seed(12345)
+
 
 class igamma_gen(rv_continuous):
     def _pdf(self, x, a, b):
-        return exp(self._logpdf(x,a,b))
+        return exp(self._logpdf(x, a, b))
+
     def _logpdf(self, x, a, b):
-        return a*log(b) - gammaln(a) -(a+1)*log(x) - b/x
+        return a * log(b) - gammaln(a) - (a + 1) * log(x) - b / x
+
     def _cdf(self, x, a, b):
-        return 1.0-gammainc(a,b/x) # why is this different than the wiki?
+        return 1.0 - gammainc(a, b / x)  # why is this different than the wiki?
+
     def _ppf(self, q, a, b):
-        return b/gammaincinv(a,1-q)
-#NOTE: should be correct, work through invgamma example and 2 param inv gamma
-#CDF
+        return b / gammaincinv(a, 1 - q)
+
+    # NOTE: should be correct, work through invgamma example and 2 param inv gamma
+    # CDF
     def _munp(self, n, a, b):
-        args = (a,b)
+        args = (a, b)
         super(igamma_gen, self)._munp(self, n, *args)
-#TODO: is this robust for differential entropy in this case? closed form or
-#shortcuts in special?
+
+    # TODO: is this robust for differential entropy in this case? closed form or
+    # shortcuts in special?
     def _entropy(self, *args):
         def integ(x):
             val = self._pdf(x, *args)
-            return val*log(val)
+            return val * log(val)
 
         entr = -integrate.quad(integ, self.a, self.b)[0]
         if not np.isnan(entr):
@@ -46,31 +53,36 @@ def integ(x):
         else:
             raise ValueError("Problem with integration.  Returned nan.")
 
-igamma = igamma_gen(a=0.0, name='invgamma', longname="An inverted gamma",
-            shapes = 'a,b', extradoc="""
 
-Inverted gamma distribution
+igamma = igamma_gen(
+    a=0.0,
+    name="invgamma",
+    longname="An inverted gamma",
+    shapes="a,b",  # extradoc="""
+    #
+    # Inverted gamma distribution
+    #
+    # invgamma.pdf(x,a,b) = b**a*x**(-a-1)/gamma(a) * exp(-b/x)
+    # for x > 0, a > 0, b>0.
+    # """
+)
 
-invgamma.pdf(x,a,b) = b**a*x**(-a-1)/gamma(a) * exp(-b/x)
-for x > 0, a > 0, b>0.
-""")
 
-
-#NOTE: the above is unnecessary.  B takes the same role as the scale parameter
+# NOTE: the above is unnecessary.  B takes the same role as the scale parameter
 # in inverted gamma
 
-palpha = np.random.gamma(400.,.005, size=10000)
-print("First moment: %s\nSecond moment: %s" % (palpha.mean(),palpha.std()))
+palpha = np.random.gamma(400.0, 0.005, size=10000)
+print("First moment: %s\nSecond moment: %s" % (palpha.mean(), palpha.std()))
 palpha = palpha[0]
 
-prho = np.random.beta(49.5,49.5, size=1e5)
+prho = np.random.beta(49.5, 49.5, size=1e5)
 print("Beta Distribution")
-print("First moment: %s\nSecond moment: %s" % (prho.mean(),prho.std()))
+print("First moment: %s\nSecond moment: %s" % (prho.mean(), prho.std()))
 prho = prho[0]
 
-psigma = igamma.rvs(1.,4.**2/2, size=1e5)
+psigma = igamma.rvs(1.0, 4.0**2 / 2, size=1e5)
 print("Inverse Gamma Distribution")
-print("First moment: %s\nSecond moment: %s" % (psigma.mean(),psigma.std()))
+print("First moment: %s\nSecond moment: %s" % (psigma.mean(), psigma.std()))
 
 # First do the univariate case
 # y_t = theta_t + epsilon_t
@@ -90,38 +102,38 @@ def integ(x):
 
 draws = 400
 # prior beliefs, from JME paper
-mu_, lambda_ = 1.,2.
+mu_, lambda_ = 1.0, 2.0
 
 # Model 1
-y1y2 = np.zeros((draws,2))
+y1y2 = np.zeros((draws, 2))
 for draw in range(draws):
-    theta = np.random.normal(mu_,lambda_**2)
+    theta = np.random.normal(mu_, lambda_**2)
     y1 = theta + np.random.normal()
     y2 = theta + np.random.normal()
-    y1y2[draw] = y1,y2
+    y1y2[draw] = y1, y2
 
 
 # log marginal distribution
-lnp1p2_mod1 = stats.norm.pdf(y1,loc=mu_, scale=lambda_**2+1)*\
-                stats.norm.pdf(y2,mu_,scale=lambda_**2+1)
+lnp1p2_mod1 = stats.norm.pdf(
+    y1, loc=mu_, scale=lambda_**2 + 1
+) * stats.norm.pdf(y2, mu_, scale=lambda_**2 + 1)
 
 
 # Model 2
-pmu_pairsp1 = np.zeros((draws,2))
-y1y2pairsp1 = np.zeros((draws,2))
+pmu_pairsp1 = np.zeros((draws, 2))
+y1y2pairsp1 = np.zeros((draws, 2))
 # prior 1
 for draw in range(draws):
-    theta1 = np.random.uniform(0,1)
+    theta1 = np.random.uniform(0, 1)
     theta2 = np.random.normal(mu_, lambda_**2)
-#    mu = theta2/(1-theta1)
-#do not do this to maintain independence theta2 is the _location_
-#    y1 = np.random.normal(mu_, lambda_**2)
+    #    mu = theta2/(1-theta1)
+    # do not do this to maintain independence theta2 is the _location_
+    #    y1 = np.random.normal(mu_, lambda_**2)
     y1 = theta2
-#    pmu_pairsp1[draw] = mu, theta1
-    pmu_pairsp1[draw] = theta2, theta1 # mean, autocorr
+    #    pmu_pairsp1[draw] = mu, theta1
+    pmu_pairsp1[draw] = theta2, theta1  # mean, autocorr
     y2 = theta2 + theta1 * y1 + np.random.normal()
-    y1y2pairsp1[draw] = y1,y2
-
+    y1y2pairsp1[draw] = y1, y2
 
 
 # for a = 0, b = 1 - epsilon = .99999
@@ -129,65 +141,70 @@ def integ(x):
 # variance is 1./12 * .99999**2
 
 # Model 2
-pmu_pairsp2 = np.zeros((draws,2))
-y1y2pairsp2 = np.zeros((draws,2))
+pmu_pairsp2 = np.zeros((draws, 2))
+y1y2pairsp2 = np.zeros((draws, 2))
 # prior 2
 theta12_2 = []
 for draw in range(draws):
-#    y1 = np.random.uniform(-4,6)
-    theta1 = np.random.uniform(0,1)
-    theta2 = np.random.normal(mu_*(1-theta1), lambda_**2*(1-theta1)**2)
-    theta12_2.append([theta1,theta2])
-
-    mu = theta2/(1-theta1)
-    y1 = np.random.normal(mu_,lambda_**2)
+    #    y1 = np.random.uniform(-4,6)
+    theta1 = np.random.uniform(0, 1)
+    theta2 = np.random.normal(
+        mu_ * (1 - theta1), lambda_**2 * (1 - theta1) ** 2
+    )
+    theta12_2.append([theta1, theta2])
+
+    mu = theta2 / (1 - theta1)
+    y1 = np.random.normal(mu_, lambda_**2)
     y2 = theta2 + theta1 * y1 + np.random.normal()
     pmu_pairsp2[draw] = mu, theta1
-    y1y2pairsp2[draw] = y1,y2
+    y1y2pairsp2[draw] = y1, y2
 
 fig = plt.figure()
 fsp = fig.add_subplot(221)
-fsp.scatter(pmu_pairsp1[:,0], pmu_pairsp1[:,1], color='b', facecolor='none')
-fsp.set_ylabel('Autocorrelation (Y)')
-fsp.set_xlabel('Mean (Y)')
-fsp.set_title('Model 2 (P1)')
-fsp.axis([-20,20,0,1])
+fsp.scatter(pmu_pairsp1[:, 0], pmu_pairsp1[:, 1], color="b", facecolor="none")
+fsp.set_ylabel("Autocorrelation (Y)")
+fsp.set_xlabel("Mean (Y)")
+fsp.set_title("Model 2 (P1)")
+fsp.axis([-20, 20, 0, 1])
 
 fsp = fig.add_subplot(222)
-fsp.scatter(pmu_pairsp2[:,0],pmu_pairsp2[:,1], color='b', facecolor='none')
-fsp.set_title('Model 2 (P2)')
-fsp.set_ylabel('Autocorrelation (Y)')
-fsp.set_xlabel('Mean (Y)')
-fsp.set_title('Model 2 (P2)')
-fsp.axis([-20,20,0,1])
+fsp.scatter(pmu_pairsp2[:, 0], pmu_pairsp2[:, 1], color="b", facecolor="none")
+fsp.set_title("Model 2 (P2)")
+fsp.set_ylabel("Autocorrelation (Y)")
+fsp.set_xlabel("Mean (Y)")
+fsp.set_title("Model 2 (P2)")
+fsp.axis([-20, 20, 0, 1])
 
 fsp = fig.add_subplot(223)
-fsp.scatter(y1y2pairsp1[:,0], y1y2pairsp1[:,1], color='b', marker='o',
-    facecolor='none')
-fsp.scatter(y1y2[:,0], y1y2[:,1], color ='g', marker='+')
-fsp.set_title('Model 1 vs. Model 2 (P1)')
-fsp.set_ylabel('Y(2)')
-fsp.set_xlabel('Y(1)')
-fsp.axis([-20,20,-20,20])
+fsp.scatter(
+    y1y2pairsp1[:, 0],
+    y1y2pairsp1[:, 1],
+    color="b",
+    marker="o",
+    facecolor="none",
+)
+fsp.scatter(y1y2[:, 0], y1y2[:, 1], color="g", marker="+")
+fsp.set_title("Model 1 vs. Model 2 (P1)")
+fsp.set_ylabel("Y(2)")
+fsp.set_xlabel("Y(1)")
+fsp.axis([-20, 20, -20, 20])
 
 fsp = fig.add_subplot(224)
-fsp.scatter(y1y2pairsp2[:,0], y1y2pairsp2[:,1], color='b', marker='o')
-fsp.scatter(y1y2[:,0], y1y2[:,1], color='g', marker='+')
-fsp.set_title('Model 1 vs. Model 2 (P2)')
-fsp.set_ylabel('Y(2)')
-fsp.set_xlabel('Y(1)')
-fsp.axis([-20,20,-20,20])
+fsp.scatter(y1y2pairsp2[:, 0], y1y2pairsp2[:, 1], color="b", marker="o")
+fsp.scatter(y1y2[:, 0], y1y2[:, 1], color="g", marker="+")
+fsp.set_title("Model 1 vs. Model 2 (P2)")
+fsp.set_ylabel("Y(2)")
+fsp.set_xlabel("Y(1)")
+fsp.axis([-20, 20, -20, 20])
 
-#plt.show()
+# plt.show()
 
-#TODO: this does not look the same as the working paper?
-#NOTE: but it matches the language?  I think mine is right!
+# TODO: this does not look the same as the working paper?
+# NOTE: but it matches the language?  I think mine is right!
 
 # Contour plots.
 # on the basis of observed data. ie., the mgrid
-#np.mgrid[6:-4:10j,-4:6:10j]
-
-
+# np.mgrid[6:-4:10j,-4:6:10j]
 
 
 # Example 2:
@@ -211,30 +228,32 @@ def integ(x):
 # palpha ~ Gamma(2.00,.10)
 # mean = 2.00
 # std = .1 which implies k = 400, theta = .005
-palpha = np.random.gamma(400,.005)
+palpha = np.random.gamma(400, 0.005)
 
 # pi ~ Beta(.5,.05)
 pi = np.random.beta(49.5, 49.5)
 
 # psigma ~ InvGamma(1.00,4.00)
-#def invgamma(a,b):
+# def invgamma(a,b):
 #    return np.sqrt(b*a**2/np.sum(np.random.random(b,1)**2, axis=1))
-#NOTE: Use inverse gamma distribution igamma
-psigma = igamma.rvs(1.,4.0, size=1e6) #TODO: parameterization is not correct vs.
+# NOTE: Use inverse gamma distribution igamma
+psigma = igamma.rvs(
+    1.0, 4.0, size=1e6
+)  # TODO: parameterization is not correct vs.
 # Del Negro and Schorfheide
 if pymc_installed:
-    psigma2 = pymc.rinverse_gamma(1.,4.0, size=1e6)
+    psigma2 = pymc.rinverse_gamma(1.0, 4.0, size=1e6)
 else:
-    psigma2 = stats.invgamma.rvs(1., scale=4.0, size=1e6)
+    psigma2 = stats.invgamma.rvs(1.0, scale=4.0, size=1e6)
 nsims = 500
 y = np.zeros((nsims))
-#for i in range(1,nsims):
+# for i in range(1,nsims):
 #    y[i] = .9*y[i-1] + 1/(1-p1/alpha) + np.random.normal()
 
-#Are these supposed to be sampled jointly?
+# Are these supposed to be sampled jointly?
 
 # InvGamma(sigma|v,s) propto sigma**(-v-1)*e**(-vs**2/2*sigma**2)
-#igamma =
+# igamma =
 
 # M2: y_t = 1/alpha * E_t[y_t+1] + p2*y_t-1 + mu_t
 # mu_t ~ epsilon_t
diff --git a/statsmodels/stats/proportion.py b/statsmodels/stats/proportion.py
index f8df2900f84..ee068b74bac 100644
--- a/statsmodels/stats/proportion.py
+++ b/statsmodels/stats/proportion.py
@@ -622,7 +622,11 @@ def binom_test(count, nobs, prop=0.5, alternative='two-sided'):
     if np.any(prop > 1.0) or np.any(prop < 0.0):
         raise ValueError("p must be in range [0,1]")
     if alternative in ['2s', 'two-sided']:
-        pval = stats.binom_test(count, n=nobs, p=prop)
+        try:
+            pval = stats.binomtest(int(count), n=int(nobs), p=prop).pvalue
+        except AttributeError:
+            # Remove after min SciPy >= 1.7
+            pval = stats.binom_test(count, n=nobs, p=prop)
     elif alternative in ['l', 'larger']:
         pval = stats.binom.sf(count-1, nobs, prop)
     elif alternative in ['s', 'smaller']: