From dffb34055254ae97ba4854a479f293f07751ff80 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 7 May 2024 10:02:02 +0100 Subject: [PATCH] BUG: Correct cov_kwargs -> cov_kwds Consistently use cov_kwds Fix bug that can occur in some tests due to wrong name passed to OLS.fit closes #9121 --- examples/notebooks/gls.ipynb | 40 +-- examples/notebooks/influence_glm_logit.ipynb | 48 +-- examples/notebooks/ols.ipynb | 110 +++---- .../notebooks/regression_diagnostics.ipynb | 40 +-- examples/notebooks/robust_models_1.ipynb | 280 +++++++++--------- examples/notebooks/wls.ipynb | 40 +-- statsmodels/stats/diagnostic.py | 31 +- statsmodels/stats/tests/test_diagnostic.py | 41 ++- statsmodels/tsa/tests/test_seasonal.py | 26 +- 9 files changed, 351 insertions(+), 305 deletions(-) diff --git a/examples/notebooks/gls.ipynb b/examples/notebooks/gls.ipynb index d19947ba6db..de5afc41591 100644 --- a/examples/notebooks/gls.ipynb +++ b/examples/notebooks/gls.ipynb @@ -15,12 +15,12 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "import numpy as np\n", "\n", "import statsmodels.api as sm" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -37,12 +37,12 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "data = sm.datasets.longley.load()\n", "data.exog = sm.add_constant(data.exog)\n", "print(data.exog.head())" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -64,10 +64,10 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "ols_resid = sm.OLS(data.endog, data.exog).fit().resid" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -90,14 +90,14 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "resid_fit = sm.OLS(\n", " np.asarray(ols_resid)[1:], sm.add_constant(np.asarray(ols_resid)[:-1])\n", ").fit()\n", "print(resid_fit.tvalues[1])\n", "print(resid_fit.pvalues[1])" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -115,10 +115,10 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "rho = resid_fit.params[1]" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -136,12 +136,12 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "from scipy.linalg import toeplitz\n", "\n", "toeplitz(range(5))" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -151,10 +151,10 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "order = toeplitz(range(len(ols_resid)))" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -168,12 +168,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "sigma = rho ** order\n", "gls_model = sm.GLS(data.endog, data.exog, sigma=sigma)\n", "gls_results = gls_model.fit()" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -188,12 +188,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "glsar_model = sm.GLSAR(data.endog, data.exog, 1)\n", "glsar_results = glsar_model.iterative_fit(1)\n", "print(glsar_results.summary())" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -210,13 +210,13 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(gls_results.params)\n", "print(glsar_results.params)\n", "print(gls_results.bse)\n", "print(glsar_results.bse)" - ] + ], + "outputs": [] } ], "metadata": { diff --git a/examples/notebooks/influence_glm_logit.ipynb b/examples/notebooks/influence_glm_logit.ipynb index ca371ebaadb..7a87d7b1cfe 100644 --- a/examples/notebooks/influence_glm_logit.ipynb +++ b/examples/notebooks/influence_glm_logit.ipynb @@ -22,7 +22,6 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "import os.path\n", "import pandas as pd\n", @@ -33,7 +32,8 @@ "\n", "plt.rc(\"figure\", figsize=(16, 8))\n", "plt.rc(\"font\", size=14)" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -43,7 +43,6 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "import statsmodels.stats.tests.test_influence\n", "\n", @@ -53,7 +52,8 @@ "file_name = \"binary_constrict.csv\"\n", "file_path = os.path.join(cur_dir, \"results\", file_name)\n", "df = pd.read_csv(file_path, index_col=0)" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -63,7 +63,6 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "res = GLM(\n", " df[\"constrict\"],\n", @@ -71,7 +70,8 @@ " family=families.Binomial(),\n", ").fit(attach_wls=True, atol=1e-10)\n", "print(res.summary())" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -92,10 +92,10 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "infl = res.get_influence(observed=False)" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -105,11 +105,11 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "summ_df = infl.summary_frame()\n", "summ_df.sort_values(\"cooks_d\", ascending=False)[:10]" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -119,11 +119,11 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "fig = infl.plot_influence()\n", "fig.tight_layout(pad=1.0)" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -133,11 +133,11 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "fig = infl.plot_index(y_var=\"cooks\", threshold=2 * infl.cooks_distance[0].mean())\n", "fig.tight_layout(pad=1.0)" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -147,11 +147,11 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "fig = infl.plot_index(y_var=\"resid\", threshold=1)\n", "fig.tight_layout(pad=1.0)" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -161,11 +161,11 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "fig = infl.plot_index(y_var=\"dfbeta\", idx=1, threshold=0.5)\n", "fig.tight_layout(pad=1.0)" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -175,11 +175,11 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "fig = infl.plot_index(y_var=\"dfbeta\", idx=2, threshold=0.5)\n", "fig.tight_layout(pad=1.0)" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -189,11 +189,11 @@ "outputs_hidden": false } }, - "outputs": [], "source": [ "fig = infl.plot_index(y_var=\"dfbeta\", idx=0, threshold=0.5)\n", "fig.tight_layout(pad=1.0)" - ] + ], + "outputs": [] }, { "cell_type": "code", @@ -201,8 +201,8 @@ "metadata": { "tags": [] }, - "outputs": [], - "source": [] + "source": [], + "outputs": [] } ], "metadata": { diff --git a/examples/notebooks/ols.ipynb b/examples/notebooks/ols.ipynb index f322cb1988d..3af23dcca56 100644 --- a/examples/notebooks/ols.ipynb +++ b/examples/notebooks/ols.ipynb @@ -11,16 +11,15 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "%matplotlib inline" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", @@ -28,7 +27,8 @@ "import statsmodels.api as sm\n", "\n", "np.random.seed(9876789)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -43,14 +43,14 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "nsample = 100\n", "x = np.linspace(0, 10, 100)\n", "X = np.column_stack((x, x ** 2))\n", "beta = np.array([1, 0.1, 10])\n", "e = np.random.normal(size=nsample)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -63,11 +63,11 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "X = sm.add_constant(X)\n", "y = np.dot(X, beta) + e" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -80,12 +80,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "model = sm.OLS(y, X)\n", "results = model.fit()\n", "print(results.summary())" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -98,11 +98,11 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(\"Parameters: \", results.params)\n", "print(\"R2: \", results.rsquared)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -117,7 +117,6 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "nsample = 50\n", "sig = 0.5\n", @@ -127,7 +126,8 @@ "\n", "y_true = np.dot(X, beta)\n", "y = y_true + sig * np.random.normal(size=nsample)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -140,11 +140,11 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "res = sm.OLS(y, X).fit()\n", "print(res.summary())" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -157,12 +157,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(\"Parameters: \", res.params)\n", "print(\"Standard errors: \", res.bse)\n", "print(\"Predicted values: \", res.predict())" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -175,7 +175,6 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "pred_ols = res.get_prediction()\n", "iv_l = pred_ols.summary_frame()[\"obs_ci_lower\"]\n", @@ -189,7 +188,8 @@ "ax.plot(x, iv_u, \"r--\")\n", "ax.plot(x, iv_l, \"r--\")\n", "ax.legend(loc=\"best\")" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -204,7 +204,6 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "nsample = 50\n", "groups = np.zeros(nsample, int)\n", @@ -222,7 +221,8 @@ "y_true = np.dot(X, beta)\n", "e = np.random.normal(size=nsample)\n", "y = y_true + e" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -235,13 +235,13 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(X[:5, :])\n", "print(y[:5])\n", "print(groups)\n", "print(dummy[:5, :])" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -254,11 +254,11 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "res2 = sm.OLS(y, X).fit()\n", "print(res2.summary())" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -271,7 +271,6 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "pred_ols2 = res2.get_prediction()\n", "iv_l = pred_ols2.summary_frame()[\"obs_ci_lower\"]\n", @@ -285,7 +284,8 @@ "ax.plot(x, iv_u, \"r--\")\n", "ax.plot(x, iv_l, \"r--\")\n", "legend = ax.legend(loc=\"best\")" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -302,12 +302,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "R = [[0, 1, 0, 0], [0, 0, 1, 0]]\n", "print(np.array(R))\n", "print(res2.f_test(R))" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -320,10 +320,10 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(res2.f_test(\"x2 = x3 = 0\"))" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -338,32 +338,32 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "beta = [1.0, 0.3, -0.0, 10]\n", "y_true = np.dot(X, beta)\n", "y = y_true + np.random.normal(size=nsample)\n", "\n", "res3 = sm.OLS(y, X).fit()" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(res3.f_test(R))" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(res3.f_test(\"x2 = x3 = 0\"))" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -378,14 +378,14 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "from statsmodels.datasets.longley import load_pandas\n", "\n", "y = load_pandas().endog\n", "X = load_pandas().exog\n", "X = sm.add_constant(X)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -398,12 +398,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "ols_model = sm.OLS(y, X)\n", "ols_results = ols_model.fit()\n", "print(ols_results.summary())" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -418,7 +418,6 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "norm_x = X.values\n", "for i, name in enumerate(X):\n", @@ -426,7 +425,8 @@ " continue\n", " norm_x[:, i] = X[name] / np.linalg.norm(X[name])\n", "norm_xtx = np.dot(norm_x.T, norm_x)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -439,12 +439,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "eigs = np.linalg.eigvals(norm_xtx)\n", "condition_number = np.sqrt(eigs.max() / eigs.min())\n", "print(condition_number)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -459,7 +459,6 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "ols_results2 = sm.OLS(y.iloc[:14], X.iloc[:14]).fit()\n", "print(\n", @@ -474,7 +473,8 @@ " ]\n", " )\n", ")" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -487,10 +487,10 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "infl = ols_results.get_influence()" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -503,19 +503,19 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "2.0 / len(X) ** 0.5" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(infl.summary_frame().filter(regex=\"dfb\"))" - ] + ], + "outputs": [] } ], "metadata": { @@ -539,4 +539,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/notebooks/regression_diagnostics.ipynb b/examples/notebooks/regression_diagnostics.ipynb index 55a5cc047df..bf948f06b25 100644 --- a/examples/notebooks/regression_diagnostics.ipynb +++ b/examples/notebooks/regression_diagnostics.ipynb @@ -27,16 +27,15 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "%matplotlib inline" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "from statsmodels.compat import lzip\n", "\n", @@ -55,7 +54,8 @@ "\n", "# Inspect the results\n", "print(results.summary())" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -75,12 +75,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "name = [\"Jarque-Bera\", \"Chi^2 two-tail prob.\", \"Skew\", \"Kurtosis\"]\n", "test = sms.jarque_bera(results.resid)\n", "lzip(name, test)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -93,12 +93,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "name = [\"Chi^2\", \"Two-tail probability\"]\n", "test = sms.omni_normtest(results.resid)\n", "lzip(name, test)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -113,13 +113,13 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "from statsmodels.stats.outliers_influence import OLSInfluence\n", "\n", "test_class = OLSInfluence(results)\n", "test_class.dfbetas[:5, :]" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -134,13 +134,13 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "from statsmodels.graphics.regressionplots import plot_leverage_resid2\n", "\n", "fig, ax = plt.subplots(figsize=(8, 6))\n", "fig = plot_leverage_resid2(results, ax=ax)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -162,10 +162,10 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "np.linalg.cond(results.model.exog)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -180,12 +180,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "name = [\"Lagrange multiplier statistic\", \"p-value\", \"f-value\", \"f p-value\"]\n", "test = sms.het_breuschpagan(results.resid, results.model.exog)\n", "lzip(name, test)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -198,12 +198,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "name = [\"F statistic\", \"p-value\"]\n", "test = sms.het_goldfeldquandt(results.resid, results.model.exog)\n", "lzip(name, test)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -218,12 +218,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "name = [\"t value\", \"p value\"]\n", "test = sms.linear_harvey_collier(results)\n", "lzip(name, test)" - ] + ], + "outputs": [] } ], "metadata": { diff --git a/examples/notebooks/robust_models_1.ipynb b/examples/notebooks/robust_models_1.ipynb index 21414cc29e9..a65afcfed9f 100644 --- a/examples/notebooks/robust_models_1.ipynb +++ b/examples/notebooks/robust_models_1.ipynb @@ -11,16 +11,15 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "%matplotlib inline" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "from statsmodels.compat import lmap\n", "import numpy as np\n", @@ -28,7 +27,8 @@ "import matplotlib.pyplot as plt\n", "\n", "import statsmodels.api as sm" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -56,16 +56,15 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "norms = sm.robust.norms" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "def plot_weights(support, weights_func, xlabels, xticks):\n", " fig = plt.figure(figsize=(12, 8))\n", @@ -75,7 +74,8 @@ " ax.set_xticklabels(xlabels, fontsize=16)\n", " ax.set_ylim(-0.1, 1.1)\n", " return ax" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -88,16 +88,15 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "help(norms.AndrewWave.weights)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "a = 1.339\n", "support = np.linspace(-np.pi * a, np.pi * a, 100)\n", @@ -105,7 +104,8 @@ "plot_weights(\n", " support, andrew.weights, [\"$-\\pi*a$\", \"0\", \"$\\pi*a$\"], [-np.pi * a, 0, np.pi * a]\n", ")" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -118,22 +118,22 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "help(norms.Hampel.weights)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "c = 8\n", "support = np.linspace(-3 * c, 3 * c, 1000)\n", "hampel = norms.Hampel(a=2.0, b=4.0, c=c)\n", "plot_weights(support, hampel.weights, [\"3*c\", \"0\", \"3*c\"], [-3 * c, 0, 3 * c])" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -146,22 +146,22 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "help(norms.HuberT.weights)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "t = 1.345\n", "support = np.linspace(-3 * t, 3 * t, 1000)\n", "huber = norms.HuberT(t=t)\n", "plot_weights(support, huber.weights, [\"-3*t\", \"0\", \"3*t\"], [-3 * t, 0, 3 * t])" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -174,21 +174,21 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "help(norms.LeastSquares.weights)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "support = np.linspace(-3, 3, 1000)\n", "lst_sq = norms.LeastSquares()\n", "plot_weights(support, lst_sq.weights, [\"-3\", \"0\", \"3\"], [-3, 0, 3])" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -201,22 +201,22 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "help(norms.RamsayE.weights)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "a = 0.3\n", "support = np.linspace(-3 * a, 3 * a, 1000)\n", "ramsay = norms.RamsayE(a=a)\n", "plot_weights(support, ramsay.weights, [\"-3*a\", \"0\", \"3*a\"], [-3 * a, 0, 3 * a])" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -229,22 +229,22 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "help(norms.TrimmedMean.weights)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "c = 2\n", "support = np.linspace(-3 * c, 3 * c, 1000)\n", "trimmed = norms.TrimmedMean(c=c)\n", "plot_weights(support, trimmed.weights, [\"-3*c\", \"0\", \"3*c\"], [-3 * c, 0, 3 * c])" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -257,22 +257,22 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "help(norms.TukeyBiweight.weights)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "c = 4.685\n", "support = np.linspace(-3 * c, 3 * c, 1000)\n", "tukey = norms.TukeyBiweight(c=c)\n", "plot_weights(support, tukey.weights, [\"-3*c\", \"0\", \"3*c\"], [-3 * c, 0, 3 * c])" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -292,10 +292,10 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "x = np.array([1, 2, 3, 4, 500])" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -308,10 +308,10 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "x.mean()" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -324,10 +324,10 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "np.median(x)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -341,10 +341,10 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "x.std()" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -372,37 +372,37 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "stats.norm.ppf(0.75)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(x)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "sm.robust.scale.mad(x)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "np.array([1, 2, 3, 4, 5.0]).std()" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -428,10 +428,10 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "sm.robust.scale.iqr(x)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -455,10 +455,10 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "sm.robust.scale.qn_scale(x)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -472,89 +472,89 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "np.random.seed(12345)\n", "fat_tails = stats.t(6).rvs(40)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "kde = sm.nonparametric.KDEUnivariate(fat_tails)\n", "kde.fit()\n", "fig = plt.figure(figsize=(12, 8))\n", "ax = fig.add_subplot(111)\n", "ax.plot(kde.support, kde.density)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(fat_tails.mean(), fat_tails.std())" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(stats.norm.fit(fat_tails))" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(stats.t.fit(fat_tails, f0=6))" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "huber = sm.robust.scale.Huber()\n", "loc, scale = huber(fat_tails)\n", "print(loc, scale)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "sm.robust.mad(fat_tails)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "sm.robust.mad(fat_tails, c=stats.t(6).ppf(0.75))" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "sm.robust.scale.mad(fat_tails)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -567,35 +567,34 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "from statsmodels.graphics.api import abline_plot\n", "from statsmodels.formula.api import ols, rlm" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "prestige = sm.datasets.get_rdataset(\"Duncan\", \"carData\", cache=True).data" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(prestige.head(10))" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "fig = plt.figure(figsize=(12, 12))\n", "ax1 = fig.add_subplot(211, xlabel=\"Income\", ylabel=\"Prestige\")\n", @@ -604,87 +603,88 @@ "ax1.annotate(\"Minister\", xy_outlier, xy_outlier + 1, fontsize=16)\n", "ax2 = fig.add_subplot(212, xlabel=\"Education\", ylabel=\"Prestige\")\n", "ax2.scatter(prestige.education, prestige.prestige)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "ols_model = ols(\"prestige ~ income + education\", prestige).fit()\n", "print(ols_model.summary())" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "infl = ols_model.get_influence()\n", "student = infl.summary_frame()[\"student_resid\"]\n", "print(student)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(student.loc[np.abs(student) > 2])" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(infl.summary_frame().loc[\"minister\"])" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "sidak = ols_model.outlier_test(\"sidak\")\n", "sidak.sort_values(\"unadj_p\", inplace=True)\n", "print(sidak)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "fdr = ols_model.outlier_test(\"fdr_bh\")\n", "fdr.sort_values(\"unadj_p\", inplace=True)\n", "print(fdr)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "rlm_model = rlm(\"prestige ~ income + education\", prestige).fit()\n", "print(rlm_model.summary())" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(rlm_model.weights)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -704,16 +704,15 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "dta = sm.datasets.get_rdataset(\"starsCYG\", \"robustbase\", cache=True).data" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "from matplotlib.patches import Ellipse\n", "\n", @@ -742,40 +741,41 @@ "for i, row in dta.loc[dta[\"log.Te\"] < 3.8].iterrows():\n", " ax.annotate(i, row, row + 0.01, fontsize=14)\n", "xlim, ylim = ax.get_xlim(), ax.get_ylim()" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "from IPython.display import Image\n", "\n", "Image(filename=\"star_diagram.png\")" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "y = dta[\"log.light\"]\n", "X = sm.add_constant(dta[\"log.Te\"], prepend=True)\n", "ols_model = sm.OLS(y, X).fit()\n", "abline_plot(model_results=ols_model, ax=ax)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "rlm_mod = sm.RLM(y, X, sm.robust.norms.TrimmedMean(0.5)).fit()\n", "abline_plot(model_results=rlm_mod, ax=ax, color=\"red\")" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -788,43 +788,43 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "infl = ols_model.get_influence()" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "h_bar = 2 * (ols_model.df_model + 1) / ols_model.nobs\n", "hat_diag = infl.summary_frame()[\"hat_diag\"]\n", "hat_diag.loc[hat_diag > h_bar]" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "sidak2 = ols_model.outlier_test(\"sidak\")\n", "sidak2.sort_values(\"unadj_p\", inplace=True)\n", "print(sidak2)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "fdr2 = ols_model.outlier_test(\"fdr_bh\")\n", "fdr2.sort_values(\"unadj_p\", inplace=True)\n", "print(fdr2)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -837,24 +837,24 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "l = ax.lines[-1]\n", "l.remove()\n", "del l" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "weights = np.ones(len(X))\n", "weights[X[X[\"log.Te\"] < 3.8].index.values - 1] = 0\n", "wls_model = sm.WLS(y, X, weights=weights).fit()\n", "abline_plot(model_results=wls_model, ax=ax, color=\"green\")" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -868,11 +868,11 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "yy = y.values[:, None]\n", "xx = X[\"log.Te\"].values[:, None]" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -923,20 +923,20 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "params = [-4.969387980288108, 2.2531613477892365] # Computed using R\n", "print(params[0], params[1])" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "abline_plot(intercept=params[0], slope=params[1], ax=ax, color=\"red\")" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -949,7 +949,6 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "np.random.seed(12345)\n", "nobs = 200\n", @@ -959,13 +958,13 @@ "X = sm.add_constant(X, prepend=True) # np.c_[np.ones(nobs), X]\n", "mc_iter = 500\n", "contaminate = 0.25 # percentage of response variables to contaminate" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "all_betas = []\n", "for i in range(mc_iter):\n", @@ -974,18 +973,19 @@ " y[random_idx] = np.random.uniform(-750, 750)\n", " beta_hat = sm.RLM(y, X).fit().params\n", " all_betas.append(beta_hat)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "all_betas = np.asarray(all_betas)\n", "se_loss = lambda x: np.linalg.norm(x, ord=2) ** 2\n", "se_beta = lmap(se_loss, all_betas - beta_true)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -998,37 +998,37 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "np.array(se_beta).mean()" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "all_betas.mean(0)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "beta_true" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "se_loss(all_betas.mean(0) - beta_true)" - ] + ], + "outputs": [] } ], "metadata": { diff --git a/examples/notebooks/wls.ipynb b/examples/notebooks/wls.ipynb index df208af67f1..4bbf503e207 100644 --- a/examples/notebooks/wls.ipynb +++ b/examples/notebooks/wls.ipynb @@ -11,16 +11,15 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "%matplotlib inline" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", @@ -29,7 +28,8 @@ "from statsmodels.iolib.table import SimpleTable, default_txt_fmt\n", "\n", "np.random.seed(1024)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -50,7 +50,6 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "nsample = 50\n", "x = np.linspace(0, 20, nsample)\n", @@ -64,7 +63,8 @@ "e = np.random.normal(size=nsample)\n", "y = y_true + sig * w * e\n", "X = X[:, [0, 1]]" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -79,12 +79,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "mod_wls = sm.WLS(y, X, weights=1.0 / (w ** 2))\n", "res_wls = mod_wls.fit()\n", "print(res_wls.summary())" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -99,12 +99,12 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "res_ols = sm.OLS(y, X).fit()\n", "print(res_ols.params)\n", "print(res_wls.params)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -119,7 +119,6 @@ "metadata": { "scrolled": true }, - "outputs": [], "source": [ "se = np.vstack(\n", " [\n", @@ -136,7 +135,8 @@ "rownames = [\"WLS\", \"OLS\", \"OLS_HC0\", \"OLS_HC1\", \"OLS_HC3\", \"OLS_HC3\"]\n", "tabl = SimpleTable(se, colnames, rownames, txt_fmt=default_txt_fmt)\n", "print(tabl)" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -149,24 +149,24 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "covb = res_ols.cov_params()\n", "prediction_var = res_ols.mse_resid + (X * np.dot(covb, X.T).T).sum(1)\n", "prediction_std = np.sqrt(prediction_var)\n", "tppf = stats.t.ppf(0.975, res_ols.df_resid)" - ] + ], + "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "pred_ols = res_ols.get_prediction()\n", "iv_l_ols = pred_ols.summary_frame()[\"obs_ci_lower\"]\n", "iv_u_ols = pred_ols.summary_frame()[\"obs_ci_upper\"]" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -179,7 +179,6 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "pred_wls = res_wls.get_prediction()\n", "iv_l = pred_wls.summary_frame()[\"obs_ci_lower\"]\n", @@ -197,7 +196,8 @@ "ax.plot(x, iv_u, \"g--\", label=\"WLS\")\n", "ax.plot(x, iv_l, \"g--\")\n", "ax.legend(loc=\"best\")" - ] + ], + "outputs": [] }, { "cell_type": "markdown", @@ -212,7 +212,6 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "resid1 = res_ols.resid[w == 1.0]\n", "var1 = resid1.var(ddof=int(res_ols.df_model) + 1)\n", @@ -222,7 +221,8 @@ "w_est[w != 1.0] = np.sqrt(var2) / np.sqrt(var1)\n", "res_fwls = sm.WLS(y, X, 1.0 / ((w_est ** 2))).fit()\n", "print(res_fwls.summary())" - ] + ], + "outputs": [] } ], "metadata": { diff --git a/statsmodels/stats/diagnostic.py b/statsmodels/stats/diagnostic.py index 8ab3791347e..a9f001c17b5 100644 --- a/statsmodels/stats/diagnostic.py +++ b/statsmodels/stats/diagnostic.py @@ -237,8 +237,9 @@ def compare_j(results_x, results_z, store=False): return tstat, pval +@deprecate_kwarg("cov_kwargs", "cov_kwds") def compare_encompassing(results_x, results_z, cov_type="nonrobust", - cov_kwargs=None): + cov_kwds=None): r""" Davidson-MacKinnon encompassing test for comparing non-nested models @@ -253,7 +254,7 @@ def compare_encompassing(results_x, results_z, cov_type="nonrobust", OLS covariance estimator. Specify one of "HC0", "HC1", "HC2", "HC3" to use White's covariance estimator. All covariance types supported by ``OLS.fit`` are accepted. - cov_kwargs : dict, default None + cov_kwds : dict, default None Dictionary of covariance options passed to ``OLS.fit``. See OLS.fit for more details. @@ -317,8 +318,8 @@ def _test_nested(endog, a, b, cov_est, cov_kwds): df_num, df_denom = int(test.df_num), int(test.df_denom) return stat, pvalue, df_num, df_denom - x_nested = _test_nested(y, x, z, cov_type, cov_kwargs) - z_nested = _test_nested(y, z, x, cov_type, cov_kwargs) + x_nested = _test_nested(y, x, z, cov_type, cov_kwds) + z_nested = _test_nested(y, z, x, cov_type, cov_kwds) return pd.DataFrame([x_nested, z_nested], index=["x", "z"], columns=["stat", "pvalue", "df_num", "df_denom"]) @@ -479,9 +480,10 @@ def acorr_ljungbox(x, lags=None, boxpierce=False, model_df=0, period=None, index=lags) +@deprecate_kwarg("cov_kwargs", "cov_kwds") @deprecate_kwarg("maxlag", "nlags") def acorr_lm(resid, nlags=None, store=False, *, period=None, - ddof=0, cov_type="nonrobust", cov_kwargs=None): + ddof=0, cov_type="nonrobust", cov_kwds=None): """ Lagrange Multiplier tests for autocorrelation. @@ -510,7 +512,7 @@ def acorr_lm(resid, nlags=None, store=False, *, period=None, OLS covariance estimator. Specify one of "HC0", "HC1", "HC2", "HC3" to use White's covariance estimator. All covariance types supported by ``OLS.fit`` are accepted. - cov_kwargs : dict, default None + cov_kwds : dict, default None Dictionary of covariance options passed to ``OLS.fit``. See OLS.fit for more details. @@ -545,8 +547,8 @@ def acorr_lm(resid, nlags=None, store=False, *, period=None, """ resid = array_like(resid, "resid", ndim=1) cov_type = string_like(cov_type, "cov_type") - cov_kwargs = {} if cov_kwargs is None else cov_kwargs - cov_kwargs = dict_like(cov_kwargs, "cov_kwargs") + cov_kwds = {} if cov_kwds is None else cov_kwds + cov_kwds = dict_like(cov_kwds, "cov_kwds") nobs = resid.shape[0] if period is not None and nlags is None: maxlag = min(nobs // 5, 2 * period) @@ -563,7 +565,7 @@ def acorr_lm(resid, nlags=None, store=False, *, period=None, usedlag = maxlag resols = OLS(xshort, xdall[:, :usedlag + 1]).fit(cov_type=cov_type, - cov_kwargs=cov_kwargs) + cov_kwds=cov_kwds) fval = float(resols.fvalue) fpval = float(resols.f_pvalue) if cov_type == "nonrobust": @@ -985,9 +987,10 @@ def het_goldfeldquandt(y, x, idx=None, split=None, drop=None, return fval, fpval, ordering +@deprecate_kwarg("cov_kwargs", "cov_kwds") @deprecate_kwarg("result", "res") def linear_reset(res, power=3, test_type="fitted", use_f=False, - cov_type="nonrobust", cov_kwargs=None): + cov_type="nonrobust", cov_kwds=None): r""" Ramsey's RESET test for neglected nonlinearity @@ -1015,7 +1018,7 @@ def linear_reset(res, power=3, test_type="fitted", use_f=False, OLS covariance estimator. Specify one of "HC0", "HC1", "HC2", "HC3" to use White's covariance estimator. All covariance types supported by ``OLS.fit`` are accepted. - cov_kwargs : dict, default None + cov_kwds : dict, default None Dictionary of covariance options passed to ``OLS.fit``. See OLS.fit for more details. @@ -1055,7 +1058,7 @@ def linear_reset(res, power=3, test_type="fitted", use_f=False, "non-constant column.") test_type = string_like(test_type, "test_type", options=("fitted", "exog", "princomp")) - cov_kwargs = dict_like(cov_kwargs, "cov_kwargs", optional=True) + cov_kwds = dict_like(cov_kwds, "cov_kwds", optional=True) use_f = bool_like(use_f, "use_f") if isinstance(power, int): if power < 2: @@ -1093,8 +1096,8 @@ def linear_reset(res, power=3, test_type="fitted", use_f=False, aug_exog = np.hstack([exog] + [aug ** p for p in power]) mod_class = res.model.__class__ mod = mod_class(res.model.data.endog, aug_exog) - cov_kwargs = {} if cov_kwargs is None else cov_kwargs - res = mod.fit(cov_type=cov_type, cov_kwargs=cov_kwargs) + cov_kwds = {} if cov_kwds is None else cov_kwds + res = mod.fit(cov_type=cov_type, cov_kwds=cov_kwds) nrestr = aug_exog.shape[1] - exog.shape[1] nparams = aug_exog.shape[1] r_mat = np.eye(nrestr, nparams, k=nparams-nrestr) diff --git a/statsmodels/stats/tests/test_diagnostic.py b/statsmodels/stats/tests/test_diagnostic.py index 2b6019f4c0a..911738dbbb4 100644 --- a/statsmodels/stats/tests/test_diagnostic.py +++ b/statsmodels/stats/tests/test_diagnostic.py @@ -1805,8 +1805,8 @@ def test_encompasing_error(reset_randomstate): @pytest.mark.parametrize( "cov", [ - dict(cov_type="nonrobust", cov_kwargs={}), - dict(cov_type="HC0", cov_kwargs={}), + dict(cov_type="nonrobust", cov_kwds={}), + dict(cov_type="HC0", cov_kwds={}), ], ) def test_reset_smoke(power, test_type, use_f, cov, reset_randomstate): @@ -1826,8 +1826,8 @@ def test_reset_smoke(power, test_type, use_f, cov, reset_randomstate): @pytest.mark.parametrize( "cov", [ - dict(cov_type="nonrobust", cov_kwargs={}), - dict(cov_type="HC0", cov_kwargs={}), + dict(cov_type="nonrobust", cov_kwds={}), + dict(cov_type="HC0", cov_kwds={}), ], ) def test_acorr_lm_smoke(store, ddof, cov, reset_randomstate): @@ -1996,3 +1996,36 @@ def test_diagnostics_pandas(reset_randomstate): res, order_by=np.arange(y.shape[0] - 1, 0 - 1, -1) ) smsdia.spec_white(res.resid, x) + + +def test_deprecated_argument(): + x = np.random.randn(100) + y = 2 * x + np.random.randn(100) + result = OLS(y, add_constant(x)).fit( + cov_type="HAC", cov_kwds={"maxlags": 2} + ) + with pytest.warns(FutureWarning, match="the "): + smsdia.linear_reset( + result, + power=2, + test_type="fitted", + cov_type="HAC", + cov_kwargs={"maxlags": 2}, + ) + + +def test_diagnostics_hac(reset_randomstate): + x = np.random.randn(100) + y = 2 * x + np.random.randn(100) + result = OLS(y, add_constant(x)).fit( + cov_type="HAC", cov_kwds={"maxlags": 2} + ) + reset_test = smsdia.linear_reset( + result, + power=2, + test_type="fitted", + cov_type="HAC", + cov_kwds={"maxlags": 2}, + ) + assert reset_test.statistic > 0 + assert 0 <= reset_test.pvalue <= 1 diff --git a/statsmodels/tsa/tests/test_seasonal.py b/statsmodels/tsa/tests/test_seasonal.py index 2a1f0b630a7..51c441bc457 100644 --- a/statsmodels/tsa/tests/test_seasonal.py +++ b/statsmodels/tsa/tests/test_seasonal.py @@ -315,17 +315,27 @@ def test_seasonal_decompose_multiple(): @pytest.mark.matplotlib -@pytest.mark.parametrize('model', ['additive', 'multiplicative']) -@pytest.mark.parametrize('freq', [4, 12]) -@pytest.mark.parametrize('two_sided', [True, False]) -@pytest.mark.parametrize('extrapolate_trend', [True, False]) -def test_seasonal_decompose_plot(model, freq, two_sided, extrapolate_trend): +@pytest.mark.parametrize("model", ["additive", "multiplicative"]) +@pytest.mark.parametrize("freq", [4, 12]) +@pytest.mark.parametrize("two_sided", [True, False]) +@pytest.mark.parametrize("extrapolate_trend", [True, False]) +def test_seasonal_decompose_plot( + model, freq, two_sided, extrapolate_trend, close_figures +): x = np.array([-50, 175, 149, 214, 247, 237, 225, 329, 729, 809, 530, 489, 540, 457, 195, 176, 337, 239, 128, 102, 232, 429, 3, 98, 43, -141, -77, -13, 125, 361, -45, 184]) x -= x.min() + 1 x2 = np.r_[x[12:], x[:12]] x = np.c_[x, x2] - res = seasonal_decompose(x, period=freq, two_sided=two_sided, - extrapolate_trend=extrapolate_trend) - res.plot() + res = seasonal_decompose( + x, + period=freq, + two_sided=two_sided, + extrapolate_trend=extrapolate_trend + ) + fig = res.plot() + + import matplotlib.pyplot as plt + + plt.close(fig)