Merge a447b6a into c299e3f

statsmodels · Mar 5, 2014 · c8402c2 · c8402c2
2 parents c299e3f + a447b6a
commit c8402c2
Show file tree

Hide file tree

Showing 11 changed files with 241 additions and 138 deletions.
diff --git a/examples/notebooks/categorical_interaction_plot.ipynb b/examples/notebooks/categorical_interaction_plot.ipynb
diff --git a/examples/notebooks/formulas.ipynb b/examples/notebooks/formulas.ipynb
@@ -270,7 +270,7 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "res = sm.ols(formula='Lottery ~ np.log(Literacy)', data=df).fit()\n",
+      "res = smf.ols(formula='Lottery ~ np.log(Literacy)', data=df).fit()\n",
       "print res.params"
      ],
      "language": "python",
@@ -290,7 +290,7 @@
      "input": [
       "def log_plus_1(x):\n",
       "    return np.log(x) + 1.\n",
-      "res = sm.ols(formula='Lottery ~ log_plus_1(Literacy)', data=df).fit()\n",
+      "res = smf.ols(formula='Lottery ~ log_plus_1(Literacy)', data=df).fit()\n",
       "print res.params"
      ],
      "language": "python",

diff --git a/examples/notebooks/interactions_anova.ipynb b/examples/notebooks/interactions_anova.ipynb
@@ -31,19 +31,20 @@
       "import numpy as np\n",
       "np.set_printoptions(precision=4, suppress=True)\n",
       "import statsmodels.api as sm\n",
-      "import pandas\n",
+      "import pandas as pd\n",
+      "pd.set_option(\"display.width\", 100)\n",
       "import matplotlib.pyplot as plt\n",
       "from statsmodels.formula.api import ols\n",
       "from statsmodels.graphics.api import interaction_plot, abline_plot\n",
       "from statsmodels.stats.anova import anova_lm\n",
       "\n",
       "try:\n",
-      "    salary_table = pandas.read_csv('salary.table')\n",
+      "    salary_table = pd.read_csv('salary.table')\n",
       "except:  # recent pandas can read URL without urlopen\n",
       "    from urllib2 import urlopen\n",
       "    url = 'http://stats191.stanford.edu/data/salary.table'\n",
       "    fh = urlopen(url)\n",
-      "    salary_table = pandas.read_table(fh)\n",
+      "    salary_table = pd.read_table(fh)\n",
       "    salary_table.to_csv('salary.table')\n",
       "\n",
       "E = salary_table.E\n",
@@ -454,78 +455,140 @@
      "collapsed": false,
      "input": [
       "try:\n",
-      "    minority_table = pandas.read_table('minority.table')\n",
+      "    minority_table = pd.read_table('minority.table')\n",
       "except:  # don't have data already\n",
       "    url = 'http://stats191.stanford.edu/data/minority.table'\n",
-      "    minority_table = pandas.read_table(url)\n",
+      "    minority_table = pd.read_table(url)\n",
       "\n",
       "factor_group = minority_table.groupby(['ETHN'])\n",
       "\n",
-      "plt.figure(figsize=(6,6))\n",
+      "fig, ax = plt.subplots(figsize=(6,6))\n",
       "colors = ['purple', 'green']\n",
       "markers = ['o', 'v']\n",
       "for factor, group in factor_group:\n",
-      "    plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
+      "    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
       "                marker=markers[factor], s=12**2)\n",
-      "plt.xlabel('TEST');\n",
-      "plt.ylabel('JPERF');\n",
-      "\n",
+      "ax.set_xlabel('TEST');\n",
+      "ax.set_ylabel('JPERF');"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
       "min_lm = ols('JPERF ~ TEST', data=minority_table).fit()\n",
-      "print min_lm.summary()\n",
-      "\n",
-      "plt.figure(figsize=(6,6));\n",
+      "print min_lm.summary()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "fig, ax = plt.subplots(figsize=(6,6));\n",
       "for factor, group in factor_group:\n",
-      "    plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
+      "    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
       "                marker=markers[factor], s=12**2)\n",
       "\n",
-      "plt.xlabel('TEST')\n",
-      "plt.ylabel('JPERF')\n",
-      "abline_plot(model_results = min_lm, ax=plt.gca());\n",
-      "\n",
+      "ax.set_xlabel('TEST')\n",
+      "ax.set_ylabel('JPERF')\n",
+      "fig = abline_plot(model_results = min_lm, ax=ax)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
       "min_lm2 = ols('JPERF ~ TEST + TEST:ETHN',\n",
       "        data=minority_table).fit()\n",
       "\n",
-      "print min_lm2.summary()\n",
-      "\n",
-      "plt.figure(figsize=(6,6));\n",
+      "print min_lm2.summary()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "fig, ax = plt.subplots(figsize=(6,6));\n",
       "for factor, group in factor_group:\n",
-      "    plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
+      "    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
       "                marker=markers[factor], s=12**2)\n",
       "\n",
-      "abline_plot(intercept = min_lm2.params['Intercept'],\n",
-      "                 slope = min_lm2.params['TEST'], ax=plt.gca(), color='purple');\n",
-      "abline_plot(intercept = min_lm2.params['Intercept'],\n",
+      "fig = abline_plot(intercept = min_lm2.params['Intercept'],\n",
+      "                 slope = min_lm2.params['TEST'], ax=ax, color='purple');\n",
+      "fig = abline_plot(intercept = min_lm2.params['Intercept'],\n",
       "        slope = min_lm2.params['TEST'] + min_lm2.params['TEST:ETHN'],\n",
-      "        ax=plt.gca(), color='green');\n",
-      "\n",
-      "\n",
+      "        ax=ax, color='green');"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
       "min_lm3 = ols('JPERF ~ TEST + ETHN', data = minority_table).fit()\n",
-      "print min_lm3.summary()\n",
-      "\n",
-      "plt.figure(figsize=(6,6));\n",
+      "print min_lm3.summary()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "fig, ax = plt.subplots(figsize=(6,6));\n",
       "for factor, group in factor_group:\n",
-      "    plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
+      "    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
       "                marker=markers[factor], s=12**2)\n",
       "\n",
-      "abline_plot(intercept = min_lm3.params['Intercept'],\n",
-      "                 slope = min_lm3.params['TEST'], ax=plt.gca(), color='purple');\n",
-      "abline_plot(intercept = min_lm3.params['Intercept'] + min_lm3.params['ETHN'],\n",
-      "        slope = min_lm3.params['TEST'], ax=plt.gca(), color='green');\n",
-      "\n",
-      "\n",
+      "fig = abline_plot(intercept = min_lm3.params['Intercept'],\n",
+      "                 slope = min_lm3.params['TEST'], ax=ax, color='purple');\n",
+      "fig = abline_plot(intercept = min_lm3.params['Intercept'] + min_lm3.params['ETHN'],\n",
+      "        slope = min_lm3.params['TEST'], ax=ax, color='green');"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
       "min_lm4 = ols('JPERF ~ TEST * ETHN', data = minority_table).fit()\n",
-      "print min_lm4.summary()\n",
-      "\n",
-      "plt.figure(figsize=(6,6));\n",
+      "print min_lm4.summary()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "fig, ax = plt.subplots(figsize=(8,6));\n",
       "for factor, group in factor_group:\n",
-      "    plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
+      "    ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
       "                marker=markers[factor], s=12**2)\n",
       "\n",
-      "abline_plot(intercept = min_lm4.params['Intercept'],\n",
-      "                 slope = min_lm4.params['TEST'], ax=plt.gca(), color='purple');\n",
-      "abline_plot(intercept = min_lm4.params['Intercept'] + min_lm4.params['ETHN'],\n",
+      "fig = abline_plot(intercept = min_lm4.params['Intercept'],\n",
+      "                 slope = min_lm4.params['TEST'], ax=ax, color='purple');\n",
+      "fig = abline_plot(intercept = min_lm4.params['Intercept'] + min_lm4.params['ETHN'],\n",
       "        slope = min_lm4.params['TEST'] + min_lm4.params['TEST:ETHN'],\n",
-      "        ax=plt.gca(), color='green');\n"
+      "        ax=ax, color='green');"
      ],
      "language": "python",
      "metadata": {},
@@ -537,16 +600,40 @@
      "input": [
       "# is there any effect of ETHN on slope or intercept?\n",
       "table5 = anova_lm(min_lm, min_lm4)\n",
-      "print table5\n",
-      "\n",
+      "print table5"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
       "# is there any effect of ETHN on intercept\n",
       "table6 = anova_lm(min_lm, min_lm3)\n",
-      "print table6\n",
-      "\n",
+      "print table6"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
       "# is there any effect of ETHN on slope\n",
       "table7 = anova_lm(min_lm, min_lm2)\n",
-      "print table7\n",
-      "\n",
+      "print table7"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
       "# is it just the slope or both?\n",
       "table8 = anova_lm(min_lm2, min_lm4)\n",
       "print table8"
@@ -567,22 +654,38 @@
      "collapsed": false,
      "input": [
       "try:\n",
-      "    rehab_table = pandas.read_csv('rehab.table')\n",
+      "    rehab_table = pd.read_csv('rehab.table')\n",
       "except:\n",
       "    url = 'http://stats191.stanford.edu/data/rehab.csv'\n",
-      "    rehab_table = pandas.read_table(url, delimiter=\",\")\n",
+      "    rehab_table = pd.read_table(url, delimiter=\",\")\n",
       "    rehab_table.to_csv('rehab.table')\n",
       "\n",
-      "plt.figure(figsize=(6,6))\n",
-      "rehab_table.boxplot('Time', 'Fitness', ax=plt.gca())\n",
-      "\n",
+      "fig, ax = plt.subplots(figsize=(8,6))\n",
+      "fig = rehab_table.boxplot('Time', 'Fitness', ax=ax, grid=False)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
       "rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()\n",
       "table9 = anova_lm(rehab_lm)\n",
       "print table9\n",
       "\n",
-      "print rehab_lm.model.data.orig_exog\n",
-      "\n",
-      "print rehab_lm.summary()\n"
+      "print rehab_lm.model.data.orig_exog"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "print rehab_lm.summary()"
      ],
      "language": "python",
      "metadata": {},
@@ -600,10 +703,10 @@
      "collapsed": false,
      "input": [
       "try:\n",
-      "    kidney_table = pandas.read_table('./kidney.table')\n",
+      "    kidney_table = pd.read_table('./kidney.table')\n",
       "except:\n",
       "    url = 'http://stats191.stanford.edu/data/kidney.table'\n",
-      "    kidney_table = pandas.read_table(url, delimiter=\" *\")"
+      "    kidney_table = pd.read_table(url, delimiter=\" *\")"
      ],
      "language": "python",
      "metadata": {},
@@ -638,8 +741,8 @@
      "collapsed": false,
      "input": [
       "kt = kidney_table\n",
-      "plt.figure(figsize=(6,6))\n",
-      "interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),\n",
+      "plt.figure(figsize=(8,6))\n",
+      "fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),\n",
       "        colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())"
      ],
      "language": "python",
@@ -698,13 +801,7 @@
       "\n",
       "print anova_lm(sum_lm)\n",
       "print anova_lm(sum_lm, typ=2)\n",
-      "print anova_lm(sum_lm, typ=3)\n",
-      "\n",
-      "nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',\n",
-      "            data=kt).fit()\n",
-      "print anova_lm(nosum_lm)\n",
-      "print anova_lm(nosum_lm, typ=2)\n",
-      "print anova_lm(nosum_lm, typ=3)"
+      "print anova_lm(sum_lm, typ=3)"
      ],
      "language": "python",
      "metadata": {},
@@ -713,7 +810,13 @@
     {
      "cell_type": "code",
      "collapsed": false,
-     "input": [],
+     "input": [
+      "nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',\n",
+      "            data=kt).fit()\n",
+      "print anova_lm(nosum_lm)\n",
+      "print anova_lm(nosum_lm, typ=2)\n",
+      "print anova_lm(nosum_lm, typ=3)"
+     ],
      "language": "python",
      "metadata": {},
      "outputs": []