Skip to content

Commit

Permalink
Merge a447b6a into c299e3f
Browse files Browse the repository at this point in the history
  • Loading branch information
jseabold committed Mar 5, 2014
2 parents c299e3f + a447b6a commit c8402c2
Show file tree
Hide file tree
Showing 11 changed files with 241 additions and 138 deletions.
60 changes: 26 additions & 34 deletions examples/notebooks/categorical_interaction_plot.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions examples/notebooks/formulas.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
"res = sm.ols(formula='Lottery ~ np.log(Literacy)', data=df).fit()\n",
"res = smf.ols(formula='Lottery ~ np.log(Literacy)', data=df).fit()\n",
"print res.params"
],
"language": "python",
Expand All @@ -290,7 +290,7 @@
"input": [
"def log_plus_1(x):\n",
" return np.log(x) + 1.\n",
"res = sm.ols(formula='Lottery ~ log_plus_1(Literacy)', data=df).fit()\n",
"res = smf.ols(formula='Lottery ~ log_plus_1(Literacy)', data=df).fit()\n",
"print res.params"
],
"language": "python",
Expand Down
247 changes: 175 additions & 72 deletions examples/notebooks/interactions_anova.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,20 @@
"import numpy as np\n",
"np.set_printoptions(precision=4, suppress=True)\n",
"import statsmodels.api as sm\n",
"import pandas\n",
"import pandas as pd\n",
"pd.set_option(\"display.width\", 100)\n",
"import matplotlib.pyplot as plt\n",
"from statsmodels.formula.api import ols\n",
"from statsmodels.graphics.api import interaction_plot, abline_plot\n",
"from statsmodels.stats.anova import anova_lm\n",
"\n",
"try:\n",
" salary_table = pandas.read_csv('salary.table')\n",
" salary_table = pd.read_csv('salary.table')\n",
"except: # recent pandas can read URL without urlopen\n",
" from urllib2 import urlopen\n",
" url = 'http://stats191.stanford.edu/data/salary.table'\n",
" fh = urlopen(url)\n",
" salary_table = pandas.read_table(fh)\n",
" salary_table = pd.read_table(fh)\n",
" salary_table.to_csv('salary.table')\n",
"\n",
"E = salary_table.E\n",
Expand Down Expand Up @@ -454,78 +455,140 @@
"collapsed": false,
"input": [
"try:\n",
" minority_table = pandas.read_table('minority.table')\n",
" minority_table = pd.read_table('minority.table')\n",
"except: # don't have data already\n",
" url = 'http://stats191.stanford.edu/data/minority.table'\n",
" minority_table = pandas.read_table(url)\n",
" minority_table = pd.read_table(url)\n",
"\n",
"factor_group = minority_table.groupby(['ETHN'])\n",
"\n",
"plt.figure(figsize=(6,6))\n",
"fig, ax = plt.subplots(figsize=(6,6))\n",
"colors = ['purple', 'green']\n",
"markers = ['o', 'v']\n",
"for factor, group in factor_group:\n",
" plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
" ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
" marker=markers[factor], s=12**2)\n",
"plt.xlabel('TEST');\n",
"plt.ylabel('JPERF');\n",
"\n",
"ax.set_xlabel('TEST');\n",
"ax.set_ylabel('JPERF');"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"min_lm = ols('JPERF ~ TEST', data=minority_table).fit()\n",
"print min_lm.summary()\n",
"\n",
"plt.figure(figsize=(6,6));\n",
"print min_lm.summary()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fig, ax = plt.subplots(figsize=(6,6));\n",
"for factor, group in factor_group:\n",
" plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
" ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
" marker=markers[factor], s=12**2)\n",
"\n",
"plt.xlabel('TEST')\n",
"plt.ylabel('JPERF')\n",
"abline_plot(model_results = min_lm, ax=plt.gca());\n",
"\n",
"ax.set_xlabel('TEST')\n",
"ax.set_ylabel('JPERF')\n",
"fig = abline_plot(model_results = min_lm, ax=ax)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"min_lm2 = ols('JPERF ~ TEST + TEST:ETHN',\n",
" data=minority_table).fit()\n",
"\n",
"print min_lm2.summary()\n",
"\n",
"plt.figure(figsize=(6,6));\n",
"print min_lm2.summary()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fig, ax = plt.subplots(figsize=(6,6));\n",
"for factor, group in factor_group:\n",
" plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
" ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
" marker=markers[factor], s=12**2)\n",
"\n",
"abline_plot(intercept = min_lm2.params['Intercept'],\n",
" slope = min_lm2.params['TEST'], ax=plt.gca(), color='purple');\n",
"abline_plot(intercept = min_lm2.params['Intercept'],\n",
"fig = abline_plot(intercept = min_lm2.params['Intercept'],\n",
" slope = min_lm2.params['TEST'], ax=ax, color='purple');\n",
"fig = abline_plot(intercept = min_lm2.params['Intercept'],\n",
" slope = min_lm2.params['TEST'] + min_lm2.params['TEST:ETHN'],\n",
" ax=plt.gca(), color='green');\n",
"\n",
"\n",
" ax=ax, color='green');"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"min_lm3 = ols('JPERF ~ TEST + ETHN', data = minority_table).fit()\n",
"print min_lm3.summary()\n",
"\n",
"plt.figure(figsize=(6,6));\n",
"print min_lm3.summary()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fig, ax = plt.subplots(figsize=(6,6));\n",
"for factor, group in factor_group:\n",
" plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
" ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
" marker=markers[factor], s=12**2)\n",
"\n",
"abline_plot(intercept = min_lm3.params['Intercept'],\n",
" slope = min_lm3.params['TEST'], ax=plt.gca(), color='purple');\n",
"abline_plot(intercept = min_lm3.params['Intercept'] + min_lm3.params['ETHN'],\n",
" slope = min_lm3.params['TEST'], ax=plt.gca(), color='green');\n",
"\n",
"\n",
"fig = abline_plot(intercept = min_lm3.params['Intercept'],\n",
" slope = min_lm3.params['TEST'], ax=ax, color='purple');\n",
"fig = abline_plot(intercept = min_lm3.params['Intercept'] + min_lm3.params['ETHN'],\n",
" slope = min_lm3.params['TEST'], ax=ax, color='green');"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"min_lm4 = ols('JPERF ~ TEST * ETHN', data = minority_table).fit()\n",
"print min_lm4.summary()\n",
"\n",
"plt.figure(figsize=(6,6));\n",
"print min_lm4.summary()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fig, ax = plt.subplots(figsize=(8,6));\n",
"for factor, group in factor_group:\n",
" plt.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
" ax.scatter(group['TEST'], group['JPERF'], color=colors[factor],\n",
" marker=markers[factor], s=12**2)\n",
"\n",
"abline_plot(intercept = min_lm4.params['Intercept'],\n",
" slope = min_lm4.params['TEST'], ax=plt.gca(), color='purple');\n",
"abline_plot(intercept = min_lm4.params['Intercept'] + min_lm4.params['ETHN'],\n",
"fig = abline_plot(intercept = min_lm4.params['Intercept'],\n",
" slope = min_lm4.params['TEST'], ax=ax, color='purple');\n",
"fig = abline_plot(intercept = min_lm4.params['Intercept'] + min_lm4.params['ETHN'],\n",
" slope = min_lm4.params['TEST'] + min_lm4.params['TEST:ETHN'],\n",
" ax=plt.gca(), color='green');\n"
" ax=ax, color='green');"
],
"language": "python",
"metadata": {},
Expand All @@ -537,16 +600,40 @@
"input": [
"# is there any effect of ETHN on slope or intercept?\n",
"table5 = anova_lm(min_lm, min_lm4)\n",
"print table5\n",
"\n",
"print table5"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# is there any effect of ETHN on intercept\n",
"table6 = anova_lm(min_lm, min_lm3)\n",
"print table6\n",
"\n",
"print table6"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# is there any effect of ETHN on slope\n",
"table7 = anova_lm(min_lm, min_lm2)\n",
"print table7\n",
"\n",
"print table7"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# is it just the slope or both?\n",
"table8 = anova_lm(min_lm2, min_lm4)\n",
"print table8"
Expand All @@ -567,22 +654,38 @@
"collapsed": false,
"input": [
"try:\n",
" rehab_table = pandas.read_csv('rehab.table')\n",
" rehab_table = pd.read_csv('rehab.table')\n",
"except:\n",
" url = 'http://stats191.stanford.edu/data/rehab.csv'\n",
" rehab_table = pandas.read_table(url, delimiter=\",\")\n",
" rehab_table = pd.read_table(url, delimiter=\",\")\n",
" rehab_table.to_csv('rehab.table')\n",
"\n",
"plt.figure(figsize=(6,6))\n",
"rehab_table.boxplot('Time', 'Fitness', ax=plt.gca())\n",
"\n",
"fig, ax = plt.subplots(figsize=(8,6))\n",
"fig = rehab_table.boxplot('Time', 'Fitness', ax=ax, grid=False)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()\n",
"table9 = anova_lm(rehab_lm)\n",
"print table9\n",
"\n",
"print rehab_lm.model.data.orig_exog\n",
"\n",
"print rehab_lm.summary()\n"
"print rehab_lm.model.data.orig_exog"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print rehab_lm.summary()"
],
"language": "python",
"metadata": {},
Expand All @@ -600,10 +703,10 @@
"collapsed": false,
"input": [
"try:\n",
" kidney_table = pandas.read_table('./kidney.table')\n",
" kidney_table = pd.read_table('./kidney.table')\n",
"except:\n",
" url = 'http://stats191.stanford.edu/data/kidney.table'\n",
" kidney_table = pandas.read_table(url, delimiter=\" *\")"
" kidney_table = pd.read_table(url, delimiter=\" *\")"
],
"language": "python",
"metadata": {},
Expand Down Expand Up @@ -638,8 +741,8 @@
"collapsed": false,
"input": [
"kt = kidney_table\n",
"plt.figure(figsize=(6,6))\n",
"interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),\n",
"plt.figure(figsize=(8,6))\n",
"fig = interaction_plot(kt['Weight'], kt['Duration'], np.log(kt['Days']+1),\n",
" colors=['red', 'blue'], markers=['D','^'], ms=10, ax=plt.gca())"
],
"language": "python",
Expand Down Expand Up @@ -698,13 +801,7 @@
"\n",
"print anova_lm(sum_lm)\n",
"print anova_lm(sum_lm, typ=2)\n",
"print anova_lm(sum_lm, typ=3)\n",
"\n",
"nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',\n",
" data=kt).fit()\n",
"print anova_lm(nosum_lm)\n",
"print anova_lm(nosum_lm, typ=2)\n",
"print anova_lm(nosum_lm, typ=3)"
"print anova_lm(sum_lm, typ=3)"
],
"language": "python",
"metadata": {},
Expand All @@ -713,7 +810,13 @@
{
"cell_type": "code",
"collapsed": false,
"input": [],
"input": [
"nosum_lm = ols('np.log(Days+1) ~ C(Duration, Treatment) * C(Weight, Treatment)',\n",
" data=kt).fit()\n",
"print anova_lm(nosum_lm)\n",
"print anova_lm(nosum_lm, typ=2)\n",
"print anova_lm(nosum_lm, typ=3)"
],
"language": "python",
"metadata": {},
"outputs": []
Expand Down

0 comments on commit c8402c2

Please sign in to comment.