In [None]:
# installation:  pip install -U statsmodels

# https://www.statsmodels.org/stable/index.html

In [None]:
#  for getting started go to :

#  https://www.statsmodels.org/stable/gettingstarted.html

In [None]:
# simple examples

import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf


#  load data
dt = sm.datasets.get_rdataset("Guerry", "HistData").data

#  fit regression model (using the natural log of one of the regressors)
result = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dt).fit()

#  inspect the results
print(result.summary())

In [None]:
import numpy as np
import statsmodels.api as sm


#  generate artificial data (2 regressors + constant)
nobs = 100

X = np.random.random((nobs, 2))
X = sm.add_constant(X)

beta = [1, .1, .5]
e = np.random.random(nobs)
y = np.dot(X, beta) + e

#  fit regression model
result = sm.OLS(y, X).fit()

#  inspect the results
print(result.summary())

In [None]:
import pandas
import numpy as np
from __future__ import print_function
import statsmodels.api as sm
from patsy import dmatrices


df = sm.datasets.get_rdataset("Guerry", "HistData").data
vars = ['Department', 'Lottery', 'Literacy', 'Wealth', 'Region']

df = df[vars]
print(df[-5:])

df = df.dropna()
print(df[-5:])

#  use patsy’s dmatrices function to create design matrices
y, X = dmatrices('Lottery ~ Literacy + Wealth + Region', data=df, return_type='dataframe')
print(y[:3])

print(X[:3])


#  model fit and summary

mod = sm.OLS(y, X)    # describe model

result = mod.fit()       # fit model

print(result.summary())   # summarize model

print(result.params)
print(result.rsquared)


# tests
print(sm.stats.linear_rainbow(result))

#   draw a plot of partial regression for a set of regressors
sm.graphics.plot_partregress('Lottery', 'Wealth', ['Region', 'Literacy'], data=df, obs_labels=False)