In [40]:
import numpy as np
import patsy
from patsy import dmatrices, dmatrix, demo_data

In [2]:
data = demo_data('a', 'b', 'x1', 'x2', 'y', 'z', 'z column')
data

{'a': ['a1', 'a1', 'a2', 'a2', 'a1', 'a1', 'a2', 'a2'],
 'b': ['b1', 'b2', 'b1', 'b2', 'b1', 'b2', 'b1', 'b2'],
 'x1': array([ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ,  1.86755799,
        -0.97727788,  0.95008842, -0.15135721]),
 'x2': array([-0.10321885,  0.4105985 ,  0.14404357,  1.45427351,  0.76103773,
         0.12167502,  0.44386323,  0.33367433]),
 'y': array([ 1.49407907, -0.20515826,  0.3130677 , -0.85409574, -2.55298982,
         0.6536186 ,  0.8644362 , -0.74216502]),
 'z': array([ 2.26975462, -1.45436567,  0.04575852, -0.18718385,  1.53277921,
         1.46935877,  0.15494743,  0.37816252]),
 'z column': array([-0.88778575, -1.98079647, -0.34791215,  0.15634897,  1.23029068,
         1.20237985, -0.38732682, -0.30230275])}

In [4]:
dmatrices("y ~ x1 + x2", data)

(DesignMatrix with shape (8, 1)
          y
    1.49408
   -0.20516
    0.31307
   -0.85410
   -2.55299
    0.65362
    0.86444
   -0.74217
   Terms:
     'y' (column 0),
 DesignMatrix with shape (8, 3)
   Intercept        x1        x2
           1   1.76405  -0.10322
           1   0.40016   0.41060
           1   0.97874   0.14404
           1   2.24089   1.45427
           1   1.86756   0.76104
           1  -0.97728   0.12168
           1   0.95009   0.44386
           1  -0.15136   0.33367
   Terms:
     'Intercept' (column 0)
     'x1' (column 1)
     'x2' (column 2))

In [16]:
outcome, predictors = dmatrices('y ~ x1 + x2', data=data)
betas = np.linalg.lstsq(predictors, outcome, rcond=None)[0].ravel()

In [20]:
for name, beta in zip(predictors.design_info.column_names, betas):
    print("%s: %s" % (name, beta))

Intercept: 0.5796623441231169
x1: 0.08859919035535568
x2: -1.7647920555065002


In [22]:
d = dmatrix("x1 + x2", data)

In [34]:
dmatrix("-1 + x1 + x2", data)

DesignMatrix with shape (8, 2)
        x1        x2
   1.76405  -0.10322
   0.40016   0.41060
   0.97874   0.14404
   2.24089   1.45427
   1.86756   0.76104
  -0.97728   0.12168
   0.95009   0.44386
  -0.15136   0.33367
  Terms:
    'x1' (column 0)
    'x2' (column 1)

In [36]:
dmatrix("x1 + np.log(x2+5)", data)

DesignMatrix with shape (8, 3)
  Intercept        x1  np.log(x2 + 5)
          1   1.76405         1.58858
          1   0.40016         1.68836
          1   0.97874         1.63784
          1   2.24089         1.86474
          1   1.86756         1.75112
          1  -0.97728         1.63348
          1   0.95009         1.69449
          1  -0.15136         1.67404
  Terms:
    'Intercept' (column 0)
    'x1' (column 1)
    'np.log(x2 + 5)' (column 2)

In [37]:
dmatrix("center(x1) + standardize(x2)", data)

DesignMatrix with shape (8, 3)
  Intercept  center(x1)  standardize(x2)
          1     0.87995         -1.21701
          1    -0.48395         -0.07791
          1     0.09463         -0.66885
          1     1.35679          2.23584
          1     0.98345          0.69899
          1    -1.86138         -0.71844
          1     0.06598         -0.00417
          1    -1.03546         -0.24845
  Terms:
    'Intercept' (column 0)
    'center(x1)' (column 1)
    'standardize(x2)' (column 2)

In [41]:
dir(patsy.builtins)

['C',
 'ContrastMatrix',
 'Diff',
 'Helmert',
 'I',
 'Poly',
 'Q',
 'Sum',
 'Treatment',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'bs',
 'cc',
 'center',
 'cr',
 'scale',
 'standardize',
 'te',
 'test_I',
 'test_Q']

In [43]:
dmatrix("a + 0", data)

DesignMatrix with shape (8, 2)
  a[a1]  a[a2]
      1      0
      1      0
      0      1
      0      1
      1      0
      1      0
      0      1
      0      1
  Terms:
    'a' (columns 0:2)

In [44]:
patsy.ModelDesc.from_formula("y ~ a + a:b + np.log(x)")

ModelDesc(lhs_termlist=[Term([EvalFactor('y')])],
          rhs_termlist=[Term([]),
                        Term([EvalFactor('a')]),
                        Term([EvalFactor('a'), EvalFactor('b')]),
                        Term([EvalFactor('np.log(x)')])])

In [45]:
patsy.ModelDesc.from_formula("y ~ a:b")

ModelDesc(lhs_termlist=[Term([EvalFactor('y')])],
          rhs_termlist=[Term([]), Term([EvalFactor('a'), EvalFactor('b')])])

In [51]:
data

{'a': ['a1', 'a1', 'a2', 'a2', 'a1', 'a1', 'a2', 'a2'],
 'b': ['b1', 'b2', 'b1', 'b2', 'b1', 'b2', 'b1', 'b2'],
 'x1': array([ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ,  1.86755799,
        -0.97727788,  0.95008842, -0.15135721]),
 'x2': array([-0.10321885,  0.4105985 ,  0.14404357,  1.45427351,  0.76103773,
         0.12167502,  0.44386323,  0.33367433]),
 'y': array([ 1.49407907, -0.20515826,  0.3130677 , -0.85409574, -2.55298982,
         0.6536186 ,  0.8644362 , -0.74216502]),
 'z': array([ 2.26975462, -1.45436567,  0.04575852, -0.18718385,  1.53277921,
         1.46935877,  0.15494743,  0.37816252]),
 'z column': array([-0.88778575, -1.98079647, -0.34791215,  0.15634897,  1.23029068,
         1.20237985, -0.38732682, -0.30230275])}

In [54]:
dmatrix("a + b + a:b", data)

DesignMatrix with shape (8, 4)
  Intercept  a[T.a2]  b[T.b2]  a[T.a2]:b[T.b2]
          1        0        0                0
          1        0        1                0
          1        1        0                0
          1        1        1                1
          1        0        0                0
          1        0        1                0
          1        1        0                0
          1        1        1                1
  Terms:
    'Intercept' (column 0)
    'a' (column 1)
    'b' (column 2)
    'a:b' (column 3)