In [1]:
import numpy as np
from patsy import dmatrices, dmatrix

In [2]:
# example data
data = {
    'income':[10, 15, 20, 25, 30, 100],
    'edu':[9, 9, 12, 12, 15, 18],
    'age':[19, 22, 35, 27, 42, 36],
    'gender':['m', 'f', 'm', 'f', 'm', 'f']
}

In [3]:
outcome, predictors = dmatrices('income ~ edu + age', data)

In [4]:
outcome

DesignMatrix with shape (6, 1)
  income
      10
      15
      20
      25
      30
     100
  Terms:
    'income' (column 0)

In [5]:
predictors

DesignMatrix with shape (6, 3)
  Intercept  edu  age
          1    9   19
          1    9   22
          1   12   35
          1   12   27
          1   15   42
          1   18   36
  Terms:
    'Intercept' (column 0)
    'edu' (column 1)
    'age' (column 2)

## get the least-squares solution to a linear matrix equation

In [6]:
betas = np.linalg.lstsq(predictors, outcome)[0].ravel()
for name, beta in zip(predictors.design_info.column_names, betas):
    print("{:10}: {:.4}".format(name, beta))

Intercept : -60.33
edu       : 14.06
age       : -2.719


## patsy's built-in transformation function. center, standardize, ...

In [7]:
dmatrix("center(age) + standardize(edu)", data)

DesignMatrix with shape (6, 3)
  Intercept  center(age)  standardize(edu)
          1    -11.16667          -1.09322
          1     -8.16667          -1.09322
          1      4.83333          -0.15617
          1     -3.16667          -0.15617
          1     11.83333           0.78087
          1      5.83333           1.71791
  Terms:
    'Intercept' (column 0)
    'center(age)' (column 1)
    'standardize(edu)' (column 2)

## we can use other python functions. e.g. np.log()

In [8]:
dmatrix('np.log(age) + edu', data)

DesignMatrix with shape (6, 3)
  Intercept  np.log(age)  edu
          1      2.94444    9
          1      3.09104    9
          1      3.55535   12
          1      3.29584   12
          1      3.73767   15
          1      3.58352   18
  Terms:
    'Intercept' (column 0)
    'np.log(age)' (column 1)
    'edu' (column 2)

## categorical variable. gender

In [9]:
dmatrix('age + gender', data)

DesignMatrix with shape (6, 3)
  Intercept  gender[T.m]  age
          1            1   19
          1            0   22
          1            1   35
          1            0   27
          1            1   42
          1            0   36
  Terms:
    'Intercept' (column 0)
    'gender' (column 1)
    'age' (column 2)

## interaction term

In [10]:
outcome, predictors = dmatrices('income ~ age + gender:age', data)
predictors

DesignMatrix with shape (6, 3)
  Intercept  age  gender[T.m]:age
          1   19               19
          1   22                0
          1   35               35
          1   27                0
          1   42               42
          1   36                0
  Terms:
    'Intercept' (column 0)
    'age' (column 1)
    'gender:age' (column 2)

In [11]:
betas = np.linalg.lstsq(predictors, outcome)[0].ravel()
for name, beta in zip(predictors.design_info.column_names, betas):
    print("{:10}: {:.4}".format(name, beta))

Intercept : -47.71
age       : 3.452
gender[T.m]:age: -1.443
