This is a port of https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/jupyter_notebooks/Linear_Mixed_Effects_Models.ipynb

In [1]:
%matplotlib inline
import pymc3 as pm
import numpy as np
import pandas as pd
import theano.tensor as tt
import theano

import matplotlib.pyplot as plt
plt.style.use('seaborn-darkgrid')

  from ._conv import register_converters as _register_converters


Load data

In [2]:
url = ('https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/'
         'lme4/InstEval.csv')
data = pd.read_csv(url)
data = data.drop(columns=['Unnamed: 0'])
data = data.rename(columns={'s': 'students',
                            'd': 'instructors',
                            'dept': 'departments',
                            'y': 'ratings'})
data['students'] -= 1  # start index by 0
# Remap categories to start from 0 and end at max(category).
data['instructors'] = data['instructors'].astype('category').cat.codes
data['departments'] = data['departments'].astype('category').cat.codes

train = data.sample(frac=0.8)
test = data.drop(train.index)

train.head()

Unnamed: 0,students,instructors,studage,lectage,service,departments,ratings
38188,1552,150,2,1,1,8,4
68160,2775,685,6,3,0,11,2
7830,287,716,4,1,0,12,2
32317,1321,354,8,2,1,3,5
36971,1502,829,8,6,0,6,4


Set up training and testing set

In [3]:
X_stud = theano.shared(train['students'].values)
X_inst = theano.shared(train['instructors'].values)
X_dept = theano.shared(train['departments'].values)
X_serv = theano.shared(train['service'].values)
Y_rate = theano.shared(train['ratings'].values)

In [4]:
X_serv.eval()

array([1, 0, 0, ..., 1, 1, 0])

In [5]:
num_students = max(data['students']) + 1
num_instructors = max(data['instructors']) + 1
num_departments = max(data['departments']) + 1
num_observations = train.shape[0]

print('Number of students:', num_students)
print('Number of instructors:', num_instructors)
print('Number of departments:', num_departments)
print('Number of observations:', num_observations)

Number of students: 2972
Number of instructors: 1128
Number of departments: 14
Number of observations: 58737


set up model in PyMC3

In [6]:
with pm.Model() as lmm:
    # Set up fixed effects and other parameters.
    intercept = pm.Flat('intercept')  # alpha in eq
    effect_service = pm.Flat('effect_service')  # beta in eq
    stddev_students = pm.Flat('stddev_unconstrained_students')  # sigma in eq
    stddev_instructors = pm.Flat(
        'stddev_unconstrained_instructors')  # sigma in eq
    stddev_departments = pm.Flat(
        'stddev_unconstrained_departments')  # sigma in eq

    # Set up random effects
    effect_students = pm.Normal('effect_students', 0., tt.exp(
        stddev_students), shape=num_students)
    effect_instructors = pm.Normal('effect_instructors', 0., tt.exp(
        stddev_instructors), shape=num_instructors)
    effect_departments = pm.Normal('effect_departments', 0., tt.exp(
        stddev_departments), shape=num_departments)

    ratings = pm.Normal('ratings',
                        mu=(effect_service * Y_rate +
                            effect_students[X_stud] +
                            effect_instructors[X_inst] +
                            effect_departments[X_dept] +
                            intercept),
                        sd=1.,
                        observed=Y_rate
                        )

Gibbs sampling is one version of generalization of Expectation maximization. We can do some compound step magic to do EM in PyMC3

In [7]:
lmm.free_RVs

[intercept,
 effect_service,
 stddev_unconstrained_students,
 stddev_unconstrained_instructors,
 stddev_unconstrained_departments,
 effect_students,
 effect_instructors,
 effect_departments]

In [8]:
lmm.free_RVs[0:5]

[intercept,
 effect_service,
 stddev_unconstrained_students,
 stddev_unconstrained_instructors,
 stddev_unconstrained_departments]

In [9]:
lmm.free_RVs[5:]

[effect_students, effect_instructors, effect_departments]

In [10]:
grad_vars = lmm.free_RVs[0:5]
logp_dlogp_func = lmm.logp_dlogp_function(grad_vars)

In [11]:
point = lmm.test_point
point

{'effect_departments': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'effect_instructors': array([0., 0., 0., ..., 0., 0., 0.]),
 'effect_service': array(0.),
 'effect_students': array([0., 0., 0., ..., 0., 0., 0.]),
 'intercept': array(0.),
 'stddev_unconstrained_departments': array(0.),
 'stddev_unconstrained_instructors': array(0.),
 'stddev_unconstrained_students': array(0.)}

In [12]:
logp_dlogp_func.set_extra_values(point)
array = logp_dlogp_func.dict_to_array(point)
array

array([0., 0., 0., 0., 0.])

In [13]:
logp_dlogp_func.get_extra_values()

{'effect_departments': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'effect_instructors': array([0., 0., 0., ..., 0., 0., 0.]),
 'effect_students': array([0., 0., 0., ..., 0., 0., 0.])}

In [14]:
logp_dlogp_func(array)

(array(-410739.2057508),
 array([ 1.87958e+05,  7.05966e+05, -2.97200e+03, -1.12800e+03,
        -1.40000e+01]))

In [15]:
logp_dlogp_func._cost_joined

__logp

In [16]:
logp_dlogp_func._vars_joined.tag.test_value

array([0., 0., 0., 0., 0.])

In [17]:
optimizer = pm.adam(learning_rate=0.01)

In [18]:
inputs = theano.shared(array, 'to_max')
cost_replace = theano.clone(-logp_dlogp_func._cost_joined, {logp_dlogp_func._vars_joined: inputs})
updates = optimizer(cost_replace, [inputs])

In [19]:
logp_dlogp_func._vars_joined.tag.test_value

array([0., 0., 0., 0., 0.])

In [20]:
updates[inputs].tag.test_value

array([ 0.01,  0.01, -0.01, -0.01, -0.01])

In [21]:
extra_rvs = lmm.free_RVs[5:]
train = theano.function(
    inputs=extra_rvs,
    outputs=[cost_replace],
    updates=optimizer(cost_replace, [inputs]))

In [22]:
extra_point = logp_dlogp_func.get_extra_values()
input_array = [extra_point[var.name] for var in extra_rvs]
input_array

[array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])]

In [23]:
train(*input_array)

[array(410739.2057508)]

In [24]:
logp_dlogp_func.array_to_full_dict(inputs.eval())

{'effect_departments': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'effect_instructors': array([0., 0., 0., ..., 0., 0., 0.]),
 'effect_service': array(0.01),
 'effect_students': array([0., 0., 0., ..., 0., 0., 0.]),
 'intercept': array(0.01),
 'stddev_unconstrained_departments': array(-0.01),
 'stddev_unconstrained_instructors': array(-0.01),
 'stddev_unconstrained_students': array(-0.01)}

Set up maximization

In [36]:
from pymc3.step_methods.arraystep import BlockedStep
from pymc3.model import modelcontext


class MaxFunc(BlockedStep):
    """
    Step method that take the maximization of RVs

    Parameters
    ----------
    vars : list
        List of variables to take maximization.
    model: pymc3 model
    """

    def __init__(self, vars, optimizer=pm.adam(learning_rate=0.01), model=None):
        model = modelcontext(model)

        self.generates_stats = True
        stats_dtypes = [{
            'loss': np.float64,
        }]

        self.vars = vars
        self.m = model
        self.optimizer = optimizer

        varnames = [var.name for var in vars]
        extra_vars = [
            var for var in model.free_RVs if var.name not in varnames
        ]
        self.extra_vars = extra_vars
        self._logp_dlogp_func = model.logp_dlogp_function(self.vars)

        point = lmm.test_point
        self._logp_dlogp_func.set_extra_values(point)
        np_array = self._logp_dlogp_func.dict_to_array(point)

        # TODO: accept scipy optimizer to directly optimize
        # self._logp_dlogp_func(np_array) in numpy
        inputs = theano.shared(np_array, 'to_max')
        cost_replace = theano.clone(self._logp_dlogp_func._cost_joined,
                                    {self._logp_dlogp_func._vars_joined: inputs})

        self.train_fn = theano.function(
            inputs=extra_vars,
            outputs=[-cost_replace],
            updates=optimizer(cost_replace, [inputs]))

    def step(self, point):
        input_array = [point[var.name] for var in self.extra_vars]
        loss = self.train_fn(*input_array)
        point = self._logp_dlogp_func.array_to_full_dict(inputs.eval())
        stats = {
            'loss': loss,
        }
        return point, [stats]

In [37]:
with lmm:
    step1 = pm.HamiltonianMC(vars=lmm.free_RVs[:5])
    step2 = MaxFunc(vars=lmm.free_RVs[5:])
    trace = pm.sample(1000, tune=1000, step=[step2, step1])

AttributeError: 'MaxFunc' object has no attribute 'stats_dtypes'