In [1]:
import os
import datetime
import numpy as np
import xarray as xa
import pprint

import logging
import warnings

logging.getLogger("tensorflow").setLevel(logging.INFO)
logging.getLogger("batchglm").setLevel(logging.INFO)

  return f(*args, **kwds)
  return f(*args, **kwds)


## Import batchglm

In [2]:
import batchglm.api as glm

In [3]:
# just to ignore some tensorflow warnings; just ignore this line
warnings.filterwarnings("ignore", category=DeprecationWarning, module="tensorflow")

# Example 1: easy

## Simulate data

In this example, we have 4 biological replicates (animals, patients, cell culture replicates etc.) in a treatment experiment: 2 in each condition (treated, untreated). Accordingly, there is perfect confounding at this level. We circumvent this by constraining the biological replicate coefficients to not model mean trends. 

### Define design matrices

In [80]:
ncells = 2000
dmat = np.zeros([ncells, 6])
dmat[:,0] = 1
dmat[:500,1] = 1 # bio rep 1
dmat[500:1000,2] = 1 # bio rep 2
dmat[1000:1500,3] = 1 # bio rep 3
dmat[1500:2000,4] = 1 # bio rep 4
dmat[1000:2000,5] = 1 # condition effect
print(np.unique(dmat, axis=0))

[[1. 0. 0. 0. 1. 1.]
 [1. 0. 0. 1. 0. 1.]
 [1. 0. 1. 0. 0. 0.]
 [1. 1. 0. 0. 0. 0.]]


In [5]:
sim = glm.models.nb_glm.Simulator(num_features=100)

In [6]:
sim.parse_dmat_loc(dmat = dmat)
sim.parse_dmat_scale(dmat = dmat)
sim.generate_params()
sim.generate_data()

### Simulated model data:

In [7]:
sim.X

<xarray.DataArray 'X' (observations: 2000, features: 100)>
array([[12903,  5849,  2604, ...,  7251, 18090,  5080],
       [11761, 27807,  3453, ...,  4643,    17,  6729],
       [11733, 14188,  4125, ...,  7424, 34598,  8651],
       ...,
       [40679, 10016, 18212, ...,  8702,  2372,  5319],
       [22378, 12701, 11424, ..., 15495,  6123,  6795],
       [12572, 11356, 14371, ..., 17543,  2710, 10276]])
Dimensions without coordinates: observations, features

In [8]:
np.unique(sim.design_loc, axis=0)

array([[1., 0., 0., 0., 1., 1.],
       [1., 0., 0., 1., 0., 1.],
       [1., 0., 1., 0., 0., 0.],
       [1., 1., 0., 0., 0., 0.]])

### The parameters used to generate this data:

In [9]:
sim.par_link_loc

<xarray.DataArray 'a' (design_loc_params: 6, features: 100)>
array([[ 9.115424,  8.908089,  8.774674, ...,  8.347528,  9.112151,  8.613704],
       [ 0.225722,  0.574323, -0.406029, ...,  0.549426,  0.531398,  0.139959],
       [ 0.109343, -0.060766,  0.674088, ..., -0.21946 ,  0.310577,  0.634328],
       [ 0.09099 ,  0.252674,  0.591868, ...,  0.515935, -0.072529, -0.446625],
       [ 0.385514,  0.531545,  0.475988, ...,  0.689591, -0.200923, -0.188396],
       [ 0.635883, -0.027357,  0.455295, ...,  0.570734, -0.238338,  0.67039 ]])
Coordinates:
  * design_loc_params  (design_loc_params) <U2 'p0' 'p1' 'p2' 'p3' 'p4' 'p5'
Dimensions without coordinates: features

In [10]:
sim.par_link_scale

<xarray.DataArray 'b' (design_scale_params: 6, features: 100)>
array([[ 2.079442,  1.94591 ,  1.94591 , ...,  2.197225,  0.693147,  2.197225],
       [ 0.509976,  0.043864, -0.239517, ...,  0.331443, -0.535629,  0.68757 ],
       [-0.173803,  0.553164, -0.214653, ..., -0.133374,  0.591388,  0.538071],
       [ 0.366421,  0.431785,  0.573761, ...,  0.296076, -0.037804,  0.578232],
       [ 0.571573,  0.610246,  0.020735, ..., -0.418196,  0.554592,  0.54537 ],
       [-0.567676,  0.643797,  0.428963, ...,  0.588652,  0.241874,  0.157571]])
Coordinates:
  * design_scale_params  (design_scale_params) <U2 'p0' 'p1' 'p2' 'p3' 'p4' 'p5'
Dimensions without coordinates: features

## Constraints for model

In [11]:
dmat_est_loc = sim.design_loc

In [12]:
dmat_est_scale = sim.design_scale

Build constraints based on sets of parameters that have to sum to zero. Each of these constraints is enforced by binding one of these parameters to the rest of the set. Such a constraint is encoded by assigning a 1 to each parameter in the set and a -1 to to the dependent parameter.

In [82]:
constraints_loc = np.zeros([2, dmat_est_loc.shape[1]])
# Constraint 0: Account for perfect confouding at biological replicate and treatment level 
# by constraining biological replicate coefficients not to produce mean effects across conditions.
constraints_loc[0,3] = -1
constraints_loc[0,4:5] = 1
# Constraint 1: Account for fact that first level of biological replicates was not absorbed into offset.
constraints_loc[1,1] = -1
constraints_loc[1,2:5] = 1
constraints_loc

array([[ 0.,  0.,  0., -1.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0., -1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.]])

In [15]:
constraints_scale = constraints_loc.copy()

array([[ 0.,  0.,  0., -1.,  1.,  0.],
       [ 0., -1.,  1.,  1.,  1.,  0.]])

In [16]:
from numpy.linalg import matrix_rank
constraints_loc_mod = constraints_loc.copy()
constraints_loc_mod[constraints_loc_mod==-1] = 1
print(np.vstack([np.unique(dmat_est_loc, axis=0), constraints_loc_mod]))
print("rank deficiency without constraints: "+ str(dmat_est_loc.shape[1] - matrix_rank(np.vstack([np.unique(dmat_est_loc, axis=0)]))))
print("rank deficiency with constraints: "+ str(dmat_est_loc.shape[1] - matrix_rank(np.vstack([np.unique(dmat_est_loc, axis=0), constraints_loc_mod]))))

[[1. 0. 0. 0. 1. 1.]
 [1. 0. 0. 1. 0. 1.]
 [1. 0. 1. 0. 0. 0.]
 [1. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 1. 0.]
 [0. 1. 1. 1. 1. 0.]]
rank deficiency without constraints: 2
rank deficiency with constraints: 0


## Estimate the model

In [17]:
X = sim.X
design_loc = dmat_est_loc
design_scale = dmat_est_scale

# input data
input_data = glm.models.nb_glm.InputData.new(
    data=X, 
    design_loc=design_loc,
    design_scale=design_scale)
input_data.constraints_loc = constraints_loc
input_data.constraints_scale = constraints_scale

### Set up estimator:

In [18]:
estimator = glm.models.nb_glm.Estimator(input_data, quick_scale=False)
estimator.initialize()

Using closed-form MLE initialization for mean
Should train mu: False
Using closed-form MME initialization for dispersion
Should train r: True


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Graph was finalized.
Running local_init_op.
Done running local_init_op.


### Train

Now start the training sequence and let the estimator choose automatically the best training strategy:

In [19]:
estimator.train_sequence('QUICK')

training strategy:
[{'convergence_criteria': 't_test',
  'learning_rate': 0.01,
  'loss_window_size': 10,
  'optim_algo': 'ADAM',
  'stop_at_loss_change': 0.25,
  'use_batching': False}]
Beginning with training sequence #1
Step: 1	loss: 909.982321
Step: 2	loss: 909.973624
Step: 3	loss: 909.968350
Step: 4	loss: 909.965493
Step: 5	loss: 909.963875
Step: 6	loss: 909.962728
Step: 7	loss: 909.961821
Step: 8	loss: 909.961120
Step: 9	loss: 909.960557
Step: 10	loss: 909.960035
Step: 11	loss: 909.959539
Step: 12	loss: 909.959123
Step: 13	loss: 909.958834
Step: 14	loss: 909.958649
Step: 15	loss: 909.958509
Step: 16	loss: 909.958381
Step: 17	loss: 909.958267
Step: 18	loss: 909.958179
Step: 19	loss: 909.958110
Step: 20	loss: 909.958046
pval: 0.003388
Step: 21	loss: 909.957982
Step: 22	loss: 909.957925
Step: 23	loss: 909.957883
Step: 24	loss: 909.957854
Step: 25	loss: 909.957831
Step: 26	loss: 909.957807
Step: 27	loss: 909.957778
Step: 28	loss: 909.957743
Step: 29	loss: 909.957701
Step: 30	loss: 90

## Obtaining the results

The fitted parameters can be retrieved by calling the corresponding parameters of `estimator`:

In [20]:
estimator.par_link_loc

<xarray.DataArray (design_loc_params: 6, features: 100)>
array([[ 9.278052,  9.165176,  8.917734, ...,  8.526134,  9.486837,  9.003132],
       [ 0.061182,  0.322709, -0.542116, ...,  0.393476,  0.106263, -0.243964],
       [-0.061182, -0.322709,  0.542116, ..., -0.393476, -0.106263,  0.243964],
       [-0.166838, -0.14927 ,  0.051688, ..., -0.081269,  0.066566, -0.127305],
       [ 0.166838,  0.14927 , -0.051688, ...,  0.081269, -0.066566,  0.127305],
       [ 0.698859,  0.102646,  0.83754 , ...,  0.983851, -0.75716 , -0.03016 ]])
Coordinates:
  * design_loc_params  (design_loc_params) <U2 'p0' 'p1' 'p2' 'p3' 'p4' 'p5'
    feature_allzero    (features) bool False False False False False False ...
  * features           (features) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ...

In [21]:
estimator.par_link_scale

<xarray.DataArray (design_scale_params: 6, features: 100)>
array([[ 2.199805,  2.191249,  1.744115, ...,  2.309823,  0.703358,  2.806439],
       [ 0.396867, -0.195051,  0.07298 , ...,  0.258389, -0.475605,  0.123256],
       [-0.396867,  0.195051, -0.07298 , ..., -0.258389,  0.475605, -0.123256],
       [-0.063983, -0.151285,  0.275507, ...,  0.382301, -0.267185, -0.075441],
       [ 0.063983,  0.151285, -0.275507, ..., -0.382301,  0.267185,  0.075441],
       [-0.27461 ,  0.906192,  0.847079, ...,  0.429621,  0.480646,  0.145633]])
Coordinates:
  * design_scale_params  (design_scale_params) <U2 'p0' 'p1' 'p2' 'p3' 'p4' 'p5'
    feature_allzero      (features) bool False False False False False False ...
  * features             (features) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ...

### Check that constraints were met

These parameter sets should sum to zero for each gene.

In [22]:
np.max(estimator.par_link_loc[1,:]+np.sum(estimator.par_link_loc[2:5,:], axis=0))

<xarray.DataArray ()>
array(5.551115e-17)
Coordinates:
    design_loc_params  <U2 'p1'

In [24]:
np.max(np.sum(estimator.par_link_loc[1:3,:], axis=0)+np.sum(estimator.par_link_loc[3:5,:], axis=0))

<xarray.DataArray ()>
array(5.551115e-17)

## Comparing the results with the simulated data:

Linear model output:

In [25]:
locdiff = glm.utils.stats.rmsd(np.matmul(estimator.design_loc, estimator.par_link_loc), 
                               np.matmul(sim.design_loc, sim.par_link_loc))
print("Root mean squared deviation of location: %.2f" % locdiff)

scalediff = glm.utils.stats.rmsd(np.matmul(estimator.design_scale, estimator.par_link_scale), 
                                 np.matmul(sim.design_scale, sim.par_link_scale))
print("Root mean squared deviation of scale:    %.2f" % scalediff)

Root mean squared deviation of location: 0.02
Root mean squared deviation of scale:    0.06


# Example 2: advanced

## Simulate some data

In this example, we have 4 biological replicates (animals, patients, cell culture replicates etc.) in a treatment experiment: 2 in each condition (treated, untreated). Accordingly, there is perfect confounding at this level already. We circumvent this by constraining the biological replicate coefficients to not model mean trends (constraints 0,1). Secondly, there a are technical replicates which contain cells from one biological replicate from each condition. Each biological replicate was assigned to one treated-untreated sample pair and each pair split into two technical replicates. Again, we correct perfect confouding by constrainig the techincal replicate coefficients not to model mean effects by constraints 2,3.

### Define design matrices

In [33]:
ncells = 2000
dmat = np.zeros([ncells, 10])
dmat[:,0] = 1
dmat[:500,1] = 1 # bio rep 1
dmat[500:1000,2] = 1 # bio rep 2
dmat[1000:1500,3] = 1 # bio rep 3
dmat[1500:2000,4] = 1 # bio rep 4
dmat[0:250,5] = 1 # tech rep 1
dmat[1000:1250,5] = 1 # tech rep 1
dmat[250:500,6] = 1 # tech rep 2
dmat[1250:1500,6] = 1 # tech rep 2
dmat[500:750,7] = 1 # tech rep 3
dmat[1500:1750,7] = 1 # tech rep 3
dmat[750:1000,8] = 1 # tech rep 4
dmat[1750:2000,8] = 1 # tech rep 4
dmat[1000:2000,9] = 1 # condition effect
print(np.unique(dmat, axis=0))

[[1. 0. 0. 0. 1. 0. 0. 0. 1. 1.]
 [1. 0. 0. 0. 1. 0. 0. 1. 0. 1.]
 [1. 0. 0. 1. 0. 0. 1. 0. 0. 1.]
 [1. 0. 0. 1. 0. 1. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 1. 0. 0. 0. 0. 1. 0. 0.]
 [1. 1. 0. 0. 0. 0. 1. 0. 0. 0.]
 [1. 1. 0. 0. 0. 1. 0. 0. 0. 0.]]


In [34]:
sim = glm.models.nb_glm.Simulator(num_features=100)

In [35]:
sim.parse_dmat_loc(dmat = dmat)
sim.parse_dmat_scale(dmat = dmat)
sim.generate_params()
sim.generate_data()

### Simulated model data:

In [36]:
sim.X

<xarray.DataArray 'X' (observations: 2000, features: 100)>
array([[ 5784,   164,  4191, ...,  6312,  1274, 10346],
       [ 4503,   221,  3810, ...,  2482,  4323, 14221],
       [ 3289,   705,  6224, ...,  7159,  2031,  6204],
       ...,
       [ 3396,  3531,  1152, ..., 15260,  4269,  2373],
       [ 2603,  2649,   741, ...,  4346,   900,  2629],
       [ 1385,  3452,   762, ...,  4697,  5049,  3041]])
Dimensions without coordinates: observations, features

## Constraints for model

In [38]:
dmat_est_loc = sim.design_loc

In [39]:
dmat_est_scale = sim.design_scale

Build constraints based on sets of parameters that have to sum to zero. Each of these constraints is enforced by binding one of these parameters to the rest of the set. Such a constraint is encoded by assigning a 1 to each parameter in the set and a -1 to to the dependent parameter.

In [60]:
np.unique(dmat_est_loc, axis=0)

array([[1., 0., 0., 0., 1., 0., 0., 0., 1., 1.],
       [1., 0., 0., 0., 1., 0., 0., 1., 0., 1.],
       [1., 0., 0., 1., 0., 0., 1., 0., 0., 1.],
       [1., 0., 0., 1., 0., 1., 0., 0., 0., 1.],
       [1., 0., 1., 0., 0., 0., 0., 0., 1., 0.],
       [1., 0., 1., 0., 0., 0., 0., 1., 0., 0.],
       [1., 1., 0., 0., 0., 0., 1., 0., 0., 0.],
       [1., 1., 0., 0., 0., 1., 0., 0., 0., 0.]])

In [84]:
constraints_loc = np.zeros([4, dmat_est_loc.shape[1]])
# Constraint 0: Account for perfect confouding at biological replicate and treatment level 
# by constraining biological replicate coefficients not to produce mean effects across conditions.
constraints_loc[0,3] = -1
constraints_loc[0,4:5] = 1
# Constraint 1: Account for fact that first level of biological replicates was not absorbed into offset. 
constraints_loc[1,1] = -1
constraints_loc[1,2:5] = 1
# Constraint 2: Account for fact that first level of technical replicates was not absorbed into offset. 
constraints_loc[2,5] = -1
constraints_loc[2,6:9] = 1
# Constraint 3: Account for perfect confouding at biological replicate and technical replicate 
# by constraining technical replicate coefficients not to produce mean effects across biological replicates.
constraints_loc[3,7] = -1
constraints_loc[3,8:9] = 1

constraints_loc

array([[ 0.,  0.,  0., -1.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0., -1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0., -1.,  1.,  1.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0., -1.,  1.,  0.]])

In [70]:
constraints_scale = constraints_loc.copy()

array([[ 0.,  0.,  0., -1.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0., -1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0., -1.,  1.,  1.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0., -1.,  1.,  0.]])

In [71]:
from numpy.linalg import matrix_rank
constraints_loc_mod = constraints_loc.copy()
constraints_loc_mod[constraints_loc_mod==-1] = 1
print(np.vstack([np.unique(dmat_est_loc, axis=0), constraints_loc_mod]))
print("rank deficiency without constraints: "+ str(dmat_est_loc.shape[1] - matrix_rank(np.vstack([np.unique(dmat_est_loc, axis=0)]))))
print("rank deficiency with constraints: "+ str(dmat_est_loc.shape[1] - matrix_rank(np.vstack([np.unique(dmat_est_loc, axis=0), constraints_loc_mod]))))

[[1. 0. 0. 0. 1. 0. 0. 0. 1. 1.]
 [1. 0. 0. 0. 1. 0. 0. 1. 0. 1.]
 [1. 0. 0. 1. 0. 0. 1. 0. 0. 1.]
 [1. 0. 0. 1. 0. 1. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 1. 0. 0. 0. 0. 1. 0. 0.]
 [1. 1. 0. 0. 0. 0. 1. 0. 0. 0.]
 [1. 1. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 1. 0. 0. 0. 0. 0.]
 [0. 1. 1. 1. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 1. 1. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 1. 0.]]
rank deficiency without constraints: 4
rank deficiency with constraints: 0


## Estimate the model

In [72]:
X = sim.X
design_loc = dmat_est_loc
design_scale = dmat_est_scale

# input data
input_data = glm.models.nb_glm.InputData.new(
    data=X, 
    design_loc=design_loc,
    design_scale=design_scale)
input_data.constraints_loc = constraints_loc
input_data.constraints_scale = constraints_scale

### Set up estimator:

In [73]:
estimator = glm.models.nb_glm.Estimator(input_data, quick_scale=False)
estimator.initialize()

Using closed-form MLE initialization for mean
Should train mu: True
Using closed-form MME initialization for dispersion
Should train r: True


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Graph was finalized.
Running local_init_op.
Done running local_init_op.


### Train

Now start the training sequence and let the estimator choose automatically the best training strategy:

In [74]:
estimator.train_sequence('QUICK')

training strategy:
[{'convergence_criteria': 't_test',
  'learning_rate': 0.1,
  'loss_window_size': 100,
  'optim_algo': 'ADAM',
  'stop_at_loss_change': 0.05,
  'use_batching': True},
 {'convergence_criteria': 't_test',
  'learning_rate': 0.05,
  'loss_window_size': 10,
  'optim_algo': 'ADAM',
  'stop_at_loss_change': 0.05,
  'use_batching': False}]
Beginning with training sequence #1
Step: 1	loss: 901.681688
Step: 2	loss: 913.664792
Step: 3	loss: 905.085027
Step: 4	loss: 903.485653
Step: 5	loss: 900.701931
Step: 6	loss: 905.045980
Step: 7	loss: 904.836149
Step: 8	loss: 902.916081
Step: 9	loss: 896.496285
Step: 10	loss: 900.040830
Step: 11	loss: 902.491017
Step: 12	loss: 901.849604
Step: 13	loss: 894.050507
Step: 14	loss: 899.251237
Step: 15	loss: 900.766051
Step: 16	loss: 899.668476
Step: 17	loss: 893.412154
Step: 18	loss: 896.617505
Step: 19	loss: 898.626475
Step: 20	loss: 899.040557
Step: 21	loss: 892.611549
Step: 22	loss: 896.447203
Step: 23	loss: 897.852252
Step: 24	loss: 897.91

Step: 294	loss: 895.587162
Step: 295	loss: 897.837425
Step: 296	loss: 896.752155
Step: 297	loss: 890.293085
Step: 298	loss: 895.965920
Step: 299	loss: 897.445376
Step: 300	loss: 897.462147
pval: 0.679089
Training sequence #1 complete
Beginning with training sequence #2
Step: 301	loss: 894.823153
Step: 302	loss: 899.268276
Step: 303	loss: 895.076122
Step: 304	loss: 895.257103
Step: 305	loss: 896.624632
Step: 306	loss: 896.308158
Step: 307	loss: 895.225080
Step: 308	loss: 894.690255
Step: 309	loss: 894.962803
Step: 310	loss: 895.325616
Step: 311	loss: 895.216138
Step: 312	loss: 894.820048
Step: 313	loss: 894.561062
Step: 314	loss: 894.568433
Step: 315	loss: 894.659313
Step: 316	loss: 894.648754
Step: 317	loss: 894.539861
Step: 318	loss: 894.442858
Step: 319	loss: 894.414304
Step: 320	loss: 894.422377
pval: 0.012077
Step: 321	loss: 894.418280
Step: 322	loss: 894.385701
Step: 323	loss: 894.338637
Step: 324	loss: 894.303665
Step: 325	loss: 894.291808
Step: 326	loss: 894.287617
Step: 327	los

## Obtaining the results

### Check that constraints were met

These parameter sets should sum to zero for each gene.

In [75]:
np.max(estimator.par_link_loc[1,:]+np.sum(estimator.par_link_loc[2:5,:], axis=0))

<xarray.DataArray ()>
array(1.110223e-16)
Coordinates:
    design_loc_params  <U2 'p1'

In [76]:
np.max(np.sum(estimator.par_link_loc[1:3,:], axis=0)+np.sum(estimator.par_link_loc[3:5,:], axis=0))

<xarray.DataArray ()>
array(1.110223e-16)

## Comparing the results with the simulated data:

Linear model output:

In [77]:
locdiff = glm.utils.stats.rmsd(np.matmul(estimator.design_loc, estimator.par_link_loc), 
                               np.matmul(sim.design_loc, sim.par_link_loc))
print("Root mean squared deviation of location: %.2f" % locdiff)

scalediff = glm.utils.stats.rmsd(np.matmul(estimator.design_scale, estimator.par_link_scale), 
                                 np.matmul(sim.design_scale, sim.par_link_scale))
print("Root mean squared deviation of scale:    %.2f" % scalediff)

Root mean squared deviation of location: 0.03
Root mean squared deviation of scale:    0.07
