In [1]:
import os
import datetime
import numpy as np
import pprint

import logging
import warnings

logging.getLogger("tensorflow").setLevel(logging.INFO)
logging.getLogger("batchglm").setLevel(logging.INFO)

## Import batchglm

In [2]:
import batchglm.api as glm

In [3]:
# just to ignore some tensorflow warnings; just ignore this line
warnings.filterwarnings("ignore", category=DeprecationWarning, module="tensorflow")

## Simulate some data

In [4]:
sim = glm.models.nb_glm.Simulator(num_features=100)
sim.generate()

### Simulated model data:

In [5]:
sim.X

<xarray.DataArray 'X' (observations: 2000, features: 100)>
array([[ 4591,  6286,  4754, ...,  5106,   370,   539],
       [ 5503, 18603,  7617, ..., 14718,   458,  1923],
       [11337,  4280,  3251, ...,  8953,   643,   691],
       ..., 
       [ 7733, 11835,  4322, ..., 14312,   458,  1787],
       [ 3621,  2372,  9816, ..., 12259,   775,   643],
       [ 4161, 16194, 13938, ...,  3701,   438,  2283]])
Dimensions without coordinates: observations, features

In [6]:
np.unique(sim.design_loc, axis=0)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  1.],
       [ 1.,  0.,  0.,  1.,  0.],
       [ 1.,  0.,  0.,  1.,  1.],
       [ 1.,  0.,  1.,  0.,  0.],
       [ 1.,  0.,  1.,  0.,  1.],
       [ 1.,  1.,  0.,  0.,  0.],
       [ 1.,  1.,  0.,  0.,  1.]])

In [7]:
np.unique(sim.design_scale, axis=0)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  1.],
       [ 1.,  0.,  0.,  1.,  0.],
       [ 1.,  0.,  0.,  1.,  1.],
       [ 1.,  0.,  1.,  0.,  0.],
       [ 1.,  0.,  1.,  0.,  1.],
       [ 1.,  1.,  0.,  0.,  0.],
       [ 1.,  1.,  0.,  0.,  1.]])

### The parameters used to generate this data:

In [8]:
sim.par_link_loc

<xarray.DataArray 'a' (design_loc_params: 5, features: 100)>
array([[  8.913801e+00,   8.977684e+00,   8.618311e+00, ...,   9.051508e+00,
          6.624763e+00,   6.779789e+00],
       [  5.277409e-01,   1.234578e-01,   2.199390e-01, ...,   2.866304e-01,
          4.977616e-01,   2.566364e-01],
       [ -5.909113e-01,   5.375138e-01,  -3.491470e-01, ...,  -1.098316e-01,
         -2.146866e-01,   1.921012e-02],
       [  1.137895e-01,   8.568040e-02,   3.509631e-01, ...,   2.903533e-01,
         -3.090637e-03,   2.251633e-01],
       [ -3.579814e-01,   4.854707e-01,   2.406968e-01, ...,   5.728669e-01,
         -6.259994e-01,   3.608064e-01]])
Coordinates:
  * design_loc_params  (design_loc_params) <U14 'Intercept' 'batch[T.1]' ...
Dimensions without coordinates: features

In [9]:
sim.par_link_scale

<xarray.DataArray 'b' (design_scale_params: 5, features: 100)>
array([[ 1.609438,  1.609438,  1.791759, ...,  1.098612,  1.098612,  1.791759],
       [ 0.269036, -0.299555,  0.42504 , ...,  0.489682,  0.255016,  0.16679 ],
       [ 0.21587 ,  0.162142, -0.659762, ...,  0.565868,  0.684866, -0.677622],
       [-0.377317,  0.008908, -0.596742, ...,  0.452542, -0.418559,  0.050641],
       [-0.493644, -0.050117, -0.602287, ..., -0.555146,  0.370324, -0.098433]])
Coordinates:
  * design_scale_params  (design_scale_params) <U14 'Intercept' 'batch[T.1]' ...
Dimensions without coordinates: features

## Estimate the model

In [10]:
X = sim.X
design_loc = sim.design_loc
design_scale = sim.design_scale

# input data
input_data = glm.models.nb_glm.InputData.new(data=X, design_loc=design_loc, design_scale=design_scale)

### set up estimator:

In [11]:
estimator = glm.models.nb_glm.Estimator(input_data)
estimator.initialize()

Using closed-form MLE initialization for mean
Using closed-form MME initialization for dispersion
Graph was finalized.
Running local_init_op.
Done running local_init_op.


### Now train:

There are multiple possible training strategies:

In [12]:
for i in estimator.TrainingStrategy:
    print(i.name)

AUTO
DEFAULT
EXACT
QUICK
PRE_INITIALIZED


Each one of them corresponds to a list of training options which will be passed to the estimator.train() function:

In [13]:
pprint.pprint(estimator.TrainingStrategy.DEFAULT.value)

[{'convergence_criteria': 't_test',
  'learning_rate': 0.1,
  'loss_window_size': 100,
  'optim_algo': 'ADAM',
  'stop_at_loss_change': 0.05,
  'use_batching': True},
 {'convergence_criteria': 't_test',
  'learning_rate': 0.05,
  'loss_window_size': 10,
  'optim_algo': 'GD',
  'stop_at_loss_change': 0.05,
  'use_batching': False}]


Therefore, when choosing the training strategy "DEFAULT", the following call:
```python
estimator.train_sequence("DEFAULT")
```
is equal to:
```python
estimator.train_sequence(estimator.TrainingStrategy.DEFAULT)
```
is equal to:
```python
estimator.train(
    convergence_criteria = 't_test',
    learning_rate = 0.1,
    loss_window_size = 100,
    optim_algo = 'ADAM',
    stop_at_loss_change = 0.05,
    use_batching = True
)
estimator.train(
    convergence_criteria = 't_test',
    learning_rate = 0.05,
    loss_window_size = 10,
    optim_algo = 'GD',
    stop_at_loss_change = 0.05,
    use_batching = False
)
```

Now start the training sequence and let the estimator choose automatically the best training strategy:

In [14]:
estimator.train_sequence("AUTO")

training strategy:
[{'convergence_criteria': 't_test',
  'learning_rate': 0.1,
  'loss_window_size': 100,
  'optim_algo': 'ADAM',
  'stop_at_loss_change': 0.05,
  'use_batching': True},
 {'convergence_criteria': 't_test',
  'learning_rate': 0.05,
  'loss_window_size': 10,
  'optim_algo': 'GD',
  'stop_at_loss_change': 0.05,
  'use_batching': False}]
Beginning with training sequence #1
Step: 1	loss: 874.227234
Step: 2	loss: 891.925110
Step: 3	loss: 880.911133
Step: 4	loss: 882.574097
Step: 5	loss: 879.281921
Step: 6	loss: 882.160706
Step: 7	loss: 881.002991
Step: 8	loss: 881.263123
Step: 9	loss: 877.321106
Step: 10	loss: 879.381836
Step: 11	loss: 880.998474
Step: 12	loss: 880.275818
Step: 13	loss: 875.146912
Step: 14	loss: 879.836731
Step: 15	loss: 878.588379
Step: 16	loss: 878.498474
Step: 17	loss: 875.313782
Step: 18	loss: 877.923645
Step: 19	loss: 878.182068
Step: 20	loss: 878.936157
Step: 21	loss: 874.566223
Step: 22	loss: 877.987732
Step: 23	loss: 877.975769
Step: 24	loss: 878.0649

Step: 294	loss: 877.174500
Step: 295	loss: 876.600830
Step: 296	loss: 878.241516
Step: 297	loss: 874.371460
Step: 298	loss: 876.768555
Step: 299	loss: 877.739746
Step: 300	loss: 877.787537
pval: 0.623752
Training sequence #1 complete
Beginning with training sequence #2
Step: 301	loss: 876.474548
Step: 302	loss: 876.286011
Step: 303	loss: 876.257385
Step: 304	loss: 876.251953
Step: 305	loss: 876.242798
Step: 306	loss: 876.235779
Step: 307	loss: 876.231445
Step: 308	loss: 876.224365
Step: 309	loss: 876.226074
Step: 310	loss: 876.219666
Step: 311	loss: 876.215759
Step: 312	loss: 876.215149
Step: 313	loss: 876.214722
Step: 314	loss: 876.215515
Step: 315	loss: 876.213440
Step: 316	loss: 876.214905
Step: 317	loss: 876.204346
Step: 318	loss: 876.213867
Step: 319	loss: 876.204346
Step: 320	loss: 876.217102
pval: 0.024419
Step: 321	loss: 876.202820
Step: 322	loss: 876.211304
Step: 323	loss: 876.202026
Step: 324	loss: 876.200195
Step: 325	loss: 876.195984
Step: 326	loss: 876.202087
Step: 327	los

## Obtaining the results

The fitted parameters can be retrieved by calling the corresponding parameters of `estimator`:

In [15]:
estimator.par_link_loc

<xarray.DataArray (design_loc_params: 5, features: 100)>
array([[  8.896719e+00,   8.968782e+00,   8.630840e+00, ...,   9.043062e+00,
          6.607889e+00,   6.775296e+00],
       [  5.645462e-01,   1.241246e-01,   2.213196e-01, ...,   2.810317e-01,
          4.911180e-01,   2.649933e-01],
       [ -5.622654e-01,   5.468256e-01,  -3.109781e-01, ...,  -6.594287e-02,
         -2.112963e-01,   3.426668e-03],
       [  1.686629e-01,   1.246395e-01,   3.659815e-01, ...,   3.122060e-01,
          2.498589e-02,   2.529985e-01],
       [ -3.719978e-01,   4.811932e-01,   2.587149e-01, ...,   5.567902e-01,
         -6.043478e-01,   3.910311e-01]], dtype=float32)
Coordinates:
  * design_loc_params  (design_loc_params) <U14 'Intercept' 'batch[T.1]' ...
    feature_allzero    (features) bool False False False False False False ...
  * features           (features) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ...

In [16]:
estimator.par_link_scale

<xarray.DataArray (design_scale_params: 5, features: 100)>
array([[ 1.538816,  1.700791,  1.759085, ...,  1.132266,  0.982784,  1.759065],
       [ 0.258989, -0.418664,  0.513659, ...,  0.464592,  0.323254,  0.236866],
       [ 0.271992,  0.069093, -0.720841, ...,  0.636362,  0.721407, -0.598967],
       [-0.376817, -0.147543, -0.497577, ...,  0.398896, -0.26284 ,  0.095415],
       [-0.407618, -0.126005, -0.580147, ..., -0.578385,  0.373495, -0.049214]], dtype=float32)
Coordinates:
  * design_scale_params  (design_scale_params) <U14 'Intercept' 'batch[T.1]' ...
    feature_allzero      (features) bool False False False False False False ...
  * features             (features) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ...

## Comparing the results with the simulated data:

In [17]:
locdiff = glm.utils.stats.rmsd(estimator.par_link_loc, sim.par_link_loc)
print("Root mean squared deviation of location: %.2f" % locdiff)

scalediff = glm.utils.stats.rmsd(estimator.par_link_scale, sim.par_link_scale)
print("Root mean squared deviation of scale:    %.2f" % scalediff)

Root mean squared deviation of location: 0.03
Root mean squared deviation of scale:    0.08
