# Tests for Normaldistribution (glm_norm)

In [1]:
import os
import datetime
import numpy as np
import pprint

import logging
import warnings

%load_ext autoreload
%autoreload 2

In [2]:
import batchglm.api as glm

np.warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning, module="tensorflow")
logging.getLogger("tensorflow").setLevel(logging.INFO)
logging.getLogger("batchglm").setLevel(logging.INFO)
logging.getLogger("diffxpy").setLevel(logging.INFO)

## Simulate some Data

In [3]:
sim = glm.models.glm_norm.Simulator()

In [4]:
sim.generate()

In [5]:
sim.X

<xarray.DataArray 'X' (observations: 1000, features: 100)>
array([[153530.668053, 184471.534877, 169294.048184, ..., 124758.218499,
        163299.072128, 168342.198344],
       [153727.148687, 184660.252114, 169413.785819, ..., 124985.662782,
        163416.764229, 168525.694527],
       [153524.265736, 184464.054546, 169294.235636, ..., 124763.473351,
        163296.463545, 168330.185236],
       ...,
       [153823.158262, 185136.831761, 169525.750169, ..., 124938.104856,
        163703.131904, 168728.535786],
       [153620.064816, 184690.47005 , 169416.598402, ..., 124924.478733,
        163443.696657, 168461.014046],
       [153870.35622 , 184581.061892, 169478.678999, ..., 125108.260219,
        163420.133359, 168725.511263]])
Dimensions without coordinates: observations, features

In [6]:
sim.sd

<xarray.DataArray (observations: 1000, features: 100)>
array([[  3.75385 ,   8.645385,   7.135442, ...,   8.487027,   3.7549  ,
          6.458461],
       [ 10.768345,  33.85133 ,  36.596781, ...,  82.445376,  29.838425,
          9.863909],
       [  3.75385 ,   8.645385,   7.135442, ...,   8.487027,   3.7549  ,
          6.458461],
       ...,
       [ 35.960157, 246.471101,  90.036937, ..., 130.847114, 155.863109,
         59.458407],
       [ 12.535728,  62.946941,  17.554913, ...,  13.469561,  19.613986,
         38.930793],
       [ 35.960157, 246.471101,  90.036937, ..., 130.847114, 155.863109,
         59.458407]])
Dimensions without coordinates: observations, features

In [7]:
sim.par_link_loc

<xarray.DataArray (design_loc_params: 5, features: 100)>
array([[1.535276e+05, 1.844713e+05, 1.692924e+05, ..., 1.247599e+05,
        1.632980e+05, 1.683406e+05],
       [1.941580e+02, 1.618886e+02, 1.078499e+02, ..., 1.376593e+02,
        1.037169e+02, 1.901242e+02],
       [1.113831e+02, 1.690494e+02, 1.244814e+02, ..., 1.450850e+02,
        1.374363e+02, 1.623521e+02],
       [1.941130e+02, 1.235625e+02, 1.801599e+02, ..., 1.154509e+02,
        1.729885e+02, 1.536259e+02],
       [1.067593e+02, 1.453621e+02, 1.029423e+02, ..., 1.707639e+02,
        1.541521e+02, 1.270179e+02]])
Coordinates:
  * design_loc_params  (design_loc_params) object 'Intercept' ... 'batch[T.3]'
Dimensions without coordinates: features

In [8]:
sim.par_link_scale

<xarray.DataArray (design_scale_params: 5, features: 100)>
array([[1.322782, 2.157026, 1.965074, ..., 2.138539, 1.323062, 1.865391],
       [1.053829, 1.364953, 1.634886, ..., 2.273597, 2.072735, 0.423491],
       [2.103463, 2.022464, 1.513653, ..., 0.87566 , 2.288657, 1.114935],
       [2.225734, 0.831343, 2.051622, ..., 0.626995, 1.506541, 1.975494],
       [1.205801, 1.985266, 0.90026 , ..., 0.461894, 1.653181, 1.796394]])
Coordinates:
  * design_scale_params  (design_scale_params) object 'Intercept' ... 'batch[T.3]'
Dimensions without coordinates: features

## Set up Estimator

In [9]:
X = sim.X
design_loc = sim.design_loc
design_scale = sim.design_scale

# input data
input_data = glm.models.glm_norm.InputData.new(data=X, design_loc=design_loc, design_scale=design_scale)

In [13]:
estimator = glm.models.glm_norm.Estimator(input_data, 
                init_a = "standard", init_b = "standard", 
                provide_optimizers = {
                    "gd": True, "adam": True, "adagrad": True, "rmsprop": True,
                    "nr": False, "nr_tr": False,
                    "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False,
            }, provide_hessian = False)

In [14]:
estimator.initialize()

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [17]:
estimator.train_sequence(training_strategy=[
            {
                "learning_rate": 1,
                "convergence_criteria": "all_converged",
                "stopping_criteria": 1e-6,
                "use_batching": False,
                "optim_algo": "gd",
            },
        ])

INFO:tensorflow:Step: 0 loss: 615.682423 models converged 0
INFO:tensorflow:Step: 1 loss: 615.682423, converged 100 in 0.227 sec., updated 100, {f: 100, g: 100, x: 100}


In [18]:
store = estimator.finalize()

In [20]:
store.a - sim.a

<xarray.DataArray (design_loc_params: 5, features: 100)>
array([[ 2.079207e-02,  5.072105e+00,  5.357422e+00, ...,  7.132302e+00,
         3.804407e+00,  5.184096e-01],
       [-3.651440e-01, -9.410049e+00, -1.232361e+01, ...,  3.187043e+00,
        -8.003614e+00, -6.045565e-01],
       [-1.111495e+01,  6.376660e+00,  4.579032e+00, ..., -6.709791e+00,
        -8.973671e+00,  2.515338e+00],
       [ 5.207923e+00, -3.547651e+00, -2.312268e+01, ..., -1.051617e+01,
        -3.800159e+00,  1.252344e+00],
       [ 4.002172e+00, -9.691013e+00, -8.613539e-01, ..., -1.567684e+01,
         2.253166e-01, -5.545119e+00]])
Coordinates:
  * design_loc_params  (design_loc_params) object 'Intercept' ... 'batch[T.3]'
Dimensions without coordinates: features

In [None]:
np.mean(store.b - sim.b)

In [None]:
store.a

In [None]:
sim.a

In [None]:
store.b - sim.b

In [None]:
sim.b