# Tests for Normaldistribution (glm_norm)

In [1]:
import os
import datetime
import numpy as np
import pprint

import logging
import warnings

%load_ext autoreload
%autoreload 2

In [2]:
import batchglm.api as glm

np.warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning, module="tensorflow")
logging.getLogger("tensorflow").setLevel(logging.INFO)
logging.getLogger("batchglm").setLevel(logging.INFO)
logging.getLogger("diffxpy").setLevel(logging.INFO)

## Import batchglm

In [3]:
#import batchglm.api as glm

## Sample from Normal Distribution

In [4]:
norm = glm.utils.random.Normal(500, 1)

In [5]:
norm.sample(10)

array([501.25935817, 498.68799571, 499.31995862, 498.29183471,
       499.11170989, 501.35713775, 501.46349582, 500.31657626,
       502.07566594, 501.7752767 ])

## Simulate some Data

In [6]:
sim = glm.models.glm_norm.Simulator()

In [7]:
sim.generate()

In [8]:
sim.X

<xarray.DataArray 'X' (observations: 1000, features: 100)>
array([[184684.792063, 146479.581112, 177765.222345, ..., 187925.811221,
        108955.104953, 103362.985605],
       [184851.63215 , 146656.764463, 177912.245487, ..., 188046.511744,
        109048.40107 , 103491.650847],
       [184696.466702, 146479.42617 , 177757.726847, ..., 187908.938225,
        108946.922523, 103365.457247],
       ...,
       [184464.339317, 147032.941101, 178067.154883, ..., 187907.035463,
        109245.753269, 103565.152714],
       [184813.295841, 146605.080131, 177938.192729, ..., 188060.918785,
        109127.556121, 103663.173937],
       [184210.207117, 146885.742137, 178073.142143, ..., 188149.333242,
        109135.261876, 103606.18264 ]])
Dimensions without coordinates: observations, features

In [9]:
sim.sd

<xarray.DataArray (observations: 1000, features: 100)>
array([[  9.257123,   4.552421,   4.042198, ...,   7.539886,   8.90448 ,
          8.680762],
       [ 50.241062,  25.479648,  10.437355, ...,  58.996728,  58.666484,
         59.985968],
       [  9.257123,   4.552421,   4.042198, ...,   7.539886,   8.90448 ,
          8.680762],
       ...,
       [400.148629,  72.955467,  27.265876, ..., 249.02139 , 255.458282,
        482.595757],
       [ 73.729038,  13.034873,  10.559578, ...,  31.825372,  38.773809,
         69.837982],
       [400.148629,  72.955467,  27.265876, ..., 249.02139 , 255.458282,
        482.595757]])
Dimensions without coordinates: observations, features

In [10]:
sim.par_link_loc

<xarray.DataArray (design_loc_params: 5, features: 100)>
array([[1.846841e+05, 1.464818e+05, 1.777597e+05, ..., 1.879199e+05,
        1.089524e+05, 1.033692e+05],
       [1.975500e+02, 1.633729e+02, 1.521928e+02, ..., 1.072455e+02,
        1.107797e+02, 1.718046e+02],
       [1.182931e+02, 1.770706e+02, 1.211661e+02, ..., 1.027303e+02,
        1.368892e+02, 1.564950e+02],
       [1.122457e+02, 1.579796e+02, 1.801606e+02, ..., 1.290159e+02,
        1.921865e+02, 1.971535e+02],
       [1.803929e+02, 1.350850e+02, 1.875733e+02, ..., 1.196119e+02,
        1.468283e+02, 1.790403e+02]])
Coordinates:
  * design_loc_params  (design_loc_params) object 'Intercept' ... 'batch[T.3]'
Dimensions without coordinates: features

In [11]:
sim.par_link_scale

<xarray.DataArray (design_scale_params: 5, features: 100)>
array([[2.225393, 1.515659, 1.396789, ..., 2.020207, 2.186554, 2.161109],
       [1.691439, 1.722221, 0.948603, ..., 2.057275, 1.885314, 1.933001],
       [2.2181  , 1.962733, 0.832975, ..., 1.220109, 0.665833, 2.037248],
       [1.719584, 1.26691 , 1.777269, ..., 0.453733, 1.334934, 1.30424 ],
       [2.075003, 1.051969, 0.960245, ..., 1.440057, 1.471191, 2.085069]])
Coordinates:
  * design_scale_params  (design_scale_params) object 'Intercept' ... 'batch[T.3]'
Dimensions without coordinates: features

## Set up Estimator

In [12]:
X = sim.X
design_loc = sim.design_loc
design_scale = sim.design_scale

# input data
input_data = glm.models.glm_norm.InputData.new(data=X, design_loc=design_loc, design_scale=design_scale)

In [21]:
estimator = glm.models.glm_norm.Estimator(input_data, init_a = "closed_form", init_b = "closed_form", provide_optimizers = {
                "gd": True,
                "adam": True,
                "adagrad": True,
                "rmsprop": True,
                "nr": True,
                "nr_tr": True,
                "irls": False,
                "irls_gd": False,
                "irls_tr": False,
                "irls_gd_tr": False,
            })

In [22]:
estimator.initialize()

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [23]:
estimator.train_sequence(training_strategy=[
            {
                "learning_rate": 1,
                "convergence_criteria": "all_converged_ll",
                "stopping_criteria": 1e-6,
                "use_batching": False,
                "optim_algo": "nr_tr",
            },
        ])

INFO:tensorflow:Step: 0 loss: 518.852340 models converged 0
INFO:tensorflow:Step: 1 loss: 517.642441, converged 54 in 0.635 sec., updated 95, {f: 0, g: 54, x: 0}
INFO:tensorflow:Step: 2 loss: 517.123870, converged 56 in 0.137 sec., updated 43, {f: 0, g: 2, x: 0}
INFO:tensorflow:Step: 3 loss: 516.835763, converged 62 in 0.148 sec., updated 41, {f: 6, g: 6, x: 0}
INFO:tensorflow:Step: 4 loss: 516.792731, converged 77 in 0.147 sec., updated 35, {f: 14, g: 14, x: 0}
INFO:tensorflow:Step: 5 loss: 516.791437, converged 88 in 0.141 sec., updated 20, {f: 11, g: 11, x: 0}
INFO:tensorflow:Step: 6 loss: 516.791434, converged 94 in 0.14 sec., updated 9, {f: 6, g: 5, x: 0}
INFO:tensorflow:Step: 7 loss: 516.791434, converged 97 in 0.138 sec., updated 3, {f: 3, g: 3, x: 0}
INFO:tensorflow:Step: 8 loss: 516.791434, converged 100 in 0.133 sec., updated 0, {f: 0, g: 0, x: 3}


In [24]:
store = estimator.finalize()

In [25]:
np.mean(store.a - sim.a)

<xarray.DataArray ()>
array(1.308458)

In [26]:
np.mean(store.b - sim.b)

<xarray.DataArray ()>
array(0.001505)

In [27]:
sim.a

<xarray.DataArray (design_loc_params: 5, features: 100)>
array([[1.846841e+05, 1.464818e+05, 1.777597e+05, ..., 1.879199e+05,
        1.089524e+05, 1.033692e+05],
       [1.975500e+02, 1.633729e+02, 1.521928e+02, ..., 1.072455e+02,
        1.107797e+02, 1.718046e+02],
       [1.182931e+02, 1.770706e+02, 1.211661e+02, ..., 1.027303e+02,
        1.368892e+02, 1.564950e+02],
       [1.122457e+02, 1.579796e+02, 1.801606e+02, ..., 1.290159e+02,
        1.921865e+02, 1.971535e+02],
       [1.803929e+02, 1.350850e+02, 1.875733e+02, ..., 1.196119e+02,
        1.468283e+02, 1.790403e+02]])
Coordinates:
  * design_loc_params  (design_loc_params) object 'Intercept' ... 'batch[T.3]'
Dimensions without coordinates: features

In [28]:
store.a

array([[1.84684035e+05, 1.46482134e+05, 1.77759396e+05, 1.29542873e+05,
        1.86638163e+05, 1.27327587e+05, 1.62138700e+05, 1.51307242e+05,
        1.60869682e+05, 1.82674336e+05, 1.52276368e+05, 1.09860426e+05,
        1.34065479e+05, 1.97825525e+05, 1.35473652e+05, 1.48197361e+05,
        1.10791870e+05, 1.21283012e+05, 1.18428972e+05, 1.18438355e+05,
        1.54764642e+05, 1.83225889e+05, 1.05420172e+05, 1.01247749e+05,
        1.97850781e+05, 1.87338352e+05, 1.93633939e+05, 1.82969872e+05,
        1.42452074e+05, 1.43964745e+05, 1.15945111e+05, 1.03576433e+05,
        1.63179291e+05, 1.75457417e+05, 1.57364061e+05, 1.93780721e+05,
        1.47416919e+05, 1.11934950e+05, 1.08922098e+05, 1.71383180e+05,
        1.85360257e+05, 1.02213524e+05, 1.25128348e+05, 1.90141984e+05,
        1.31254054e+05, 1.31255813e+05, 1.95511658e+05, 1.76724776e+05,
        1.75441530e+05, 1.57668023e+05, 1.04387613e+05, 1.20758852e+05,
        1.71224249e+05, 1.53764800e+05, 1.11338186e+05, 1.021114