# Tests for Normaldistribution (glm_norm)

In [1]:
import os
import datetime
import numpy as np
import pprint

import logging
import warnings

%load_ext autoreload
%autoreload 2

In [2]:
import batchglm.api as glm

np.warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning, module="tensorflow")
logging.getLogger("tensorflow").setLevel(logging.INFO)
logging.getLogger("batchglm").setLevel(logging.INFO)
logging.getLogger("diffxpy").setLevel(logging.INFO)

## Import batchglm

In [3]:
#import batchglm.api as glm

## Sample from Normal Distribution

In [4]:
norm = glm.utils.random.Normal(500, 0.7)

In [5]:
norm.sample(10)

array([499.81300155, 499.67698884, 501.14545915, 500.03680682,
       499.91719565, 500.55032355, 500.09589318, 499.8435251 ,
       499.59043686, 500.5751665 ])

## Simulate some Data

In [6]:
sim = glm.models.glm_norm.Simulator()

In [7]:
sim.generate()

In [8]:
sim.X

<xarray.DataArray 'X' (observations: 1000, features: 100)>
array([[1825.736317, 1717.151464, 2058.068455, ..., 1338.386091, 1553.415151,
        1473.683407],
       [1824.984823, 1716.314222, 2053.891666, ..., 1338.511869, 1552.190071,
        1473.468847],
       [1822.978204, 1715.913483, 2055.33396 , ..., 1339.48997 , 1552.839886,
        1473.543676],
       ...,
       [1824.026528, 1715.670734, 2057.29308 , ..., 1338.608833, 1553.574349,
        1474.881435],
       [1823.777335, 1717.562966, 2055.145852, ..., 1338.979296, 1553.043821,
        1476.517067],
       [1825.012217, 1715.266316, 2057.753804, ..., 1339.927605, 1553.114601,
        1475.45076 ]])
Dimensions without coordinates: observations, features

In [9]:
sim.mean

<xarray.DataArray (observations: 1000, features: 100)>
array([[1824.072639, 1716.272203, 2056.038906, ..., 1338.973189, 1553.245036,
        1474.503634],
       [1824.072639, 1716.272203, 2056.038906, ..., 1338.973189, 1553.245036,
        1474.503634],
       [1824.072639, 1716.272203, 2056.038906, ..., 1338.973189, 1553.245036,
        1474.503634],
       ...,
       [1824.072639, 1716.272203, 2056.038906, ..., 1338.973189, 1553.245036,
        1474.503634],
       [1824.072639, 1716.272203, 2056.038906, ..., 1338.973189, 1553.245036,
        1474.503634],
       [1824.072639, 1716.272203, 2056.038906, ..., 1338.973189, 1553.245036,
        1474.503634]])
Dimensions without coordinates: observations, features

In [10]:
sim.par_link_loc

<xarray.DataArray (design_loc_params: 5, features: 100)>
array([[1.824073e+03, 1.716272e+03, 2.056039e+03, ..., 1.338973e+03,
        1.553245e+03, 1.474504e+03],
       [1.000000e-08, 1.000000e-08, 1.000000e-08, ..., 1.000000e-08,
        1.000000e-08, 1.000000e-08],
       [1.000000e-08, 1.000000e-08, 1.000000e-08, ..., 1.000000e-08,
        1.000000e-08, 1.000000e-08],
       [1.000000e-08, 1.000000e-08, 1.000000e-08, ..., 1.000000e-08,
        1.000000e-08, 1.000000e-08],
       [1.000000e-08, 1.000000e-08, 1.000000e-08, ..., 1.000000e-08,
        1.000000e-08, 1.000000e-08]])
Coordinates:
  * design_loc_params  (design_loc_params) object 'Intercept' ... 'batch[T.3]'
Dimensions without coordinates: features

In [11]:
sim.par_link_scale

<xarray.DataArray (design_scale_params: 5, features: 100)>
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])
Coordinates:
  * design_scale_params  (design_scale_params) object 'Intercept' ... 'batch[T.3]'
Dimensions without coordinates: features

## Set up Estimator

In [12]:
X = sim.X
design_loc = sim.design_loc
design_scale = sim.design_scale

# input data
input_data = glm.models.glm_norm.InputData.new(data=X, design_loc=design_loc, design_scale=design_scale)

In [13]:
estimator = glm.models.glm_norm.Estimator(input_data, init_a = "closed_form", init_b = "standard", provide_optimizers = {
                "gd": True,
                "adam": True,
                "adagrad": True,
                "rmsprop": True,
                "nr": True,
                "nr_tr": True,
                "irls": False,
                "irls_gd": False,
                "irls_tr": False,
                "irls_gd_tr": False,
            })

In [14]:
estimator.initialize()

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [15]:
estimator.train_sequence(training_strategy=[
            {
                "learning_rate": 1,
                "convergence_criteria": "all_converged_ll",
                "stopping_criteria": 1e-6,
                "use_batching": False,
                "optim_algo": "nr_tr",
            },
        ])

INFO:tensorflow:Step: 0 loss: 74444.007192 models converged 0
INFO:tensorflow:Step: 1 loss: 74444.007192, converged 100 in 0.797 sec., updated 0, {f: 0, g: 100, x: 0}


In [16]:
store = estimator.finalize()

In [22]:
store.a[0,:]

array([283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91308516,
       283.91308516, 283.91308516, 283.91308516, 283.91

In [18]:
sim.a[0, :]

<xarray.DataArray (features: 100)>
array([1824.072639, 1716.272203, 2056.038906, 2120.103698, 1532.846973,
       1528.466331, 1261.464591, 1777.739713, 1967.850575, 1292.692008,
       1581.014622, 1482.277993, 1019.19021 , 1278.826588, 1346.757696,
       1634.915703, 1886.123402, 1599.484147, 1411.332042, 1212.843183,
       1123.296401, 1280.61181 , 1002.096508, 1735.527243, 2094.811979,
       1060.903049, 1513.869454, 1707.829402, 1142.400485, 1898.505965,
       1458.644519, 1481.455326, 1221.431544, 1609.599   , 1682.243177,
       1566.522661, 1719.416092, 1195.099899, 1864.987071, 1612.083425,
       1394.067084, 1622.161249, 1742.31738 , 1109.616013, 1112.524298,
       2001.788263, 1634.033024, 1349.785632, 1466.259668, 1626.582973,
       1407.379542, 1904.402993, 2026.989203, 1775.926046, 1638.616197,
       1195.298279, 1873.293301, 1434.340039, 2008.238807, 1348.614909,
       1752.291047, 1976.446388, 1782.418839, 1299.910167, 1023.587471,
       1892.333133, 1021.1487

In [19]:
sim.a[0, :] - store.a[0, :]

<xarray.DataArray (features: 100)>
array([1540.159554, 1432.359118, 1772.125821, 1836.190613, 1248.933888,
       1244.553246,  977.551506, 1493.826628, 1683.93749 , 1008.778923,
       1297.101536, 1198.364908,  735.277125,  994.913503, 1062.844611,
       1351.002618, 1602.210317, 1315.571062, 1127.418956,  928.930098,
        839.383316,  996.698724,  718.183423, 1451.614157, 1810.898894,
        776.989964, 1229.956369, 1423.916317,  858.4874  , 1614.592879,
       1174.731434, 1197.542241,  937.518459, 1325.685915, 1398.330092,
       1282.609576, 1435.503007,  911.186814, 1581.073985, 1328.170339,
       1110.153999, 1338.248164, 1458.404295,  825.702928,  828.611213,
       1717.875178, 1350.119939, 1065.872547, 1182.346583, 1342.669888,
       1123.466457, 1620.489908, 1743.076118, 1492.012961, 1354.703112,
        911.385194, 1589.380216, 1150.426954, 1724.325722, 1064.701824,
       1468.377962, 1692.533303, 1498.505754, 1015.997082,  739.674386,
       1608.420048,  737.2357

In [20]:
sim.X.mean()

<xarray.DataArray 'X' ()>
array(1569.870755)

In [21]:
sim.a_var

<xarray.DataArray 'a_var' (loc_params: 5, features: 100)>
array([[1.824073e+03, 1.716272e+03, 2.056039e+03, ..., 1.338973e+03,
        1.553245e+03, 1.474504e+03],
       [1.000000e-08, 1.000000e-08, 1.000000e-08, ..., 1.000000e-08,
        1.000000e-08, 1.000000e-08],
       [1.000000e-08, 1.000000e-08, 1.000000e-08, ..., 1.000000e-08,
        1.000000e-08, 1.000000e-08],
       [1.000000e-08, 1.000000e-08, 1.000000e-08, ..., 1.000000e-08,
        1.000000e-08, 1.000000e-08],
       [1.000000e-08, 1.000000e-08, 1.000000e-08, ..., 1.000000e-08,
        1.000000e-08, 1.000000e-08]])
Coordinates:
  * loc_params  (loc_params) <U14 'Intercept' 'condition[T.1]' ... 'batch[T.3]'
Dimensions without coordinates: features