-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
test_model_helpers.py
154 lines (129 loc) · 6.23 KB
/
test_model_helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# Copyright 2020 The PyMC Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import numpy.ma as ma
import numpy.testing as npt
import pandas as pd
import pymc3 as pm
import scipy.sparse as sps
import theano
import theano.tensor as tt
import theano.sparse as sparse
class TestHelperFunc:
def test_pandas_to_array(self):
"""
Ensure that pandas_to_array returns the dense array, masked array,
graph variable, TensorVariable, or sparse matrix as appropriate.
"""
# Create the various inputs to the function
sparse_input = sps.csr_matrix(np.eye(3))
dense_input = np.arange(9).reshape((3, 3))
input_name = 'input_variable'
theano_graph_input = tt.as_tensor(dense_input, name=input_name)
pandas_input = pd.DataFrame(dense_input)
# All the even numbers are replaced with NaN
missing_pandas_input = pd.DataFrame(np.array([[np.nan, 1, np.nan],
[3, np.nan, 5],
[np.nan, 7, np.nan]]))
masked_array_input = ma.array(dense_input,
mask=(np.mod(dense_input, 2) == 0))
# Create a generator object. Apparently the generator object needs to
# yield numpy arrays.
square_generator = (np.array([i**2], dtype=int) for i in range(100))
# Alias the function to be tested
func = pm.model.pandas_to_array
#####
# Perform the various tests
#####
# Check function behavior with dense arrays and pandas dataframes
# without missing values
for input_value in [dense_input, pandas_input]:
func_output = func(input_value)
assert isinstance(func_output, np.ndarray)
assert func_output.shape == input_value.shape
npt.assert_allclose(func_output, dense_input)
# Check function behavior with sparse matrix inputs
sparse_output = func(sparse_input)
assert sps.issparse(sparse_output)
assert sparse_output.shape == sparse_input.shape
npt.assert_allclose(sparse_output.toarray(),
sparse_input.toarray())
# Check function behavior when using masked array inputs and pandas
# objects with missing data
for input_value in [masked_array_input, missing_pandas_input]:
func_output = func(input_value)
assert isinstance(func_output, ma.core.MaskedArray)
assert func_output.shape == input_value.shape
npt.assert_allclose(func_output, masked_array_input)
# Check function behavior with Theano graph variable
theano_output = func(theano_graph_input)
assert isinstance(theano_output, theano.gof.graph.Variable)
assert theano_output.owner.inputs[0].name == input_name
# Check function behavior with generator data
generator_output = func(square_generator)
# Output is wrapped with `pm.floatX`, and this unwraps
wrapped = generator_output.owner.inputs[0]
# Make sure the returned object has .set_gen and .set_default methods
assert hasattr(wrapped, "set_gen")
assert hasattr(wrapped, "set_default")
# Make sure the returned object is a Theano TensorVariable
assert isinstance(wrapped, tt.TensorVariable)
def test_as_tensor(self):
"""
Check returned values for `data` given known inputs to `as_tensor()`.
Note that ndarrays should return a TensorConstant and sparse inputs
should return a Sparse Theano object.
"""
# Create the various inputs to the function
input_name = 'testing_inputs'
sparse_input = sps.csr_matrix(np.eye(3))
dense_input = np.arange(9).reshape((3, 3))
masked_array_input = ma.array(dense_input,
mask=(np.mod(dense_input, 2) == 0))
# Create a fake model and fake distribution to be used for the test
fake_model = pm.Model()
with fake_model:
fake_distribution = pm.Normal.dist(mu=0, sigma=1)
# Create the testval attribute simply for the sake of model testing
fake_distribution.testval = None
# Alias the function to be tested
func = pm.model.as_tensor
# Check function behavior using the various inputs
dense_output = func(dense_input,
input_name,
fake_model,
fake_distribution)
sparse_output = func(sparse_input,
input_name,
fake_model,
fake_distribution)
masked_output = func(masked_array_input,
input_name,
fake_model,
fake_distribution)
# Ensure that the missing values are appropriately set to None
for func_output in [dense_output, sparse_output]:
assert func_output.missing_values is None
# Ensure that the Theano variable names are correctly set.
# Note that the output for masked inputs do not have their names set
# to the passed value.
for func_output in [dense_output, sparse_output]:
assert func_output.name == input_name
# Ensure the that returned functions are all of the correct type
assert isinstance(dense_output, tt.TensorConstant)
assert sparse.basic._is_sparse_variable(sparse_output)
# Masked output is something weird. Just ensure it has missing values
# self.assertIsInstance(masked_output, tt.TensorConstant)
assert masked_output.missing_values is not None
return None