From 5297c64137da2d6f2ecdf99da8d21de81545afd9 Mon Sep 17 00:00:00 2001 From: Ari Hartikainen Date: Thu, 9 Sep 2021 11:34:48 +0300 Subject: [PATCH 1/6] fix shaping issues --- cmdstanpy/stanfit.py | 46 ++++++++++++++++++++++------------------ docsrc/conf.py | 4 +++- test/test_optimize.py | 10 +++++---- test/test_variational.py | 4 ++-- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/cmdstanpy/stanfit.py b/cmdstanpy/stanfit.py index d4a95bd5..d3a500d4 100644 --- a/cmdstanpy/stanfit.py +++ b/cmdstanpy/stanfit.py @@ -1074,7 +1074,6 @@ def draws_xr( 0, self.draws(inc_warmup=inc_warmup), ) - return xr.Dataset(data, coords=coordinates, attrs=attrs).transpose( 'chain', 'draw', ... ) @@ -1373,7 +1372,7 @@ def stan_variable( inc_iterations: bool = False, warn: bool = True, name: Optional[str] = None, - ) -> np.ndarray: + ) -> Union[np.ndarray, float]: """ Return a numpy.ndarray which contains the estimates for the for the named Stan program variable where the dimensions of the @@ -1416,13 +1415,12 @@ def stan_variable( 'Invalid estimate, optimization failed to converge.' ) - col_idxs = self._metadata.stan_vars_cols[var] + col_idxs = list(self._metadata.stan_vars_cols[var]) if inc_iterations and self._save_iterations: num_rows = self._all_iters.shape[0] else: num_rows = 1 - - if len(col_idxs) > 0: # container var + if len(col_idxs) > 1: # container var dims = (num_rows,) + self._metadata.stan_vars_dims[var] # pylint: disable=redundant-keyword-arg if num_rows > 1: @@ -1430,19 +1428,16 @@ def stan_variable( dims, order='F' ) else: - mle = np.expand_dims(self._mle, axis=0) # hack for col indexing - result = ( - mle[0, col_idxs] - .reshape(dims, order='F') # type: ignore - .squeeze(axis=0) - ) + result = self._mle[col_idxs].reshape(dims[1:], order="F") else: # scalar var + col_idx = col_idxs[0] if num_rows > 1: - result = self._all_iters[:, col_idxs] + result = self._all_iters[:, col_idx] else: - result = np.atleast_1d(mle[0, col_idxs]) - - assert isinstance(result, np.ndarray) # make the typechecker happy + result = float(self._mle[col_idx]) + assert isinstance( + result, (np.ndarray, float) + ) # make the typechecker happy return result def stan_variables( @@ -2143,7 +2138,7 @@ def metadata(self) -> InferenceMetadata: def stan_variable( self, var: Optional[str] = None, *, name: Optional[str] = None - ) -> np.ndarray: + ) -> Union[np.ndarray, float]: """ Return a numpy.ndarray which contains the estimates for the for the named Stan program variable where the dimensions of the @@ -2172,12 +2167,16 @@ def stan_variable( if var not in self._metadata.stan_vars_dims: raise ValueError('Unknown variable name: {}'.format(var)) col_idxs = list(self._metadata.stan_vars_cols[var]) - vals = list(self._variational_mean) - xs = [vals[x] for x in col_idxs] shape: Tuple[int, ...] = () - if len(col_idxs) > 0: + if len(col_idxs) > 1: shape = self._metadata.stan_vars_dims[var] - return np.array(xs).reshape(shape) + result = np.asarray(self._variational_mean)[col_idxs].reshape( + shape, order="F" + ) + else: + result = float(self._variational_mean[col_idxs[0]]) + assert isinstance(result, (np.ndarray, float)) + return result def stan_variables(self) -> Dict[str, np.ndarray]: """ @@ -2424,7 +2423,12 @@ def build_xarray_data( var_dims: Tuple[str, ...] = ('draw', 'chain') if dims: var_dims += tuple(f"{var_name}_dim_{i}" for i in range(len(dims))) - data[var_name] = (var_dims, drawset[start_row:, :, col_idxs]) + data[var_name] = ( + var_dims, + drawset[start_row:, :, col_idxs].reshape( + *drawset.shape[:2], *dims, order="F" + ), + ) else: data[var_name] = ( var_dims, diff --git a/docsrc/conf.py b/docsrc/conf.py index 667634d7..f55c6d67 100644 --- a/docsrc/conf.py +++ b/docsrc/conf.py @@ -441,5 +441,7 @@ def emit(self, record): # } # Makes the copying behavior on code examples cleaner by removing things like In [10]: from the text to be copied -copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " +copybutton_prompt_text = ( + r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " +) copybutton_prompt_is_regexp = True diff --git a/test/test_optimize.py b/test/test_optimize.py index 4e246a8e..2ec47ea3 100644 --- a/test/test_optimize.py +++ b/test/test_optimize.py @@ -208,13 +208,15 @@ def test_variable_bern(self): self.assertTrue('theta' in bern_mle.metadata.stan_vars_dims) self.assertEqual(bern_mle.metadata.stan_vars_dims['theta'], ()) theta = bern_mle.stan_variable(var='theta') - self.assertEqual(theta.shape, ()) + self.assertTrue(isinstance(theta, float)) with self.assertRaises(ValueError): bern_mle.stan_variable(var='eta') with self.assertRaises(ValueError): bern_mle.stan_variable(var='lp__') with LogCapture() as log: - self.assertEqual(bern_mle.stan_variable(name='theta').shape, ()) + self.assertTrue( + isinstance(bern_mle.stan_variable(name='theta'), float) + ) log.check_present( ( 'cmdstanpy', @@ -250,7 +252,7 @@ def test_variables_3d(self): var_beta = multidim_mle.stan_variable(var='beta') self.assertEqual(var_beta.shape, (2,)) # 1-element tuple var_frac_60 = multidim_mle.stan_variable(var='frac_60') - self.assertEqual(var_frac_60.shape, ()) + self.assertTrue(isinstance(var_frac_60, float)) vars = multidim_mle.stan_variables() self.assertEqual(len(vars), len(multidim_mle.metadata.stan_vars_dims)) self.assertTrue('y_rep' in vars) @@ -258,7 +260,7 @@ def test_variables_3d(self): self.assertTrue('beta' in vars) self.assertEqual(vars['beta'].shape, (2,)) self.assertTrue('frac_60' in vars) - self.assertEqual(vars['frac_60'].shape, ()) + self.assertTrue(isinstance(vars['frac_60'], float)) multidim_mle_iters = multidim_model.optimize( data=jdata, diff --git a/test/test_variational.py b/test/test_variational.py index 581a01d6..58c4c953 100644 --- a/test/test_variational.py +++ b/test/test_variational.py @@ -126,7 +126,7 @@ def test_variables_3d(self): var_beta = multidim_variational.stan_variable(var='beta') self.assertEqual(var_beta.shape, (2,)) # 1-element tuple var_frac_60 = multidim_variational.stan_variable(var='frac_60') - self.assertEqual(var_frac_60.shape, ()) + self.assertTrue(isinstance(var_frac_60, float)) vars = multidim_variational.stan_variables() self.assertEqual( len(vars), len(multidim_variational.metadata.stan_vars_dims) @@ -136,7 +136,7 @@ def test_variables_3d(self): self.assertTrue('beta' in vars) self.assertEqual(vars['beta'].shape, (2,)) self.assertTrue('frac_60' in vars) - self.assertEqual(vars['frac_60'].shape, ()) + self.assertTrue(isinstance(vars['frac_60'], float)) with self.assertRaises(ValueError): multidim_variational.stan_variable(var='beta', name='yrep') with LogCapture() as log: From fcb73a84d61d00674d8e81000c52b2ab8551d77c Mon Sep 17 00:00:00 2001 From: Ari Hartikainen Date: Thu, 9 Sep 2021 11:50:35 +0300 Subject: [PATCH 2/6] fix mypy --- cmdstanpy/stanfit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmdstanpy/stanfit.py b/cmdstanpy/stanfit.py index d3a500d4..83367fd3 100644 --- a/cmdstanpy/stanfit.py +++ b/cmdstanpy/stanfit.py @@ -1442,7 +1442,7 @@ def stan_variable( def stan_variables( self, inc_iterations: bool = False - ) -> Dict[str, np.ndarray]: + ) -> Dict[str, Union[np.ndarray, float]]: """ Return a dictionary mapping Stan program variables names to the corresponding numpy.ndarray containing the inferred values. @@ -2178,7 +2178,7 @@ def stan_variable( assert isinstance(result, (np.ndarray, float)) return result - def stan_variables(self) -> Dict[str, np.ndarray]: + def stan_variables(self) -> Dict[str, Union[np.ndarray, float]]: """ Return a dictionary mapping Stan program variables names to the corresponding numpy.ndarray containing the inferred values. From 101960cc32e17a7c59676dfcaa316578389eb168 Mon Sep 17 00:00:00 2001 From: Ari Hartikainen Date: Thu, 9 Sep 2021 14:57:09 +0300 Subject: [PATCH 3/6] add shape test --- test/data/shape.stan | 30 ++++++++++++++++++++++++++++++ test/test_optimize.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 test/data/shape.stan diff --git a/test/data/shape.stan b/test/data/shape.stan new file mode 100644 index 00000000..06bba24f --- /dev/null +++ b/test/data/shape.stan @@ -0,0 +1,30 @@ +parameters { + real z; +} +model { + z ~ normal(0,1); +} +generated quantities { + int x = 1; + real a = 1; + real b[2]; + real c[2,3]; + vector[2] d; + vector[3] e[2]; + matrix[2,3] f[4]; + matrix[2,3] g[4,5]; + for (n in 1:2) { + b[n] = 1; + d[n] = 1; + for (m in 1:3) { + c[n,m] = n; + e[n,m] = n; + for (k in 1:4) { + f[k, n, m] = n; + for (j in 1:4) { + g[k, j, n, m] = n; + } + } + } + } +} diff --git a/test/test_optimize.py b/test/test_optimize.py index 2ec47ea3..5e609366 100644 --- a/test/test_optimize.py +++ b/test/test_optimize.py @@ -287,6 +287,36 @@ def test_variables_3d(self): self.assertTrue('frac_60' in vars_iters) self.assertEqual(vars_iters['frac_60'].shape, (8,)) + def test_variables_shape(self): + stan = os.path.join(DATAFILES_PATH, 'shape.stan') + model = CmdStanModel(stan_file=stan) + no_data = {} + mle = model.optimize( + seed=1239812093, + algorithm='LBFGS', + init_alpha=0.001, + iter=100, + tol_obj=1e-12, + tol_rel_obj=1e4, + tol_grad=1e-8, + tol_rel_grad=1e7, + tol_param=1e-8, + history_size=5, + ) + for var, shape in { + "x": float, + "a": float, + "b": (2,), + "c": (2,3), + "d": (2,), + "e":(2,3), + "f": (4,2,3), + "g":(4,5,2,3), + }.items(): + if isinstance(shape, tuple): + assert mle.stan_variable(var).shape == shape + else: + assert isinstance(mle.stan_variable(var), shape) class OptimizeTest(unittest.TestCase): def test_optimize_good(self): From e533811d37c048895ed57303acc9b63045a442ee Mon Sep 17 00:00:00 2001 From: Mitzi Morris Date: Thu, 9 Sep 2021 22:25:08 -0400 Subject: [PATCH 4/6] more unit tests; fixed gq bug; different test model --- cmdstanpy/stanfit.py | 22 ++++++++++++----- test/data/matrix_var.stan | 19 +++++++++++++++ test/test_generate_quantities.py | 22 +++++++++++++++++ test/test_optimize.py | 41 +++++++++----------------------- test/test_sample.py | 13 ++++++++++ test/test_variational.py | 11 +++++++++ 6 files changed, 92 insertions(+), 36 deletions(-) create mode 100644 test/data/matrix_var.stan diff --git a/cmdstanpy/stanfit.py b/cmdstanpy/stanfit.py index 83367fd3..c2ed9c9c 100644 --- a/cmdstanpy/stanfit.py +++ b/cmdstanpy/stanfit.py @@ -1983,16 +1983,26 @@ def stan_variable( return self.mcmc_sample.stan_variable(var, inc_warmup=inc_warmup) else: # is gq variable self._assemble_generated_quantities() - col_idxs = self._metadata.stan_vars_cols[var] + draw1 = 0 if ( not inc_warmup and self.mcmc_sample.metadata.cmdstan_config['save_warmup'] ): - draw1 = self.mcmc_sample.num_draws_warmup * self.chains - return flatten_chains(self._draws)[ # type: ignore - draw1:, col_idxs - ] - return flatten_chains(self._draws)[:, col_idxs] # type: ignore + draw1 = self.mcmc_sample.num_draws_warmup + num_draws = self.mcmc_sample.num_draws_sampling + if ( + inc_warmup + and self.mcmc_sample.metadata.cmdstan_config['save_warmup'] + ): + num_draws += self.mcmc_sample.num_draws_warmup + dims = [num_draws * self.chains] + col_idxs = self._metadata.stan_vars_cols[var] + if len(col_idxs) > 0: + dims.extend(self._metadata.stan_vars_dims[var]) + # pylint: disable=redundant-keyword-arg + return self._draws[draw1:, :, col_idxs].reshape( # type: ignore + dims, order='F' + ) def stan_variables(self, inc_warmup: bool = False) -> Dict[str, np.ndarray]: """ diff --git a/test/data/matrix_var.stan b/test/data/matrix_var.stan new file mode 100644 index 00000000..327a9976 --- /dev/null +++ b/test/data/matrix_var.stan @@ -0,0 +1,19 @@ +transformed data { + int y[10] = {0,1,0,0,0,0,0,0,0,1}; +} +parameters { + real theta; +} +model { + theta ~ beta(1,1); // uniform prior on interval 0,1 + y ~ bernoulli(theta); +} +generated quantities { + # x is a 4 x 3 matrix where i,j entry == rownum + matrix[4, 3] z; + for (row_num in 1:4) { + for (col_num in 1:3) { + z[row_num, col_num] = row_num; + } + } +} diff --git a/test/test_generate_quantities.py b/test/test_generate_quantities.py index 6e015e82..5d0ef224 100644 --- a/test/test_generate_quantities.py +++ b/test/test_generate_quantities.py @@ -426,6 +426,28 @@ def test_no_xarray(self): with self.assertRaises(RuntimeError): bern_gqs.draws_xr() + def test_bug_455(self): + stan = os.path.join(DATAFILES_PATH, 'bernoulli.stan') + bern_model = CmdStanModel(stan_file=stan) + jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json') + bern_fit = bern_model.sample( + data=jdata, + chains=1, + seed=12345, + iter_sampling=1, + ) + stan = os.path.join(DATAFILES_PATH, 'matrix_var.stan') + model = CmdStanModel(stan_file=stan) + gqs = model.generate_quantities(mcmc_sample=bern_fit) + z_as_ndarray = gqs.stan_variable(var="z") + self.assertEqual(z_as_ndarray.shape, (1, 4, 3)) # flattens chains + z_as_xr = gqs.draws_xr(vars="z") + self.assertEqual(z_as_xr.z.data.shape, (1, 1, 4, 3)) # keeps chains + for i in range(4): + for j in range(3): + self.assertEqual(int(z_as_ndarray[0, i, j]), i + 1) + self.assertEqual(int(z_as_xr.z.data[0, 0, i, j]), i + 1) + if __name__ == '__main__': unittest.main() diff --git a/test/test_optimize.py b/test/test_optimize.py index 5e609366..f50070e1 100644 --- a/test/test_optimize.py +++ b/test/test_optimize.py @@ -287,36 +287,6 @@ def test_variables_3d(self): self.assertTrue('frac_60' in vars_iters) self.assertEqual(vars_iters['frac_60'].shape, (8,)) - def test_variables_shape(self): - stan = os.path.join(DATAFILES_PATH, 'shape.stan') - model = CmdStanModel(stan_file=stan) - no_data = {} - mle = model.optimize( - seed=1239812093, - algorithm='LBFGS', - init_alpha=0.001, - iter=100, - tol_obj=1e-12, - tol_rel_obj=1e4, - tol_grad=1e-8, - tol_rel_grad=1e7, - tol_param=1e-8, - history_size=5, - ) - for var, shape in { - "x": float, - "a": float, - "b": (2,), - "c": (2,3), - "d": (2,), - "e":(2,3), - "f": (4,2,3), - "g":(4,5,2,3), - }.items(): - if isinstance(shape, tuple): - assert mle.stan_variable(var).shape == shape - else: - assert isinstance(mle.stan_variable(var), shape) class OptimizeTest(unittest.TestCase): def test_optimize_good(self): @@ -611,6 +581,17 @@ def test_optimize_bad(self): data=no_data, seed=1239812093, inits=None, algorithm='BFGS' ) + def test_bug_455(self): + stan = os.path.join(DATAFILES_PATH, 'matrix_var.stan') + model = CmdStanModel(stan_file=stan) + mle = model.optimize() + self.assertTrue(isinstance(mle.stan_variable('theta'), float)) + z_as_ndarray = mle.stan_variable(var="z") + self.assertEqual(z_as_ndarray.shape, (4, 3)) + for i in range(4): + for j in range(3): + self.assertEqual(int(z_as_ndarray[i, j]), i + 1) + if __name__ == '__main__': unittest.main() diff --git a/test/test_sample.py b/test/test_sample.py index 7af5e139..49ee5a13 100644 --- a/test/test_sample.py +++ b/test/test_sample.py @@ -1668,6 +1668,19 @@ def test_no_xarray(self): with self.assertRaises(RuntimeError): bern_fit.draws_xr() + def test_bug_455(self): + stan = os.path.join(DATAFILES_PATH, 'matrix_var.stan') + bug_455_model = CmdStanModel(stan_file=stan) + bug_455_fit = bug_455_model.sample(iter_sampling=1, chains=1) + z_as_ndarray = bug_455_fit.stan_variable(var="z") + self.assertEqual(z_as_ndarray.shape, (1, 4, 3)) # flattens chains + z_as_xr = bug_455_fit.draws_xr(vars="z") + self.assertEqual(z_as_xr.z.data.shape, (1, 1, 4, 3)) # keeps chains + for i in range(4): + for j in range(3): + self.assertEqual(int(z_as_ndarray[0, i, j]), i + 1) + self.assertEqual(int(z_as_xr.z.data[0, 0, i, j]), i + 1) + if __name__ == '__main__': unittest.main() diff --git a/test/test_variational.py b/test/test_variational.py index 58c4c953..9209c1f5 100644 --- a/test/test_variational.py +++ b/test/test_variational.py @@ -253,6 +253,17 @@ def test_variational_eta_fail(self): ) ) + def test_bug_455(self): + stan = os.path.join(DATAFILES_PATH, 'matrix_var.stan') + model = CmdStanModel(stan_file=stan) + vb_fit = model.variational() + self.assertTrue(isinstance(vb_fit.stan_variable('theta'), float)) + z_as_ndarray = vb_fit.stan_variable(var="z") + self.assertEqual(z_as_ndarray.shape, (4, 3)) + for i in range(4): + for j in range(3): + self.assertEqual(int(z_as_ndarray[i, j]), i + 1) + if __name__ == '__main__': unittest.main() From 2aa17055e5d0bec5bbea3f03495eaec77e5206fd Mon Sep 17 00:00:00 2001 From: Mitzi Morris Date: Thu, 9 Sep 2021 22:26:43 -0400 Subject: [PATCH 5/6] code cleanup --- test/data/shape.stan | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 test/data/shape.stan diff --git a/test/data/shape.stan b/test/data/shape.stan deleted file mode 100644 index 06bba24f..00000000 --- a/test/data/shape.stan +++ /dev/null @@ -1,30 +0,0 @@ -parameters { - real z; -} -model { - z ~ normal(0,1); -} -generated quantities { - int x = 1; - real a = 1; - real b[2]; - real c[2,3]; - vector[2] d; - vector[3] e[2]; - matrix[2,3] f[4]; - matrix[2,3] g[4,5]; - for (n in 1:2) { - b[n] = 1; - d[n] = 1; - for (m in 1:3) { - c[n,m] = n; - e[n,m] = n; - for (k in 1:4) { - f[k, n, m] = n; - for (j in 1:4) { - g[k, j, n, m] = n; - } - } - } - } -} From 55fbc05c15d1d8b325096fd08f1a671013797be8 Mon Sep 17 00:00:00 2001 From: Mitzi Morris Date: Fri, 10 Sep 2021 11:45:53 -0400 Subject: [PATCH 6/6] changes per code review --- test/test_generate_quantities.py | 2 +- test/test_optimize.py | 2 +- test/test_sample.py | 10 +++++----- test/test_variational.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/test/test_generate_quantities.py b/test/test_generate_quantities.py index 5d0ef224..16fd36a9 100644 --- a/test/test_generate_quantities.py +++ b/test/test_generate_quantities.py @@ -426,7 +426,7 @@ def test_no_xarray(self): with self.assertRaises(RuntimeError): bern_gqs.draws_xr() - def test_bug_455(self): + def test_single_row_csv(self): stan = os.path.join(DATAFILES_PATH, 'bernoulli.stan') bern_model = CmdStanModel(stan_file=stan) jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json') diff --git a/test/test_optimize.py b/test/test_optimize.py index f50070e1..87dcc88b 100644 --- a/test/test_optimize.py +++ b/test/test_optimize.py @@ -581,7 +581,7 @@ def test_optimize_bad(self): data=no_data, seed=1239812093, inits=None, algorithm='BFGS' ) - def test_bug_455(self): + def test_single_row_csv(self): stan = os.path.join(DATAFILES_PATH, 'matrix_var.stan') model = CmdStanModel(stan_file=stan) mle = model.optimize() diff --git a/test/test_sample.py b/test/test_sample.py index 49ee5a13..202ef742 100644 --- a/test/test_sample.py +++ b/test/test_sample.py @@ -1668,13 +1668,13 @@ def test_no_xarray(self): with self.assertRaises(RuntimeError): bern_fit.draws_xr() - def test_bug_455(self): + def test_single_row_csv(self): stan = os.path.join(DATAFILES_PATH, 'matrix_var.stan') - bug_455_model = CmdStanModel(stan_file=stan) - bug_455_fit = bug_455_model.sample(iter_sampling=1, chains=1) - z_as_ndarray = bug_455_fit.stan_variable(var="z") + model = CmdStanModel(stan_file=stan) + fit = model.sample(iter_sampling=1, chains=1) + z_as_ndarray = fit.stan_variable(var="z") self.assertEqual(z_as_ndarray.shape, (1, 4, 3)) # flattens chains - z_as_xr = bug_455_fit.draws_xr(vars="z") + z_as_xr = fit.draws_xr(vars="z") self.assertEqual(z_as_xr.z.data.shape, (1, 1, 4, 3)) # keeps chains for i in range(4): for j in range(3): diff --git a/test/test_variational.py b/test/test_variational.py index 9209c1f5..d11c0ab2 100644 --- a/test/test_variational.py +++ b/test/test_variational.py @@ -253,7 +253,7 @@ def test_variational_eta_fail(self): ) ) - def test_bug_455(self): + def test_single_row_csv(self): stan = os.path.join(DATAFILES_PATH, 'matrix_var.stan') model = CmdStanModel(stan_file=stan) vb_fit = model.variational()