stan-dev · ahartikainen · Sep 14, 2020 · Sep 4, 2020 · Sep 4, 2020 · Sep 5, 2020
diff --git a/cmdstanpy/stanfit.py b/cmdstanpy/stanfit.py
@@ -702,7 +702,7 @@ def draws_as_dataframe(
             the output, i.e., the sampler was run with ``save_warmup=True``,
             then the warmup draws are included.  Default value is ``False``.
         """
-        pnames_base = [name.split('.')[0] for name in self.column_names]
+        pnames_base = [name.split('[')[0] for name in self.column_names]
         if params is not None:
             for param in params:
                 if not (param in self._column_names or param in pnames_base):
@@ -723,51 +723,47 @@ def draws_as_dataframe(
         mask = []
         params = set(params)
         for name in self.column_names:
-            if any(item in params for item in (name, name.split('.')[0])):
+            if any(item in params for item in (name, name.split('[')[0])):
                 mask.append(name)
         return self._draws_as_df[mask]
 
-    def stan_variable(self, name: str) -> np.ndarray:
+    def stan_variable(self, name: str) -> pd.DataFrame:
         """
-        Return a new ndarray which contains the set of post-warmup draws
+        Return a new DataFrame which contains the set of post-warmup draws
         for the named Stan program variable.  Flattens the chains.
         Underlyingly draws are in chain order, i.e., for a sample
         consisting of N chains of M draws each, the first M array
         elements are from chain 1, the next M are from chain 2,
         and the last M elements are from chain N.
 
-        * If the variable is a scalar variable, this returns a 1-d array,
-          length(draws X chains).
-        * If the variable is a vector, this is a 2-d array,
-          shape ( draws X chains, len(vector))
-        * If the variable is a matrix, this is a 3-d array,
-          shape ( draws X chains, matrix nrows, matrix ncols ).
-        * If the variable is an array with N dimensions, this is an N+1-d array,
-          shape ( draws X chains, size(dim 1), ... size(dim N)).
+        * If the variable is a scalar variable, the shape of the DataFrame is
+          ( draws X chains, 1).
+        * If the variable is a vector, the shape of the DataFrame is
+          ( draws X chains, len(vector))
+        * If the variable is a matrix, the shape of the DataFrame is
+          ( draws X chains, size(dim 1) X size(dim 2) )
+        * If the variable is an array with N dimensions, the shape of the
+          DataFrame is ( draws X chains, size(dim 1) X ... X size(dim N))
 
         :param name: variable name
         """
         if name not in self._stan_variable_dims:
             raise ValueError('unknown name: {}'.format(name))
         self._assemble_draws()
         dim0 = self.num_draws * self.runset.chains
-        dims = self._stan_variable_dims[name]
-        if dims == 1:
-            idx = self.column_names.index(name)
-            return self._draws[self._draws_warmup :, :, idx].reshape(
-                (dim0,), order='A'
-            )
-        else:
-            idxs = [
-                x[0]
-                for x in enumerate(self.column_names)
-                if x[1].startswith(name + '.')
-            ]
-            var_dims = [dim0]
-            var_dims.extend(dims)
-            return self._draws[
-                self._draws_warmup :, :, idxs[0] : idxs[-1] + 1
-            ].reshape(tuple(var_dims), order='A')
+        dims = np.prod(self._stan_variable_dims[name])
+        pattern = r'^{}(\[[\d,]+\])?$'.format(name)
+        names, idxs = [], []
+        for i, column_name in enumerate(self.column_names):
+            if re.search(pattern, column_name):
+                names.append(column_name)
+                idxs.append(i)
+        return pd.DataFrame(
+            self._draws[
+                self._draws_warmup:, :, idxs
+            ].reshape((dim0, dims), order='A'),
+            columns=names
+        )
 
     def stan_variables(self) -> Dict:
         """

diff --git a/cmdstanpy/utils.py b/cmdstanpy/utils.py
@@ -636,11 +636,19 @@ def scan_column_names(fd: TextIO, config_dict: Dict, lineno: int) -> int:
     line = fd.readline().strip()
     lineno += 1
     names = line.split(',')
-    config_dict['column_names'] = tuple(names)
+    config_dict['column_names'] = tuple(_rename_columns(names))
     config_dict['num_params'] = len(names) - 1
     return lineno
 
 
+def _rename_columns(names: List) -> List:
+    names = [
+        re.sub(r',([\d,]+)$', r'[\1]', column.replace('.', ','))
+        for column in names
+    ]
+    return names
+
+
 def parse_var_dims(names: Tuple[str, ...]) -> Dict:
     """
     Use Stan CSV file column names to get variable names, dimensions.
@@ -653,14 +661,14 @@ def parse_var_dims(names: Tuple[str, ...]) -> Dict:
     while idx < len(names):
         if names[idx].endswith('__'):
             pass
-        elif '.' not in names[idx]:
+        elif '[' not in names[idx]:
             vars_dict[names[idx]] = 1
         else:
-            vs = names[idx].split('.')
-            if idx < len(names) - 1 and names[idx + 1].split('.')[0] == vs[0]:
+            vs = names[idx].split('[')
+            if idx < len(names) - 1 and names[idx + 1].split('[')[0] == vs[0]:
                 idx += 1
                 continue
-            dims = [int(vs[x]) for x in range(1, len(vs))]
+            dims = [int(x) for x in vs[1][:-1].split(',')]
             vars_dict[vs[0]] = tuple(dims)
         idx += 1
     return vars_dict

diff --git a/test/test_generate_quantities.py b/test/test_generate_quantities.py
@@ -37,16 +37,16 @@ def test_gen_quantities_csv_files(self):
             csv_file = bern_gqs.runset.csv_files[i]
             self.assertTrue(os.path.exists(csv_file))
         column_names = [
-            'y_rep.1',
-            'y_rep.2',
-            'y_rep.3',
-            'y_rep.4',
-            'y_rep.5',
-            'y_rep.6',
-            'y_rep.7',
-            'y_rep.8',
-            'y_rep.9',
-            'y_rep.10',
+            'y_rep[1]',
+            'y_rep[2]',
+            'y_rep[3]',
+            'y_rep[4]',
+            'y_rep[5]',
+            'y_rep[6]',
+            'y_rep[7]',
+            'y_rep[8]',
+            'y_rep[9]',
+            'y_rep[10]',
         ]
         self.assertEqual(bern_gqs.column_names, tuple(column_names))
         self.assertEqual(
@@ -104,16 +104,16 @@ def test_gen_quanties_mcmc_sample(self):
             csv_file = bern_gqs.runset.csv_files[i]
             self.assertTrue(os.path.exists(csv_file))
         column_names = [
-            'y_rep.1',
-            'y_rep.2',
-            'y_rep.3',
-            'y_rep.4',
-            'y_rep.5',
-            'y_rep.6',
-            'y_rep.7',
-            'y_rep.8',
-            'y_rep.9',
-            'y_rep.10',
+            'y_rep[1]',
+            'y_rep[2]',
+            'y_rep[3]',
+            'y_rep[4]',
+            'y_rep[5]',
+            'y_rep[6]',
+            'y_rep[7]',
+            'y_rep[8]',
+            'y_rep[9]',
+            'y_rep[10]',
         ]
         self.assertEqual(bern_gqs.column_names, tuple(column_names))
         self.assertEqual(