index exception when ANALYZING table with one column #31

fsaad · 2015-06-05T13:57:02Z

I create a csv file with one column of data named ("c0") and 20 values. When I run ANALYZE, an exception in thrown

Here is a reproducible example of the procedure:

import bayeslite
import bayeslite.crosscat
import numpy as np
import math
import random
import sys

from crosscat.MultiprocessingEngine import MultiprocessingEngine
from bayeslite.shell.pretty import pp_cursor

def pprint(cursor):
    return pp_cursor(sys.stdout, cursor)

if __name__ == '__main__':
    # create one column of data, save to data.csv, with header c0
    t = 20
    data = np.random.rand(t)
    data = data.reshape(len(data),1)
    np.savetxt('data.csv', data, header='c0', comments='')

    btable = "table{}".format(t)
    generator = "table{}_cc".format(t)

    bdb = bayeslite.bayesdb_open()
    engine = bayeslite.crosscat.CrosscatMetamodel(
        MultiprocessingEngine())
    bayeslite.bayesdb_register_metamodel(bdb, engine)
    bayeslite.bayesdb_read_csv_file(bdb, btable, "data.csv",
                                    header=True, create=True)

    bql = '''
    SELECT * FROM {}
    '''.format(btable)
    c = bdb.execute(bql)
    pprint(c)


    bql = '''
    CREATE GENERATOR {} FOR {}
        USING crosscat (
           c0 NUMERICAL
        );
    '''.format(generator, btable)
    bdb.execute(bql)

    # exception thrown in the following call
    bql = '''
    INITIALIZE {} MODELS FOR {};
    '''.format(10, generator)
    bdb.execute(bql)

    bql = '''
    ANALYZE {} for {} ITERATIONS WAIT;
    '''.format(generator, 10)
    bdb.execute(bql)

    bql = '''
    CREATE TEMP TABLE simres AS
        SIMULATE c0 FROM {}
        LIMIT {};
    '''.format(generator, 15)
    bdb.execute(bql)

    bql = 'SELECT * FROM simres;'
    simdata = None
    with bdb.savepoint():
        c = bdb.execute(bql)
        simdata = np.array(c.fetchall())

And here is the stack trace:

In [19]: run one_col.py
             c0
---------------
  0.21819395493
 0.930373567089
 0.725379439808
 0.691447842751
 0.261562572085
 0.948943970262
  0.46605176487
0.0151432877238
 0.441854759811
 0.665655889346
0.0765081395686
 0.447978645136
 0.825578309208
 0.500403070452
 0.658746843184
 0.843358329166
 0.248048357726
  0.79623218477
 0.526216988005
 0.875729646947
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
/home/fsaad/Documents/pcp/bayeslite/experiments/one_col.py in <module>()
     55     ANALYZE {} for {} ITERATIONS WAIT;
     56     '''.format(generator, 10)
---> 57     bdb.execute(bql)
     58 
     59     bql = '''

/usr/local/lib/python2.7/dist-packages/bayeslite-0.1.dev-py2.7.egg/bayeslite/bayesdb.pyc in execute(self, string, bindings)
    149         if more:
    150             raise ValueError('>1 phrase in string')
--> 151         return bql.execute_phrase(self, phrase, bindings)
    152 
    153     def sql_execute(self, string, bindings=None):

/usr/local/lib/python2.7/dist-packages/bayeslite-0.1.dev-py2.7.egg/bayeslite/bql.pyc in execute_phrase(bdb, phrase, bindings)
    553             max_seconds=phrase.seconds,
    554             ckpt_iterations=phrase.ckpt_iterations,
--> 555             ckpt_seconds=phrase.ckpt_seconds)
    556         return empty_cursor(bdb)
    557 

/usr/local/lib/python2.7/dist-packages/bayeslite-0.1.dev-py2.7.egg/bayeslite/crosscat.pyc in analyze_models(self, bdb, generator_id, modelnos, iterations, max_seconds, ckpt_iterations, ckpt_seconds)
    588                         X_L=X_L_list,
    589                         X_D=X_D_list,
--> 590                         n_steps=n_steps,
    591                     )
    592                     if iterations is not None:

/usr/local/lib/python2.7/dist-packages/CrossCat-0.1.8-py2.7-linux-x86_64.egg/crosscat/LocalEngine.pyc in analyze(self, M_c, T, X_L, X_D, kernel_list, n_steps, c, r, max_iterations, max_time, do_diagnostics, diagnostics_every_N, ROW_CRP_ALPHA_GRID, COLUMN_CRP_ALPHA_GRID, S_GRID, MU_GRID, N_GRID, do_timing, CT_KERNEL)
    267             diagnostics_dict = munge_diagnostics(diagnostics_dict_list)
    268             if reprocess_diagnostics_func is not None:
--> 269                 diagnostics_dict = reprocess_diagnostics_func(diagnostics_dict)
    270             ret_tuple = ret_tuple + (diagnostics_dict, )
    271         if do_timing:

/usr/local/lib/python2.7/dist-packages/CrossCat-0.1.8-py2.7-linux-x86_64.egg/crosscat/utils/diagnostic_utils.pyc in default_reprocess_diagnostics_func(diagnostics_arr_dict)
     50     # column_paritition_assignments are column, iter, chain
     51     D = column_partition_assignments.shape[0] - 1
---> 52     f_z_statistic_0_1 = column_partition_assignments_to_f_z_statistic(column_partition_assignments, 1, 0)
     53     f_z_statistic_0_D = column_partition_assignments_to_f_z_statistic(column_partition_assignments, D, 0)
     54     diagnostics_arr_dict['f_z[0, 1]'] = f_z_statistic_0_1

/usr/local/lib/python2.7/dist-packages/CrossCat-0.1.8-py2.7-linux-x86_64.egg/crosscat/utils/diagnostic_utils.pyc in column_partition_assignments_to_f_z_statistic(column_partition_assignments, j, i)
     43     iter_column_chain_arr = column_partition_assignments.transpose((1, 0, 2))
     44     helper = lambda column_chain_arr: column_chain_to_ratio(column_chain_arr, j, i)
---> 45     as_list = map(helper, iter_column_chain_arr)
     46     return numpy.array(as_list)[:, numpy.newaxis]
     47 

/usr/local/lib/python2.7/dist-packages/CrossCat-0.1.8-py2.7-linux-x86_64.egg/crosscat/utils/diagnostic_utils.pyc in <lambda>(column_chain_arr)
     42         j, i=0):
     43     iter_column_chain_arr = column_partition_assignments.transpose((1, 0, 2))
---> 44     helper = lambda column_chain_arr: column_chain_to_ratio(column_chain_arr, j, i)
     45     as_list = map(helper, iter_column_chain_arr)
     46     return numpy.array(as_list)[:, numpy.newaxis]

/usr/local/lib/python2.7/dist-packages/CrossCat-0.1.8-py2.7-linux-x86_64.egg/crosscat/utils/diagnostic_utils.pyc in column_chain_to_ratio(column_chain_arr, j, i)
     32 
     33 def column_chain_to_ratio(column_chain_arr, j, i=0):
---> 34     chain_i_j = column_chain_arr[[i, j], :]
     35     is_same = numpy.diff(chain_i_j, axis=0)[0] == 0
     36     n_chains = len(is_same)

IndexError: index 1 is out of bounds for axis 0 with size 1

The text was updated successfully, but these errors were encountered:

riastradh-probcomp · 2015-06-05T18:12:42Z

Little aside: Please don't substitute strings directly into SQL/BQL queries! In most cases, you should be using query parameters:

cursor = bdb.execute('SIMULATE x, y, z FROM t GIVEN w = ? LIMIT ?', ('zot', 42))

If you absolutely must do string substitution, e.g. because you need to substitute a table name, use sqlite3_quote_name from bayeslite.sqlite3_util:

qt = sqlite3_quote_name(table_name)
qcn = sqlite3_quote_name(column_name)
bql = 'SELECT %s FROM %s' % (qt, qcn)

(This should be exposed by the bayeslite module -- that it is not is an API mistake.)

Fixes Github issue #31. Requires Crosscat 0.1.9. Dynamically ascertaining whether these tests should fail based on the Crosscat version was too much trouble to implement.

riastradh-probcomp · 2015-06-05T20:17:33Z

Fixed in 9e87fc5.

riastradh-probcomp added a commit that referenced this issue Jun 5, 2015

No more xfail for one-column tests.

9e87fc5

Fixes Github issue #31. Requires Crosscat 0.1.9. Dynamically ascertaining whether these tests should fail based on the Crosscat version was too much trouble to implement.

riastradh-probcomp closed this as completed Jun 5, 2015

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

index exception when ANALYZING table with one column #31

index exception when ANALYZING table with one column #31

fsaad commented Jun 5, 2015

riastradh-probcomp commented Jun 5, 2015

riastradh-probcomp commented Jun 5, 2015

index exception when ANALYZING table with one column #31

index exception when ANALYZING table with one column #31

Comments

fsaad commented Jun 5, 2015

riastradh-probcomp commented Jun 5, 2015

riastradh-probcomp commented Jun 5, 2015