In [1]:
%matplotlib inline
from bdbcontrib import quickstart

In [2]:
help(quickstart)

Help on function quickstart in module bdbcontrib:

quickstart(*args, **kwargs)
    Create a Population object, wrapping a bayeslite.BayesDB.
    
    name : str  REQUIRED.
        A name for the population, should use letters and underscores only.
        This will also be used as a table name in the bdb, and %t in queries
        will expand to this name. %g in queries will expand to the current
        population metamodel, also based on this name.
    csv_path : str
        The path to a comma-separated values file. If specified, will be used
        to populate the bdb. It must exist and be both readable and non-empty.
    df : pandas.DataFrame
        If specified, these data will be used to populate the bdb, superseding
        any csv_path. It must not be empty.
    bdb_path : str
        If specified, store data and analysis results here. If no other data
        source (csv or df) is specified, then it must already have been
        populated. If not specified, we will use a v

In [None]:
s = quickstart("satellites", bdb_path="examples/satellites/satellites.bdb")

ERROR: 
Please set session_capture_name option to quickstart
  to either opt-in or opt-out of sending details of your usage of
  this software to the MIT Probabilistic Computing Group.

If you see this in one of our example notebooks,
  return to the starting page, the Index.ipynb, to
  make that choice.


In [61]:
s = quickstart("satellites", bdb_path="satellites.bdb", session_capture_name="Demo")

In [5]:
help(s)

Help on Population in module bdbcontrib.population object:

class Population(__builtin__.object)
 |  Generative Population Model, wraps a BayesDB, and tracks one population.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, name, csv_path=None, bdb_path=None, df=None, logger=None, session_capture_name=None)
 |      Create a Population object, wrapping a bayeslite.BayesDB.
 |      
 |      name : str  REQUIRED.
 |          A name for the population, should use letters and underscores only.
 |          This will also be used as a table name in the bdb, and %t in queries
 |          will expand to this name. %g in queries will expand to the current
 |          population metamodel, also based on this name.
 |      csv_path : str
 |          The path to a comma-separated values file. If specified, will be used
 |          to populate the bdb. It must exist and be both readable and non-empty.
 |      df : pandas.DataFrame
 |          If specified, these data will be used to populate th

In [8]:
from bdbcontrib.population_method import population_method
help(population_method)

Help on function population_method in module bdbcontrib.population_method:

population_method(**argspec_transforms)
    Create a method in the Population class to encapsulate this function.
    
    Fill in documentation holes, but otherwise leave the function itself
    untouched for library use.
    
    For the types below, "arg" is a zero-indexed positional argument number
    or a keyword argument name as a string. For keyword arguments, if the
    caller provides a value, we leave it untouched. Positional arguments may
    either be kept and transformed, or hidden from the argument list. If
    the transform takes both a population and an arg, then it is kept and the
    arg is transformed. If it takes only a population, then it is hidden (there
    is no corresponding positional argument in the method, the documentation
    line is removed, and other positional arguments shift to accommodate).
    
    __PT_DOC__
    
    In your method's docstring, use the above names with unde

In [12]:
from bdbcontrib.population_method import PT_DOC
print PT_DOC

population: 
	Passes on the population object itself.
interpret_bql: [argspec]
	Fills in %t and %g in a BQL query.
specifier_to_df: [argspec]
	Queries to create a dataframe from BQL if need be.
population_to_bdb: 
	Provides the population's BayesDB instance.
population_name: 
	Provides the population's name.
generator_name: 
	Provides the population's generative population model's name.
logger: 
	Provides a logger to which one can .info, .warn, .plot, etc.


In [9]:
@population_method(population=0)
def my_name(self):
    return self.name
s.my_name()

'satellites'

In [18]:
@population_method(population_name='myname')
def name_length_plus_k(k, myname="name"):
    return k + len(myname)
{'method with default:': s.name_length_plus_k(100), 
 'method with override:': s.name_length_plus_k(100, myname="foo"), 
 'bare function with default:': name_length_plus_k(100),
 'bare function with override:': name_length_plus_k(100, myname="Asher Lev")}

{'bare function with default:': 104,
 'bare function with override:': 109,
 'method with default:': 110,
 'method with override:': 103}

In [22]:
@population_method(specifier_to_df=0)
def only(df):
    assert (1, 1) == df.shape
    return df.ix[0, 0]
s.only('SELECT COUNT(*) FROM %t')

1167.0

In [23]:
only('SELECT COUNT(*) FROM %t')

AttributeError: 'str' object has no attribute 'shape'

In [24]:
only(s.q('SELECT COUNT(*) FROM %t'))

1167.0

In [26]:
df = s.q('SELECT COUNT(*) FROM %t')
s.only(df)

1167.0

In [29]:
@population_method(no_such_hook=0)
def foo():
    return 42

KeyError: 'no_such_hook'

In [30]:
@population_method(population_name=1)
def foo(bar):
    return 42

IndexError: Invalid positional specifier for foo: 1

In [31]:
@population_method(population_name="pub")
def foo(bar):
    return 42

IndexError: No such optional argument for foo: pub

In [32]:
s.only(3, 4, 5)

TypeError: __main__.only() takes at most 1 arguments (3 given)

In [33]:
s.only()

TypeError: __main__.only() takes at least 1 arguments (0 given)

In [35]:
s.only(bar="five")  # I hate this message, but it's the one python gives for similar errors on regular functions...

TypeError: __main__.only() takes at least 1 arguments (1 given)

In [57]:
@population_method(population_name=0, specifier_to_df=[1, 2])
def quux(name, df1, df2, number):
    """This is documentation!
    
    name: __population_name__
    df1: __specifier_to_df__
      The first thing.
    df2: __specifier_to_df__
      The second thing.
    number: some numerical thing.
    """
    return '\n\n'.join([str(i) for i in [name, df1, df2, number]])

help(s.quux)

Help on method as_population_method in module bdbcontrib.population_method:

as_population_method(self, *args, **kwargs) method of bdbcontrib.population.Population instance
    This is documentation!
    
    df1: a pandas.DataFrame or BQL query.
      The first thing.
    df2: a pandas.DataFrame or BQL query.
      The second thing.
    number: some numerical thing.



In [58]:
help(quux)

Help on function quux in module __main__:

quux(name, df1, df2, number)
    This is documentation!
    
    name: str
        the name of the relevant table in the bdb.
    df1: a pandas.DataFrame.
      The first thing.
    df2: a pandas.DataFrame.
      The second thing.
    number: some numerical thing.



In [59]:
print s.quux('SELECT COUNT(*) FROM %t',
              'ESTIMATE DEPENDENCE PROBABILITY FROM PAIRWISE COLUMNS OF %g FOR "Apogee_km", "Perigee_km"', 
              42)

satellites

   "COUNT"(*)
0        1167

   generator_id       name0       name1  value
0             1  Perigee_km  Perigee_km      1
1             1  Perigee_km   Apogee_km      1
2             1   Apogee_km  Perigee_km      1
3             1   Apogee_km   Apogee_km      1

42
