## Code Snippets
From crosscat/tests/component_model_extensions/MultinomialComponentModel.py

In [15]:
import math
import random
import numpy

In [22]:
example_data_parameters = dict(weights=(numpy.ones(7)/7.).tolist())
example_hyperparams = dict(dirichlet_alpha=.5, K = 7)

In [27]:
def counts_to_data(counts):
    """
    Converts a vector of counts to data.
    """
    assert type(counts) is list or type(counts) is numpy.ndarray
    K = len(counts)
    N = int(sum(counts))
    X = []
    for k in range(K):
        i = 0
        while i < counts[k]:
            X.append([k])
            i += 1

        assert i == counts[k]

    assert len(X) == N

    random.shuffle(X)
    X = numpy.array(X, dtype=float)

    return X

def check_model_parameters_dict(params):

    if type(params) is not dict:
        raise TypeError("params should be a dict")

    keys = ['weights']

    for key in keys:
        if key not in params.keys():
            raise KeyError("model_parameters_dict should have key %s" % key)

    for key, value in params.iteritems():
        if key == "weights":
            if type(value) is not list:
                raise TypeError("model parameters dict key 'weights' should be a list")
            if type(value[0]) is list:
                raise TypeError("weights should not be a list of lists, should be a list of floats")
            if math.fabs(sum(value) - 1.0) > .00000001:
                raise ValueError("model parameters dict key 'weights' should sum to 1.0")
        else:
            raise KeyError("invalid key, %s, for model parameters dict" % key)
            
def check_hyperparameters_dict(hyperparameters_dict):
    
    # 'fixed' key is not necessary for user-defined hyperparameters
    keys = ['dirichlet_alpha', 'K']

    for key in keys:
        if key not in hyperparameters_dict.keys():
            raise KeyError("hyperparameters_dict should have key %s" % key)

    for key, value in hyperparameters_dict.iteritems():
        if key == "K":
            if type(value) is not int:
                raise TypeError("hyperparameters dict entry K should be an int")

            if value < 1:
                raise ValueError("hyperparameters dict entry K should be greater than 0")
        elif key == "dirichlet_alpha":
            if type(value) is not float \
            and type(value) is not numpy.float64 \
            and type(value) is not int:
                raise TypeError("hyperparameters dict entry dirichlet_alpha should be a float or int")

            if value <= 0.0:
                raise ValueError("hyperparameters dict entry dirichlet_alpha should be greater than 0")

        elif key == "fixed":
            pass
        else:
            raise KeyError("invalid key, %s, for hyperparameters dict" % key)

def check_data_vs_k(X,K):
    if type(X) is numpy.ndarray:
        X = X.flatten(1)
        X = X.tolist()
    K_data = len(set(X))
    if K_data > K:
        raise ValueError("the number of items in the data is greater than K")            

In [28]:
def generate_data_from_parameters(params, N, gen_seed=0):
    """
    returns a set of intervals over which the component model pdf is 
    supported. 
    Inputs:
        params: a dict with entries 'weights'
        N: number of data points
    """
    if type(N) is not int:
        raise TypeError("N should be an int")

    if N <= 0:
        raise ValueError("N should be greater than 0")

    if type(params) is not dict:
        raise TypeError("params should be a dict")

    check_model_parameters_dict(params)

    # multinomial draw
    counts = numpy.array(numpy.random.multinomial(N, params['weights']), dtype=int)

    X = counts_to_data(counts)

    assert len(X) == N

    return X

def sample_parameters_given_hyper(hypers, gen_seed=0):
    """
    Samples weights given the current hyperparameters
    Inputs:
        gen_seed: integer used to seed the rng
    """
    if type(gen_seed) is not int:
        raise TypeError("gen_seed should be an int")

    random.seed(gen_seed)
    numpy.random.seed(gen_seed)

    dirichlet_alpha = hypers['dirichlet_alpha']
    K = hypers['K']

    alpha = numpy.array([dirichlet_alpha]*int(K))

    weights = numpy.random.dirichlet(alpha)
    weights = weights.tolist()

    params = {'weights': weights}

    return params

In [23]:
check_model_parameters_dict(example_data_parameters)
check_hyperparameters_dict(example_hyperparams)

In [29]:
generate_data_from_parameters(example_data_parameters,10)
sample_parameters_given_hyper(example_hyperparams)

{'weights': [0.13413503453963307,
  0.1669051803480647,
  0.07919362146775934,
  0.08448803005892792,
  0.38811048820902566,
  0.14494114049910156,
  0.0022265048774877667]}