Skip to content

Commit

Permalink
Schema for serialized Crosscat model
Browse files Browse the repository at this point in the history
  • Loading branch information
Madeleine Thompson committed Feb 8, 2016
1 parent 6472fcc commit 86b758d
Showing 1 changed file with 264 additions and 0 deletions.
264 changes: 264 additions & 0 deletions src/metamodels/crosscat_theta.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
{
"title": "schema for a serialized crosscat model",
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "This schema specifies the structure of a single serialized model from the 'crosscat' generator in bayeslite. Such serialized models are stored in the theta_json column of the bayesdb_crosscat_theta table of a .bdb file.",
"type": "object",
"additionalProperties": false,
"required": ["X_D", "X_L", "model_config", "iterations"],
"properties": {

"iterations": {
"description": "The number of iterations ANALYZE has been run for on this model.",
"type": "integer",
"minimum": 0
},

"model_config": {
"description": "Metainformation about the model, independent of its current state. It is always the same value.",
"type": "object",
"additionalProperties": false,
"required": ["initialization", "kernel_list", "row_initialization"],
"properties": {
"initialization": {
"enum": ["from_the_prior"]
},
"row_initialization": {
"enum": ["from_the_prior"]
},
"kernel_list": {
"type": "array",
"maxItems": 0
}
}
},

"X_D": {
"description": "X_D specifies the mapping of data rows into categories for each view. It is an array with an element for each view. So, if there were three data rows and two views, X_D could be [[0, 0, 0], [0, 1, 1]]. The first view would assign all rows to a single category. The second view would assign the first row to one category and last two rows to a second category.",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer",
"minimum": 0
}
}
},

"X_L": {
"description": "X_L specifies the latent state of the model.",
"type": "object",
"additionalProperties": false,
"required": ["column_partition", "column_hypers", "view_state"],
"properties": {
"col_ensure": {
"description": "Optional declaration of (in)dependence constraints",
"type": "object",
"additionalProperties": false,
"properties": {
"dependent": {"$ref": "#/definitions/dependenceContstraints"},
"independent": {"$ref": "#/definitions/dependenceContstraints"}
}
},
"column_partition": {
"description": "Describes the partitioning of variables into views.",
"type": "object",
"additionalProperties": false,
"required": ["assignments", "counts", "hypers"],
"properties": {
"assignments": {
"description": "An array of view assignments. Element k has value v if variable k is in view v.",
"type": "array",
"items": {
"type": "integer",
"minimum": 0
}
},
"counts": {
"description": "Element v has value n if view v has n variables in it. This is redundant with assignments. This array has the same length as X_D.",
"type": "array",
"items": {
"type": "integer",
"minimum": 0
}
},
"hypers": {
"type": "object",
"additionalProperties": false,
"required": ["alpha"],
"properties": {
"alpha": {
"description": "Parameter for the CRP that generates the views.",
"type": "number",
"minimum": 0.0
}
}
}
}
},
"column_hypers": {
"description": "Hyperparameters for a column. Model parameters within a category are drawn from the distribution implied by these parameters. Elements correspond to columns.",
"type": "array",
"items": {
"oneOf": [
{"$ref": "#/definitions/numericHypers"},
{"$ref": "#/definitions/categoricalHypers"},
{"$ref": "#/definitions/cyclicHypers"}
]
}
},
"view_state": {
"description": "Each element corresponds to a view and contains all information about that view.",
"type": "array",
"items": {
"type": "object",
"additionalProperties": false,
"required": ["column_names", "column_component_suffstats", "row_partition_model"],
"properties" : {
"column_names": {
"description": "An array of the column names in the view. If X_L.view_state[2].column_names[4] is 'S', and the fifth element of X_L.column_partition.assignments with value 2 has index k, then column k has name 'S'.",
"type": "array",
"items": {"type": "string"}
},
"column_component_suffstats": {
"description": "An array of sufficient statistics of the categories of this view. There is one element for each column in the view. This element is itself an array with one element for each category.",
"type": "array",
"items": {
"description": "Array of sufficient-statistics objects for one category of this view.",
"type": "array",
"items": {
"oneOf": [
{"$ref": "#/definitions/normalStats"},
{"$ref": "#/definitions/multinomialStats"},
{"$ref": "#/definitions/vonMisesStats"},
{"$ref": "#/definitions/emptyStats"}
]
}
}
},
"row_partition_model": {
"description": "Latent state for assignment of variables to categories in this view.",
"type": "object",
"additionalProperties": false,
"required": ["counts", "hypers"],
"properties": {
"counts": {
"description": "Index is category, value is number of variables in that category.",
"type": "array",
"items": {"type": "integer"}
},
"hypers": {
"type": "object",
"additionalProperties": false,
"required": ["alpha"],
"properties": {
"alpha": {
"description": "Dirichlet parameter for assignment of variables to categories in this view.",
"type": "number",
"minimum": 0,
"exclusiveMinimum": true
}
}
}
}
}
}
}
}
}
}
},

"definitions": {
"numericHypers": {
"description": "Hyperparameters for a normal-inverse-gamma distribution in column_hypers",
"type": "object",
"additionalProperties": false,
"required": ["fixed"],
"properties": {
"fixed": {"$ref": "#/definitions/fixed"},
"mu": {"type": "number"},
"r": {"type": "number"},
"s": {"type": "number"},
"nu": {"type": "number"}
}
},
"categoricalHypers": {
"description": "Hyperparameters for a Dirichlet distribution in column_hypers. dirichlet_alpha is a single number because the model only allows for Dirichlets with uniform priors; {'dirichlet_alpha': 0.5, 'K': 2} is the Beta(0.5,0.5) distribution.",
"type": "object",
"additionalProperties": false,
"required": ["fixed", "dirichlet_alpha", "K"],
"properties": {
"fixed": {"$ref": "#/definitions/fixed"},
"dirichlet_alpha": {"type": "number"},
"K": {"type": "number"}
}
},
"cyclicHypers": {
"description": "Hyperparameters for a von Mises distribution in column_hypers",
"type": "object",
"additionalProperties": false,
"required": ["fixed", "a", "b", "kappa"],
"properties": {
"fixed": {"$ref": "#/definitions/fixed"},
"a": {"type": "number"},
"b": {"type": "number"},
"kappa": {"type": "number"}
}
},
"fixed": {
"description": "Hyperparameter 'fixed'. Nonzero in theory if Crosscat cannot do hyperparameter inference. Always zero.",
"type": "number",
"minimum": 0.0,
"maximum": 0.0
},
"normalStats": {
"description": "Sufficient statistics for a normal category",
"type": "object",
"additionalProperties": false,
"required": ["N", "sum_x", "sum_x_squared"],
"properties": {
"N": {"type": "number"},
"sum_x": {"type": "number"},
"sum_x_squared": {"type": "number"}
}
},
"multinomialStats": {
"description": "Sufficient statistics for a multinomial category. '0', '1', etc. properties are counts.",
"type": "object",
"additionalProperties": false,
"required": ["N"],
"patternProperties": {
"[0-9]+": {"type": "number"}
},
"properties": {
"N": {"type": "number"}
}
},
"vonMisesStats": {
"description": "Sufficient statistics for a von Mises category.",
"type": "object",
"additionalProperties": false,
"required": ["N", "sum_cos_x", "sum_sin_x"],
"properties": {
"N": {"type": "number"},
"sum_cos_x": {"type": "number"},
"sum_sin_x": {"type": "number"}
}
},
"emptyStats": {
"type": "object",
"additionalProperties": false
},
"dependenceContstraints": {
"description": "Specifies a collection of dependence or independence constraints. Maps a column index to the list of columns on which it is (in)dependent.",
"type": "object",
"additionalProperties": false,
"patternProperties": {
"[0-9]+": {
"type": "array",
"items": {"type": "integer"}
}
}
}
}
}

0 comments on commit 86b758d

Please sign in to comment.