In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import mb_modelbase as mb
import pandas as pd
import numpy as np

In [3]:
from mb_modelbase.models_core.gspnmodel import GSPNModel
from mb_modelbase.models_core.empirical_model import EmpiricalModel

## learn models from data

In [4]:
iris_data = pd.read_csv("../doc/data/iris.csv")

In [5]:
iris_spec = {'gspn_iris': lambda: ({'class': GSPNModel, 
                                    'data': iris_data, 
                                    'fitopts': {'empirical_model_name': 'emp_iris'}}),
             'emp_iris': lambda: ({'class': EmpiricalModel, 
                                   'data': iris_data, 
                                   'fitopts': {'empirical_model_name': 'emp_iris'}})
            }

In [6]:
models = mb.fit_models(iris_spec)

19:23:10.068 INFO :: Fitted 2 models in total: {'gspn_iris', 'emp_iris'}


[([0, 1, 2, 3, 4],      sepal_length  sepal_width  petal_length  petal_width  species
0             5.1          3.5           1.4          0.2        0
1             4.9          3.0           1.4          0.2        0
2             4.7          3.2           1.3          0.2        0
3             4.6          3.1           1.5          0.2        0
4             5.0          3.6           1.4          0.2        0
5             5.4          3.9           1.7          0.4        0
6             4.6          3.4           1.4          0.3        0
7             5.0          3.4           1.5          0.2        0
8             4.4          2.9           1.4          0.2        0
9             4.9          3.1           1.5          0.1        0
10            5.4          3.7           1.5          0.2        0
11            4.8          3.4           1.6          0.2        0
12            4.8          3.0           1.4          0.1        0
13            4.3          3.0           1.

In [7]:
# save models if you want
#mb.save_models(models, '../../models_ppl')

## run density query on marginal model

In [8]:
# get gspn model 
gspn_iris = models['gspn_iris']['model']
emp_iris = models['emp_iris']['model']

In [9]:
# get fields (dimensions)
sepal_length, sepal_width, petal_length, petal_width, species = gspn_iris.byname(['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'])
#sepal_length = gspn_iris.byname('sepal_length')

Below are a couple of queries that should work. If you try the same query on the empirical model you see that you get some results.

In [10]:
# query marginal density along sepal_length
gspn_iris.predict(predict=['sepal_length', mb.Probability(sepal_length)], splitby=mb.Split(sepal_length))
#emp_iris.predict(predict=['sepal_length', mb.Probability(sepal_length)], splitby=mb.Split(sepal_length))

Unnamed: 0,sepal_length,@probability(['sepal_length'])
0,4.372,0.0
1,4.516,0.0
2,4.66,0.0
3,4.804,0.0
4,4.948,0.0
5,5.092,0.0
6,5.236,0.0
7,5.38,0.0
8,5.524,0.0
9,5.668,0.0


In [11]:
# query bivariate probability over species and petal_width
gspn_iris.predict(
    predict=['sepal_length', 'species', mb.Probability([sepal_length, species])],
    splitby=[mb.Split(sepal_length), mb.Split(species)]
)
#emp_iris.predict(
#    predict=['sepal_length', 'species', mb.Probability([sepal_length, species])],
#    splitby=[mb.Split(sepal_length), mb.Split(species)]
#)

NameError: name 'colname' is not defined

In [12]:
# predicts value of sepal_width and marginalizes everything else
gspn_iris.predict(predict=[mb.Aggregation(petal_width)])
#emp_iris.predict(predict=[mb.Aggregation(petal_width)])

ValueError: Your model does not provide the requested aggregation: 'maximum'

In [13]:
# samples petal_length and petal_width
gspn_iris.copy().marginalize(keep=['petal_length', 'petal_width']).sample(50)
#emp_iris.copy().marginalize(keep=['petal_length', 'petal_width']).sample(50)

NotImplementedError: Will do this later