In [12]:
import pickle
import numpy as np

from sklearn.mixture import GaussianMixture

In [2]:
def get_values_from_trace(model, trace, thin=1, burn=0):
    """
    :param model: pymc3 model
    :param trace: pymc3 trace object
    :param thin: int
    :param burn: int, number of steps to exclude
    :return: dict: varname --> ndarray
    """
    varnames = [var.name for var in model.vars]
    trace_values = {var: trace.get_values(var, thin=thin, burn=burn) for var in varnames}
    return trace_values


In [35]:
class GaussMix(object):
    def __init__(self, n_components, covariance_type="diag"):
        self._n_components = n_components
        self._vars = []
        self._gm = GaussianMixture(n_components=self._n_components, covariance_type=covariance_type)
    
    def fit(self, sample_dict):
        """
        :param sample_dict: dict, var --> 1d array
        """
        self._vars = list(sample_dict.keys())
        X_train = self._dict_to_array(sample_dict)
        self._gm.fit(X_train)
        return self
    
    def score_samples(self, sample_dict):
        X = self._dict_to_array(sample_dict)
        logp = self._gm.score_samples(X)
        return logp
    
    def sample(self, n_samples=1):
        X = self._gm.sample(n_samples=n_samples)
        X_dict = {}
        for i, v in enumerate(self._vars):
            X_dict[v] = X[:, i]
        return X_dict
    
    def get_vars(self):
        return self._vars
    
    def get_model(self):
        return self._gm
    
    def get_gm_fited_params(self):
        weights = self._gm.weights_
        means = self._gm.means_
        covariances = self._gm.covariances_
        
        results = {}
        for i, v in enumerate(self._vars):
            results[v] = {}
            results[v]["weights"] = weights
            results[v]["means"] = [means[j][i] for j in range(self.n_components)]
            results[v]["sigmas"] = [np.sqrt(covariances[j][i]) for j in range(self.n_components)]
        return results
    
    def get_n_components(self):
        return self._n_components
    
    def _dict_to_array(self, sample_dict):
        X = [sample_dict[v] for v in self._vars]
        X = np.stack(X, axis=1)
        return X

In [6]:
model = pickle.load(open("data/pm_model.pickle", "rb"), encoding="latin1")
trace = pickle.load(open("data/trace_obj.pickle", "rb"), encoding="latin1")

In [7]:
sample = get_values_from_trace(model, trace, thin=10, burn=1000)

In [8]:
sample.keys()

dict_keys(['P0_interval__', 'Ls_log__', 'rho_interval__', 'DeltaG1_interval__', 'DeltaDeltaG_interval__', 'DeltaH1_interval__', 'DeltaH2_interval__', 'DeltaH_0_interval__', 'log_sigma_interval__'])

In [9]:
vars_redun = ["DeltaDeltaG_interval__", "DeltaH2_interval__", "rho_interval__"]
sample_redun = {v: sample[v] for v in vars_redun}

In [36]:
gm = GaussMix(n_components=2)
gm.fit(sample_redun)

<__main__.GaussMix at 0x1c269e07f0>

In [37]:
gm.get_vars()

['DeltaDeltaG_interval__', 'DeltaH2_interval__', 'rho_interval__']

In [38]:
gm.get_gm_fited_params()

[{'weight': 0.6158551014743028,
  'mean': -2.0988126610978113,
  'sigma': array([0.0113226 , 0.0040884 , 0.05062079])},
 {'weight': 0.3841448985256964,
  'mean': -2.0727776419495987,
  'sigma': array([0.01297718, 0.00521766, 0.06248262])}]

In [39]:
gm_model = gm.get_model()

In [43]:
gm_model.weights_

array([0.6158551, 0.3841449])

In [41]:
gm_model.means_

array([[-2.09881266, -0.036     , -1.68769259],
       [-2.07277764, -0.04358964, -1.8077247 ]])

In [42]:
gm_model.covariances_

array([[1.28201278e-04, 1.67150236e-05, 2.56246430e-03],
       [1.68407083e-04, 2.72239836e-05, 3.90407796e-03]])