In [None]:
from sklearn.preprocessing import StandardScaler
from visualization_fct import *

import itertools
from bokeh.io import output_notebook
output_notebook()
%matplotlib notebook

from bokeh.plotting import show  # output_file, save
from sklearn.mixture import GaussianMixture

import matplotlib.pyplot as plt  # , mpld3

data = pd.read_csv("../asm_data/asm_data_for_ml.txt", sep='\t')
del data['MJD']
del data['error']
del data['errorA']
del data['errorB']
del data['errorC']
data['rateCA'] = data.rateC / data.rateA
data_thr = mask(data, 'orbit')  # rm too large values except for 'orbit'


np.random.seed(1)

X = np.c_[data_thr.orbit, data_thr.rate, data_thr.rateA, data_thr.rateB,
          data_thr.rateC, data_thr.rateCA]

scaler = StandardScaler()
X = scaler.fit_transform(X)

gmm = GaussianMixture(n_components=3, covariance_type='full', n_init=1)
gmm.fit(X)  # , weights=w) not implemented in sklearn yet
preds = gmm.predict(X)
probs = gmm.predict_proba(X)

data_thr['preds'] = pd.Series(preds).astype("category")

color_key = ["red", "blue", "yellow", "grey", "black", "purple", "pink",
             "brown", "green", "orange"]  # Spectral9
color_key = color_key[:len(set(preds))+1]

covs = gmm.covariances_
means = gmm.means_

# transform cov for non-standardizeed data:
covs = np.array([np.dot(np.diag(np.sqrt(scaler.var_)),
                        np.dot(covs[j], np.diag(np.sqrt(scaler.var_))))
                 for j in range(covs.shape[0])])
means = np.array([scaler.inverse_transform(means[j].reshape(1, -1)).T
                  for j in range(means.shape[0])])

In [None]:
# single plot rateCA vs rate with predicted classes and ellipses:
x = 5
y = 1
covs_xy = [covs[j][[x, y]][:, [x, y]] for j in range(len(covs))]
means_xy = [means[j][[x, y]] for j in range(len(covs))]

p = interactive_transition_probability(data_thr, covs=covs_xy, means=means_xy, percent10=False)

In [None]:
show(p)

In [None]:
x_name='rateCA'
y_name='rate'
plot_width=900
plot_height=300
title=None
pred_name = 'preds'
TOOLS = "wheel_zoom,box_zoom,reset,box_select,pan"  # ,lasso_select,save"

xmin_p = np.percentile(data[x_name], 0.1)
xmax_p = np.percentile(data[x_name], 99)
ymin_p = np.percentile(data[y_name], 0.1)
ymax_p = np.percentile(data[y_name], 99)

fig = Figure(x_range=(xmin_p, xmax_p),
             y_range=(ymin_p, ymax_p),
             plot_width=plot_width,
             plot_height=plot_height,
             title=title,
             tools=TOOLS)
data_extended = data_thr.copy(deep=True)
data_extended['x_next'] = np.r_[data_thr[x_name][1:],
                                data_thr[x_name][0]]
data_extended['y_next'] = np.r_[data_thr[y_name][1:],
                                data_thr[y_name][0]]


In [None]:
source = ColumnDataSource(data_extended)
colors = [color_key[x] for x in data_extended[pred_name]]
colors_next = [color_key[x] for x in data_extended[pred_name][1:]]
colors_next += color_key[(data_extended[pred_name][0])]
n_samples = data_extended.shape[0]


In [None]:
data_extended
fig.circle(x_name, y_name, source=source, color=colors)
show(fig)

In [None]:
source.data

In [None]:
fig2 = Figure(x_range=(xmin_p, xmax_p),
              y_range=(ymin_p, ymax_p),
              plot_width=plot_width,
              plot_height=plot_height,
              title=title,
              tools=TOOLS)

In [None]:
fig2.circle('x_next', 'y_next', source=source, color=colors_next)
show(fig2)