In [1]:
import pathos.multiprocessing as multiprocessing
import dill as pickle
from mpl_toolkits.mplot3d import Axes3D

%matplotlib notebook
import matplotlib.pyplot as plt

from py_analyze_tools.sklearn import BasisFunctionConfiguration, ReaDDyElasticNetEstimator

from scipy.integrate import odeint
import py_analyze_tools.tools as pat
import numpy as np
from pprint import pprint
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit

# simple sklearn estimator integration 

In [2]:
traj = pat.Trajectory("../generate/simple_trajectory_.h5")
traj.counts = traj.counts[:45*500]
traj.update()

max counts = 6000.0, min nonzero counts = 1.0


In [3]:
bfc = BasisFunctionConfiguration(traj)
bfc.add_fusion(0, 1, 2)
bfc.add_fission(2, 0, 1)
bfc.add_conversion(0, 1)
bfc.add_conversion(0, 3)
bfc.add_conversion(3, 0)

In [4]:
scale = 1. / (2. * traj.n_species * traj.n_time_steps)
estimator = ReaDDyElasticNetEstimator(traj, bfc, scale, alpha=200, l1_ratio=0.5)

In [5]:
dcounts_dt = np.gradient(traj.counts, axis=0) / traj.time_step
estimator.fit(None, None)

ReaDDyElasticNetEstimator(alpha=200,
             basis_function_configuration=<py_analyze_tools.sklearn.BasisFunctionConfiguration object at 0x7f3762823a90>,
             l1_ratio=0.5, scale=5.555555555555556e-06,
             trajs=[<py_analyze_tools.tools.Trajectory object at 0x7f3797ea2128>])

In [6]:
def fun(data, _):
    theta = np.array([f(data) for f in bfc.functions])
    return np.matmul(estimator.coefficients_, theta)
f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2)
f.suptitle("least squares fit for full trajectory (not well-mixed in the last time steps)")
num_solution = odeint(fun, traj.counts[0], np.arange(0, traj.n_time_steps*.01, .01))
axes = [ax1, ax2, ax3, ax4]
labels=["A", "B", "C", "D"]
for i in range(traj.n_species):
    axes[i].plot(np.arange(0, traj.n_time_steps*.01, .01), traj.counts[:,i])
    axes[i].plot(np.arange(0, traj.n_time_steps*.01, .01), num_solution[:, i])
    axes[i].set_title("Concentration of %s particles over time" % labels[i])
plt.show()

<IPython.core.display.Javascript object>

# grid search

In [3]:
traj = pat.Trajectory("../generate/simple_trajectory_.h5")
traj.counts = traj.counts[:45*500]
traj.update()
bfc = BasisFunctionConfiguration(traj)
bfc.add_fusion(0, 1, 2)
bfc.add_fission(2, 0, 1)
bfc.add_conversion(0, 1)
bfc.add_conversion(0, 3)
bfc.add_conversion(3, 0)
estimator = ReaDDyElasticNetEstimator(traj, bfc, 1. / (2. * traj.n_species * traj.n_time_steps))

max counts = 6000.0, min nonzero counts = 1.0


In [21]:
parameter_grid = []
for alpha in np.arange(0, 300, 50):
    for l1_ratio in np.arange(0, 1, .2):
            parameter_grid.append({
                'alpha': [alpha], 
                'l1_ratio': [l1_ratio], 
                'init_xi': [np.random.random(bfc.n_basis_functions) for _ in range(5)]})
# parameter_grid = {'alpha': np.arange(0, 300, 50), 'l1_ratio': np.arange(0, 1, .2)}
cv = TimeSeriesSplit(3)
gs = GridSearchCV(estimator, parameter_grid, n_jobs=8, cv=cv)

In [22]:
gs.fit(range(0, traj.n_time_steps), y=traj.dcounts_dt)

GridSearchCV(cv=TimeSeriesSplit(n_splits=3), error_score='raise',
       estimator=ReaDDyElasticNetEstimator(alpha=1.0,
             basis_function_configuration=<py_analyze_tools.sklearn.BasisFunctionConfiguration object at 0x7fb49b8a6240>,
             init_xi=array([ 0.5,  0.5,  0.5,  0.5,  0.5]), l1_ratio=1.0,
             scale=5.555555555555556e-06,
             trajs=[<py_analyze_tools.tools.Trajectory object at 0x7fb49b7e6470>]),
       fit_params={}, iid=True, n_jobs=8,
       param_grid=[{'alpha': [0], 'l1_ratio': [0.0], 'init_xi': [array([ 0.07017,  0.50571,  0.28999,  0.90417,  0.60532]), array([ 0.99447,  0.0092 ,  0.79705,  0.48453,  0.62191]), array([ 0.65046,  0.0733 ,  0.33178,  0.14676,  0.33236]), array([ 0.53697,  0.9073 ,  0.89192,  0.44083,  0.99908]), array(....85547,  0.88715,  0.00873,  0.49422]), array([ 0.32151,  0.64288,  0.23186,  0.70475,  0.26287])]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [23]:
best_est = gs.best_estimator_

In [24]:
best_est.alpha

50

In [25]:
best_est.l1_ratio

0.0

In [26]:
def fun(data, _):
    theta = np.array([f(data) for f in bfc.functions])
    return np.matmul(best_est.coefficients_, theta)
f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2)
f.suptitle("least squares fit for full trajectory (not well-mixed in the last time steps)")
num_solution = odeint(fun, traj.counts[0], np.arange(0, traj.n_time_steps*.01, .01))
axes = [ax1, ax2, ax3, ax4]
labels=["A", "B", "C", "D"]
for i in range(traj.n_species):
    axes[i].plot(np.arange(0, traj.n_time_steps*.01, .01), traj.counts[:,i])
    axes[i].plot(np.arange(0, traj.n_time_steps*.01, .01), num_solution[:, i])
    axes[i].set_title("Concentration of %s pfor _ in range(5):rticles over time" % labels[i])
plt.show()

<IPython.core.display.Javascript object>

In [27]:
best_est.coefficients_

array([  1.92812974e-06,   7.83267660e-05,   0.00000000e+00,
         1.02698180e-02,   4.41846555e-05])

In [28]:
import functools
import operator
def rate_info(xi, diffusion_coefficient=.2, microscopic_rate=.05, reaction_radius=.7):
        tmp = np.sqrt(microscopic_rate / diffusion_coefficient) * reaction_radius
        rate_chapman = 4. * np.pi * diffusion_coefficient * reaction_radius * (1. - np.tanh(tmp) / tmp)
        rate_per_volume = xi * functools.reduce(operator.mul, [15,15,15], 1)

        print("erban chapman rate (per volume): {}".format(rate_chapman))
        print("lasso fitted rate (per counts): {}".format(xi))
        print("lasso fitted rate (per volume): {}".format(rate_per_volume))

        return rate_chapman, xi, rate_per_volume

In [29]:
rate_info(best_est.coefficients_)

erban chapman rate (per volume): 0.06848398371542153
lasso fitted rate (per counts): [  1.92812974e-06   7.83267660e-05   0.00000000e+00   1.02698180e-02
   4.41846555e-05]
lasso fitted rate (per volume): [  6.50743787e-03   2.64352835e-01   0.00000000e+00   3.46606358e+01
   1.49123212e-01]


(0.068483983715421526,
 array([  1.92812974e-06,   7.83267660e-05,   0.00000000e+00,
          1.02698180e-02,   4.41846555e-05]),
 array([  6.50743787e-03,   2.64352835e-01,   0.00000000e+00,
          3.46606358e+01,   1.49123212e-01]))

In [17]:
gs.best_params_

{'alpha': 50,
 'init_xi': array([ 0.50307034,  0.41345567,  0.19904549,  0.15864206,  0.08438181]),
 'l1_ratio': 0.80000000000000004}

In [36]:
%matplotlib notebook
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
x,y,z = [],[],[]
res = {}
for idx, param_set in enumerate(gs.cv_results_['params']):
    key = (param_set['alpha'], param_set['l1_ratio'])
    if key not in res.keys():
        res[key] = []
    res[key].append(gs.cv_results_['mean_test_score'][idx] - np.min(gs.cv_results_['mean_test_score']))
for key in res.keys():
    x.append(key[0])
    y.append(key[1])
    z.append(np.min(res[key]))
x = np.array(x)
y = np.array(y)
z = np.array(z)
ax.set_xlabel("alpha")
ax.set_ylabel("l1_ratio")
ax.set_zlabel("relative mean test score")
ax.bar3d(x,y,np.zeros(len(z)),10,.1,z)
plt.show()

<IPython.core.display.Javascript object>