### PyEmma Featurizer Support

In [1]:
import openpathsampling as paths
import numpy as np

In [2]:
#! lazy
import pyemma.coordinates as coor

In [3]:
#! lazy
ref_storage = paths.Storage('engine_store_test.nc', mode='r')

28-07-16 08:59:31 openpathsampling.netcdfplus.netcdfplus INFO     Open existing netCDF file 'engine_store_test.nc' for reading - reading from existing file


In [4]:
#! lazy
storage = paths.Storage('delete.nc', 'w')
storage.trajectories.save(ref_storage.trajectories[0])

28-07-16 08:59:31 openpathsampling.netcdfplus.netcdfplus INFO     Create new netCDF file 'delete.nc' for writing - deleting existing file
28-07-16 08:59:31 openpathsampling.netcdfplus.netcdfplus INFO     Setup netCDF file and create variables
28-07-16 08:59:31 openpathsampling.netcdfplus.netcdfplus INFO     Initializing store 'trajectories'
28-07-16 08:59:31 openpathsampling.netcdfplus.netcdfplus INFO     Initializing store 'topologies'
28-07-16 08:59:31 openpathsampling.netcdfplus.netcdfplus INFO     Initializing store 'cvs'
28-07-16 08:59:31 openpathsampling.netcdfplus.netcdfplus INFO     Initializing store 'snapshots'
28-07-16 08:59:31 openpathsampling.netcdfplus.netcdfplus INFO     Initializing store 'samples'
28-07-16 08:59:31 openpathsampling.netcdfplus.netcdfplus INFO     Initializing store 'samplesets'
28-07-16 08:59:31 openpathsampling.netcdfplus.netcdfplus INFO     Initializing store 'pathmovechanges'
28-07-16 08:59:31 openpathsampling.netcdfplus.netcdfplus INFO     Initializ

UUID('8b0aba33-4832-11e6-88ff-000000000014')

Import a PyEmma Coordinates Module

Using of pyemma featurizers or general other complex code requires a little trick to be storable. Since storing of code only works if we are not dependend on the context (scope) we need to wrap the construction of our featurizer in a function, that gets all it needs from the global scope as a parameter

In [5]:
def pyemma_generator(f):
    f.add_inverse_distances(f.pairs(f.select_Backbone()))

In [6]:
cv = paths.collectivevariable.CV_PyEMMA_Featurizer(
    'pyemma', 
    pyemma_generator, 
    topology=ref_storage.snapshots[0].topology
).with_diskcache()

Now use this featurizer generating function to build a collective variable out of it. All we need for that is a name as usual, the generating function, the list of parameters - here only the topology and at best a test snapshot, a template.

In [7]:
cv(ref_storage.trajectories[0]);

Let's save it to the storage

In [8]:
#! lazy
print storage.save(cv)

(store.cvs[CollectiveVariable], 2, UUID('d4f16b4c-5490-11e6-891f-0000000000a8'))


and apply the featurizer to a trajectory

In [9]:
cv(storage.trajectories[0]);

Sync to make sure the cache is written to the netCDF file.

In [10]:
cv(storage.snapshots.all());

In [11]:
py_cv = storage.cvs['pyemma']

In [12]:
store = storage.stores['cv%d' % storage.idx(py_cv)]
nc_var = store.variables['value']

In [13]:
assert(nc_var.shape[1] == 15)
print nc_var.shape[1]

15


In [14]:
assert(nc_var.var_type == 'numpy.float32')
print nc_var.var_type

numpy.float32


In [15]:
#! ignore
print storage.variables['cvs_json'][:]

[ u'{"_cls": "CV_PyEMMA_Featurizer", "_dict": {"topology": {"_obj": "topologies", "_uuid": "8b0aba33-4832-11e6-88ff-000000000002"}, "name": "pyemma", "featurizer": {"_marshal": "YwEAAAABAAAAAwAAAEMAAABzIAAAAHwAAGoAAHwAAGoBAHwAAGoCAIMAAIMBAIMBAAFkAABTKAEAAABOKAMAAAB0FQAAAGFkZF9pbnZlcnNlX2Rpc3RhbmNlc3QFAAAAcGFpcnN0DwAAAHNlbGVjdF9CYWNrYm9uZSgBAAAAdAEAAABmKAAAAAAoAAAAAHMeAAAAPGlweXRob24taW5wdXQtNS00Nzc0ZDhlZGRkMDA+dBAAAABweWVtbWFfZ2VuZXJhdG9yAQAAAHMCAAAAAAE=", "_module_vars": [], "_global_vars": []}, "kwargs": {}}}']


In [16]:
py_cv_idx = storage.idx(py_cv)
print py_cv_idx
py_emma_feat = storage.vars['cvs_json'][py_cv_idx]

0


In [17]:
erg = py_emma_feat(storage.snapshots);

In [18]:
#! lazy
print erg[0::5,2:4]

[[ 2.68972969  2.06547379]
 [ 2.61396813  2.00944018]
 [ 2.73086929  2.06900215]
 [ 2.79462218  2.09850192]]


In [19]:
storage.close()
ref_storage.close()

In [20]:
storage = paths.Storage('delete.nc', 'r')

28-07-16 08:59:32 openpathsampling.netcdfplus.netcdfplus INFO     Open existing netCDF file 'delete.nc' for reading - reading from existing file


In [21]:
cv = storage.cvs[0]

In [22]:
cv(storage.snapshots)

array([[ 7.49063683,  4.10780048,  2.68972969,  2.06547379,  1.62878013,
         6.90130949,  4.08113956,  2.72602296,  2.0472281 ,  6.57030106,
         4.110569  ,  2.63028264,  7.49063683,  4.10780001,  6.90130711],
       [ 7.49063683,  4.10780048,  2.68972969,  2.06547379,  1.62878013,
         6.90130949,  4.08113956,  2.72602296,  2.0472281 ,  6.57030106,
         4.110569  ,  2.63028264,  7.49063683,  4.10780001,  6.90130711],
       [ 7.55804491,  4.04861212,  2.66780806,  2.04628849,  1.60359669,
         6.81927395,  4.0386076 ,  2.70586848,  2.00875568,  6.44432783,
         4.0713129 ,  2.58127213,  7.47397041,  3.9945395 ,  6.79377174],
       [ 7.55804491,  4.04861212,  2.66780806,  2.04628849,  1.60359669,
         6.81927395,  4.0386076 ,  2.70586848,  2.00875568,  6.44432783,
         4.0713129 ,  2.58127213,  7.47397041,  3.9945395 ,  6.79377174],
       [ 7.39766407,  3.91530514,  2.61396813,  2.00944018,  1.57548916,
         6.78701973,  3.98001122,  2.67948079, 

Make sure that we get the same result

In [23]:
assert np.allclose(erg, cv(storage.snapshots))

In [24]:
storage.close()