### PyEmma Featurizer Support

In [1]:
import openpathsampling as paths
import numpy as np

from __future__ import print_function

In [2]:
#! lazy
import pyemma.coordinates as coor

You are still using msmtools from the deprecated Omnia channel. Please switch to conda-forge to catch future updates.
In order to do so please set conda-forge channel to highest priority by:

    conda config --add channels conda-forge

and update this package by:

    conda update msmtools




In [3]:
#! lazy
ref_storage = paths.Storage('engine_store_test.nc', mode='r')

18-06-18 22:02:18 openpathsampling.netcdfplus.netcdfplus INFO     Open existing netCDF file 'engine_store_test.nc' for reading - reading from existing file
18-06-18 22:02:18 openpathsampling.netcdfplus.netcdfplus INFO     Loading different netcdf version. Installed version is 0.9.4 and loaded version is 0.9.2
18-06-18 22:02:18 openpathsampling.netcdfplus.netcdfplus INFO     Loaded version is older. Should be no problem other then missing features and information
18-06-18 22:02:18 openpathsampling.storage.storage INFO     Loading different OPS storage version. Installed version is 0.9.4 and loaded version is 0.9.2.dev-e35c4fe
18-06-18 22:02:18 openpathsampling.storage.storage INFO     Loaded version is older. Should be no problem other then missing features and information
18-06-18 22:02:18 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['replica', 'trajectory', 'ensemble', 'bias', 'parent', 'mover'] and instatiated with ['replica', 'trajectory



In [4]:
#! lazy
storage = paths.Storage('delete.nc', 'w')
storage.trajectories.save(ref_storage.trajectories[0])

18-06-18 22:02:18 openpathsampling.netcdfplus.netcdfplus INFO     Create new netCDF file 'delete.nc' for writing - deleting existing file
18-06-18 22:02:18 openpathsampling.netcdfplus.netcdfplus INFO     Setup netCDF file and create variables
18-06-18 22:02:18 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['replica', 'trajectory', 'ensemble', 'bias', 'parent', 'mover'] and instatiated with ['replica', 'trajectory', 'ensemble', 'bias', 'parent', 'mover']
18-06-18 22:02:18 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['samples', 'movepath'] and instatiated with ['samples', 'movepath']
18-06-18 22:02:18 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['simulation', 'mccycle', 'previous', 'active', 'change'] and instatiated with ['simulation', 'mccycle', 'previous', 'active', 'change']
18-06-18 22:02:18 openpathsampling.netcdfplus.netcdfplus INFO     Initializi

41651733812648293111110397822037393428

Import a PyEmma Coordinates Module

Using of pyemma featurizers or general other complex code requires a little trick to be storable. Since storing of code only works if we are not dependend on the context (scope) we need to wrap the construction of our featurizer in a function, that gets all it needs from the global scope as a parameter

In [5]:
def pyemma_generator(f):
    f.add_inverse_distances(f.pairs(f.select_Backbone()))

In [6]:
cv = paths.collectivevariable.PyEMMAFeaturizerCV(
    featurizer=pyemma_generator, 
    topology=ref_storage.snapshots[0].topology
).with_diskcache().named('pyemma')

Now use this featurizer generating function to build a collective variable out of it. All we need for that is a name as usual, the generating function, the list of parameters - here only the topology and at best a test snapshot, a template.

In [7]:
cv(ref_storage.trajectories[0]);

Let's save it to the storage

In [8]:
#! lazy
print(storage.save(cv))

(store.attributes[PseudoAttribute] : 1 object(s), 20, 169829974798395533320853678154322018600)


and apply the featurizer to a trajectory

In [9]:
cv(storage.trajectories[0]);

Sync to make sure the cache is written to the netCDF file.

In [10]:
cv(storage.snapshots.all());

In [11]:
py_cv = storage.cvs['pyemma']

In [12]:
store = storage.stores['cv%d' % storage.idx(py_cv)]
nc_var = store.variables['value']

In [13]:
assert(nc_var.shape[1] == 15)
print(nc_var.shape[1])

15


In [14]:
assert(nc_var.var_type == 'numpy.float32')
print(nc_var.var_type)

numpy.float32


In [15]:
#! ignore
print(storage.variables['attributes_json'][:])

[ '{"_cls":"PyEMMAFeaturizerCV","_dict":{"featurizer":{"_marshal":"4wEAAAAAAAAAAQAAAAMAAABDAAAAcxgAAAB8AGoAfABqAXwAagKDAIMBgwEBAGQAUwApAU4pA9oVYWRkX2ludmVyc2VfZGlzdGFuY2Vz2gVwYWlyc9oPc2VsZWN0X0JhY2tib25lKQHaAWapAHIFAAAA+h48aXB5dGhvbi1pbnB1dC01LTQ3NzRkOGVkZGQwMD7aEHB5ZW1tYV9nZW5lcmF0b3IBAAAAcwIAAAAAAQ==","_global_vars":[],"_module_vars":[]},"topology":{"_hex_uuid":"0x1f55d507081211e7aa35000000000002","_store":"topologies"},"kwargs":{}}}']


In [16]:
py_cv_idx = storage.idx(py_cv)
print(py_cv_idx)
py_emma_feat = storage.vars['attributes_json'][py_cv_idx]

0


In [17]:
erg = py_emma_feat(storage.snapshots);

In [18]:
#! lazy
print(erg[:,2:4])

[[ 2.68972969  2.06547379]
 [ 2.66780782  2.04628825]
 [ 2.61396813  2.00944018]
 [ 2.61284065  2.00203276]
 [ 2.68295574  2.04546618]
 [ 2.73086905  2.06900191]
 [ 2.79744911  2.09469581]
 [ 2.79462266  2.09850168]
 [ 2.74107432  2.08592987]
 [ 2.75102949  2.09090185]]


In [19]:
storage.close()
ref_storage.close()

In [20]:
#! lazy
storage = paths.Storage('delete.nc', 'r')

18-06-18 22:02:20 openpathsampling.netcdfplus.netcdfplus INFO     Open existing netCDF file 'delete.nc' for reading - reading from existing file
18-06-18 22:02:20 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['replica', 'trajectory', 'ensemble', 'bias', 'parent', 'mover'] and instatiated with ['replica', 'trajectory', 'ensemble', 'bias', 'parent', 'mover']
18-06-18 22:02:20 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['samples', 'movepath'] and instatiated with ['samples', 'movepath']
18-06-18 22:02:20 openpathsampling.netcdfplus.stores.variable INFO     Creates VariableStore with variables ['simulation', 'mccycle', 'previous', 'active', 'change'] and instatiated with ['simulation', 'mccycle', 'previous', 'active', 'change']


In [21]:
cv = storage.cvs[0]

Make sure that we get the same result

In [22]:
assert np.allclose(erg, cv(storage.snapshots))

In [23]:
storage.close()