In [None]:
import pyemma
pyemma.__version__
import random as rndm

In [None]:
import matplotlib as mpltlib
import matplotlib.pylab as plt
import numpy as np
#import nglview as nv
%pylab inline
import mdtraj
import numpy as np

In [None]:
import pyemma.coordinates as coor
import pyemma.msm as msm
import pyemma.plots as mplt

In [None]:
def average_by_state(dtraj, x, nstates):
    assert(len(dtraj) == len(x))
    N = len(dtraj)
    res = np.zeros((nstates))
    for i in range(nstates):
        I = np.argwhere(dtraj == i)[:,0]
        res[i] = np.mean(x[I])
    return res

def avg_by_set(x, sets):
    # compute mean positions of sets. This is important because of some technical points the set order 
    # in the coarse-grained TPT object can be different from the input order.
    avg = np.zeros(len(sets))
    for i in range(len(sets)):
        I = list(sets[i])
        avg[i] = np.mean(x[I])
    return avg

def save_figure(name):
    # change these if wanted
    do_save = True
    fig_dir = ''
    if do_save:
        savefig(fig_dir + name, bbox_inches='tight')

def plot_sampled_function(xall, yall, zall, ax=None, nbins=100, nlevels=20, cmap=cm.bwr, cbar=True, cbar_label=None):
    # histogram data
    xmin = np.min(xall)
    xmax = np.max(xall)
    dx = (xmax - xmin) / float(nbins)
    ymin = np.min(yall)
    ymax = np.max(yall)
    dy = (ymax - ymin) / float(nbins)
    # bin data
    eps = x
    xbins = np.linspace(xmin - 0.5*dx, xmax + 0.5*dx, num=nbins)
    ybins = np.linspace(ymin - 0.5*dy, ymax + 0.5*dy, num=nbins)
    xI = np.digitize(xall, xbins)
    yI = np.digitize(yall, ybins)
    # result
    z = np.zeros((nbins, nbins))
    N = np.zeros((nbins, nbins))
    # average over bins
    for t in range(len(xall)):
        z[xI[t], yI[t]] += zall[t]
        N[xI[t], yI[t]] += 1.0
    z /= N
    # do a contour plot
    extent = [xmin, xmax, ymin, ymax]
    if ax is None:
        ax = gca()
    ax.contourf(z.T, 100, extent=extent, cmap=cmap)
    if cbar:
        cbar = plt.colorbar()
        if cbar_label is not None:
            cbar.ax.set_ylabel(cbar_label)

    return ax

def plot_sampled_density(xall, yall, zall, ax=None, nbins=100, cmap=cm.Blues, cbar=True, cbar_label=None):
    return plot_sampled_function(xall, yall, zall, ax=ax, nbins=nbins, cmap=cmap, cbar=cbar, cbar_label=cbar_label)

def griddata(x, y, z, binsize=0.01, retbin=True, retloc=True):
    
    # get extrema values.
    xmin, xmax = x.min(), x.max()
    ymin, ymax = y.min(), y.max()

    # make coordinate arrays.
    xi      = np.arange(xmin, xmax+binsize, binsize)
    yi      = np.arange(ymin, ymax+binsize, binsize)
    xi, yi = np.meshgrid(xi,yi)

    # make the grid.
    grid           = np.zeros(xi.shape, dtype=x.dtype)
    nrow, ncol = grid.shape
    if retbin: bins = np.copy(grid)

    # create list in same shape as grid to store indices
    if retloc:
        wherebin = np.copy(grid)
        wherebin = wherebin.tolist()

    # fill in the grid.
    for row in range(nrow):
        for col in range(ncol):
            xc = xi[row, col]    # x coordinate.
            yc = yi[row, col]    # y coordinate.

            # find the position that xc and yc correspond to.
            posx = np.abs(x - xc)
            posy = np.abs(y - yc)
            ibin = np.logical_and(posx < binsize/2., posy < binsize/2.)
            ind  = np.where(ibin == True)[0]

            # fill the bin.
            bin = z[ibin]
            if retloc: wherebin[row][col] = ind
            if retbin: bins[row, col] = bin.size
            if bin.size != 0:
                binval         = np.median(bin)
                grid[row, col] = binval
            else:
                grid[row, col] = np.nan   # fill empty bins with nans.

    # return the grid
    if retbin:
        if retloc:
            return grid, bins, wherebin
        else:
            return grid, bins
    else:
        if retloc:
            return grid, wherebin
        else:
            return grid

In [None]:
indir = '.'
topfile =  'hdim-oct.inpcrd.pdb'
traj_list = []
for filename in os.listdir(indir):
    if filename.endswith('.nc'):
        traj_list.append(os.path.join(indir,filename))

        
print(topfile)
print(traj_list)

In [None]:
atom_mol = 145
num_mol = 2

topology = mdtraj.load(topfile).topology

In [None]:
## Intermolecular Ca distance as the Feature

cainter_feat = coor.featurizer(topfile)

cainter_feat.add_custom_func(dist_intermol, dim_permute_sortinter)

# print(cainter_feat.describe()[:5])

cainter_inp = coor.load(traj_list, cainter_feat)
#print('traj dim =', cainter_inp.dimension())

labels = ['Inter\nCa dist']

# score_cainter = coor.vamp(data=cainter_inp,dim=4,lag=100).score(score_method='VAMP2')
# print('VAMP2-score Intermolecy:',score_cainter)

In [None]:
## Intramolecular Ca distance as the Feature

caintra_feat = coor.featurizer(topfile)

caintra_feat.add_custom_func(dist_intramol, dim_permute_sortintra)

caintra_inp = coor.load(traj_list, features=caintra_feat)
#print('traj dim =', caintra_inp.dimension())

labels = ['Intra\nCa dist']

In [None]:
## Intermolecular BB distance as the Feature

bbinter_feat = coor.featurizer(topfile)

bbinter_feat.add_custom_func(dist_intermol, dim_permute_sortinter)

bbinter_inp = coor.load(traj_list, features=bbinter_feat)
#print('traj dim =', cainter_inp.dimension())

labels = ['Inter\nBB dist']

In [None]:
## Intramolecular BB distance as the Feature

bbintra_feat = coor.featurizer(topfile)

bbintra_feat.add_custom_func(dist_intramol, dim_permute_sortintra)

bbintra_inp = coor.load(traj_list, features=bbintra_feat)
#print('traj dim =', caintra_inp.dimension())

labels = ['Intra\nBB dist']

In [None]:
## Backbone Torsion as the Feature

torsionfeat = coor.featurizer(topfile)

torsionfeat.add_backbone_torsions(deg=True)

torsion_inp = coor.load(traj_list, features=torsionfeat)

labels = ['BB\nTorsion']

In [None]:
## BAckbone Atoms as the Feature

bb_atoms_feat = coor.featurizer(topfile)

bb_atoms_feat.add_selection(bb_atoms_feat.select_Backbone())

# print(bb_atoms_feat.describe()[:5])

bb_inp = coor.load(traj_list, features=bb_atoms_feat)
#print('traj dim =', bb_inp.dimension())

labels = ['Backbone\nAtoms']

In [None]:
def score_cv(data, dim, lag, number_of_splits=10, validation_fraction=0.2):
    """Compute a cross-validated VAMP2 score.

    We randomly split the list of independent trajectories into
    a training and a validation set, compute the VAMP2 score,
    and repeat this process several times.

    """
    # we temporarily suppress very short-lived progress bars
    #with pyemma.util.contexts.settings(show_progress_bars=False):
    nval = int(len(data) * validation_fraction)
    scores = np.zeros(number_of_splits)
    for n in range(number_of_splits):
        ival = np.random.choice(len(data), size=nval, replace=False)
        vamp = pyemma.coordinates.vamp([d for i, d in enumerate(data) if i not in ival], lag=lag, dim=dim)
        scores[n] = vamp.score([d for i, d in enumerate(data) if i in ival])
    return scores

In [None]:
dim =2
lag =100

torsion_scores = score_cv(torsion_inp, lag=lag, dim=dim)
bb_tr = [torsion_scores.mean()]
# errors1 = [torsion_scores.std()]

sdtr_scores = score_cv(sdtr_inp, lag=lag, dim=dim)
sc_tr = [sdtr_scores.mean()]
# errors2 = [sdtr_scores.std()]

ca_scores = score_cv(ca_inp, lag=lag, dim=dim)
ca_pos = [ca_scores.mean()]
# errors3 = [ca_scores.std()]   

# allatom_scores = score_cv(all_inp, lag=lag, dim=dim)
# all_pos = [allatom_scores.mean()]
# # errors += [allatom_scores.std()]

backbone_scores = score_cv(bb_inp, lag=lag, dim=dim)
bb_pos = [backbone_scores.mean()]
# errors += [backbone_scores.std()]

# cainter_scores = score_cv(cainter_inp, lag=lag, dim=dim)
# dist_inter_ca = [cainter_scores.mean()]
# # errors += [min_scores.std()]

# cainterinv_scores = score_cv(cainterinv_inp, lag=lag, dim=dim)
# distinv_inter_ca = [cainterinv_scores.mean()]
# # errors += [min_scores.std()]

# caintra_scores = score_cv(caintra_inp, lag=lag, dim=dim)
# dist_intra = [caintra_scores.mean()]
# # errors += [min_scores.std()]

# caintrainv_scores = score_cv(caintrainv_inp, lag=lag, dim=dim)
# distinv_intra = [caintrainv_scores.mean()]
# # errors += [min_scores.std()]

bbinter_scores = score_cv(bbinter_inp, lag=lag, dim=dim)
dist_inter_bb = [bbinter_scores.mean()]
# errors += [min_scores.std()]

bbintra_scores = score_cv(bbintra_inp, lag=lag, dim=dim)
dist_intra_bb = [bbintra_scores.mean()]
# errors += [min_scores.std()]

In [None]:
fig, ax = plt.subplots(figsize=(10, 7))
score_mapping = dict(BB_position=bb_pos,
                     Ca_position=ca_pos,
                     BB_torsion=bb_tr,
                     SC_torsion=sc_tr,
                     Inter_BB_distance=dist_inter_bb,
                     Intra_BB_distance=dist_intra_bb)
lbl = []
for i, (key, value) in enumerate(sorted(score_mapping.items(), key=lambda x: x[1])):
    ax.bar(i, height=value)
    lbl.append(key)
ax.set_xticks(np.arange(0, len(score_mapping), 1))
ax.set_xticklabels(lbl)
ax.set_ylabel("VAMP-2 Score")
fig.tight_layout()