In [7]:
import os
import numpy as np
import torch
import torch.autograd.profiler as profiler
from torch_batch_svd import svd
import MDAnalysis as md
from numba import jit
from shapeGMM import _traj_tools as traj_tools
from torch_shapeGMM import ShapeGMMTorch
from shapeGMM import gmm_shapes

In [16]:
delta = 10
# read trajectory
data_path = '../../DESRES-Trajectory_pnas2012-2f4k-360K-protein/pnas2012-2f4k-360K-protein/'
selection = "name CA and not resid 42 76"
#selection = "bynum 5:204"
#selection = "all"
# LOAD DATA
prmtopFileName =  data_path + 'pnas2012-2f4k-360K-protein.pdb'
trajFiles = [data_path + files for files in sorted(os.listdir(data_path)) if files.endswith('.dcd')]
coord = md.Universe(prmtopFileName,trajFiles)
sel = coord.select_atoms(selection)
print("Number of atoms in trajectory:", coord.atoms.n_atoms)
print("Number of frames in trajectory:",coord.trajectory.n_frames)
print("Number of atoms being analyzed:",sel.n_atoms)
print("Number of frames being analyzed:",coord.trajectory.n_frames//delta+1)
traj = np.empty((coord.trajectory.n_frames//delta+1,sel.n_atoms,3),dtype=float)
count = 0
for ts in coord.trajectory[::delta]:
    traj[count,:,:] = sel.positions#-sel.center_of_geometry()
    count += 1

Number of atoms in trajectory: 577
Number of frames in trajectory: 1526041
Number of atoms being analyzed: 33
Number of frames being analyzed: 152605


In [3]:
sgmm = ShapeGMMTorch(n_clusters=2,verbose=True,init_cluster_method="chunk",dtype=torch.float64)
sgmm.fit(traj)

Number of frames being analyzed: 15261
Number of particles being analyzed: 33
Number of dimensions (must be 3): 3
Initializing clustering using method: chunk
Weights from initial clusters in fit: [0.50003276 0.49996724]
1 [0.658 0.342] -121.787
2 [0.631 0.369] -73.975
3 [0.603 0.397] -69.322
4 [0.595 0.405] -68.808
5 [0.594 0.406] -68.781
6 [0.593 0.407] -68.779
7 [0.593 0.407] -68.779
Total elapsed time: 3035.619
Time to send data: 542.514 17.872
Expectation time: 944.214 31.105
Gamma time: 1.315 0.043
Maximization time: 1392.511 45.872


In [9]:
def torch_fit():
    sgmm = ShapeGMMTorch(n_clusters=2,verbose=False,init_cluster_method="chunk",dtype=torch.float64)
    sgmm.fit(traj)

In [10]:
def cpu_fit():
    sgmm_old = gmm_shapes.ShapeGMM(n_clusters=2,verbose=False,init_cluster_method="uniform")
    fit_traj = sgmm_old.fit_uniform(traj)

In [17]:
# other svd
%timeit torch_fit()

Total elapsed time: 26587.172
Time to send data: 3384.757 12.731
Expectation time: 8502.767 31.981
Gamma time: 1.93 0.007
Maximization time: 13192.504 49.62
Total elapsed time: 26533.645
Time to send data: 3357.66 12.654
Expectation time: 8503.037 32.046
Gamma time: 1.926 0.007
Maximization time: 13192.475 49.72
Total elapsed time: 26688.717
Time to send data: 3366.956 12.616
Expectation time: 8557.621 32.065
Gamma time: 1.942 0.007
Maximization time: 13272.986 49.733
Total elapsed time: 26723.457
Time to send data: 3382.693 12.658
Expectation time: 8566.479 32.056
Gamma time: 1.936 0.007
Maximization time: 13290.84 49.735
Total elapsed time: 26716.871
Time to send data: 3382.978 12.662
Expectation time: 8566.23 32.063
Gamma time: 1.931 0.007
Maximization time: 13291.849 49.751
Total elapsed time: 26726.959
Time to send data: 3383.325 12.659
Expectation time: 8566.071 32.05
Gamma time: 1.932 0.007
Maximization time: 13291.621 49.731
Total elapsed time: 26730.078
Time to send data: 3382

In [None]:
%timeit cpu_fit()

In [15]:
# torch.linalg.svd
%timeit torch_fit()

Total elapsed time: 2878.192
Time to send data: 395.105 13.728
Expectation time: 937.551 32.574
Gamma time: 1.272 0.044
Maximization time: 1381.551 48.001
Total elapsed time: 2847.587
Time to send data: 372.465 13.08
Expectation time: 937.379 32.918
Gamma time: 1.261 0.044
Maximization time: 1381.32 48.508
Total elapsed time: 2874.535
Time to send data: 373.529 12.994
Expectation time: 942.303 32.781
Gamma time: 1.271 0.044
Maximization time: 1402.756 48.799
Total elapsed time: 2866.681
Time to send data: 375.319 13.092
Expectation time: 944.507 32.948
Gamma time: 1.269 0.044
Maximization time: 1391.733 48.549
Total elapsed time: 2865.1
Time to send data: 375.182 13.095
Expectation time: 944.263 32.957
Gamma time: 1.274 0.044
Maximization time: 1391.479 48.566
Total elapsed time: 2866.259
Time to send data: 375.157 13.089
Expectation time: 944.457 32.951
Gamma time: 1.258 0.044
Maximization time: 1391.236 48.538
Total elapsed time: 2866.052
Time to send data: 375.14 13.089
Expectation 

In [14]:
%timeit cpu_fit()

11 s ± 10.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
import torch_align
traj_tensor = torch.tensor(traj,dtype=torch.float64,device="cuda:0")
torch_align.torch_remove_center_of_geometry(traj_tensor)
traj_data = traj_tensor.cpu().numpy().astype(np.float64)

In [10]:
from shapeGMM import _gmm_shapes_uniform_library as uniform_lib
n_frames = traj.shape[0]
n_atoms = traj.shape[1]
clusters = np.zeros(n_frames).astype(int)
cpu_log_lik = uniform_lib.uniform_sgmm_log_likelihood(traj_data,clusters)
print(cpu_log_lik)

-1878676.1226401418


In [7]:
from shapeGMM import _gmm_shapes_uniform_library as uniform_lib
import torch_uniform_lib
n_frames = traj.shape[0]
n_atoms = traj.shape[1]
clusters = np.ones(n_frames).astype(int)
clusters[:n_frames//2] -= 1
cpu_log_lik = uniform_lib.uniform_sgmm_log_likelihood(traj_data,clusters)
torch_log_lik = torch_uniform_lib.uniform_sgmm_log_likelihood(traj_tensor,clusters)
print("Torch:", torch_log_lik)
print("Cpu:", cpu_log_lik)

Torch: tensor(-1858678.8339, device='cuda:0', dtype=torch.float64)
Cpu: -1858677.5374847776


In [4]:
from shapeGMM import _gmm_shapes_uniform_library as uniform_lib

In [7]:
print(traj_tensor[1])

tensor([[  6.8477,   4.7297,   4.7206],
        [  3.0619,   3.5255,   4.9044],
        [  3.3194,   3.3377,   1.0083],
        [  6.1238,   0.6801,   0.6809],
        [  4.9286,  -2.9690,   0.5469],
        [  8.2410,  -4.1646,   2.3025],
        [  7.2443,  -2.0381,   5.2875],
        [  3.5406,  -3.4833,   5.4977],
        [  2.5575,  -0.6423,   7.7899],
        [ -0.0694,   1.2573,   5.6587],
        [ -2.4256,  -1.6055,   6.4778],
        [ -2.5465,  -0.0371,  10.1318],
        [ -5.2052,   2.2669,   8.3972],
        [ -7.4261,  -0.7279,   7.8922],
        [-10.7499,   0.4367,   9.4302],
        [-13.4596,   0.7661,   6.7651],
        [-10.7013,   0.1048,   4.2755],
        [-10.8844,  -1.8480,   0.9678],
        [ -7.1411,  -2.5058,   0.1368],
        [ -3.7194,  -1.7842,   1.7905],
        [ -2.2321,   1.7227,   0.9927],
        [  0.8930,  -0.1173,  -0.2661],
        [ -1.2593,  -1.9534,  -2.9012],
        [ -2.3790,  -0.5522,  -6.2151],
        [  0.9711,   1.3475,  -6.2008],


In [16]:
print(traj_tensor[1])

tensor([[ -8.6059,  -3.9422,  -1.3943],
        [ -5.5889,  -3.6376,   1.1788],
        [ -3.4849,  -2.6729,  -1.9714],
        [ -5.3743,   0.5394,  -3.0420],
        [ -4.0941,   3.8045,  -1.4727],
        [ -7.7171,   5.3351,  -1.5944],
        [ -8.8158,   2.5236,   0.7109],
        [ -5.8646,   3.0718,   3.3263],
        [ -6.6122,  -0.2919,   4.8815],
        [ -3.3654,  -2.2792,   4.1335],
        [ -1.7649,  -0.1962,   6.8753],
        [ -3.9265,  -2.4093,   9.3764],
        [ -0.9146,  -4.8654,   8.8452],
        [  1.3670,  -2.3788,  10.5089],
        [  3.0578,  -4.4987,  13.2323],
        [  6.7893,  -4.8990,  12.5468],
        [  6.0819,  -3.2051,   9.2491],
        [  8.3089,  -0.7538,   7.2949],
        [  5.8301,   0.8383,   4.7540],
        [  2.0535,   0.5739,   3.9594],
        [  1.0975,  -2.3176,   1.5362],
        [ -0.5539,   0.3545,  -0.6704],
        [  2.8533,   2.1441,  -1.0395],
        [  5.6177,   1.1804,  -3.4117],
        [  2.7889,   0.0784,  -5.7814],


In [6]:
import torch_uniform_lib
import torch_align
clusters = np.zeros(n_frames).astype(int)
traj_data = traj_tensor.cpu().numpy().astype(np.float64)
torch_log_lik = torch_uniform_lib.uniform_sgmm_log_likelihood(traj_tensor,clusters)
cpu_log_lik = uniform_lib.uniform_sgmm_log_likelihood(traj_data.astype(np.float64),clusters)
print("Torch:", torch_log_lik)
print("CPU:", cpu_log_lik)

Torch: tensor(-1878676.1219, device='cuda:0', dtype=torch.float64)
CPU: -1878676.1226401418


In [8]:
traj_tensor, torch_avg, torch_var = torch_align.torch_iterative_align_uniform(traj_tensor,verbose=True,dtype=torch.float64,thresh=1e-3)

-140.6965287253666
-123.27849525863395
-123.1051094426547
-123.10310916013485
-123.10308154852453


In [19]:
c_mats = torch.matmul(torch_avg.T,traj_tensor)
u, s, v = svd(c_mats)
print(s)

tensor([[3056.5600,  329.3635,  127.4152],
        [1286.9537,  771.7097,   43.2465],
        [1272.0988,  751.8033,   71.6444],
        ...,
        [1545.7034,  929.0017,  127.7904],
        [1432.5021,  943.6318,  139.7874],
        [1515.1137,  912.2363,  148.8823]], device='cuda:0',
       dtype=torch.float64)


In [20]:
prod_dets = torch.linalg.det(u)*torch.linalg.det(v)

In [21]:
%timeit u[:,:,-1] *= prod_dets.view(-1,1)

19.8 µs ± 70.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [18]:
def mult(u,prod_dets):
    u[:,0,-1] *= prod_dets
    u[:,1,-1] *= prod_dets
    u[:,2,-1] *= prod_dets
%timeit mult(u,prod_dets)

51.4 µs ± 137 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [16]:
n_frames = c_mats.shape[0]
m_mats = torch.empty((n_frames,4,4),dtype=torch.float64,device="cuda:0")
m_mats[:,0,0] = c_mats[:,0,0] + c_mats[:,1,1] + c_mats[:,2,2]
m_mats[:,1,0] = c_mats[:,1,2] - c_mats[:,2,1] 
m_mats[:,1,1] = c_mats[:,0,0] - c_mats[:,1,1] - c_mats[:,2,2]
m_mats[:,2,0] = c_mats[:,2,0] - c_mats[:,0,2]
m_mats[:,2,1] = c_mats[:,0,1] + c_mats[:,1,0]
m_mats[:,2,2] = -c_mats[:,0,0] + c_mats[:,1,1] - c_mats[:,2,2]
m_mats[:,3,0] = c_mats[:,0,1] - c_mats[:,1,0]
m_mats[:,3,1] = c_mats[:,0,2] - c_mats[:,2,0]
m_mats[:,3,2] = c_mats[:,1,2] + c_mats[:,2,1]
m_mats[:,3,3] = -c_mats[:,0,0] - c_mats[:,1,1] + c_mats[:,2,2]
e, v = torch.linalg.eigh(m_mats)
print(e)

tensor([[-3453.9513, -1400.3969,  1341.0095,  3513.3387],
        [-2104.1921,  -407.2036,   495.9789,  2015.4168],
        [-2097.0689,  -411.7150,   556.5262,  1952.2577],
        ...,
        [-2350.0777,  -436.7064,   184.2885,  2602.4955],
        [-2238.6947,  -411.5551,   134.3286,  2515.9212],
        [-2282.8416,  -465.1869,   171.7962,  2576.2323]], device='cuda:0',
       dtype=torch.float64)


In [18]:
%timeit e, v = torch.linalg.eigh(m_mats)

873 ms ± 1.94 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [19]:
%timeit u, s, v = svd(c_mats)

58.6 ms ± 15.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [20]:
%timeit u, s, v = torch.linalg.svd(c_mats)

84.2 ms ± 93.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [17]:
disp = traj_tensor - torch_avg
n_atoms = disp.shape[1]
# compute framewise variance
mvG = torch.matmul(disp.view(-1,1,n_atoms*3),disp.view(-1,n_atoms*3,1))
print(mvG.shape)

torch.Size([15261, 1, 1])


In [9]:
disp = traj_tensor - torch_avg
mvg = torch_uniform_lib.ln_spherical_gaussian_pdf(disp,torch_var).reshape(1,n_frames)
torch_log_likelihood = torch.logsumexp(mvg,0)
print(torch.sum(torch_log_likelihood))

tensor(-1878676.1275, device='cuda:0', dtype=torch.float64)


In [79]:
avg_numpy = avg.cpu().numpy()
mvG = uniform_lib.ln_spherical_gaussian_pdf(traj_data.reshape(n_frames,n_atoms*3), center.reshape(n_atoms*3), var).reshape(1,n_frames)
log_likelihood = 0
for i in range(n_frames):
    log_likelihood += uniform_lib.logsumexp(mvG[:,i])
print(mvG)

[[-602.29209913 -209.18311537 -232.3707049  ...  -82.05303052
   -82.51625549  -81.54216565]]


In [86]:
disp = traj_tensor - torch_avg
var_x = torch.matmul(disp[:,:,0].reshape(n_frames,1,n_atoms),disp[:,:,0].reshape(n_frames,n_atoms,1))
var_y = torch.matmul(disp[:,:,1].reshape(n_frames,1,n_atoms),disp[:,:,1].reshape(n_frames,n_atoms,1))
var_z = torch.matmul(disp[:,:,2].reshape(n_frames,1,n_atoms),disp[:,:,2].reshape(n_frames,n_atoms,1))
var_sum = var_x + var_y + var_z
lnnorm = -1.5*(n_atoms-1)*torch.log(torch_var)
multiplier = -0.5/torch_var
var_sum *= multiplier
var_sum += lnnorm
print(torch.sum(var_sum))

tensor(-9516086.9248, device='cuda:0', dtype=torch.float64)


In [74]:
# meta data from inputs
n_frames = traj_data.shape[0]
n_clusters = np.amax(clusters) + 1
print(n_clusters)
n_atoms = traj_data.shape[1]
n_dim = traj_data.shape[2]
n_features = n_atoms*n_dim
# declare arrays 
cluster_frame_ln_likelihoods = np.empty((n_clusters,n_frames),dtype=np.float64)
ln_weights = np.empty(n_clusters,dtype=np.float64)
# compute likelihood of each frame at each Gaussian
for k in range(n_clusters):
    indeces = np.argwhere(clusters == k).flatten()
    center, var = traj_tools.traj_iterative_average_var(traj_data[indeces])
    # initialize weights as populations of clusters
    ln_weights[k] = np.log(indeces.size/n_frames)
    # align the entire trajectory to each cluster mean if requested
    traj_data = traj_tools.traj_align(traj_data,center)
    cluster_frame_ln_likelihoods[k,:] = ln_spherical_gaussian_pdf(traj_data.reshape(n_frames,n_features), center.reshape(n_features), var)
# compute log likelihood
log_likelihood = 0.0
for i in range(n_frames):
    log_likelihood += logsumexp(cluster_frame_ln_likelihoods[:,i]+ln_weights[k])

1


In [77]:
print(cluster_frame_ln_likelihoods)

[[-602.29209913 -209.18311537 -232.3707049  ...  -82.05303052
   -82.51625549  -81.54216565]]


In [75]:
print(var, torch_var )

4.781302754332612 tensor(4.7810, device='cuda:0', dtype=torch.float64)


In [48]:
def logsumexp(x):
    c = x.max()
    return c + np.log(np.sum(np.exp(x - c)))
def ln_spherical_gaussian_pdf(x, mu, sigma):
    n_samples = x.shape[0]
    n_dim = x.shape[1]-3
#    lnnorm = -0.5*n_dim*(np.log(2.0*np.pi*sigma))
    lnnorm = -0.5*n_dim*(np.log(sigma))
    mvG = np.empty(n_samples,dtype=np.float64)
    multiplier = -0.5/sigma
    for i in range(n_samples):
        diffV = x[i] - mu
        mvG[i] = multiplier*np.dot(diffV,diffV) + lnnorm
    return mvG
def uniform_sgmm_log_likelihood(traj_data,clusters):
    # meta data from inputs
    n_frames = traj_data.shape[0]
    n_clusters = np.amax(clusters) + 1
    n_atoms = traj_data.shape[1]
    n_dim = traj_data.shape[2]
    n_features = n_atoms*n_dim
    # declare arrays 
    cluster_frame_ln_likelihoods = np.empty((n_clusters,n_frames),dtype=np.float64)
    ln_weights = np.empty(n_clusters,dtype=np.float64)
    # compute likelihood of each frame at each Gaussian
    for k in range(n_clusters):
        indeces = np.argwhere(clusters == k).flatten()
        center, var = traj_tools.traj_iterative_average_var(traj_data[indeces])
        # initialize weights as populations of clusters
        ln_weights[k] = np.log(indeces.size/n_frames)
        # align the entire trajectory to each cluster mean if requested
        traj_data = traj_tools.traj_align(traj_data,center)
        cluster_frame_ln_likelihoods[k,:] = ln_spherical_gaussian_pdf(traj_data.reshape(n_frames,n_features), center.reshape(n_features), var)
    # compute log likelihood
    log_likelihood = 0.0
    for i in range(n_frames):
        log_likelihood += logsumexp(cluster_frame_ln_likelihoods[:,i]+ln_weights[k])
    return log_likelihood

In [8]:
from shapeGMM import gmm_shapes
sgmm_old = gmm_shapes.ShapeGMM(n_clusters=2,verbose=True,init_cluster_method="uniform")
fit_traj = sgmm_old.fit_uniform(traj)

Number of frames being analyzed: 15261
Number of particles being analyzed: 33
Number of dimensions (must be 3): 3
Initializing clustering using method: uniform
Weights from initial clusters in fit_uniform: [0.50003276 0.49996724]
0 [0.65797721 0.34202279] -1858579.5053968055
1 [0.6306054 0.3693946] -1128964.6520612452
2 [0.60295421 0.39704579] -1057938.8030235204
3 [0.59539528 0.40460472] -1050076.1681569528
4 [0.59372018 0.40627982] -1049660.0410772387
5 [0.59328434 0.40671566] -1049640.9193107646
6 [0.59316424 0.40683576] -1049639.637364138
7 [0.59313113 0.40686887] -1049639.5406551328
8 [0.59312202 0.40687798] -1049639.5333279734
9 [0.59311951 0.40688049] -1049639.5327734153


In [18]:
fit_traj = sgmm_old.fit_uniform(traj)

Weights from initial clusters in fit_uniform: [0.59275277 0.40724723]
0 [0.62444435 0.37555565] -3403343.4878362184
1 [0.66948927 0.33051073] -3323079.034704934
2 [0.68303397 0.31696603] -3257122.596708434
3 [0.64723401 0.35276599] -3149249.517208174
4 [0.62892598 0.37107402] -3144556.3241802976
5 [0.6172524 0.3827476] -3143619.8291211957
6 [0.60969269 0.39030731] -3143249.9492966174
7 [0.60452411 0.39547589] -3143094.7077788482
8 [0.60105919 0.39894081] -3143021.919000625
9 [0.59885202 0.40114798] -3142989.944946185
10 [0.5974765 0.4025235] -3142977.1653555883
11 [0.59662226 0.40337774] -3142972.2266322062
12 [0.59609107 0.40390893] -3142970.323638196
13 [0.59576016 0.40423984] -3142969.5876060715
14 [0.59555375 0.40444625] -3142969.3018288827
15 [0.59542486 0.40457514] -3142969.190572466
16 [0.59534434 0.40465566] -3142969.1471800175
17 [0.59529402 0.40470598] -3142969.130235106
18 [0.59526255 0.40473745] -3142969.123614055
19 [0.59524288 0.40475712] -3142969.1210251553
20 [0.5952305

In [9]:
print(sgmm_old.var[0], sgmm_old.var[1])
print(sgmm.vars[0], sgmm.vars[1])

0.877569553617483 10.437978021037393
0.877451743765751 10.437371041473902


In [10]:
traj_tools.rmsd_kabsch(sgmm_old.centers[0],sgmm.centers[0].astype(np.float64))

0.0003145757821771825

In [11]:
traj_tools.rmsd_kabsch(sgmm_old.centers[1],sgmm.centers[1].astype(np.float64))

0.006805439149254265

In [28]:
traj_tools.rmsd_kabsch(sgmm_old.centers[1],sgmm.centers[0].astype(np.float64))

0.14050713439442097

In [None]:

def torch_sd(traj_tensor, ref_tensor, dtype=torch.float32, device=torch.device("cuda:0")):
    # meta data
    n_frames = traj_tensor.shape[0]
    # compute correlation matrices using batched matmul
    c_mats = torch.matmul(ref_tensor.T,traj_tensor)
    m_mats = torch.empty((n_frames,4,4),dtype=dtype,device=device)
    m_mats[:,0,0] = c_mats[:,0,0] + c_mats[:,1,1] + c_mats[:,2,2]
    m_mats[:,1,0] = c_mats[:,1,2] - c_mats[:,2,1] 
    m_mats[:,1,1] = c_mats[:,0,0] - c_mats[:,1,1] - c_mats[:,2,2]
    m_mats[:,2,0] = c_mats[:,2,0] - c_mats[:,0,2]
    m_mats[:,2,1] = c_mats[:,0,1] + c_mats[:,1,0]
    m_mats[:,2,2] = -c_mats[:,0,0] + c_mats[:,1,1] - c_mats[:,2,2]
    m_mats[:,3,0] = c_mats[:,0,1] - c_mats[:,1,0]
    m_mats[:,3,1] = c_mats[:,0,2] - c_mats[:,2,0]
    m_mats[:,3,2] = c_mats[:,1,2] + c_mats[:,2,1]
    m_mats[:,3,3] = -c_mats[:,0,0] - c_mats[:,1,1] + c_mats[:,2,2]
    e, v = torch.linalg.eigh(m_mats)
    return e[:,0]
    # free up local variables 
    del c_mats
    del m_mats
    del e
    del v
    torch.cuda.empty_cache()    
    