In [None]:
from cclib.io import ccread
import numpy as np

from tscode.clustered_csearch import clustered_csearch
from tscode.utils import write_xyz

# data = ccread(r'C:\Users\Nik\Desktop\miller\pep\pep.xyz')
data = ccread(r'C:\Users\Nik\Desktop\complete\cat_salt.xyz')
confs = clustered_csearch(data.atomcoords[0], data.atomnos, n=20, n_out=10)

# data = ccread(r'C:\Users\Nik\Desktop\complete\ts1.xyz')
# confs = clustered_csearch(data.atomcoords[0], data.atomnos, constrained_indexes=np.array([[133, 168], [23, 151], [61, 130]]), n=20)

In [None]:
with open('most_diverse_confs.xyz', 'w') as f:
    for c in confs:
        write_xyz(c, data.atomnos, f)
confs.shape

In [None]:
from sklearn.cluster import KMeans
from tscode.hypermolecule_class import align_structures

def most_diverse_conformers(n, structures, energies=None):
    '''
    Return the n most diverse structures from the set.
    Divides the structures in n subsets and:
    - If the enrgy list is given, chooses the
      one with the lowest energy from each.
    _ If it is not, picks the most diverse structures.
    '''
    structures = align_structures(structures)
    features = structures.reshape((structures.shape[0], structures.shape[1]*structures.shape[2]))

    kmeans = KMeans(n_clusters=n)
    kmeans.fit(features)

    if energies is not None:
        clusters = [[] for _ in range(n)]
        for coords, energy, c in zip(structures, energies, kmeans.labels_):
            clusters[c].append((coords, energy))

        output = []
        for group in clusters:
            sorted_s, _ = zip(*sorted(group, key=lambda x: x[1]))
            output.append(sorted_s[0])

    else: # afford the most distant structure from each cluster
        centers = kmeans.cluster_centers_.reshape((n, *structures.shape[1:3]))

        clusters = [[] for _ in range(n)]
        for coords, c in zip(structures, kmeans.labels_):
            clusters[c].append(coords)

        r = np.arange(len(clusters))
        output = []
        for cluster in clusters:
            cumdists = [np.sum(np.linalg.norm(centers[r!=c]-ref, axis=2)) for c, ref in enumerate(cluster)]
            furthest = cluster[cumdists.index(max(cumdists))]
            output.append(furthest)

    return np.array(output)

mdc = most_diverse_conformers(10, confs)
mdc.shape

In [None]:
with open('most_diverse.xyz', 'w') as f:
    for c in mdc:
        write_xyz(c, data.atomnos, f)

In [None]:
import numpy as np
from numba import njit
from math import sqrt

# rmp_j = njit()(rmp)
t = np.array([3.45,4.55,8])

@njit
def norm_of(v):
    return sqrt((v[0]**2 + v[1]**2 + v[2]**2))

In [None]:
%timeit np.linalg.norm(t)

In [None]:
%timeit norm_of(t)

In [None]:
from math import sqrt

@njit
def norm1(v):
    return v / sqrt((v[0]**2 + v[1]**2 + v[2]**2))

@njit
def norm2(v): #fastest
    return v / sqrt((v[0]*v[0] + v[1]*v[1] + v[2]*v[2]))

In [None]:
%timeit norm1(t)

In [None]:
%timeit norm2(t)

%timeit com(c, a)

In [4]:
from scipy.spatial.transform import Rotation as R
import numpy as np
from numba import njit

a = np.array([1,0,0])
b = np.array([0,1,0])
c = np.array([1,1,0])
d = np.array([0,1,1])

vanilla = R.align_vectors((c, d),(a, b))[0].as_matrix() # a goes to c, b goes to d
vanilla

array([[ 0.78867513, -0.21132487,  0.57735027],
       [ 0.57735027,  0.57735027, -0.57735027],
       [-0.21132487,  0.78867513,  0.57735027]])

In [17]:
def align_vectors_v2(a, b):
    
    B = np.zeros((3,3))
    for i in range(3):
        for k in range(3):
            tot = 0
            for j in range(2):
                tot += a[j][i]*b[j][k]
            B[i,k] = tot

    u, s, vh = np.linalg.svd(B)

    # Correct improper rotation if necessary (as in Kabsch algorithm)
    if np.linalg.det(u @ vh) < 0:
        s[-1] = -s[-1]
        u[:, -1] = -u[:, -1]

    return np.dot(u, vh)

align_vectors_v2((c, d),(a, b))

array([[ 0.78867513, -0.21132487,  0.57735027],
       [ 0.57735027,  0.57735027, -0.57735027],
       [-0.21132487,  0.78867513,  0.57735027]])

In [None]:
def align_vectors_v3(a, b):
    
    B = np.zeros((3,3))
    for i in range(3):
        for k in range(3):
            tot = 0
            for j in range(2):
                tot += a[j][i]*b[j][k]
            B[i,k] = tot

    u, s, vh = np.linalg.svd(B)

    # Correct improper rotation if necessary (as in Kabsch algorithm)
    if np.linalg.det(u @ vh) < 0:
        s[-1] = -s[-1]
        u[:, -1] = -u[:, -1]

    return np.dot(u, vh)

align_vectors_v2((c, d),(a, b))

In [18]:
av3 = njit()(align_vectors_v2)

In [6]:
%timeit R.align_vectors((c, d),(a, b))[0].as_matrix()

138 µs ± 2.34 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [7]:
%timeit align_vectors_v2((c, d),(a, b))

68.1 µs ± 6.05 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [20]:
%timeit av3((c, d),(a, b))

9.76 µs ± 281 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [27]:
from concurrent.futures import ProcessPoolExecutor
import time

class DataClass:
    def __init__(self):
        self.data = []

    def append(self, n):
        print('Added ', n)
        self.data.append(n)

data = DataClass()

with ProcessPoolExecutor() as ppe:
    gen = ppe.map(data.append, (2,3,4,5))

for g in gen:
    pass

BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.