In [1]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from ase.io import read, write
from elf.water import *
import elf
from ase.visualize import view
from mbpol_calculator import reconnect_monomers
%pylab inline
from ase import Atoms

Populating the interactive namespace from numpy and matplotlib


In [4]:
atoms = read('../data/revPBE/64/64.traj', ':')

In [5]:
for i, a in enumerate(atoms):
    atoms[i] = reconnect_monomers(a)

In [285]:
r_lp = 0.7
rc_hb = 2.0

def is_hbonded(coords, ucell):
    
    if not coords.shape == (2,3,3):
        raise Exception('Must provide two molecules in np.array of shape (2,3,3)')
    
    coords = elf.geom.fold_back_coords(0, coords, ucell).reshape(-1,3,3)
    oh1 = coords[:,1,:] - coords[:,0,:]
    oh2 = coords[:,2,:] - coords[:,0,:]
    oh1 = oh1/norm(oh1, axis = -1).reshape(-1,1)
    oh2 = oh2/norm(oh2, axis = -1).reshape(-1,1)
    ohangle = np.arccos(np.einsum('ik,ik -> i',oh1,oh2))
    bisec = (oh1 + oh2)
    bisec = bisec/norm(bisec, axis = -1).reshape(-1,1)
    ortho = np.cross(oh1,oh2, axis = -1)
    lpangle = np.zeros_like(ohangle)
    lpangle[ohangle < .5 * np.pi] = (2*np.arccos(1/np.sqrt(2-1/(2-1/np.cos(ohangle/2)**2))))[ohangle < .5 * np.pi]
    lpangle[ohangle >= .5 * np.pi] = np.pi
    lpangle = lpangle.reshape(-1,1)
    lp1 = (-bisec*np.cos(lpangle/2) + ortho*np.sin(lpangle/2))*r_lp + coords[:,0,:]
    lp2 = (-bisec*np.cos(lpangle/2) - ortho*np.sin(lpangle/2))*r_lp + coords[:,0,:]

    coords = np.concatenate([coords,lp1.reshape(-1,1,3),lp2.reshape(-1,1,3)], axis = 1)
    
    look_at = {0: [], 1 : [3,4], 2: [3, 4], 3 : [1, 2], 4: [1, 2]}
    
    for i, c1 in enumerate(coords[0]):
        for c2 in coords[1,look_at[i]]:
            if np.linalg.norm(c1 - c2) <= rc_hb:
                return True
            
    return False
    
def sample_cluster(i, coords, k,  ucell, n_hb= -1):
    if n_hb == -1: n_hb = k -1
    
    if n_hb > k-1: 
        n_hb = k - 1
        print('n_hb too large, setting to {}'.format(k-1))

    if not coords.ndim == 3 or not coords.shape[1:] == (3,3):
        raise Exception('coords.shape must be (?, 3, 3)')
    
    coords = elf.geom.fold_back_coords(i, coords, ucell).reshape(-1,3,3)
    coords_o = coords[:,0]
    seed_O = coords_o[i]
    
    nn = NearestNeighbors(2*k)
    nn.fit(coords_o)
    dist, ind = nn.kneighbors([seed_O])
    ind = ind[0]
    
    hbond_ind = []
    non_hbond_ind = []
    
    for u in ind[1:]:
        if is_hbonded(coords[[i,u]],ucell):
            hbond_ind.append(u)
        else:
            non_hbond_ind.append(u)
    if len(hbond_ind) < n_hb:
        raise RuntimeError('Desired number of h-bonds not obtained')
        
    indices = [i] + hbond_ind[:n_hb]
    if len(indices) != k:
        indices += non_hbond_ind[:k-len(indices)]
    
    return indices

In [330]:
k = 6

all_clusters = []
for hb in [2, 3, 4, 5]:
    clusters = []
    for j, _ in enumerate(atoms[:]):
        coords = atoms[j].get_positions().reshape(-1,3,3)
        for i,_ in enumerate(coords):
            try:
                clusters.append([j,sample_cluster(i,coords,k ,atoms[j].get_cell(), hb)])
            except RuntimeError:
                pass
    all_clusters.append(clusters)


    



In [331]:
n_select = [500,1600,840,60]
clusters_select = []

for n, clusters in zip(n_select, all_clusters):
    np.random.shuffle(clusters)
    clusters_select += clusters[:n]

In [332]:
clusters_select

[[26, [34, 5, 11, 14, 13, 36]],
 [27, [17, 25, 11, 49, 14, 36]],
 [35, [62, 4, 43, 18, 12, 49]],
 [8, [2, 3, 44, 55, 56, 37]],
 [0, [37, 30, 7, 62, 45, 20]],
 [19, [32, 29, 18, 37, 28, 58]],
 [12, [30, 37, 52, 49, 41, 33]],
 [36, [17, 11, 60, 36, 24, 31]],
 [24, [6, 26, 4, 53, 12, 62]],
 [33, [44, 40, 56, 10, 19, 15]],
 [40, [24, 35, 7, 17, 54, 53]],
 [11, [42, 25, 19, 43, 60, 6]],
 [21, [13, 9, 40, 7, 2, 50]],
 [19, [9, 47, 19, 26, 42, 6]],
 [26, [24, 33, 7, 35, 41, 31]],
 [12, [10, 11, 49, 43, 16, 15]],
 [31, [9, 13, 47, 26, 11, 46]],
 [31, [63, 15, 48, 59, 27, 28]],
 [20, [41, 35, 55, 12, 7, 20]],
 [20, [42, 60, 25, 33, 10, 31]],
 [6, [61, 58, 59, 38, 1, 22]],
 [40, [50, 43, 40, 62, 13, 6]],
 [36, [12, 7, 16, 62, 41, 20]],
 [39, [34, 36, 8, 11, 7, 14]],
 [16, [3, 45, 13, 55, 12, 44]],
 [22, [10, 19, 44, 49, 42, 40]],
 [25, [52, 49, 53, 27, 17, 23]],
 [34, [15, 46, 59, 42, 47, 44]],
 [8, [39, 20, 4, 53, 7, 26]],
 [21, [11, 34, 36, 5, 26, 19]],
 [27, [22, 8, 45, 34, 57, 3]],
 [16, [17

In [304]:
viz_atoms = []
for clu in all_clusters[1]:
    coords = atoms[clu[0]].get_positions().reshape(-1,3,3)
    a = Atoms('{}(OHH)'.format(k),
                          positions = coords[clu[1]].reshape(-1,3),
                          pbc = True,
                          cell = atoms[0].get_cell())
    a.set_positions(a.get_positions(wrap = True))
    a.set_positions(elf.geom.fold_back_coords(0, a.get_positions(),a.get_cell()))
    viz_atoms.append(a)
view(viz_atoms)

In [333]:
systems = []
for clu in clusters_select:
    coords = atoms[clu[0]].get_positions().reshape(-1,3,3)
    new_order = np.array(clu[1] + np.delete(np.arange(0,len(coords)), clu[1]).tolist())
    systems.append(coords[new_order].reshape(-1,3))


In [334]:
len(systems)

3000

In [336]:
import pandas as pd

In [337]:
pd.DataFrame(np.array(systems).reshape(-1,3)).to_csv('hexamers_embedded.csv', index = None , header = None)

In [167]:
a = Atoms('6(OHH)', positions = sample_cluster(1,coords,6,atoms[0].get_cell()), cell = atoms[0].get_cell(), pbc= True)
# a.set_positions(a.get_positions(wrap=True))
a = reconnect_monomers(a)



In [None]:
systems_select = []
n_select = [500,1500,868,65]
for system, n in zip(systems, n_select):
    clusters_select.append(np.array(clusters)[np.random.randint(0,len(clusters),n)])

In [168]:
view(a)

In [24]:
re_ordered = [a]
for a in atoms:
    coords = a.get_positions().reshape(-1,3,3)
    a_old = -1
    b_old = -1
    for roo in np.arange(2.5,3.3,0.05):
        a,b = sample_dimers(coords, roo, epsilon = 0.05)
        if not (a,b) == (-1,-1) and not (a_old, b_old) == (a,b) and not (a_old,b_old) == (b,a):
            new_order = np.array([a,b] + np.delete(np.arange(0,len(coords)), [a,b]).tolist())
            re_ordered.append(coords[new_order].reshape(-1,3))
            print(len(coords[new_order]))
            a_old, b_old = a, b

2.4719984268148183
64
2.5452530683103016
64
2.6121641783779577
64
2.68110384232821
64
2.6540896923209036
64
2.7120935991688175
64
2.7711144178010443
64
2.8321614549629
64
2.8555311679357422
64
2.909358123321507
64
3.0135126735559306
64
3.0317496798779975
64
3.0515754148895904
64
3.1344351658409098
64
3.2445304788932257
64
3.2445304788932257
2.454595565656158
64
2.5984498857814926
64
2.6064659335525238
64
2.637868309296451
64
2.6691554608802806
64
2.799693941962908
64
2.797809660473459
64
2.849102260864186
64
2.8522985316188443
64
2.941534723904531
64
2.9776866292039585
64
3.090867235014371
64
3.0706287869396216
64
3.105026416976167
64
3.1593989120593418
64
3.231554371532708
64
2.4896052896574434
64
2.560532818291145
64
2.607125559317458
64
2.611689168620597
64
2.660678558991198
64
2.7990899753881378
64
2.8140235695797746
64
2.8879009101741464
64
2.918055926464869
64
2.925480374969411
64
2.9590188623370133
64
3.0769644536817227
64
3.136860340491394
64
3.171357473839782
64
3.171357473839

2.5914479240966526
64
2.5914479240966526
2.688424362259682
64
2.732268002555685
64
2.7795385729148263
64
2.791690871422682
64
2.8717177929266415
64
2.9144814758812045
64
2.933659589943564
64
3.0143132769421794
64
3.0710966792661565
64
3.0793447793593045
64
3.1221564442649203
64
3.2349418055324013
64
3.2349418055324013
2.5487900125363696
64
2.5487900125363696
2.6241901947168427
64
2.674898126231388
64
2.7115300073933386
64
2.719445509145454
64
2.764170720403352
64
2.8808995520482927
64
2.885960449772505
64
2.929411542286937
64
3.0085289317108637
64
3.035855396400774
64
3.1053911660296727
64
3.1053911660296727
3.2880058718997556
64
2.540654075719445
64
2.540654075719445
2.643123379146083
64
2.6357738154236743
64
2.715547753999795
64
2.75411888009624
64
2.78390812676153
64
2.826690499600688
64
2.8602642823958475
64
2.976943126731113
64
2.976943126731113
3.0166963733357086
64
3.0531613334644723
64
3.1778759139167216
64
3.2066432216487346
64
3.2066432216487346
2.5228342201894405
64
2.592365

In [15]:
re_ordered = []
for a in atoms:
    coords = a.get_positions().reshape(-1,3,3)
    old_seeds = []
    for roo in range(20):
        ind = sample_k_neighbors(coords, 3)
        if not ind[0] in old_seeds:
            print(ind)
            new_order = np.array(ind.tolist() + np.delete(np.arange(0,len(coords)), ind.tolist()).tolist())
            re_ordered.append(coords[new_order].reshape(-1,3))
            old_seeds.append(ind[0])

[53 51 36]
[18 43 62]
[30  1 37]
[ 4  7 12]
[10 49 44]
[20 39 23]
[37 30 56]
[58 61 35]
[45 22 61]
[59 61 38]
[48 63 27]
[22 45  8]
[29 58 38]
[26 49 10]
[52 23 30]
[34 13 41]
[ 8 22 55]
[40 38 43]
[32 63 29]
[59  1 61]
[ 7  4 12]
[ 5  3 13]
[ 0 54 38]
[42 21 28]
[12  7  4]
[61 28 30]
[43 60 14]
[38  0 59]
[33 28 52]
[16 52 12]
[52 23 30]
[25 31 14]
[19 25 31]
[14 36 63]
[52 23 33]
[27 48 57]
[57 27 47]
[ 3  5 37]
[ 6 23 39]
[10 49 11]
[40 38 43]
[58 61 29]
[53 46 51]
[13 34 55]
[11 10 17]
[43 60 14]
[28 61 33]
[47 63 31]
[33 52 28]
[63 47 14]
[ 8 22 55]
[30 61 37]
[38 59 22]
[22 38  8]
[41 55 13]
[46 53 56]
[58 35 61]
[19 47 31]
[15 46 51]
[18 62 43]
[27 48 57]
[17 25 11]
[10 11 44]
[11 10 17]
[42 21 33]
[34 13 55]
[62 18 43]
[50 13 44]
[35 24 58]
[60  0 43]
[16 52 58]
[29 58 32]
[ 9 54 40]
[45  3 22]
[44  2 10]
[54  0  9]
[35 24 58]
[46 56 23]
[ 7  4 37]
[18 62 43]
[58 35 61]
[32 63 59]
[14 36 59]
[59 61 38]
[40  9 32]
[42 21 28]
[43 60 18]
[62 18 43]
[21 48 42]
[58 61 35]
[17 11 25]

In [21]:
import pandas as pd

In [18]:
pd.DataFrame(np.array(re_ordered).reshape(-1,3)).to_csv('trimers_embedded.csv', index= None, header = None)

In [3]:
hexamers = read('../data/PBE_dz/hexamers/hexamers.xyz',':')

In [16]:
for i,_ in enumerate(hexamers):
    hexamers[i].set_pbc(True)
    hexamers[i].set_cell([12.43]*3)
    hexamers[i].set_positions(elf.geom.fold_back_coords(0, hexamers[i].get_positions(), hexamers[i].get_cell()))
    hexamers[i].set_positions(hexamers[i].get_positions() - hexamers[i].get_positions()[0])

In [17]:
view(hexamers)

In [9]:
elf.geom.fold_back_coords?

In [18]:
write('../data/PBE_dz/hexamers/hexamers_connected.xyz', hexamers)

In [22]:
pos = []
for h in hexamers:
    pos.append(h.get_positions())
pd.DataFrame(np.array(pos).reshape(-1,3)).to_csv('../data/PBE_dz/hexamers/hexamers_connected.csv', index = None, header = None)