In [6]:
import numpy as np
import glob
import csv
from rapidfuzz.string_metric import normalized_levenshtein

In [50]:
def pair_from_row(row):
    pair={}
    pair["mass"] = float(row[1])
    pair["epsilon"] = float(row[2])
    pair["sigma"] = float(row[3])
    pair["m"] = float(row[4])
    pair["cut"] = float(row[5])
    pair["charge"] = 0.0
    return pair

def pair_of_h():
    pair={}
    pair["mass"] = 1.0
    pair["epsilon"] = 0.0
    pair["sigma"] = 1.0
    pair["m"] = 0
    pair["cut"] = 0.0
    pair["charge"] = 0.0
    return pair

def get_charge(row,pair_dict):
    keys = list(pair_dict.keys())
    dummy = np.zeros(len(keys))
    for i,key in enumerate(keys):
        dummy[i] = normalized_levenshtein(row[0][1:],key)
        #print(i, dummy[i])
    pointer = np.squeeze(np.where( dummy == np.amax(dummy)))
    charge = float(row[2])
    return keys[pointer],charge
    
def read_pair_potentials(path):
    keylist = ["!","#","models:","types","References"]
    pair_dict = {}
    with open(path) as csvfile:
        spamreader = csv.reader(csvfile,delimiter=" ")
        for row in spamreader:
            row = [ x for x in row if x]
            if row:
                if "VdW-site" in row:
                    flag=1
                elif "coulomb-site" in row:
                    flag=2
                elif row[0] in keylist:
                    break            
                elif flag == 1 and row:
                    pair_dict[row[0]] = pair_from_row(row)
                elif flag == 2 and row:
                    print(row)
                    if row[0].split("_")[0] == "cH":
                        pair_dict[row[0]] = pair_of_h()
                        pair_dict[row[0]]["charge"] = float(row[2])
                    else:
                        p,ch = get_charge(row,pair_dict) 
                        print(p,ch)
                        pair_dict[p]["charge"] = ch
    return pair_dict



In [51]:
path = glob.glob("*/pair_potentials")[0]
print(path)
print(read_pair_potentials(path))

UA_devel/pair_potentials
['cH_alcohol', '0.0', '0.404']
['cO_alcohol', '0.0', '-0.65']
OH_alcohol -0.65
['cC_alcohol', '0.0', '0.246']
CH2_alcohol 0.246
['cH_EOH', '0.0000', '+0.415']
['cO_EOH', '0.0000', '-0.667']
OH_EOH -0.667
['cC_EOH', '0.0000', '+0.252']
CH2_EOH 0.252
{'OH_alcohol': {'mass': 17.007, 'epsilon': 84.23, 'sigma': 3.035, 'm': 12.0, 'cut': 14.0, 'charge': -0.65}, 'CH2_alkane': {'mass': 14.027, 'epsilon': 52.9133, 'sigma': 4.04, 'm': 14.0, 'cut': 14.0, 'charge': 0.0}, 'CH2_alcohol': {'mass': 14.027, 'epsilon': 84.23, 'sigma': 3.842, 'm': 14.0, 'cut': 14.0, 'charge': 0.246}, 'CH2_EOH': {'mass': 14.027, 'epsilon': 76.265, 'sigma': 3.908, 'm': 14.0, 'cut': 14.0, 'charge': 0.252}, 'OH_EOH': {'mass': 17.007, 'epsilon': 76.265, 'sigma': 3.087, 'm': 12.0, 'cut': 14.0, 'charge': -0.667}, 'cH_alcohol': {'mass': 1.0, 'epsilon': 0.0, 'sigma': 1.0, 'm': 0, 'cut': 0.0, 'charge': 0.404}, 'cH_EOH': {'mass': 1.0, 'epsilon': 0.0, 'sigma': 1.0, 'm': 0, 'cut': 0.0, 'charge': 0.415}}


In [52]:
class molecule():    
    def __init__(self,mol):
        self.molecule = np.array(mol)
        self.f = np.zeros(len(mol))
        self.n = -1*np.ones(len(mol))
        self.i = np.arange(len(mol))
        n=0
        for i,m in enumerate(mol):
            if m[0]=="b":
                self.f[i] = int(m[1:])
            else:
                self.n[i] = n
                n+=1
        return
    def get_distance(self,nos):
        n_min = np.min(nos)
        n_max = np.max(nos)
        i_min = np.squeeze(np.where(self.n==n_min))
        i_max = np.squeeze(np.where(self.n==n_max))
        print(self.molecule[i_min],self.molecule[i_max])
        seq=[]
        i_lastmol = i_min
        lastaction = 0
        flag=0
        #print(np.arange(i_min,i_max))
        for i in np.arange(i_min,i_max+1):
            if self.f[i]==0:
                if flag==0:
                    seq.append(self.molecule[i])
                    i_lastmol = i
                    lastaction = 0
                else:
                    flag-=1
            elif flag==0:
                n_end = self.n[i_lastmol]+self.f[i]+lastaction
                i_end = np.squeeze(np.where(self.n==n_end))
                if i_end < i_max:
                    flag = self.f[i]
                    lastaction += self.f[i]
                print(i,flag,i_end,i_max)

        
            
            
        return len(seq)-1,seq

In [53]:
#mol = np.array(["cH_alcohol","OH_alcohol","CH_alcohol","b3","CH2_alkane","b5","CH2_alkane","CH3_alkane","CH3_alkane","CH2_alkane","CH2_alkane","CH3_alkane"])
mol = np.array(["A0","A1","A2","b5","B1","B2","b2","C1","C2","B3","b2","D1","D2","b2","E1","E2","b2","F1","F2","A3","A4","A5"])

m = molecule(mol)

In [54]:
print(m.molecule,len(m.molecule))
print(m.f,len(m.f))
print(m.n,len(m.n))
print(m.i,len(m.i))


#print(m.get_distance((1,3)))
print("#################")
print(m.get_distance((1,4)))
print("#################")
print(m.get_distance((1,6)))
print("#################")
print(m.get_distance((1,7)))
print("#################")
print(m.get_distance((1,9)))
print("#################")
print(m.get_distance((1,max(m.n) )))
print("#################")
print(m.get_distance((1,9 )))
print("#################")
print(m.get_distance((1,11 )))
print("#################")
print(m.get_distance((1,13 )))

['A0' 'A1' 'A2' 'b5' 'B1' 'B2' 'b2' 'C1' 'C2' 'B3' 'b2' 'D1' 'D2' 'b2'
 'E1' 'E2' 'b2' 'F1' 'F2' 'A3' 'A4' 'A5'] 22
[0. 0. 0. 5. 0. 0. 2. 0. 0. 0. 2. 0. 0. 2. 0. 0. 2. 0. 0. 0. 0. 0.] 22
[ 0.  1.  2. -1.  3.  4. -1.  5.  6.  7. -1.  8.  9. -1. 10. 11. -1. 12.
 13. 14. 15. 16.] 22
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21] 22
#################
A1 B2
3 0 9 5
(3, ['A1', 'A2', 'B1', 'B2'])
#################
A1 C2
3 0 9 8
6 0 8 8
(5, ['A1', 'A2', 'B1', 'B2', 'C1', 'C2'])
#################
A1 B3
3 0 9 9
6 2.0 8 9
(4, ['A1', 'A2', 'B1', 'B2', 'B3'])
#################
A1 D2
3 5.0 9 12
10 0.0 12 12
(3, ['A1', 'A2', 'D1', 'D2'])
#################
A1 A5
3 5.0 9 21
10 2.0 12 21
13 2.0 15 21
16 2.0 18 21
(4, ['A1', 'A2', 'A3', 'A4', 'A5'])
#################
A1 D2
3 5.0 9 12
10 0.0 12 12
(3, ['A1', 'A2', 'D1', 'D2'])
#################
A1 E2
3 5.0 9 15
10 2.0 12 15
13 0.0 15 15
(3, ['A1', 'A2', 'E1', 'E2'])
#################
A1 F2
3 5.0 9 18
10 2.0 12 18
13 2.0 15 18
16 0.0 1

In [55]:
np.squeeze(np.where(m.n==4))
np.arange(1,5)

array([1, 2, 3, 4])

In [56]:

    
def read_xyz(path):
    data = []
    with open(path) as csvfile:
        spamreader = csv.reader(csvfile,delimiter=" ")
        for row in spamreader:
            data.append([r for r in row if r])
    n = int(data[0][0])
    xyz = {}
    for i,d in enumerate(data[2:n+2]):
        xyz[i] = {}
        xyz[i]["atom"] = d[0]
        xyz[i]["xyz"] = np.array([float(x) for x in d[1:4]])          
            
    return xyz

def assign_CHx(xyz):
    xyz_CHx = {}
    xkeys = sorted(xyz.keys())
    ii=0
    x=np.min(xkeys)
    flag=0
    while x <= np.max(xkeys):
        if flag==0 and xyz[x]["atom"] == "C":
            coos = np.array(xyz[x]["xyz"])
            flag=1
            x+=1
        elif flag>0:
            if xyz[x]["atom"] == "H":
                coos = np.column_stack((coos, xyz[x]["xyz"]))
                x+=1
                flag+=1
            else:
                no=flag-1
                xyz_CHx[ii] = {}
                xyz_CHx[ii]["atom"] = "CH"+str(no)
                xyz_CHx[ii]["xyz"] = np.sum(coos*np.array([15]+[1]*no),axis=1)/(15+1*no)
                print(xyz_CHx[ii]["xyz"])
                coos = []
                flag=0
                ii+=1
        else:
            xyz_CHx[ii] = xyz[x]
            x+=1
            ii+=1
    return xyz_CHx
        
def to_xyz(xyz,path):
    f = open(path, "w")
    f.write(str(len(xyz))+"\n")
    f.write("\n")
    for x in xyz:
        line = "    ".join([xyz[x]["atom"][0]]+[str(y) for y in xyz[x]["xyz"]])
        f.write(line+"\n")
    f.close()

In [57]:
path = "tors_2.00.xyz"
xyz =read_xyz(path)
xxyz = assign_CHx(xyz)
print(len(xxyz))
for xx in xxyz:
    print(xxyz[xx])
    
to_xyz(xxyz,"test.xyz")

[ 0.01203285  0.75397809 -0.8163303 ]
[-0.01203267  0.7539781   0.8163303 ]
6
{'atom': 'H', 'xyz': array([-7.5080000e-04, -5.8666370e-01, -2.2385073e+00])}
{'atom': 'O', 'xyz': array([-0.0063283, -0.6365049, -1.277139 ])}
{'atom': 'CH2', 'xyz': array([ 0.01203285,  0.75397809, -0.8163303 ])}
{'atom': 'CH2', 'xyz': array([-0.01203267,  0.7539781 ,  0.8163303 ])}
{'atom': 'O', 'xyz': array([ 0.006329 , -0.6365049,  1.277139 ])}
{'atom': 'H', 'xyz': array([ 7.5100000e-04, -5.8666370e-01,  2.2385073e+00])}


In [58]:
[15]+[1]*3

[15, 1, 1, 1]

In [59]:
def read_bond_potentials(path):
    keylist = ["!","#","models:","types"]
    bond_dict = {}
    flag=0
    with open(path) as csvfile:
        spamreader = csv.reader(csvfile,delimiter=" ")
        for row in spamreader:
            row = [ x for x in row if x]
            if row:
                if row[0] == "model":
                    flag=1
                elif flag==1 and row[0] in keylist:
                    break
                elif flag==1:
                    name = "_".join(row[1:3])
                    bond_dict[name] = {}
                    bond_dict[name]["list"] = row[1:3]
                    bond_dict[name]["type"] = int(row[0]) 
                    #bond_dict[name]["len"] = float(row[3]) 
                    #bond_dict[name]["spring"] = float(row[4]) 
                    angle_dict[name]["p"] = [float(r) for r in row[3:]] 
                
    return bond_dict
                

In [60]:
path = glob.glob("*/bond_potentials")[0]
print(path)
print(read_bond_potentials(path))

UA_devel/bond_potentials
{'CH2_alkane_CH2_alkane': {'list': ['CH2_alkane', 'CH2_alkane'], 'type': 1, 'len': 1.54, 'spring': 0.0}, 'CH2_alcohol_CH2_alcohol': {'list': ['CH2_alcohol', 'CH2_alcohol'], 'type': 1, 'len': 1.514, 'spring': 0.0}, 'CH2_EOH_CH2_EOH': {'list': ['CH2_EOH', 'CH2_EOH'], 'type': 1, 'len': 1.514, 'spring': 0.0}, 'OH_EOH_cH_EOH': {'list': ['OH_EOH', 'cH_EOH'], 'type': 1, 'len': 0.97, 'spring': 0.0}, 'CH2_EOH_OH_EOH': {'list': ['CH2_EOH', 'OH_EOH'], 'type': 1, 'len': 1.42, 'spring': 0.0}, 'OH_alcohol_cH_alcohol': {'list': ['OH_alcohol', 'cH_alcohol'], 'type': 1, 'len': 0.97, 'spring': 0.0}, 'CH2_alcohol_OH_alcohol': {'list': ['CH2_alcohol', 'OH_alcohol'], 'type': 1, 'len': 1.42, 'spring': 0.0}, 'CH2_alkane_CH2_alcohol': {'list': ['CH2_alkane', 'CH2_alcohol'], 'type': 1, 'len': 1.514, 'spring': 0.0}}


In [61]:
def read_angle_potentials(path):
    keylist = ["!","#","models:","types"]
    angle_dict = {}
    flag=0
    with open(path) as csvfile:
        spamreader = csv.reader(csvfile,delimiter=" ")
        for row in spamreader:
            row = [ x for x in row if x]
            if row:
                if row[0] == "model":
                    flag=1
                elif flag==1 and row[0] in keylist:
                    break
                elif flag==1:
                    name = "_".join(row[1:4])
                    angle_dict[name] = {}
                    angle_dict[name]["list"] = row[1:4]
                    angle_dict[name]["type"] = int(row[0]) 
                    #angle_dict[name]["angle"] = float(row[4]) 
                    #angle_dict[name]["p"] = float(row[5]) 
                    angle_dict[name]["p"] = [float(r) for r in row[4:]] 
                
    return angle_dict

In [62]:
path = glob.glob("*/angle_potentials")[0]
print(path)
print(read_angle_potentials(path))

UA_devel/angle_potentials
{'CHxx_alkane_CH2_alkane_CHxx_alkane': {'list': ['CHxx_alkane', 'CH2_alkane', 'CHxx_alkane'], 'type': 2, 'angle': 114.0, 'p': 62500.0}, 'CH2_alcohol_OH_alcohol_cH_alcohol': {'list': ['CH2_alcohol', 'OH_alcohol', 'cH_alcohol'], 'type': 2, 'angle': 107.4, 'p': 45960.0}, 'CH2_alkane_CH2_alcohol_OH_alcohol': {'list': ['CH2_alkane', 'CH2_alcohol', 'OH_alcohol'], 'type': 2, 'angle': 113.5, 'p': 62250.0}, 'CH2_alcohol_CH2_alcohol_OH_alcohol': {'list': ['CH2_alcohol', 'CH2_alcohol', 'OH_alcohol'], 'type': 2, 'angle': 113.5, 'p': 62250.0}, 'CHxx_alkane_CH2_alkane_CH2_alcohol': {'list': ['CHxx_alkane', 'CH2_alkane', 'CH2_alcohol'], 'type': 2, 'angle': 114.0, 'p': 62500.0}, 'CH2_alcohol_CH2_alkane_CH2_alcohol': {'list': ['CH2_alcohol', 'CH2_alkane', 'CH2_alcohol'], 'type': 2, 'angle': 114.0, 'p': 62500.0}, 'CH2_EOH_OH_EOH_cH_EOH': {'list': ['CH2_EOH', 'OH_EOH', 'cH_EOH'], 'type': 2, 'angle': 107.4, 'p': 45960.0}, 'CH2_EOH_CH2_EOH_OH_EOH': {'list': ['CH2_EOH', 'CH2_EOH', 

In [65]:
def read_torsion_potentials(path):
    keylist = ["!","#","models:","types","Note:"]
    torsion_dict = {}
    flag=0
    with open(path) as csvfile:
        spamreader = csv.reader(csvfile,delimiter=" ")
        for row in spamreader:
            row = [ x for x in row if x]
            if row:
                if row[0] == "model":
                    flag=1
                elif flag==1 and row[0] in keylist:
                    break
                elif flag==1:
                    name = "_".join(row[1:5])
                    torsion_dict[name] = {}
                    torsion_dict[name]["list"] = row[1:5]
                    torsion_dict[name]["type"] = int(row[0]) 
                    torsion_dict[name]["p"] = [float(r) for r in row[5:]] 
                
    return torsion_dict

In [66]:
path = glob.glob("*/torsion_potentials")[0]
print(path)
print(read_torsion_potentials(path))

UA_devel/torsion_potentials
{'CHxx_alkane_CH2_alkane_CH2_alkane_CHxx_alkane': {'list': ['CHxx_alkane', 'CH2_alkane', 'CH2_alkane', 'CHxx_alkane'], 'type': 1, 'p': [0.0, 355.03, -68.19, 791.32, 0.0]}, 'CH2_alkane_CH2_alcohol_OH_alcohol_cH_alcohol': {'list': ['CH2_alkane', 'CH2_alcohol', 'OH_alcohol', 'cH_alcohol'], 'type': 1, 'p': [-184.99, 82.0, 36.89, 303.85, 0.0]}, 'CH2_alcohol_CH2_alcohol_OH_alcohol_cH_alcohol': {'list': ['CH2_alcohol', 'CH2_alcohol', 'OH_alcohol', 'cH_alcohol'], 'type': 1, 'p': [-184.99, 82.0, 36.89, 303.85, 0.0]}, 'CHxx_alkane_CH2_alkane_CH2_alcohol_OH_alcohol': {'list': ['CHxx_alkane', 'CH2_alkane', 'CH2_alcohol', 'OH_alcohol'], 'type': 1, 'p': [0.0, 206.45, -222.56, 1085.08, 0.0]}, 'CH2_alcohol_CH2_alkane_CH2_alcohol_OH_alcohol': {'list': ['CH2_alcohol', 'CH2_alkane', 'CH2_alcohol', 'OH_alcohol'], 'type': 1, 'p': [0.0, 206.45, -222.56, 1085.08, 0.0]}, 'CHxx_alkane_CH2_alkane_CH2_alkane_CH2_alcohol': {'list': ['CHxx_alkane', 'CH2_alkane', 'CH2_alkane', 'CH2_alcoh