In [1]:
%config Application.log_level="INFO"

In [2]:
from fitESPconstrained import *
import ase.io
import parmed as pmd
from parmed import gromacs
from insertHbyList import insertHbyList
import warnings
import pandas as pd
import logging
import sys

In [3]:
pd.set_option('precision', 3)

In [4]:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

In [5]:
logging.info("Started")

INFO:root:Started


In [6]:
implicitHbondingPartners = {'CD4':1,'CD3':1,'CA2':2,'CA3':2,'CB2':2,'CB3':2}

infile_pdb = 'sandbox/system100.pdb'
infile_top = 'sandbox/system100.lean.top'

In [7]:
ua_ase_struct = ase.io.read(infile_pdb)
ua_pmd_struct = pmd.load_file(infile_pdb)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    ua_pmd_top = gromacs.GromacsTopologyFile(infile_top,parametrize=False)
# throws some warnings on angle types, does not matter for bonding info
# if error thrown, just try to "reduce" .top as far as possible
# warnings supressed as shown on
# https://docs.python.org/2/library/warnings.html

ua_pmd_top.strip(':SOL,CL') # strip water and electrolyte from system (if not yet done in .top)
ua_pmd_top.box = ua_pmd_struct.box # Needed because .pdb contains box info
ua_pmd_top.positions = ua_pmd_struct.positions

ua_names = [ a.name for a in ua_pmd_top.atoms ]
ua_residues = [ a.residue.name for a in ua_pmd_top.atoms ]

aa_ase_struct, aa_pmd_struct, aa_names, aa_residues = \
    insertHbyList(ua_ase_struct,ua_pmd_top,
    implicitHbondingPartners,1.0)

ua_count = len(ua_ase_struct)     # united atoms structure
aa_count = len(aa_ase_struct) # all atoms structure

ua_ase_index = np.arange(ua_count)
aa_ase_index = np.arange(aa_count)

aa_atom_residue_list = list(zip(aa_names,aa_residues))
aa_ase_index = range(aa_count)
aa_ase2pmd = dict(zip(aa_ase_index,aa_atom_residue_list))
aa_pmd2ase = dict(zip(aa_atom_residue_list,aa_ase_index))

ua_atom_residue_list = list(zip(ua_names,ua_residues))
ua_ase_index = range(ua_count)
ua_ase2pmd = dict(zip(ua_ase_index,ua_atom_residue_list))
ua_pmd2ase = dict(zip(ua_atom_residue_list,ua_ase_index))

INFO:root:Adding 1 H-atoms to CD3 (#8)...
INFO:root:bondingPartners [ 9 21]
INFO:root:Atom CD3 already has bonding partners CD4, CB1
INFO:root:Adding H-atom 1CD3 at position [ 23.911579364757202, 25.466622991293512, 12.349999999999998 ]
INFO:root:Adding 1 H-atoms to CD4 (#9)...
INFO:root:bondingPartners [10 12]
INFO:root:Atom CD4 already has bonding partners CD5, CA1
INFO:root:Adding H-atom 1CD4 at position [ 25.312471957818314, 25.072450829115194, 10.489569761035874 ]
INFO:root:Adding 2 H-atoms to CA2 (#15)...
INFO:root:bondingPartners [16]
INFO:root:Atom CA2 already has bonding partners CA3
INFO:root:Adding H-atom 1CA2 at position [ 28.74261935828807, 23.557590675857806, 11.104589188842278 ]
INFO:root:bondingPartners [ 16 105]
INFO:root:Atom CA2 already has bonding partners CA3, 1CA2
INFO:root:Adding H-atom 2CA2 at position [ 28.85271690173004, 23.702220854918263, 9.389826475843604 ]
INFO:root:Adding 2 H-atoms to CA3 (#16)...
INFO:root:bondingPartners [17]
INFO:root:Atom CA3 already 

In [8]:
ua_pmd_struct.atoms[0]

<Atom CE1 [0]; In ter 0>

In [9]:
ua_pmd_top.atoms[0]

<Atom CE1 [0]; In terB 0>

In [10]:
ua_pmd_top.save('test.top',overwrite=True)

In [11]:
ua_pmd2ase

{('CA1', 'OXO0'): 41,
 ('CA1', 'terA'): 83,
 ('CA1', 'terB'): 12,
 ('CA2', 'OXO0'): 44,
 ('CA2', 'terA'): 86,
 ('CA2', 'terB'): 15,
 ('CA3', 'OXO0'): 45,
 ('CA3', 'terA'): 87,
 ('CA3', 'terB'): 16,
 ('CB1', 'OXO0'): 50,
 ('CB1', 'terA'): 92,
 ('CB1', 'terB'): 21,
 ('CB2', 'OXO0'): 53,
 ('CB2', 'terA'): 95,
 ('CB2', 'terB'): 24,
 ('CB3', 'OXO0'): 54,
 ('CB3', 'terA'): 96,
 ('CB3', 'terB'): 25,
 ('CC1', 'terA'): 61,
 ('CC2', 'terA'): 63,
 ('CC3', 'terA'): 64,
 ('CC4', 'terA'): 66,
 ('CC5', 'terA'): 68,
 ('CC6', 'terA'): 70,
 ('CC7', 'terA'): 72,
 ('CD1', 'OXO0'): 32,
 ('CD1', 'terA'): 74,
 ('CD1', 'terB'): 3,
 ('CD2', 'OXO0'): 34,
 ('CD2', 'terA'): 76,
 ('CD2', 'terB'): 5,
 ('CD3', 'OXO0'): 37,
 ('CD3', 'terA'): 79,
 ('CD3', 'terB'): 8,
 ('CD4', 'OXO0'): 38,
 ('CD4', 'terA'): 80,
 ('CD4', 'terB'): 9,
 ('CD5', 'OXO0'): 39,
 ('CD5', 'terA'): 81,
 ('CD5', 'terB'): 10,
 ('CD6', 'OXO0'): 59,
 ('CD6', 'terA'): 101,
 ('CD6', 'terB'): 30,
 ('CE1', 'terB'): 0,
 ('HA1', 'OXO0'): 47,
 ('HA1', 'terA

## United-Atom fit

In [8]:
A_horton, B_horton, C_horton, N_horton = read_horton_cost_function(
    file_name = 'sandbox/system100.cost_ua.h5')

In [9]:
### Charge Groups:
# read in all charge groups and construct the corresponding constraints
cg2ase, cg2cgtype, ncgtypes = read_AtomName_ChargeGroup(
    file_name = 'sandbox/atoms_in_charge_group.csv',ase2pmd=ua_ase2pmd)

In [10]:
cg2ase

[array([0, 1, 2]),
 array([3, 4]),
 array([ 5,  6,  7,  8,  9, 10, 11]),
 array([12, 13, 14, 15]),
 array([16, 17, 18, 19, 20]),
 array([21, 22, 23, 24]),
 array([25, 26, 27, 28, 29]),
 array([30, 31]),
 array([32, 33]),
 array([34, 35, 36, 37, 38, 39, 40]),
 array([41, 42, 43, 44]),
 array([45, 46, 47, 48, 49]),
 array([50, 51, 52, 53]),
 array([54, 55, 56, 57, 58]),
 array([59, 60]),
 array([74, 75]),
 array([76, 77, 78, 79, 80, 81, 82]),
 array([83, 84, 85, 86]),
 array([87, 88, 89, 90, 91]),
 array([92, 93, 94, 95]),
 array([ 96,  97,  98,  99, 100]),
 array([101, 102]),
 array([61, 62]),
 array([63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73])]

In [11]:
cg2cgtype

[1, 2, 3, 4, 5, 6, 7, 8, 2, 3, 4, 5, 6, 7, 8, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [12]:
ncgtypes

10

In [13]:
cg_q = read_ChargeGroup_TotalCharge(
    file_name = 'sandbox/charge_group_total_charge.csv')

In [14]:
cg_q

{1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 0, 7: 1, 8: 0, 9: 0, 10: 0}

In [15]:
#loop over set of charge groups (each charge group occures only ones)
charges  = [ cg_q[cg] for cg in cg2cgtype ]

In [16]:
charges

[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0]

In [17]:
D_matrix_cg, q_vector_cg = constructChargegroupConstraints(
    chargeGroups = cg2ase, N = N_horton, q = charges, debug=True)

DEBUG:root:
DEBUG:root:### constructChargegroupConstraints ###
DEBUG:root:
DEBUG:root:103 unknowns, 24 pairwise equality constraints
DEBUG:root:D_matrix ((24, 103)):
[[ 1.  1.  1. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  1.  1.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
DEBUG:root:q_vector ((24,)):
[ 0.  0.  0.  0.  1.  0.  1.  0.  0.  0.  0.  1.  0.  1.  0.  0.  0.  0.
  1.  0.  1.  0.  0.  0.]


In [18]:
D_matrix_cg.shape

(24, 103)

In [19]:
q_vector_cg.shape

(24,)

In [20]:
### Same Charged Atoms
sym2ase = read_SameChargedAtoms(
    file_name='sandbox/atoms_of_same_charge.csv',
    ase2pmd=ua_ase2pmd)

DEBUG:root:
DEBUG:root:### read_SameChargedAtoms ###
DEBUG:root:
DEBUG:root:Constructing implict symmetries due to atom types.
DEBUG:root:Apparently, atom type CE1 only occurs once at ASE index 0. No symmetry constraint created.
DEBUG:root:Apparently, atom type HE1 only occurs once at ASE index 1. No symmetry constraint created.
DEBUG:root:Apparently, atom type HE2 only occurs once at ASE index 2. No symmetry constraint created.
DEBUG:root:Add symmetry constraint for atom type CD1 at ASE indices [ 3 32 74].
DEBUG:root:Add symmetry constraint for atom type HD1 at ASE indices [ 4 33 75].
DEBUG:root:Add symmetry constraint for atom type CD2 at ASE indices [ 5 34 76].
DEBUG:root:Add symmetry constraint for atom type HD2 at ASE indices [ 6 35 77].
DEBUG:root:Add symmetry constraint for atom type OD1 at ASE indices [ 7 36 78].
DEBUG:root:Add symmetry constraint for atom type CD3 at ASE indices [ 8 37 79].
DEBUG:root:Add symmetry constraint for atom type CD4 at ASE indices [ 9 38 80].
DEBUG:r

In [21]:
sym2ase

[array([ 3, 32, 74]),
 array([ 4, 33, 75]),
 array([ 5, 34, 76]),
 array([ 6, 35, 77]),
 array([ 7, 36, 78]),
 array([ 8, 37, 79]),
 array([ 9, 38, 80]),
 array([10, 39, 81]),
 array([11, 40, 82]),
 array([12, 41, 83]),
 array([13, 42, 84]),
 array([14, 43, 85]),
 array([15, 44, 86]),
 array([16, 45, 87]),
 array([17, 46, 88]),
 array([18, 47, 89]),
 array([19, 48, 90]),
 array([20, 49, 91]),
 array([21, 50, 92]),
 array([22, 51, 93]),
 array([23, 52, 94]),
 array([24, 53, 95]),
 array([25, 54, 96]),
 array([26, 55, 97]),
 array([27, 56, 98]),
 array([28, 57, 99]),
 array([ 29,  58, 100]),
 array([ 30,  59, 101]),
 array([ 31,  60, 102]),
 array([  3,  30,  32,  59,  74, 101]),
 array([  4,  31,  33,  60,  75, 102]),
 array([12, 21, 41, 50, 83, 92]),
 array([13, 22, 42, 51, 84, 93]),
 array([14, 23, 43, 52, 85, 94]),
 array([15, 24, 44, 53, 86, 95]),
 array([16, 25, 45, 54, 87, 96]),
 array([17, 26, 46, 55, 88, 97]),
 array([18, 27, 47, 56, 89, 98]),
 array([19, 28, 48, 57, 90, 99]),
 

In [22]:
D_matrix_sym, q_vector_sym = constructPairwiseSymmetryConstraints(
    charges = sym2ase, N = N_horton, symmetry = 1.0, debug = True)

DEBUG:root:
DEBUG:root:### constructPairwiseSymmetryConstraints ###
DEBUG:root:
DEBUG:root:charge list ((40,)):
[array([ 3, 32, 74]) array([ 4, 33, 75]) array([ 5, 34, 76])
 array([ 6, 35, 77]) array([ 7, 36, 78]) array([ 8, 37, 79])
 array([ 9, 38, 80]) array([10, 39, 81]) array([11, 40, 82])
 array([12, 41, 83]) array([13, 42, 84]) array([14, 43, 85])
 array([15, 44, 86]) array([16, 45, 87]) array([17, 46, 88])
 array([18, 47, 89]) array([19, 48, 90]) array([20, 49, 91])
 array([21, 50, 92]) array([22, 51, 93]) array([23, 52, 94])
 array([24, 53, 95]) array([25, 54, 96]) array([26, 55, 97])
 array([27, 56, 98]) array([28, 57, 99]) array([ 29,  58, 100])
 array([ 30,  59, 101]) array([ 31,  60, 102])
 array([  3,  30,  32,  59,  74, 101])
 array([  4,  31,  33,  60,  75, 102]) array([12, 21, 41, 50, 83, 92])
 array([13, 22, 42, 51, 84, 93]) array([14, 23, 43, 52, 85, 94])
 array([15, 24, 44, 53, 86, 95]) array([16, 25, 45, 54, 87, 96])
 array([17, 26, 46, 55, 88, 97]) array([18, 27, 4

In [35]:
for i, r in enumerate(D_matrix_sym):
    #r[r == -1]
    print(i, ": =1    ", np.nonzero(r == 1))
    print(i, ": =-1   ", np.nonzero(r == -1))
    #print(np.nonzero(r == -1))

0 : =1     (array([ 3, 32, 74]),)
0 : =-1    (array([ 4, 33, 75]),)
1 : =1     (array([ 3, 32, 74]),)
1 : =-1    (array([ 5, 34, 76]),)
2 : =1     (array([ 3, 32, 74]),)
2 : =-1    (array([ 6, 35, 77]),)
3 : =1     (array([ 3, 32, 74]),)
3 : =-1    (array([ 7, 36, 78]),)
4 : =1     (array([ 3, 32, 74]),)
4 : =-1    (array([ 8, 37, 79]),)
5 : =1     (array([ 3, 32, 74]),)
5 : =-1    (array([ 9, 38, 80]),)
6 : =1     (array([ 3, 32, 74]),)
6 : =-1    (array([10, 39, 81]),)
7 : =1     (array([ 3, 32, 74]),)
7 : =-1    (array([11, 40, 82]),)
8 : =1     (array([ 3, 32, 74]),)
8 : =-1    (array([12, 41, 83]),)
9 : =1     (array([ 3, 32, 74]),)
9 : =-1    (array([13, 42, 84]),)
10 : =1     (array([ 3, 32, 74]),)
10 : =-1    (array([14, 43, 85]),)
11 : =1     (array([ 3, 32, 74]),)
11 : =-1    (array([15, 44, 86]),)
12 : =1     (array([ 3, 32, 74]),)
12 : =-1    (array([16, 45, 87]),)
13 : =1     (array([ 3, 32, 74]),)
13 : =-1    (array([17, 46, 88]),)
14 : =1     (array([ 3, 32, 74]),)
14 : 

In [27]:
D_matrix_qtot, q_vector_qtot = constructTotalChargeConstraint(charge = 6.0,
                                                             N = N_horton)

In [28]:
D_matrix_qtot.shape

(1, 103)

In [29]:
q_vector_qtot.shape

(1,)

In [30]:
D_matrix_all, q_vector_all = concatenated_constraints(
    D_matrices = [D_matrix_cg,D_matrix_sym,D_matrix_qtot],
    q_vectors = [q_vector_cg,q_vector_sym,q_vector_qtot])

In [31]:
D_matrix_all.shape

(64, 103)

In [32]:
np.linalg.matrix_rank(D_matrix_all)

46

In [33]:
D_matrix_all.shape

(64, 103)

In [34]:
D_matrix_all[0].shape

(103,)

In [35]:
#from numpy.linalg import matrix_rank
def construct_D_of_full_rank(D,q):
    D_LI=[D[0]]
    q_LI=[q[0]]
    for i in range(D.shape[0]):
        tmp=[]
        for r in D_LI:
            tmp.append(r)
        tmp.append(D[i])                #set tmp=LI+[M[i]]
        if np.linalg.matrix_rank(tmp)>len(D_LI):    #test if M[i] is linearly independent from all (row) vectors in LI
            D_LI.append(D[i])              #note that matrix_rank does not need to take in a square matrix
            q_LI.append(q[i])
    return np.array(D_LI), np.array(q_LI)   #return set of linearly independent (row) vectors


In [36]:
### Unconstrained Minimization
X_unconstrained, A_unconstrained, B_unconstrained = \
    unconstrainedMinimize(A_matrix = A_horton,
                        b_vector = B_horton,
                        C_scalar = C_horton,
                        debug    = True)

INFO:root:A (103, 103): 
 [[ 25.81521958  26.36708177  25.49818127 ...,  13.19587666  21.76259358
   23.25413605]
 [ 26.36708177  27.57943425  25.74304031 ...,  13.62434719  22.76181361
   24.40528713]
 [ 25.49818127  25.74304031  25.80015774 ...,  12.58072792  20.53727967
   21.90803567]
 ..., 
 [ 13.19587666  13.62434719  12.58072792 ...,  19.35307546  16.35189398
   16.16557229]
 [ 21.76259358  22.76181361  20.53727967 ...,  16.35189398  25.31905065
   26.01560492]
 [ 23.25413605  24.40528713  21.90803567 ...,  16.16557229  26.01560492
   27.39118379]]
INFO:root:B (103,): 
 [-95.16064083 -95.83362046 -93.7010925  -95.62978958 -97.39621294
 -93.46460824 -92.42181304 -94.41882547 -91.44431764 -90.58716094
 -93.19348427 -93.13734817 -87.45715276 -86.87903727 -84.86069889
 -80.45149767 -77.00489104 -73.44396411 -71.64926468 -70.07622805
 -75.29067119 -88.93726292 -86.86825192 -88.26861414 -84.35733258
 -80.6948192  -80.38326605 -80.04683188 -83.91563131 -76.77720194
 -94.24050405 -92.73

In [37]:
A_unconstrained.shape

(103, 103)

In [38]:
X_unconstrained.shape

(103,)

In [39]:
A_horton.shape

(103, 103)

In [40]:
### Constrained Minimization
X_qtot_constraint, A_qtot_constraint, B_qtot_constraint = \
    constrainedMinimize(A_matrix = A_horton,
                        b_vector = B_horton,
                        C_scalar = C_horton,
                        D_matrix = D_matrix_qtot,
                        q_vector = q_vector_qtot,
                        debug    = True)

INFO:root:103 unknowns, 1 equality constraints
INFO:root:A (103, 103): 
 [[ 25.81521958  26.36708177  25.49818127 ...,  13.19587666  21.76259358
   23.25413605]
 [ 26.36708177  27.57943425  25.74304031 ...,  13.62434719  22.76181361
   24.40528713]
 [ 25.49818127  25.74304031  25.80015774 ...,  12.58072792  20.53727967
   21.90803567]
 ..., 
 [ 13.19587666  13.62434719  12.58072792 ...,  19.35307546  16.35189398
   16.16557229]
 [ 21.76259358  22.76181361  20.53727967 ...,  16.35189398  25.31905065
   26.01560492]
 [ 23.25413605  24.40528713  21.90803567 ...,  16.16557229  26.01560492
   27.39118379]]
INFO:root:B (103,): 
 [-95.16064083 -95.83362046 -93.7010925  -95.62978958 -97.39621294
 -93.46460824 -92.42181304 -94.41882547 -91.44431764 -90.58716094
 -93.19348427 -93.13734817 -87.45715276 -86.87903727 -84.86069889
 -80.45149767 -77.00489104 -73.44396411 -71.64926468 -70.07622805
 -75.29067119 -88.93726292 -86.86825192 -88.26861414 -84.35733258
 -80.6948192  -80.38326605 -80.04683188

In [41]:
D_matrix_all_fr, q_vector_all_fr = construct_D_of_full_rank(D_matrix_all,q_vector_all)

In [42]:
D_matrix_all_fr.shape

(46, 103)

In [43]:
### Constrained Minimization
X, A, B = constrainedMinimize(A_matrix = A_horton,
                        b_vector = B_horton,
                        C_scalar = C_horton,
                        D_matrix = D_matrix_all_fr,
                        q_vector = q_vector_all_fr,
                        debug    = True)

INFO:root:103 unknowns, 46 equality constraints
INFO:root:A (103, 103): 
 [[ 25.81521958  26.36708177  25.49818127 ...,  13.19587666  21.76259358
   23.25413605]
 [ 26.36708177  27.57943425  25.74304031 ...,  13.62434719  22.76181361
   24.40528713]
 [ 25.49818127  25.74304031  25.80015774 ...,  12.58072792  20.53727967
   21.90803567]
 ..., 
 [ 13.19587666  13.62434719  12.58072792 ...,  19.35307546  16.35189398
   16.16557229]
 [ 21.76259358  22.76181361  20.53727967 ...,  16.35189398  25.31905065
   26.01560492]
 [ 23.25413605  24.40528713  21.90803567 ...,  16.16557229  26.01560492
   27.39118379]]
INFO:root:B (103,): 
 [-95.16064083 -95.83362046 -93.7010925  -95.62978958 -97.39621294
 -93.46460824 -92.42181304 -94.41882547 -91.44431764 -90.58716094
 -93.19348427 -93.13734817 -87.45715276 -86.87903727 -84.86069889
 -80.45149767 -77.00489104 -73.44396411 -71.64926468 -70.07622805
 -75.29067119 -88.93726292 -86.86825192 -88.26861414 -84.35733258
 -80.6948192  -80.38326605 -80.0468318

In [44]:
X

array([ -7.70197156e+00,   2.38557828e+00,   5.31639329e+00,
         1.43266694e+00,  -1.43266694e+00,   7.53200104e+00,
        -2.89989949e-01,  -3.21255372e+00,  -8.01245677e+00,
         4.66540427e+00,   1.09430055e+00,  -1.77670542e+00,
         8.43489276e-01,   6.44832440e-01,  -6.16161535e+00,
         4.67329363e+00,   2.26192735e-01,  -1.46050584e+01,
         5.91194862e+00,   5.39582250e+00,   4.07109459e+00,
         3.13430178e-01,  -4.21109131e-01,   2.15838726e+00,
        -2.05070831e+00,   1.41934027e+00,   8.82090249e+00,
        -3.97932634e+00,  -6.26756365e+00,   1.00664723e+00,
        -1.31828299e+00,   1.31828299e+00,  -1.85163784e+00,
         1.85163784e+00,  -3.42816713e+00,   1.65490594e+00,
         2.00958147e+00,   8.50357759e-01,  -1.07209592e+00,
        -8.77273936e-01,   8.62691811e-01,  -9.61502238e+00,
         3.90443856e+00,   1.32535781e+01,  -7.54299433e+00,
         9.47402173e-01,   8.89513422e+00,  -1.01305601e+01,
        -8.94039171e-01,

In [45]:
D_matrix_cg.shape

(24, 103)

In [46]:
D_matrix_all.shape

(64, 103)

In [47]:
D_matrix_all_fr.shape

(46, 103)

In [48]:
A.shape

(149, 149)

In [49]:
X.shape

(149,)

In [50]:
logging.info('Results:')
#prevent scientific notation and make the prints mor readable
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

logging.info('unconstrained charges {}:\n {}\ncharge sum = {}\n'.format( X_unconstrained[:N_horton].T.shape,
                                                    X_unconstrained[:N_horton].T,
                                                    X_unconstrained[:N_horton].T.sum() ))

logging.info('qtot constraint charges {}:\n {}\ncharge sum = {}\n'.format( X_qtot_constraint[:N_horton].T.shape,
                                                    X_qtot_constraint[:N_horton].T,
                                                    X_qtot_constraint[:N_horton].T.sum() ))

logging.info('constrained charges {}:\n {}\ncharge sum = {}\n'.format( X[:N_horton].T.shape,
                                                    X[:N_horton].T,
                                                    X[:N_horton].T.sum() ))
logging.info('Lagrange multipliers {}:\n {}'.format( X[N_horton:].T.shape,
                                              X[N_horton:].T ) )


### test the results
### test the results
logging.info("value of cost function, unconstrained: {}".format(
    (np.dot(X_unconstrained.T, np.dot(A_unconstrained, X_unconstrained)) - 2*np.dot(B_unconstrained.T, X_unconstrained) - C_horton) ) )
logging.info("value of cost function, qtot constrained: {}".format(
    (np.dot(X_qtot_constraint.T, np.dot(A_qtot_constraint, X_qtot_constraint)) - 2*np.dot(B_qtot_constraint.T, X_qtot_constraint) - C_horton) ) )
logging.info("value of cost function, fully constrained: {}".format(
    (np.dot(X.T, np.dot(A, X)) - 2*np.dot(B.T, X) - C_horton) ) )

INFO:root:Results:
INFO:root:unconstrained charges (103,):
 [ 0.33  -0.137 -0.146  0.089 -0.145 -0.249 -0.06   0.377 -0.154 -0.16
 -0.014 -0.125 -0.518  0.399  0.579 -0.473 -0.251  0.406 -0.246 -0.162
 -0.418 -0.378  0.308  0.547 -0.427 -0.34   0.564 -0.382 -0.365 -0.399
  0.307 -0.199  0.189 -0.128 -0.342 -0.034  0.512 -0.085 -0.055 -0.438
  0.023 -0.434  0.355  0.464 -0.234 -0.386  0.407 -0.311 -0.333 -0.316
 -0.526  0.357  0.593 -0.382 -0.468  0.65  -0.398 -0.376 -0.35   0.031
 -0.081  0.178 -0.203 -0.2    0.192 -0.141  0.012 -0.052  0.174 -0.204
  0.097 -0.241  0.206 -0.16   0.369 -0.179  0.027 -0.158  0.405 -0.141
 -0.112 -0.763  0.14  -0.356  0.425  0.481 -0.357 -0.421  0.516 -0.382
 -0.24  -0.373 -0.541  0.489  0.522 -0.363 -0.544  0.561 -0.126 -0.364
 -0.317  0.456 -0.134]
charge sum = -5.764207402342874

INFO:root:qtot constraint charges (103,):
 [  0.605  -0.594   0.539   4.833   0.006 -15.368   4.875   6.532   2.89
   7.816 -16.126   2.597  -0.241   1.28   -5.706   3.052   4

In [51]:
ua_ase2pmd_df = pd.DataFrame(ua_ase2pmd).T
ua_ase2pmd_df[2] = X_unconstrained
ua_ase2pmd_df[3] = X_qtot_constraint[:N_horton]
ua_ase2pmd_df[4] = X[:N_horton]
ua_ase2pmd_df.columns = ['atom','resudue','q_unconstrained','q_total_charge_constrained', 'q_fully_constrained']
ua_ase2pmd_df

Unnamed: 0,atom,resudue,q_unconstrained,q_total_charge_constrained,q_fully_constrained
0,CE1,terB,0.330,0.605,-7.702
1,HE1,terB,-0.137,-0.594,2.386
2,HE2,terB,-0.146,0.539,5.316
3,CD1,terB,0.089,4.833,1.433
4,HD1,terB,-0.145,0.006,-1.433
5,CD2,terB,-0.249,-15.368,7.532
6,HD2,terB,-0.060,4.875,-0.290
7,OD1,terB,0.377,6.532,-3.213
8,CD3,terB,-0.154,2.890,-8.012
9,CD4,terB,-0.160,7.816,4.665


In [52]:
ua_ase2pmd_df.iloc[sym2ase[0]]

Unnamed: 0,atom,resudue,q_unconstrained,q_total_charge_constrained,q_fully_constrained
3,CD1,terB,0.089,4.833,1.433
32,CD1,OXO0,0.189,-8.896,-1.852
74,CD1,terA,0.369,-8.065,0.419


## all-atom cost function & fit

In [53]:
A_horton_aa, B_horton_aa, C_horton_aa, N_horton_aa = read_horton_cost_function(
    file_name = 'sandbox/system100.cost_aa.h5')

In [54]:
N_horton_aa

133

In [55]:
cg_aa, cgtypes_aa, ncgtypes_aa = read_AtomName_ChargeGroup(
    file_name = 'sandbox/atoms_in_charge_group.csv',ase2pmd = aa_ase2pmd)


In [56]:
cg_aa

[array([0, 1, 2]),
 array([3, 4]),
 array([ 5,  6,  7,  8,  9, 10, 11]),
 array([12, 13, 14, 15]),
 array([16, 17, 18, 19, 20]),
 array([21, 22, 23, 24]),
 array([25, 26, 27, 28, 29]),
 array([30, 31]),
 array([32, 33]),
 array([34, 35, 36, 37, 38, 39, 40]),
 array([41, 42, 43, 44]),
 array([45, 46, 47, 48, 49]),
 array([50, 51, 52, 53]),
 array([54, 55, 56, 57, 58]),
 array([59, 60]),
 array([74, 75]),
 array([76, 77, 78, 79, 80, 81, 82]),
 array([83, 84, 85, 86]),
 array([87, 88, 89, 90, 91]),
 array([92, 93, 94, 95]),
 array([ 96,  97,  98,  99, 100]),
 array([101, 102]),
 array([61, 62]),
 array([63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73])]

In [128]:
len(cgtypes_aa)

24

In [118]:
ncgtypes_aa

10

In [119]:
cg_q_aa = read_ChargeGroup_TotalCharge(
    file_name = 'sandbox/charge_group_total_charge.csv')

In [120]:
cg_q_aa

{1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 0, 7: 1, 8: 0, 9: 0, 10: 0}

In [129]:
charges_aa = [ cg_q_aa[cg] for cg in cgtypes_aa ]

In [130]:
charges_aa

[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0]

In [131]:
len(charges)

24

In [132]:
### Same Charged Atoms
sym2ase_aa = read_SameChargedAtoms(
    file_name='sandbox/atoms_of_same_charge.csv',
    ase2pmd=aa_ase2pmd)

In [133]:
sym2ase_aa

[array([  3,  30,  32,  59,  74, 101]),
 array([  4,  31,  33,  60,  75, 102]),
 array([12, 21, 41, 50, 83, 92]),
 array([13, 22, 42, 51, 84, 93]),
 array([14, 23, 43, 52, 85, 94]),
 array([15, 24, 44, 53, 86, 95]),
 array([16, 25, 45, 54, 87, 96]),
 array([17, 26, 46, 55, 88, 97]),
 array([18, 27, 47, 56, 89, 98]),
 array([19, 28, 48, 57, 90, 99]),
 array([ 20,  29,  49,  58,  91, 100])]

In [134]:
sym2ase_aa

[array([  3,  30,  32,  59,  74, 101]),
 array([  4,  31,  33,  60,  75, 102]),
 array([12, 21, 41, 50, 83, 92]),
 array([13, 22, 42, 51, 84, 93]),
 array([14, 23, 43, 52, 85, 94]),
 array([15, 24, 44, 53, 86, 95]),
 array([16, 25, 45, 54, 87, 96]),
 array([17, 26, 46, 55, 88, 97]),
 array([18, 27, 47, 56, 89, 98]),
 array([19, 28, 48, 57, 90, 99]),
 array([ 20,  29,  49,  58,  91, 100])]

In [136]:
qtot_aa = 6.0

In [137]:
D_matrix_cg_aa, q_vector_cg_aa = constructChargegroupConstraints(
    chargeGroups = cg_aa, N = N_horton_aa, q = charges_aa, debug=True)

D_matrix_qtot_aa, q_vector_qtot_aa = constructTotalChargeConstraint(charge = qtot_aa,
                                                             N = N_horton_aa)

D_matrix_sym_aa, q_vector_sym_aa = constructPairwiseSymmetryConstraints(
    charges = sym2ase_aa, N = N_horton_aa, symmetry = 1.0, debug = True)

133 unknowns, 24 pairwise equality constraints
D_matrix ((24, 133)):
[[ 1.  1.  1. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
q_vector ((24,)):
[ 0.  0.  0.  0.  1.  0.  1.  0.  0.  0.  0.  1.  0.  1.  0.  0.  0.  0.
  1.  0.  1.  0.  0.  0.]
133 unknowns, 5 pairwise equality constraints
symmetry list ((5,)):
[ 1.  1.  1.  1.  1.]
D_single ((5, 133)):
[[ 0.  0.  0.  1. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0. -1.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  

D_single ((5, 133)):
[[ 0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0. -1.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  1.
   0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.  0.  0.
   0.  0. -1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0

In [138]:
D_matrix_all_aa, q_vector_all_aa = concatenated_constraints(
    D_matrices = [D_matrix_cg_aa,D_matrix_sym_aa,D_matrix_qtot_aa],
    q_vectors = [q_vector_cg_aa,q_vector_sym_aa,q_vector_qtot_aa])

In [139]:
D_matrix_all_aa.shape

(80, 133)

In [140]:
### Unconstrained Minimization
X_unconstrained_aa, A_unconstrained_aa, B_unconstrained_aa = \
    unconstrainedMinimize(A_matrix = A_horton_aa,
                        b_vector = B_horton_aa,
                        C_scalar = C_horton_aa,
                        debug    = True)

A (133, 133): 
 [[ 25.815  26.367  25.498 ...,  15.055  15.234  15.448]
 [ 26.367  27.579  25.743 ...,  15.569  15.771  16.048]
 [ 25.498  25.743  25.8   ...,  14.342  14.476  14.692]
 ..., 
 [ 15.055  15.569  14.342 ...,  21.568  19.681  20.16 ]
 [ 15.234  15.771  14.476 ...,  19.681  21.95   20.969]
 [ 15.448  16.048  14.692 ...,  20.16   20.969  21.751]]
B (133,): 
 [-95.161 -95.834 -93.701 -95.63  -97.396 -93.465 -92.422 -94.419 -91.444
 -90.587 -93.193 -93.137 -87.457 -86.879 -84.861 -80.451 -77.005 -73.444
 -71.649 -70.076 -75.291 -88.937 -86.868 -88.269 -84.357 -80.695 -80.383
 -80.047 -83.916 -76.777 -94.241 -92.734 -95.722 -95.333 -96.809 -97.43
 -98.159 -95.249 -94.89  -96.707 -97.201 -93.342 -92.693 -91.249 -89.558
 -84.894 -82.053 -84.636 -78.415 -80.928 -93.905 -94.014 -91.538 -87.599
 -83.503 -81.055 -77.273 -83.694 -80.088 -96.047 -95.628 -92.206 -90.122
 -90.709 -92.782 -96.338 -89.992 -91.542 -84.881 -82.169 -82.99  -78.876
 -86.126 -84.669 -95.083 -94.009 -93.903 -93.

In [141]:
### Constrained Minimization
X_qtot_constraint_aa, A_qtot_constraint_aa, B_qtot_constraint_aa = \
    constrainedMinimize(A_matrix = A_horton_aa,
                        b_vector = B_horton_aa,
                        C_scalar = C_horton_aa,
                        D_matrix = D_matrix_qtot_aa,
                        q_vector = q_vector_qtot_aa,
                        debug    = True)

133 unknowns, 1 equality constraints
A (133, 133): 
 [[ 25.815  26.367  25.498 ...,  15.055  15.234  15.448]
 [ 26.367  27.579  25.743 ...,  15.569  15.771  16.048]
 [ 25.498  25.743  25.8   ...,  14.342  14.476  14.692]
 ..., 
 [ 15.055  15.569  14.342 ...,  21.568  19.681  20.16 ]
 [ 15.234  15.771  14.476 ...,  19.681  21.95   20.969]
 [ 15.448  16.048  14.692 ...,  20.16   20.969  21.751]]
B (133,): 
 [-95.161 -95.834 -93.701 -95.63  -97.396 -93.465 -92.422 -94.419 -91.444
 -90.587 -93.193 -93.137 -87.457 -86.879 -84.861 -80.451 -77.005 -73.444
 -71.649 -70.076 -75.291 -88.937 -86.868 -88.269 -84.357 -80.695 -80.383
 -80.047 -83.916 -76.777 -94.241 -92.734 -95.722 -95.333 -96.809 -97.43
 -98.159 -95.249 -94.89  -96.707 -97.201 -93.342 -92.693 -91.249 -89.558
 -84.894 -82.053 -84.636 -78.415 -80.928 -93.905 -94.014 -91.538 -87.599
 -83.503 -81.055 -77.273 -83.694 -80.088 -96.047 -95.628 -92.206 -90.122
 -90.709 -92.782 -96.338 -89.992 -91.542 -84.881 -82.169 -82.99  -78.876
 -86.126

In [142]:
### Constrained Minimization
X_aa, A_aa, B_aa = constrainedMinimize(A_matrix = A_horton_aa,
                        b_vector = B_horton_aa,
                        C_scalar = C_horton_aa,
                        D_matrix = D_matrix_all_aa,
                        q_vector = q_vector_all_aa,
                        debug    = True)

133 unknowns, 80 equality constraints
A (133, 133): 
 [[ 25.815  26.367  25.498 ...,  15.055  15.234  15.448]
 [ 26.367  27.579  25.743 ...,  15.569  15.771  16.048]
 [ 25.498  25.743  25.8   ...,  14.342  14.476  14.692]
 ..., 
 [ 15.055  15.569  14.342 ...,  21.568  19.681  20.16 ]
 [ 15.234  15.771  14.476 ...,  19.681  21.95   20.969]
 [ 15.448  16.048  14.692 ...,  20.16   20.969  21.751]]
B (133,): 
 [-95.161 -95.834 -93.701 -95.63  -97.396 -93.465 -92.422 -94.419 -91.444
 -90.587 -93.193 -93.137 -87.457 -86.879 -84.861 -80.451 -77.005 -73.444
 -71.649 -70.076 -75.291 -88.937 -86.868 -88.269 -84.357 -80.695 -80.383
 -80.047 -83.916 -76.777 -94.241 -92.734 -95.722 -95.333 -96.809 -97.43
 -98.159 -95.249 -94.89  -96.707 -97.201 -93.342 -92.693 -91.249 -89.558
 -84.894 -82.053 -84.636 -78.415 -80.928 -93.905 -94.014 -91.538 -87.599
 -83.503 -81.055 -77.273 -83.694 -80.088 -96.047 -95.628 -92.206 -90.122
 -90.709 -92.782 -96.338 -89.992 -91.542 -84.881 -82.169 -82.99  -78.876
 -86.12

In [52]:
logging.info('Results:')
#prevent scientific notation and make the prints mor readable
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

logging.info('unconstrained charges {}:\n {}\ncharge sum = {}\n'.format( X_unconstrained_aa[:N_horton_aa].T.shape,
                                                    X_unconstrained_aa[:N_horton_aa].T,
                                                    X_unconstrained_aa[:N_horton_aa].T.sum() ))

logging.info('qtot constraint charges {}:\n {}\ncharge sum = {}\n'.format( X_qtot_constraint_aa[:N_horton_aa].T.shape,
                                                    X_qtot_constraint_aa[:N_horton_aa].T,
                                                    X_qtot_constraint_aa[:N_horton_aa].T.sum() ))

logging.info('constrained charges {}:\n {}\ncharge sum = {}\n'.format( X_aa[:N_horton_aa].T.shape,
                                                    X_aa[:N_horton_aa].T,
                                                    X_aa[:N_horton_aa].T.sum() ))
logging.info('Lagrange multipliers {}:\n {}'.format( X_aa[N_horton_aa:].T.shape,
                                              X_aa[N_horton_aa:].T ) )


### test the results
### test the results
logging.info( 'value of cost function, unconstrained: ',
    (np.dot(X_unconstrained_aa.T, np.dot(A_unconstrained_aa, X_unconstrained_aa)) - 2*np.dot(B_unconstrained_aa.T, X_unconstrained_aa) - C_horton_aa) )
logging.info( 'value of cost function, qtot constrained: ',
    (np.dot(X_qtot_constraint_aa.T, np.dot(A_qtot_constraint_aa, X_qtot_constraint_aa)) - 2*np.dot(B_qtot_constraint_aa.T, X_qtot_constraint_aa) - C_horton_aa) )    
logging.info( 'value of cost function, fully constrained: ',
    (np.dot(X_aa.T, np.dot(A_aa, X_aa)) - 2*np.dot(B_aa.T, X_aa) - C_horton_aa) )

INFO:root:Results:


NameError: name 'X_unconstrained_aa' is not defined

In [154]:
aa_ase2pmd_df = pd.DataFrame(aa_ase2pmd).T
aa_ase2pmd_df[2] = X_unconstrained_aa
aa_ase2pmd_df[3] = X_qtot_constraint_aa[:N_horton_aa]
aa_ase2pmd_df[4] = X_aa[:N_horton_aa]
aa_ase2pmd_df.columns = ['atom','resudue','q_unconstrained','q_total_charge_constrained', 'q_fully_constrained']
aa_ase2pmd_df

Unnamed: 0,atom,resudue,q_unconstrained,q_total_charge_constrained,q_fully_constrained
0,CE1,terB,0.230243,3.534036,1.830561e+16
1,HE1,terB,-0.128340,-0.795746,-3.282677e+15
2,HE2,terB,-0.100878,-1.392453,-1.423551e+16
3,CD1,terB,0.241373,1.290919,-8.894556e+12
4,HD1,terB,-0.186210,1.000645,8.894556e+12
5,CD2,terB,-0.292640,-13.752472,-1.613788e+17
6,HD2,terB,-0.122140,5.453614,9.702581e+16
7,OD1,terB,0.368866,5.744112,9.364880e+16
8,CD3,terB,0.302960,-7.570491,5.430573e+16
9,CD4,terB,0.303974,12.188240,2.199793e+17


In [158]:
aa_ase2pmd_df.iloc[sym2ase_aa[0]]

Unnamed: 0,atom,resudue,q_unconstrained,q_total_charge_constrained,q_fully_constrained
3,CD1,terB,0.241373,1.290919,-8894556000000.0
30,CD6,terB,0.345829,2.102949,-3.472308e+16
32,CD1,OXO0,0.195356,-6.436158,7.378852e+16
59,CD6,OXO0,-0.056741,0.843752,-8295569000000000.0
74,CD1,terA,0.248251,-8.212348,-5819088000000000.0
101,CD6,terA,0.393603,-1.494914,-2.493299e+16


In [160]:
ua_ase2pmd_df.iloc[sym2ase[0]]

Unnamed: 0,atom,resudue,q_unconstrained,q_total_charge_constrained,q_fully_constrained
3,CD1,terB,0.088691,4.832883,473255000000000.0
30,CD6,terB,0.306762,6.644481,2416099000000000.0
32,CD1,OXO0,0.18902,-8.895789,1.152518e+16
59,CD6,OXO0,0.031134,0.188399,-6242506000000000.0
74,CD1,terA,0.369452,-8.065001,47451540000000.0
101,CD6,terA,0.455664,1.780866,-8692731000000000.0


## Summarize whole process

In [14]:
#implicitHbondingPartners = {'CD4':1,'CD3':1,'CA2':2,'CA3':2,'CB2':2,'CB3':2}
#infile_pdb = 'sandbox/system100.pdb'
#infile_top = 'sandbox/system100.lean.top'

In [15]:
def printResults(X,A,B,C,N):
    #prevent scientific notation and make the prints mor readable
    np.set_printoptions(precision=3)
    np.set_printoptions(suppress=True)

    logging.info('charges {}:\n {}\ncharge sum = {}\n'.format( X[:N].T.shape,
                                                        X[:N].T,
                                                        X[:N].T.sum() ))
    logging.info('Lagrange multipliers {}:\n {}'.format( X[N:].T.shape,
                                                  X[N:].T ) )

    ### test the results
    logging.info( 'value of cost function: {}'.format(
        (np.dot(X.T, np.dot(A, X)) - 2*np.dot(B.T, X) - C) ) )

In [16]:
def fitESPconstrained(infile_pdb, infile_top, infile_cost_h5, 
    infile_atoms_in_cg_csv, infile_cg_charges_csv, infile_atoms_of_same_charge_csv,
    qtot = 0.0, strip_string=':SOL,CL', 
    implicitHbondingPartners = {'CD4':1,'CD3':1,'CA2':2,'CA3':2,'CB2':2,'CB3':2},
    debug=False):

    # A: construct all-atom representation from united-atom structure and topology:
    ua_ase_struct = ase.io.read(infile_pdb)
    ua_pmd_struct = pmd.load_file(infile_pdb)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        ua_pmd_top = gromacs.GromacsTopologyFile(infile_top,parametrize=False)
    # throws some warnings on angle types, does not matter for bonding info
    # if error thrown, just try to "reduce" .top as far as possible
    # warnings supressed as shown on
    # https://docs.python.org/2/library/warnings.html

    ua_pmd_top.strip(strip_string) # strip water and electrolyte from system (if not yet done in .top)
    ua_pmd_top.box = ua_pmd_struct.box # Needed because .pdb contains box info
    ua_pmd_top.positions = ua_pmd_struct.positions

    ua_names = [ a.name for a in ua_pmd_top.atoms ]
    ua_residues = [ a.residue.name for a in ua_pmd_top.atoms ]

    aa_ase_struct, aa_pmd_struct, aa_names, aa_residues = \
        insertHbyList(ua_ase_struct,ua_pmd_top,
        implicitHbondingPartners,1.0)

    ua_count = len(ua_ase_struct)     # united atoms structure
    aa_count = len(aa_ase_struct) # all atoms structure

    ua_ase_index = np.arange(ua_count)
    aa_ase_index = np.arange(aa_count)

    aa_atom_residue_list = list(zip(aa_names,aa_residues))
    aa_ase_index = range(aa_count)
    aa_ase2pmd = dict(zip(aa_ase_index,aa_atom_residue_list))
    aa_pmd2ase = dict(zip(aa_atom_residue_list,aa_ase_index))

    ua_atom_residue_list = list(zip(ua_names,ua_residues))
    ua_ase_index = range(ua_count)
    ua_ase2pmd = dict(zip(ua_ase_index,ua_atom_residue_list))
    ua_pmd2ase = dict(zip(ua_atom_residue_list,ua_ase_index))
    
    # TODO: distinction for ua and aa fitting:
    ase2pmd = ua_ase2pmd
    
    # B: read cost function
    
    A_horton, B_horton, C_horton, N_horton = \
        read_horton_cost_function(file_name = infile_cost_h5)
        
    # C: read constraints files
    
    ### Charge Groups:
    # read in all charge groups and construct the corresponding constraints
    cg2ase, cg2cgtype, ncgtypes = read_AtomName_ChargeGroup(
        file_name = infile_atoms_in_cg_csv, ase2pmd = ase2pmd)
    
    cg_q = read_ChargeGroup_TotalCharge(file_name = infile_cg_charges_csv)
    
    cg2q  = [ cg_q[cg] for cg in cg2cgtype ]
    
    ### Same Charged Atoms
    sym2ase = read_SameChargedAtoms(
        file_name = infile_atoms_of_same_charge_csv, ase2pmd = ase2pmd)
    
    # D: construct constraints matrices
    D_matrix_cg, q_vector_cg = constructChargegroupConstraints(
        chargeGroups = cg2ase, N = N_horton, q = cg2q, debug = debug)
    
    D_matrix_sym, q_vector_sym = constructPairwiseSymmetryConstraints(
        charges = sym2ase, N = N_horton, symmetry = 1.0, debug = False)
    
    D_matrix_qtot, q_vector_qtot = constructTotalChargeConstraint(
        charge = qtot, N = N_horton)
    
    D_matrix_all, q_vector_all = concatenated_constraints(
        D_matrices = [D_matrix_cg,D_matrix_sym,D_matrix_qtot],
        q_vectors = [q_vector_cg,q_vector_sym,q_vector_qtot])    
    
    # E: Minimization 
    
    ### Constrained minimization
    X, A, B = constrainedMinimize(A_matrix = A_horton,
                        b_vector = B_horton,
                        C_scalar = C_horton,
                        D_matrix = D_matrix_all,
                        q_vector = q_vector_all,
                        debug    = debug)
    
    ase2pmd_df = pd.DataFrame(ase2pmd).T
    ase2pmd_df.columns = ['atom','residue']
    ase2pmd_df['q'] = X[:N_horton]

    # additional debug cases
    if debug:     
        ### Unconstrained minimization
        X_unconstrained, A_unconstrained, B_unconstrained = \
            unconstrainedMinimize(A_matrix = A_horton,
                        b_vector = B_horton,
                        C_scalar = C_horton,
                        debug    = debug)
        
        ### Total charge constraint minimization
        X_qtot_constraint, A_qtot_constraint, B_qtot_constraint = \
            constrainedMinimize(A_matrix = A_horton,
                        b_vector = B_horton,
                        C_scalar = C_horton,
                        D_matrix = D_matrix_qtot,
                        q_vector = q_vector_qtot,
                        debug    = debug)
        
        ### Charge group & total charge constraint minimization
        D_matrix_cg_qtot, q_vector_cg_qtot = concatenated_constraints(
            D_matrices = [D_matrix_cg,D_matrix_qtot],
            q_vectors = [q_vector_cg,q_vector_qtot])    
        
        X_cg_qtot, A_cg_qtot, B_cg_qtot = \
            constrainedMinimize(A_matrix = A_horton,
                        b_vector = B_horton,
                        C_scalar = C_horton,
                        D_matrix = D_matrix_cg_qtot,
                        q_vector = q_vector_cg_qtot,
                        debug    = debug)
        
        logging.info("")
        logging.info("")
        logging.info("")
        logging.info("#################################")    
        logging.info("RESULTS FOR DIFFERENT CONSTRAINTS")            
        logging.info("#################################")    
        logging.info("")
        logging.info("")
        logging.info("### UNCONSTRAINED ###")
        printResults(X_unconstrained,A_unconstrained,B_unconstrained,C_horton,N_horton)
        
        logging.info("")    
        logging.info("")
        logging.info("### QTOT CONSTRAINED ###")
        printResults(X_qtot_constraint,A_qtot_constraint,B_qtot_constraint,C_horton,N_horton)
        
        logging.info("")
        logging.info("")
        logging.info("### QTOT & CG CONSTRAINED ###")
        printResults(X_cg_qtot,A_cg_qtot,B_cg_qtot,C_horton,N_horton)

        logging.info("")
        logging.info("")
        logging.info("### FULLY CONSTRAINED ###")
        printResults(X,A,B,C_horton,N_horton)
        
        #ase2pmd_df.columns.append(['q_unconstrained', 'q_qtot_constrained', 'q_qtot_cg_constrained'])
        ase2pmd_df['q_unconstrained'] = X_unconstrained
        ase2pmd_df['q_qtot_constrained'] = X_qtot_constraint[:N_horton]
        ase2pmd_df['q_cg_qtot_constrained'] = X_cg_qtot[:N_horton]
        
        checkChargeGroups(ase2pmd_df,cg2ase,cg2cgtype,cg2q)
        checkSymmetries(ase2pmd_df,sym2ase)

    return X[:N_horton], X[N_horton:], ase2pmd_df, cg2ase, cg2cgtype, cg2q, sym2ase

In [17]:
# check charge group constraints:
def checkChargeGroups( df, cg2ase, cg2cgtype, cg2q,
    q_cols = ['q','q_unconstrained','q_qtot_constrained','q_cg_qtot_constrained']):
    
    logging.info("")
    logging.info("")
    logging.info("##############################")    
    logging.info("CHARGE GROUP CONSTRAINTS CHECK")            
    logging.info("##############################")    
    logging.info("")
    logging.info("atoms grouped together by their ASE indices:")
    logging.info("{}".format(cg2ase))    
    logging.info("")
    logging.info("desired charge of each group:")
    logging.info("{}".format(cg2q))
    
    for cg_index, ase_indices_in_cg in enumerate(cg2ase):
        logging.info("cg {:d}, type {:d}:".format(cg_index,cg2cgtype[cg_index]))
        for q_col in q_cols:
            q_cg = df.iloc[ase_indices_in_cg][q_col].sum() # select first charge group
            logging.info("    {:>30}:{:8.4f}    absolute error:{:12.4e}".format(q_col,q_cg,q_cg-cg2q[cg_index]))

In [18]:
# check symmetry constraints:
def checkSymmetries( df, sym2ase, 
    q_cols = ['q','q_unconstrained','q_qtot_constrained','q_cg_qtot_constrained']):
    
    logging.info("")    
    logging.info("")
    logging.info("##########################")    
    logging.info("SYMMETRY CONSTRAINTS CHECK")            
    logging.info("##########################")    
    logging.info("")
    logging.info("groups of equally charged atoms by their ASE indices:")
    logging.info("{}".format(sym2ase))
    
    for sym_index, ase_indices_in_sym in enumerate(sym2ase):
        #logging.info("cg {:d}, type {:d}:".format(cg_index,cg2cgtype[cg_index]))
        msg = []
        for ase_index in ase_indices_in_sym:
                msg.append("({}, {})".format(
                    df.iloc[ase_index]['atom'], 
                    df.iloc[ase_index]['residue']))
                           
        logging.info("sym {:d}: {}".format(sym_index,"; ".join(msg)))
              
        for q_col in q_cols:
            msg = []
            for ase_index in ase_indices_in_sym:
                msg.append("{:.3f}".format(df.iloc[ase_index][q_col]))
            logging.info("{:>30}: {}".format(q_col,",".join(msg)))    
            
        logging.info("")


In [19]:
q, lagrange_multiplier, info_df, cg2ase, cg2cgtype, cg2q, sym2ase = \
    fitESPconstrained(infile_pdb = 'sandbox/system100.pdb', 
                  infile_top = 'sandbox/system100.lean.top', 
                  infile_cost_h5 = 'sandbox/system100.cost_ua.h5', 
                  infile_atoms_in_cg_csv = 'sandbox/atoms_in_charge_group.csv', 
                  infile_cg_charges_csv = 'sandbox/charge_group_total_charge.csv', 
                  infile_atoms_of_same_charge_csv = 'sandbox/atoms_of_same_charge.csv',
                  qtot = 6.0, strip_string=':SOL,CL', 
                  implicitHbondingPartners = {'CD4':1,'CD3':1,'CA2':2,'CA3':2,'CB2':2,'CB3':2},
                  debug=True)

INFO:root:Adding 1 H-atoms to CD3 (#8)...
INFO:root:bondingPartners [ 9 21]
INFO:root:Atom CD3 already has bonding partners CD4, CB1
INFO:root:Adding H-atom 1CD3 at position [ 23.911579364757202, 25.466622991293512, 12.349999999999998 ]
INFO:root:Adding 1 H-atoms to CD4 (#9)...
INFO:root:bondingPartners [10 12]
INFO:root:Atom CD4 already has bonding partners CD5, CA1
INFO:root:Adding H-atom 1CD4 at position [ 25.312471957818314, 25.072450829115194, 10.489569761035874 ]
INFO:root:Adding 2 H-atoms to CA2 (#15)...
INFO:root:bondingPartners [16]
INFO:root:Atom CA2 already has bonding partners CA3
INFO:root:Adding H-atom 1CA2 at position [ 28.74261935828807, 23.557590675857806, 11.104589188842278 ]
INFO:root:bondingPartners [ 16 105]
INFO:root:Atom CA2 already has bonding partners CA3, 1CA2
INFO:root:Adding H-atom 2CA2 at position [ 28.85271690173004, 23.702220854918263, 9.389826475843604 ]
INFO:root:Adding 2 H-atoms to CA3 (#16)...
INFO:root:bondingPartners [17]
INFO:root:Atom CA3 already 

INFO:root:103 unknowns, 80 equality constraints
INFO:root:A (103, 103): 
 [[ 25.815  26.367  25.498 ...,  13.196  21.763  23.254]
 [ 26.367  27.579  25.743 ...,  13.624  22.762  24.405]
 [ 25.498  25.743  25.8   ...,  12.581  20.537  21.908]
 ..., 
 [ 13.196  13.624  12.581 ...,  19.353  16.352  16.166]
 [ 21.763  22.762  20.537 ...,  16.352  25.319  26.016]
 [ 23.254  24.405  21.908 ...,  16.166  26.016  27.391]]
INFO:root:B (103,): 
 [-95.161 -95.834 -93.701 -95.63  -97.396 -93.465 -92.422 -94.419 -91.444
 -90.587 -93.193 -93.137 -87.457 -86.879 -84.861 -80.451 -77.005 -73.444
 -71.649 -70.076 -75.291 -88.937 -86.868 -88.269 -84.357 -80.695 -80.383
 -80.047 -83.916 -76.777 -94.241 -92.734 -95.722 -95.333 -96.809 -97.43
 -98.159 -95.249 -94.89  -96.707 -97.201 -93.342 -92.693 -91.249 -89.558
 -84.894 -82.053 -84.636 -78.415 -80.928 -93.905 -94.014 -91.538 -87.599
 -83.503 -81.055 -77.273 -83.694 -80.088 -96.047 -95.628 -92.206 -90.122
 -90.709 -92.782 -96.338 -89.992 -91.542 -84.881 -

INFO:root:103 unknowns, 25 equality constraints
INFO:root:A (103, 103): 
 [[ 25.815  26.367  25.498 ...,  13.196  21.763  23.254]
 [ 26.367  27.579  25.743 ...,  13.624  22.762  24.405]
 [ 25.498  25.743  25.8   ...,  12.581  20.537  21.908]
 ..., 
 [ 13.196  13.624  12.581 ...,  19.353  16.352  16.166]
 [ 21.763  22.762  20.537 ...,  16.352  25.319  26.016]
 [ 23.254  24.405  21.908 ...,  16.166  26.016  27.391]]
INFO:root:B (103,): 
 [-95.161 -95.834 -93.701 -95.63  -97.396 -93.465 -92.422 -94.419 -91.444
 -90.587 -93.193 -93.137 -87.457 -86.879 -84.861 -80.451 -77.005 -73.444
 -71.649 -70.076 -75.291 -88.937 -86.868 -88.269 -84.357 -80.695 -80.383
 -80.047 -83.916 -76.777 -94.241 -92.734 -95.722 -95.333 -96.809 -97.43
 -98.159 -95.249 -94.89  -96.707 -97.201 -93.342 -92.693 -91.249 -89.558
 -84.894 -82.053 -84.636 -78.415 -80.928 -93.905 -94.014 -91.538 -87.599
 -83.503 -81.055 -77.273 -83.694 -80.088 -96.047 -95.628 -92.206 -90.122
 -90.709 -92.782 -96.338 -89.992 -91.542 -84.881 -

INFO:root:value of cost function: 1327.7049248291387
INFO:root:
INFO:root:
INFO:root:##############################
INFO:root:CHARGE GROUP CONSTRAINTS CHECK
INFO:root:##############################
INFO:root:
INFO:root:atoms grouped together by their ASE indices:
INFO:root:[array([0, 1, 2]), array([3, 4]), array([ 5,  6,  7,  8,  9, 10, 11]), array([12, 13, 14, 15]), array([16, 17, 18, 19, 20]), array([21, 22, 23, 24]), array([25, 26, 27, 28, 29]), array([30, 31]), array([32, 33]), array([34, 35, 36, 37, 38, 39, 40]), array([41, 42, 43, 44]), array([45, 46, 47, 48, 49]), array([50, 51, 52, 53]), array([54, 55, 56, 57, 58]), array([59, 60]), array([74, 75]), array([76, 77, 78, 79, 80, 81, 82]), array([83, 84, 85, 86]), array([87, 88, 89, 90, 91]), array([92, 93, 94, 95]), array([ 96,  97,  98,  99, 100]), array([101, 102]), array([61, 62]), array([63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73])]
INFO:root:
INFO:root:desired charge of each group:
INFO:root:[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 

INFO:root:             q_cg_qtot_constrained:  0.0000    absolute error:  0.0000e+00
INFO:root:cg 20, type 7:
INFO:root:                                 q:  1.0000    absolute error:  4.4764e-13
INFO:root:                   q_unconstrained: -0.7899    absolute error: -1.7899e+00
INFO:root:                q_qtot_constrained:  0.7302    absolute error: -2.6978e-01
INFO:root:             q_cg_qtot_constrained:  1.0000    absolute error: -1.0658e-14
INFO:root:cg 21, type 8:
INFO:root:                                 q:  0.0000    absolute error:  2.5291e-13
INFO:root:                   q_unconstrained:  0.3217    absolute error:  3.2174e-01
INFO:root:                q_qtot_constrained:  0.4165    absolute error:  4.1654e-01
INFO:root:             q_cg_qtot_constrained: -0.0000    absolute error: -2.4425e-15
INFO:root:cg 22, type 9:
INFO:root:                                 q: -0.0000    absolute error: -1.0747e-12
INFO:root:                   q_unconstrained: -0.0251    absolute error: -2

In [21]:
q

array([  2.99 ,   0.27 ,  -3.261,  -1.044,   1.044,  -6.068,   2.931,
         1.813,   0.383,   3.689,  -1.889,  -0.859,   0.315,  -0.836,
        -5.479,   6.   ,   1.035, -21.194,   7.207,   7.334,   6.618,
         0.315,  -0.836,  -5.479,   6.   ,   1.035, -21.194,   7.207,
         7.334,   6.618,  -1.044,   1.044,  -1.044,   1.044,   6.547,
        -1.141,  -4.809,   1.298,  -7.999,   5.222,   0.882,   0.315,
        -0.836,  -5.479,   6.   ,   1.035, -21.194,   7.207,   7.334,
         6.618,   0.315,  -0.836,  -5.479,   6.   ,   1.035, -21.194,
         7.207,   7.334,   6.618,  -1.044,   1.044,  -0.759,   0.759,
        -0.488,   4.238,  -3.511,   4.137,  -3.562,  -5.447,   3.71 ,
        -2.692,   4.52 ,  -1.127,   0.223,  -1.044,   1.044,   0.345,
         1.117,   0.524,  -1.349,  -1.584,   0.285,   0.662,   0.315,
        -0.836,  -5.479,   6.   ,   1.035, -21.194,   7.207,   7.334,
         6.618,   0.315,  -0.836,  -5.479,   6.   ,   1.035, -21.194,
         7.207,   7.

In [23]:
q_neg, *_ = fitESPconstrained(infile_pdb = 'sandbox/system100.pdb', 
                  infile_top = 'sandbox/system100.lean.top', 
                  infile_cost_h5 = 'sandbox/system100.cost_ua.h5', 
                  infile_atoms_in_cg_csv = 'sandbox/atoms_in_charge_group.csv', 
                  infile_cg_charges_csv = 'sandbox/charge_group_total_charge_negative.csv', 
                  infile_atoms_of_same_charge_csv = 'sandbox/atoms_of_same_charge.csv',
                  qtot = -6.0, strip_string=':SOL,CL', 
                  implicitHbondingPartners = {'CD4':1,'CD3':1,'CA2':2,'CA3':2,'CB2':2,'CB3':2},
                  debug=True)

INFO:root:Adding 1 H-atoms to CD3 (#8)...
INFO:root:bondingPartners [ 9 21]
INFO:root:Atom CD3 already has bonding partners CD4, CB1
INFO:root:Adding H-atom 1CD3 at position [ 23.911579364757202, 25.466622991293512, 12.349999999999998 ]
INFO:root:Adding 1 H-atoms to CD4 (#9)...
INFO:root:bondingPartners [10 12]
INFO:root:Atom CD4 already has bonding partners CD5, CA1
INFO:root:Adding H-atom 1CD4 at position [ 25.312471957818314, 25.072450829115194, 10.489569761035874 ]
INFO:root:Adding 2 H-atoms to CA2 (#15)...
INFO:root:bondingPartners [16]
INFO:root:Atom CA2 already has bonding partners CA3
INFO:root:Adding H-atom 1CA2 at position [ 28.74261935828807, 23.557590675857806, 11.104589188842278 ]
INFO:root:bondingPartners [ 16 105]
INFO:root:Atom CA2 already has bonding partners CA3, 1CA2
INFO:root:Adding H-atom 2CA2 at position [ 28.85271690173004, 23.702220854918263, 9.389826475843604 ]
INFO:root:Adding 2 H-atoms to CA3 (#16)...
INFO:root:bondingPartners [17]
INFO:root:Atom CA3 already 

INFO:root:103 unknowns, 80 equality constraints
INFO:root:A (103, 103): 
 [[ 25.815  26.367  25.498 ...,  13.196  21.763  23.254]
 [ 26.367  27.579  25.743 ...,  13.624  22.762  24.405]
 [ 25.498  25.743  25.8   ...,  12.581  20.537  21.908]
 ..., 
 [ 13.196  13.624  12.581 ...,  19.353  16.352  16.166]
 [ 21.763  22.762  20.537 ...,  16.352  25.319  26.016]
 [ 23.254  24.405  21.908 ...,  16.166  26.016  27.391]]
INFO:root:B (103,): 
 [-95.161 -95.834 -93.701 -95.63  -97.396 -93.465 -92.422 -94.419 -91.444
 -90.587 -93.193 -93.137 -87.457 -86.879 -84.861 -80.451 -77.005 -73.444
 -71.649 -70.076 -75.291 -88.937 -86.868 -88.269 -84.357 -80.695 -80.383
 -80.047 -83.916 -76.777 -94.241 -92.734 -95.722 -95.333 -96.809 -97.43
 -98.159 -95.249 -94.89  -96.707 -97.201 -93.342 -92.693 -91.249 -89.558
 -84.894 -82.053 -84.636 -78.415 -80.928 -93.905 -94.014 -91.538 -87.599
 -83.503 -81.055 -77.273 -83.694 -80.088 -96.047 -95.628 -92.206 -90.122
 -90.709 -92.782 -96.338 -89.992 -91.542 -84.881 -

INFO:root:103 unknowns, 25 equality constraints
INFO:root:A (103, 103): 
 [[ 25.815  26.367  25.498 ...,  13.196  21.763  23.254]
 [ 26.367  27.579  25.743 ...,  13.624  22.762  24.405]
 [ 25.498  25.743  25.8   ...,  12.581  20.537  21.908]
 ..., 
 [ 13.196  13.624  12.581 ...,  19.353  16.352  16.166]
 [ 21.763  22.762  20.537 ...,  16.352  25.319  26.016]
 [ 23.254  24.405  21.908 ...,  16.166  26.016  27.391]]
INFO:root:B (103,): 
 [-95.161 -95.834 -93.701 -95.63  -97.396 -93.465 -92.422 -94.419 -91.444
 -90.587 -93.193 -93.137 -87.457 -86.879 -84.861 -80.451 -77.005 -73.444
 -71.649 -70.076 -75.291 -88.937 -86.868 -88.269 -84.357 -80.695 -80.383
 -80.047 -83.916 -76.777 -94.241 -92.734 -95.722 -95.333 -96.809 -97.43
 -98.159 -95.249 -94.89  -96.707 -97.201 -93.342 -92.693 -91.249 -89.558
 -84.894 -82.053 -84.636 -78.415 -80.928 -93.905 -94.014 -91.538 -87.599
 -83.503 -81.055 -77.273 -83.694 -80.088 -96.047 -95.628 -92.206 -90.122
 -90.709 -92.782 -96.338 -89.992 -91.542 -84.881 -

INFO:root:
INFO:root:atoms grouped together by their ASE indices:
INFO:root:[array([0, 1, 2]), array([3, 4]), array([ 5,  6,  7,  8,  9, 10, 11]), array([12, 13, 14, 15]), array([16, 17, 18, 19, 20]), array([21, 22, 23, 24]), array([25, 26, 27, 28, 29]), array([30, 31]), array([32, 33]), array([34, 35, 36, 37, 38, 39, 40]), array([41, 42, 43, 44]), array([45, 46, 47, 48, 49]), array([50, 51, 52, 53]), array([54, 55, 56, 57, 58]), array([59, 60]), array([74, 75]), array([76, 77, 78, 79, 80, 81, 82]), array([83, 84, 85, 86]), array([87, 88, 89, 90, 91]), array([92, 93, 94, 95]), array([ 96,  97,  98,  99, 100]), array([101, 102]), array([61, 62]), array([63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73])]
INFO:root:
INFO:root:desired charge of each group:
INFO:root:[0, 0, 0, 0, -1, 0, -1, 0, 0, 0, 0, -1, 0, -1, 0, 0, 0, 0, -1, 0, -1, 0, 0, 0]
INFO:root:cg 0, type 1:
INFO:root:                                 q: -0.0000    absolute error: -1.3600e-14
INFO:root:                   q_unconstrained

INFO:root:                   q_unconstrained: -0.7899    absolute error:  2.1010e-01
INFO:root:                q_qtot_constrained: -0.8204    absolute error:  1.7963e-01
INFO:root:             q_cg_qtot_constrained: -1.0000    absolute error:  8.8818e-16
INFO:root:cg 21, type 8:
INFO:root:                                 q: -0.0000    absolute error: -1.0797e-14
INFO:root:                   q_unconstrained:  0.3217    absolute error:  3.2174e-01
INFO:root:                q_qtot_constrained:  0.3198    absolute error:  3.1984e-01
INFO:root:             q_cg_qtot_constrained:  0.0000    absolute error:  0.0000e+00
INFO:root:cg 22, type 9:
INFO:root:                                 q: -0.0000    absolute error: -4.3632e-14
INFO:root:                   q_unconstrained: -0.0251    absolute error: -2.5104e-02
INFO:root:                q_qtot_constrained: -0.0085    absolute error: -8.5147e-03
INFO:root:             q_cg_qtot_constrained: -0.0000    absolute error: -4.4409e-16
INFO:root:cg 23

In [24]:
q_neg # ok, but all charges inverted

array([ 0.524, -0.196, -0.328,  0.241, -0.241, -0.843,  0.008,  0.63 ,
        0.285,  0.169, -0.121, -0.127, -0.799,  0.484,  0.546, -0.231,
       -0.584,  1.059, -0.505, -0.42 , -0.551, -0.799,  0.484,  0.546,
       -0.231, -0.584,  1.059, -0.505, -0.42 , -0.551,  0.241, -0.241,
        0.241, -0.241,  0.163, -0.299,  0.252, -0.193,  0.224,  0.091,
       -0.238, -0.799,  0.484,  0.546, -0.231, -0.584,  1.059, -0.505,
       -0.42 , -0.551, -0.799,  0.484,  0.546, -0.231, -0.584,  1.059,
       -0.505, -0.42 , -0.551,  0.241, -0.241,  0.271, -0.271,  0.986,
       -1.051,  0.395,  0.51 , -0.029, -0.239, -0.223,  0.86 , -0.445,
       -0.81 ,  0.045,  0.241, -0.241, -0.257, -0.129,  0.631,  0.411,
        0.359, -1.329,  0.314, -0.799,  0.484,  0.546, -0.231, -0.584,
        1.059, -0.505, -0.42 , -0.551, -0.799,  0.484,  0.546, -0.231,
       -0.584,  1.059, -0.505, -0.42 , -0.551,  0.241, -0.241])

In [26]:
#q, lagrange_multiplier, info_df, cg2ase, cg2cgtype, cg2q, sym2ase
q_cost_neg, lagrange_multiplier_cost_neg, info_df_cost_neg, cg2ase_cost_neg, \
    cg2cgtype_cost_neg, cg2q_cost_neg, sym2ase_cost_neg = \
    fitESPconstrained(infile_pdb = 'sandbox/system100.pdb', 
                  infile_top = 'sandbox/system100.lean.top', 
                  infile_cost_h5 = 'sandbox/system100.cost_ua_neg.h5', 
                  infile_atoms_in_cg_csv = 'sandbox/atoms_in_charge_group.csv', 
                  infile_cg_charges_csv = 'sandbox/charge_group_total_charge.csv', 
                  infile_atoms_of_same_charge_csv = 'sandbox/atoms_of_same_charge.csv',
                  qtot = 6.0, strip_string=':SOL,CL', 
                  implicitHbondingPartners = {'CD4':1,'CD3':1,'CA2':2,'CA3':2,'CB2':2,'CB3':2},
                  debug=True)

INFO:root:Adding 1 H-atoms to CD3 (#8)...
INFO:root:bondingPartners [ 9 21]
INFO:root:Atom CD3 already has bonding partners CD4, CB1
INFO:root:Adding H-atom 1CD3 at position [ 23.911579364757202, 25.466622991293512, 12.349999999999998 ]
INFO:root:Adding 1 H-atoms to CD4 (#9)...
INFO:root:bondingPartners [10 12]
INFO:root:Atom CD4 already has bonding partners CD5, CA1
INFO:root:Adding H-atom 1CD4 at position [ 25.312471957818314, 25.072450829115194, 10.489569761035874 ]
INFO:root:Adding 2 H-atoms to CA2 (#15)...
INFO:root:bondingPartners [16]
INFO:root:Atom CA2 already has bonding partners CA3
INFO:root:Adding H-atom 1CA2 at position [ 28.74261935828807, 23.557590675857806, 11.104589188842278 ]
INFO:root:bondingPartners [ 16 105]
INFO:root:Atom CA2 already has bonding partners CA3, 1CA2
INFO:root:Adding H-atom 2CA2 at position [ 28.85271690173004, 23.702220854918263, 9.389826475843604 ]
INFO:root:Adding 2 H-atoms to CA3 (#16)...
INFO:root:bondingPartners [17]
INFO:root:Atom CA3 already 

INFO:root:103 unknowns, 80 equality constraints
INFO:root:A (103, 103): 
 [[ 25.815  26.367  25.498 ...,  13.196  21.763  23.254]
 [ 26.367  27.579  25.743 ...,  13.624  22.762  24.405]
 [ 25.498  25.743  25.8   ...,  12.581  20.537  21.908]
 ..., 
 [ 13.196  13.624  12.581 ...,  19.353  16.352  16.166]
 [ 21.763  22.762  20.537 ...,  16.352  25.319  26.016]
 [ 23.254  24.405  21.908 ...,  16.166  26.016  27.391]]
INFO:root:B (103,): 
 [ 95.161  95.834  93.701  95.63   97.396  93.465  92.422  94.419  91.444
  90.587  93.193  93.137  87.457  86.879  84.861  80.451  77.005  73.444
  71.649  70.076  75.291  88.937  86.868  88.269  84.357  80.695  80.383
  80.047  83.916  76.777  94.241  92.734  95.722  95.333  96.809  97.43
  98.159  95.249  94.89   96.707  97.201  93.342  92.693  91.249  89.558
  84.894  82.053  84.636  78.415  80.928  93.905  94.014  91.538  87.599
  83.503  81.055  77.273  83.694  80.088  96.047  95.628  92.206  90.122
  90.709  92.782  96.338  89.992  91.542  84.881  

INFO:root:103 unknowns, 25 equality constraints
INFO:root:A (103, 103): 
 [[ 25.815  26.367  25.498 ...,  13.196  21.763  23.254]
 [ 26.367  27.579  25.743 ...,  13.624  22.762  24.405]
 [ 25.498  25.743  25.8   ...,  12.581  20.537  21.908]
 ..., 
 [ 13.196  13.624  12.581 ...,  19.353  16.352  16.166]
 [ 21.763  22.762  20.537 ...,  16.352  25.319  26.016]
 [ 23.254  24.405  21.908 ...,  16.166  26.016  27.391]]
INFO:root:B (103,): 
 [ 95.161  95.834  93.701  95.63   97.396  93.465  92.422  94.419  91.444
  90.587  93.193  93.137  87.457  86.879  84.861  80.451  77.005  73.444
  71.649  70.076  75.291  88.937  86.868  88.269  84.357  80.695  80.383
  80.047  83.916  76.777  94.241  92.734  95.722  95.333  96.809  97.43
  98.159  95.249  94.89   96.707  97.201  93.342  92.693  91.249  89.558
  84.894  82.053  84.636  78.415  80.928  93.905  94.014  91.538  87.599
  83.503  81.055  77.273  83.694  80.088  96.047  95.628  92.206  90.122
  90.709  92.782  96.338  89.992  91.542  84.881  

INFO:root:
INFO:root:atoms grouped together by their ASE indices:
INFO:root:[array([0, 1, 2]), array([3, 4]), array([ 5,  6,  7,  8,  9, 10, 11]), array([12, 13, 14, 15]), array([16, 17, 18, 19, 20]), array([21, 22, 23, 24]), array([25, 26, 27, 28, 29]), array([30, 31]), array([32, 33]), array([34, 35, 36, 37, 38, 39, 40]), array([41, 42, 43, 44]), array([45, 46, 47, 48, 49]), array([50, 51, 52, 53]), array([54, 55, 56, 57, 58]), array([59, 60]), array([74, 75]), array([76, 77, 78, 79, 80, 81, 82]), array([83, 84, 85, 86]), array([87, 88, 89, 90, 91]), array([92, 93, 94, 95]), array([ 96,  97,  98,  99, 100]), array([101, 102]), array([61, 62]), array([63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73])]
INFO:root:
INFO:root:desired charge of each group:
INFO:root:[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0]
INFO:root:cg 0, type 1:
INFO:root:                                 q:  0.0000    absolute error:  1.3600e-14
INFO:root:                   q_unconstrained: -0.0

INFO:root:                   q_unconstrained:  0.7899    absolute error: -2.1010e-01
INFO:root:                q_qtot_constrained:  0.8204    absolute error: -1.7963e-01
INFO:root:             q_cg_qtot_constrained:  1.0000    absolute error: -8.8818e-16
INFO:root:cg 21, type 8:
INFO:root:                                 q:  0.0000    absolute error:  1.0797e-14
INFO:root:                   q_unconstrained: -0.3217    absolute error: -3.2174e-01
INFO:root:                q_qtot_constrained: -0.3198    absolute error: -3.1984e-01
INFO:root:             q_cg_qtot_constrained:  0.0000    absolute error:  0.0000e+00
INFO:root:cg 22, type 9:
INFO:root:                                 q:  0.0000    absolute error:  4.3632e-14
INFO:root:                   q_unconstrained:  0.0251    absolute error:  2.5104e-02
INFO:root:                q_qtot_constrained:  0.0085    absolute error:  8.5147e-03
INFO:root:             q_cg_qtot_constrained:  0.0000    absolute error:  4.4409e-16
INFO:root:cg 23

In [28]:
q_neg

array([ 0.524, -0.196, -0.328,  0.241, -0.241, -0.843,  0.008,  0.63 ,
        0.285,  0.169, -0.121, -0.127, -0.799,  0.484,  0.546, -0.231,
       -0.584,  1.059, -0.505, -0.42 , -0.551, -0.799,  0.484,  0.546,
       -0.231, -0.584,  1.059, -0.505, -0.42 , -0.551,  0.241, -0.241,
        0.241, -0.241,  0.163, -0.299,  0.252, -0.193,  0.224,  0.091,
       -0.238, -0.799,  0.484,  0.546, -0.231, -0.584,  1.059, -0.505,
       -0.42 , -0.551, -0.799,  0.484,  0.546, -0.231, -0.584,  1.059,
       -0.505, -0.42 , -0.551,  0.241, -0.241,  0.271, -0.271,  0.986,
       -1.051,  0.395,  0.51 , -0.029, -0.239, -0.223,  0.86 , -0.445,
       -0.81 ,  0.045,  0.241, -0.241, -0.257, -0.129,  0.631,  0.411,
        0.359, -1.329,  0.314, -0.799,  0.484,  0.546, -0.231, -0.584,
        1.059, -0.505, -0.42 , -0.551, -0.799,  0.484,  0.546, -0.231,
       -0.584,  1.059, -0.505, -0.42 , -0.551,  0.241, -0.241])

In [29]:
q_cost_neg

array([-0.524,  0.196,  0.328, -0.241,  0.241,  0.843, -0.008, -0.63 ,
       -0.285, -0.169,  0.121,  0.127,  0.799, -0.484, -0.546,  0.231,
        0.584, -1.059,  0.505,  0.42 ,  0.551,  0.799, -0.484, -0.546,
        0.231,  0.584, -1.059,  0.505,  0.42 ,  0.551, -0.241,  0.241,
       -0.241,  0.241, -0.163,  0.299, -0.252,  0.193, -0.224, -0.091,
        0.238,  0.799, -0.484, -0.546,  0.231,  0.584, -1.059,  0.505,
        0.42 ,  0.551,  0.799, -0.484, -0.546,  0.231,  0.584, -1.059,
        0.505,  0.42 ,  0.551, -0.241,  0.241, -0.271,  0.271, -0.986,
        1.051, -0.395, -0.51 ,  0.029,  0.239,  0.223, -0.86 ,  0.445,
        0.81 , -0.045, -0.241,  0.241,  0.257,  0.129, -0.631, -0.411,
       -0.359,  1.329, -0.314,  0.799, -0.484, -0.546,  0.231,  0.584,
       -1.059,  0.505,  0.42 ,  0.551,  0.799, -0.484, -0.546,  0.231,
        0.584, -1.059,  0.505,  0.42 ,  0.551, -0.241,  0.241])

In [32]:
lagrange_multiplier_cost_neg

array([  -9.189, -185.534,   -9.203,   56.143,  -52.699,  -19.233,
         33.006,   79.511,  194.711,   -9.281,   -9.99 ,  -42.738,
        -21.089,    7.76 ,  -40.747, -360.835,   -9.237,  -37.421,
         35.95 ,  -24.132,  -36.879,  257.437,   -9.142,   -9.097,
         88.743,  203.989,  -31.459, -351.581,  266.645,   88.703,
        204.014,  -31.428, -351.61 ,  266.667,   -9.958,   -0.729,
        -11.911,  -28.073,  -14.716,   -9.937,   -0.815,  -12.003,
        -27.958,  -14.534,  -10.014,   -0.741,  -11.952,  -28.162,
        -14.738,  -10.076,   -0.77 ,  -12.202,  -28.144,  -14.484,
         42.123,  -33.712,   16.519,   44.987,  -27.105,   41.864,
        -33.861,   16.526,   44.936,  -26.8  ,   41.749,  -33.737,
         16.358,   44.738,  -26.421,   41.897,  -34.128,   16.639,
         45.061,  -26.947,   41.769,  -33.809,   16.581,   44.975,
        -26.807,    4.599])

In [33]:
info_df_cost_neg.iloc[cg2ase_cost_neg[0]] # select first charge group

Unnamed: 0,atom,residue,q,q_unconstrained,q_qtot_constrained,q_cg_qtot_constrained
0,CE1,terB,-0.524109,-0.329952,-0.324445,-0.30701
1,HE1,terB,0.19582,0.137402,0.128249,0.162811
2,HE2,terB,0.328289,0.146113,0.159843,0.144199


In [35]:
info_df_cost_neg.iloc[cg2ase_cost_neg[0]]['q'].sum()

1.3600232051658168e-14

In [34]:
info_df_cost_neg.iloc[sym2ase_cost_neg[0]] # select first symmetry group

Unnamed: 0,atom,residue,q,q_unconstrained,q_qtot_constrained,q_cg_qtot_constrained
3,CD1,terB,-0.241387,-0.088691,0.006398,-0.40941
30,CD6,terB,-0.241387,-0.306762,-0.179734,-0.233661
32,CD1,OXO0,-0.241387,-0.18902,-0.371108,-0.153185
59,CD6,OXO0,-0.241387,-0.031134,-0.027982,-0.177481
74,CD1,terA,-0.241387,-0.369452,-0.538505,-0.179503
101,CD6,terA,-0.241387,-0.455664,-0.429103,-0.138428


## double check

### total charge of system

In [213]:
from ase.io.cube import read_cube_data
from ase.units import Bohr

In [None]:
cube_data, cube_atoms = read_cube_data("sandbox/system100.rho.cube")

In [214]:
unit_cell = cube_atoms.cell.diagonal() / cube_data.shape
unit_volume = np.prod(unit_cell)
q_el = cube_data.sum()*unit_volume/Bohr**3
q_core_total = 0
for a in cube_atoms:
    q_core_total += a.number

In [227]:
q_core_total

494

In [226]:
q_el

488.00318759870044

In [228]:
q_core_total - q_el

5.9968124012995645