In [912]:
import parmed as pmd
import pandas as pd
import biopandas
import numpy as np
import matplotlib.pyplot as plt
from biopandas.mol2 import PandasMol2
from biopandas.pdb import PandasPdb 
from IPython.core.display import HTML

In [913]:
def multi_table(table_list):
    ''' Acceps a list of IpyTable objects and returns a table which contains each IpyTable in a cell
    '''
    return HTML(
        '<table><tr style="background-color:white;">' + 
        ''.join(['<td>' + table._repr_html_() + '</td>' for table in table_list]) +
        '</tr></table>'
    )

<h2>Determine # of copies in each ligand, and length of each copy<h2>

In [949]:
%cd /Users/yyk_lab/Downloads/ligands10

##Load in .txt (converted from .pdb) for ligand##
BCR = pd.read_table('BCR.txt', sep='\s+', header=None)
PHO = pd.read_table('PHO.txt', sep='\s+', header=None)
SQD = pd.read_table('SQD.txt', sep='\s+', header=None)
DGD = pd.read_table('DGD.txt', sep='\s+', header=None)
PL9 = pd.read_table('PL9.txt', sep='\s+', header=None)
HEM = pd.read_table('HEM.txt', sep='\s+', header=None)
LHG = pd.read_table('LHG.txt', sep='\s+', header=None)
LMG = pd.read_table('LMG.txt', sep='\s+', header=None)
CLA = pd.read_table('CLA.txt', sep='\s+', header=None)

##Check df to see if read in was okay##
#BCR
#PHO
#SQD
#DGD
#PL9
#HEM
#LHG
#LMG
#CLA

##Val= residue number, count=number of atoms##

print("Val      Count")
#BCR.iloc[: ,4].value_counts()
PHO.iloc[: ,4].value_counts()
#SQD.iloc[: ,5].value_counts()
#DGD.iloc[: ,5].value_counts()
#PL9.iloc[: ,4].value_counts()
#HEM.iloc[: ,4].value_counts()
#LHG.iloc[: ,5].value_counts()
#LMG.iloc[: ,5].value_counts()
#CLA.iloc[: ,4].value_counts()

/Users/yyk_lab/Downloads/ligands10
Val      Count


609    128
608    128
Name: 4, dtype: int64

<h2>Investigate .mol2<h2>

In [915]:
####Investigating .mol2 ####
pmol = PandasMol2().read_mol2('3ARC_PHO.mol2')
print('Molecule ID: %s' % pmol.code)
pmol.df
#pmol.df.loc[pmol.df['atom_name'] == 'H21']


Molecule ID: PHO


Unnamed: 0,atom_id,atom_name,x,y,z,atom_type,subst_id,subst_name,charge
0,1,CHA,-1.982,0.840,135.243,ce,608,PHO,0.127598
1,2,CHB,-2.569,1.627,139.964,ce,608,PHO,-0.374547
2,3,CHC,-5.078,5.711,138.836,ce,608,PHO,-0.122356
3,4,CHD,-4.655,4.861,134.008,ce,608,PHO,-0.168692
4,5,,-2.373,1.465,137.582,nf,608,PHO,-0.259638
...,...,...,...,...,...,...,...,...,...
133,134,H2,4.559,-1.693,140.891,ha,608,PHO,0.086859
134,135,H18,-0.368,3.351,148.205,hc,608,PHO,0.086859
135,136,H13,3.582,1.526,146.164,hc,608,PHO,0.086859
136,137,H12,4.930,-3.562,138.542,h1,608,PHO,0.086859


<h2>Generate .gro file<h2>

In [916]:
##Generting .gro file##
%cd /Users/yyk_lab/Downloads/processligand
#amber = pmd.load_file('PHO-9.prmtop', 'PHO-9.inpcrd')
#Saving them out as GROMACS compatible topology and coordinate files
#amber.save('PHO-9-2.top')


/Users/yyk_lab/Downloads/processligand


<h2>Begin to look for mismatches in naming conventions, make sure atom number is identical<h2>

In [917]:
%cd /Users/yyk_lab/Downloads/processligand/PHO

PL9_saka = pd.read_table('PHO_1_saka.txt', sep='\s+', header=None)
PL9_me = pd.read_table('PHO_1_H.txt', sep='\s+', header=None)

count_row_saka = PL9_saka.shape[0]
count_row_me = PL9_me.shape[0]

##Check that # of atoms is the same##
print ('Number of atoms in saka:',  count_row_saka)
print ('Number of atoms in mine:', count_row_me)
#PL9_saka

/Users/yyk_lab/Downloads/processligand/PHO
Number of atoms in saka: 138
Number of atoms in mine: 138


<h2>Figure out which atom names in saka mol2 need to be replaced<h2>

In [918]:
saka_name= PL9_saka.iloc[:,2]
my_name= PL9_me.iloc[:,2]



def diff(list1, list2):
    c = set(list1).union(set(list2))  # or c = set(list1) | set(list2)
    d = set(list1).intersection(set(list2))  # or d = set(list1) & set(list2)
    return list(c - d)

##Print all differences##
a=diff(my_name, saka_name)
mismatch= (len(a)/2)

#PL9_saka
saka_change=[]
for i in range(len(a)):
    for j in range (len(saka_name)):
        if (a[i]==saka_name[j]):
            saka_change.append(a[i])
           

print ('Number of mismatches between me and sakashita:', len(saka_change))
saka_change   #These are the atom names you need to change in .mol2


Number of mismatches between me and sakashita: 15


['HAA1',
 'H101',
 'H71',
 'H11',
 'HNB',
 'H161',
 'H61',
 'HND',
 'HBA1',
 'HAC1',
 'H111',
 'H51',
 'H171',
 'H121',
 'H151']

<h2>Start to match naming conventions<h2>

In [919]:
PL9_me_code= pd.read_table('PHO_1_H_code.txt', sep='\s+', header=None)
PL9_saka_code= pd.read_table('PHO_1_saka_code.txt', sep='\s+', header=None)

for x in range (len(PL9_saka_code)):
    y=PL9_saka_code.loc[x, 1]
    z=PL9_saka_code.loc[x, 2]
    PL9_saka_code.loc[x,3]=PL9_saka.loc[y-1, 2]
    PL9_saka_code.loc[x,4]=PL9_saka.loc[z-1, 2]

PL9_saka_new_code= pd.concat([PL9_saka_code.iloc[:,1],PL9_saka_code.iloc[:,3], PL9_saka_code.iloc[:,2], PL9_saka_code.iloc[:,4]] , axis=1)
PL9_saka_new_code.columns = ['a1', 'saka_name','a2','d']

for x in range (len(PL9_me_code)):
    y=PL9_me_code.loc[x, 1]
    z=PL9_me_code.loc[x, 2]
    PL9_me_code.loc[x,3]=PL9_me.loc[y-1, 2]
    PL9_me_code.loc[x,4]=PL9_me.loc[z-1, 2]

PL9_me_new_code= pd.concat([PL9_me_code.iloc[:,1],PL9_me_code.iloc[:,3], PL9_me_code.iloc[:,2], PL9_me_code.iloc[:,4]] , axis=1)
PL9_me_new_code.columns = ['a1', 'myname','a2','d']
mycode=PL9_me_new_code.sort_values('d')
sakacode=PL9_saka_new_code.sort_values('d')
multi_table ([mycode, sakacode])

Unnamed: 0_level_0,a1,myname,a2,d
Unnamed: 0_level_1,a1,saka_name,a2,d
4,65,H12,3,C1
7,66,H13,3,C1
0,67,H102,4,C10
1,68,H103,4,C10
2,69,H112,5,C11
...,...,...,...,...
71,121,HMD1,57,CMD
72,122,HMD2,57,CMD
73,123,HMD3,57,CMD
46,102,HB,1,NB

Unnamed: 0,a1,myname,a2,d
4,65,H12,3,C1
7,66,H13,3,C1
0,67,H102,4,C10
1,68,H103,4,C10
2,69,H112,5,C11
...,...,...,...,...
71,121,HMD1,57,CMD
72,122,HMD2,57,CMD
73,123,HMD3,57,CMD
46,102,HB,1,NB

Unnamed: 0,a1,saka_name,a2,d
25,138,H11,45,C1
28,137,H12,45,C1
24,110,H102,54,C10
23,111,H101,54,C10
26,109,H111,55,C11
...,...,...,...,...
0,45,C1,15,O2A
17,41,CGD,43,O2D
15,44,CED,43,O2D
1,6,C1A,5,


<h2>Making replacement<h2>

In [920]:
replace= pd.DataFrame()
bond=[]
for y in range(len(saka_change)):
    if ((sakacode['saka_name']== saka_change[y])).any(): 
       # print((sakacode.loc[sakacode['saka_name'] == saka_change[y]])) 
        replace=replace.append(sakacode.loc[sakacode['saka_name'] == saka_change[y]])
        bond.append(replace.iloc[y, 3])
        #print (replace.iloc[y, 3])
    else: 
         #print((sakacode.loc[sakacode['d'] == saka_change[y]]))
        replace=replace.append(sakacode.loc[sakacode['d'] == saka_change[y]])
        bond.append(replace.iloc[y, 1])
        print ("check in d for", saka_change[y])
        
print ("the rest are in saka_name")

len(bond)
#len(replace)


the rest are in saka_name


15

In [921]:
replacement= pd.DataFrame()

for y in range(len(bond)):
    if ((mycode['myname']== bond[y]).any()): 
        #print ('hi')
        #print(mycode.loc[mycode['d'] == saka_change[y]])
        replacement=replacement.append(mycode.loc[mycode['myname'] == bond[y]])
    else: 
        #print ('oh')
        #print(mycode.loc[mycode['myname'] == saka_change[y]])
        replacement= replacement.append(mycode.loc[mycode['d'] == bond[y]])
        
multi_table([replace, replacement])


Unnamed: 0_level_0,a1,saka_name,a2,d
Unnamed: 0_level_1,a1,myname,a2,d
64,90,HAA1,11.0,CAA
23,111,H101,54.0,C10
58,124,H71,51.0,C7
25,138,H11,45.0,C1
95,113,HNB,16.0,NB
37,100,H161,60.0,C16
56,126,H61,50.0,C6
96,112,HND,32.0,ND
69,86,HBA1,12.0,CBA
67,88,HAC1,30.0,CAC

Unnamed: 0,a1,saka_name,a2,d
64,90,HAA1,11,CAA
23,111,H101,54,C10
58,124,H71,51,C7
25,138,H11,45,C1
95,113,HNB,16,NB
37,100,H161,60,C16
56,126,H61,50,C6
96,112,HND,32,ND
69,86,HBA1,12,CBA
67,88,HAC1,30,CAC

Unnamed: 0,a1,myname,a2,d
41,91,HAA2,39,CAA
42,92,HAA3,39,CAA
0,67,H102,4,C10
1,68,H103,4,C10
35,88,H72,36,C7
36,89,H73,36,C7
4,65,H12,3,C1
7,66,H13,3,C1
46,102,HB,1,NB
14,76,H162,10,C16


<h2>Generating final dataframe to implement replacement. Double checks to see if replacement is already in code. <h2>

In [922]:
swaps=pd.DataFrame(columns=['Sakaname', 'bond' , 'Newname'])
swaps["Sakaname"]=saka_change
swaps["bond"]=bond
p=0

for y in range(len(bond)):
    num=bond[y]
    for z in range(len(replacement)):
        if ((replacement['d']==num).any()):
            index=(replacement.loc[replacement['d']==num]).index[0]
            rep=PL9_me_new_code.iloc[index,1]
       
            if ((sakacode['saka_name']==rep).any()):
                rep2=PL9_me_new_code.iloc[index+1,1]
       
                if ((sakacode['saka_name'] ==rep2).any()):
                    p=1
                  
                else: 
                    if (PL9_me_new_code.iloc[index+1,3] == num):
                        swaps.loc[y, "Newname"]= PL9_me_new_code.iloc[index+1,1]      
            else:        
                swaps.loc[y, "Newname"]= PL9_me_new_code.iloc[index,1]
swaps

Unnamed: 0,Sakaname,bond,Newname
0,HAA1,CAA,HAA3
1,H101,C10,H103
2,H71,C7,H73
3,H11,C1,
4,HNB,NB,HB
5,H161,C16,H163
6,H61,C6,H63
7,HND,ND,HD
8,HBA1,CBA,HBA3
9,HAC1,CAC,HAC3


In [947]:
pmol2= pmol.df
for x in range (len(swaps)):
    replaceme=swaps.iloc[x,0]
    if ((pmol.df['atom_name']==replaceme).any()):
        index=(pmol.df['atom_name']==replaceme).index[0]
        new= swaps.iloc[x, 2]
        #print(pmol.df.iloc[index, 'atom_name'])
        pmol2 = pmol2.replace([replaceme], new)

#check if replacement has been made         
print((pmol2.loc[pmol2['atom_name'] == 'H53']))        