In [1]:
import __main__
__main__.pymol_argv = [ 'pymol', '-qc'] 

import sys, time, os
import itertools
import pymol
import numpy as np

In [2]:
pdbin = '../3_reassemble_chains/5weo_reassembled.pdb'

pdbout = './5weo_final_homology_model.pdb'

In [3]:
# P42261|GRIA1_HUMAN Glutamate receptor sequence from uniprot
seq_ampa_full = 'MQHIFAFFCTGFLGAVVGANFPNNIQIGGLFPNQQSQEHAAFRFALSQLTEPPKLLPQIDIVNISDSFEMTYRFCSQFSKGVYAIFGFYERRTVNMLTSFCGALHVCFITPSFPVDTSNQFVLQLRPELQDALISIIDHYKWQKFVYIYDADRGLSVLQKVLDTAAEKNWQVTAVNILTTTEEGYRMLFQDLEKKKERLVVVDCESERLNAILGQIIKLEKNGIGYHYILANLGFMDIDLNKFKESGANVTGFQLVNYTDTIPAKIMQQWKNSDARDHTRVDWKRPKYTSALTYDGVKVMAEAFQSLRRQRIDISRRGNAGDCLANPAVPWGQGIDIQRALQQVRFEGLTGNVQFNEKGRRTNYTLHVIEMKHDGIRKIGYWNEDDKFVPAATDAQAGGDNSSVQNRTYIVTTILEDPYVMLKKNANQFEGNDRYEGYCVELAAEIAKHVGYSYRLEIVSDGKYGARDPDTKAWNGMVGELVYGRADVAVAPLTITLVREEVIDFSKPFMSLGISIMIKKPQKSKPGVFSFLDPLAYEIWMCIVFAYIGVSVVLFLVSRFSPYEWHSEEFEEGRDQTTSDQSNEFGIFNSLWFSLGAFMQQGCDISPRSLSGRIVGGVWWFFTLIIISSYTANLAAFLTVERMVSPIESAEDLAKQTEIAYGTLEAGSTKEFFRRSKIAVFEKMWTYMKSAEPSVFVRTTEEGMIRVRKSKGKYAYLLESTMNEYIEQRKPCDTMKVGGNLDSKGYGIATPKGSALRNPVNLAVLKLNEQGLLDKLKNKWWYDKGECGSGGGDSKDKTSALSLSNVAGVFYILIGGLGLAMLVALIEFCYKSRSESKRMKGFCLIPQQSINEAIRTSTLPRNSGAGASSGGSGENGRVVSHDFPKSMQSIPCMSHSSGMPLGATGL'

# correct end and begining
seq_ampa = seq_ampa_full[22:834]

In [4]:
three2one = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
     'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N', 
     'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W', 
     'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}

one2three = {v: k for k, v in three2one.items()}

def seq_in_chains(pdb, chains=['A','B','C','D']):
    result = dict(zip(chains, ['']*len(chains)))
    with open(pdb,'r') as file:
        res_id_old = 0
        for line in file:
            if line[:4] == 'ATOM':
                res, chain,res_id = line.split()[3:6]
                if len(chain) > 1:
                    res_id = chain[1:]
                    chain = chain[0]
                if chain in chains:
                    res_id = int(res_id)
                    if res_id_old < res_id or result[chain]=='':
                        res_id_old = res_id
                        result[chain] += three2one[res]
    return result

def diff_seq(seq1,seq2):
    
    diff = []
    for i, aa_pair in enumerate( zip(seq1,seq2) ):
        if aa_pair[0] != aa_pair[1]:
            diff.append([i,aa_pair[0],aa_pair[1]])
    return diff

def print_diff_seq(seq1, seq2, i0=1):
    print('Resid\tseq1       seq2')
    for i, aa1, aa2 in diff_seq(seq1,seq2):
        print('{},\t{}({}) !=  {}({})'.format(i+i0,one2three[aa1],aa1,one2three[aa2],aa2))

In [5]:
def mutate_protein_pdb(pdbIn,pdbOut,arr_iRes,arr_mutRes,arr_chain=['A']):
    """ Mutates pdb anf outputs pdb """
    
    
    
    
    
    # Name of the pdb file ( without path and .pdb)
    pdbName = pdbIn.split('/')[-1].split('.')[0]
    
    # Load the structure
    pymol.cmd.load(pdbIn, pdbName)
    pymol.cmd.wizard('mutagenesis')
    for iRes, mutRes, chain in zip(arr_iRes,arr_mutRes,arr_chain):
        print(iRes, mutRes, chain)
        
        # Minty fresh
        pymol.cmd.do('refresh_wizard')

        # Set target residue type
        pymol.cmd.get_wizard().set_mode(str(mutRes))

        # Select residue
        selection = '/{}//{}/{}'.format(pdbName,chain,iRes)

        # Notify the wizard about the selection
        pymol.cmd.get_wizard().do_select(selection)

        # Select frame 1
        pymol.cmd.frame('1')

        # Apply the mutation
        pymol.cmd.get_wizard().apply()
    

    # Create
    pymol.cmd.save(pdbOut)
    
    # and destroy
    pymol.cmd.delete('all')

In [6]:
ABCD = seq_in_chains(pdbin)
ABCD['A'] == ABCD['B'] == ABCD['C'] == ABCD['D']

True

In [7]:
arr_iRes,arr_mutRes,arr_chain =[], [], []

for chain in 'ABCD':
    hih = diff_seq(ABCD[chain],seq_ampa)
    
    for iRes, _, mutRes in hih:
        
        arr_iRes.append(iRes+1)
        arr_mutRes.append(one2three[mutRes])
        arr_chain.append(chain)
    
    

In [14]:
1-(len(arr_iRes)/4)/len(ABCD['A'])

0.7389162561576355

In [None]:
mutate_protein_pdb(pdbin,pdbout,arr_iRes,arr_mutRes,arr_chain)

 PyMOL not running, entering library mode (experimental)
2 ASN A
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 8/9, strain=41.43
11 ASN A
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 3/9, strain=12.91
12 GLN A
Selected!
 Mutagenesis: 15 rotamers loaded.
 Rotamer 15/15, strain=25.78
13 GLN A
Selected!
 Mutagenesis: 14 rotamers loaded.
 Rotamer 2/14, strain=30.50
14 SER A
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 1/3, strain=33.06
17 HIS A
Selected!
 Mutagenesis: 8 rotamers loaded.
 Rotamer 4/8, strain=37.54
18 ALA A
Selected!
 Mutagenesis: no rotamers found in library.
22 PHE A
Selected!
 Mutagenesis: 4 rotamers loaded.
 Rotamer 3/4, strain=33.84
23 ALA A
Selected!
 Mutagenesis: no rotamers found in library.
24 LEU A
Selected!
 Mutagenesis: 4 rotamers loaded.
 Rotamer 2/4, strain=24.22
25 SER A
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 1/3, strain=72.63
27 LEU A
Selected!
 Mutagenesis: 2 rotamers loaded.
 Rotamer 1/2, strain=48.57
28 THR A
Selected!
 Mut

241 PRO A
Selected!
 Mutagenesis: 2 rotamers loaded.
 Rotamer 2/2, strain=47.34
242 ALA A
Selected!
 Mutagenesis: no rotamers found in library.
244 ILE A
Selected!
 Mutagenesis: 4 rotamers loaded.
 Rotamer 2/4, strain=58.86
245 MET A
Selected!
 Mutagenesis: 13 rotamers loaded.
 Rotamer 9/13, strain=36.78
246 GLN A
Selected!
 Mutagenesis: 20 rotamers loaded.
 Rotamer 13/20, strain=30.00
247 GLN A
Selected!
 Mutagenesis: 16 rotamers loaded.
 Rotamer 14/16, strain=31.81
249 LYS A
Selected!
 Mutagenesis: 19 rotamers loaded.
 Rotamer 2/19, strain=29.90
250 ASN A
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 9/9, strain=21.81
251 SER A
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 2/3, strain=22.32
252 ASP A
Selected!
 Mutagenesis: 8 rotamers loaded.
 Rotamer 7/8, strain=16.69
253 ALA A
Selected!
 Mutagenesis: no rotamers found in library.
254 ARG A
Selected!
 Mutagenesis: 27 rotamers loaded.
 Rotamer 2/27, strain=28.80
255 ASP A
Selected!
 Mutagenesis: 7 rotamers loaded.
 Rotame

 Mutagenesis: no rotamers found in library.
771 ASP A
Selected!
 Mutagenesis: 7 rotamers loaded.
 Rotamer 3/7, strain=19.51
774 ASP A
Selected!
 Mutagenesis: 6 rotamers loaded.
 Rotamer 6/6, strain=31.13
792 ILE A
Selected!
 Mutagenesis: 4 rotamers loaded.
 Rotamer 1/4, strain=17.11
812 SER A
Selected!
 Mutagenesis: no phi/psi, using backbone-independent rotamers.
 Mutagenesis: 3 rotamers loaded.
 Rotamer 2/3, strain=12.59
2 ASN B
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 8/9, strain=40.05
11 ASN B
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 3/9, strain=12.93
12 GLN B
Selected!
 Mutagenesis: 15 rotamers loaded.
 Rotamer 15/15, strain=25.86
13 GLN B
Selected!
 Mutagenesis: 14 rotamers loaded.
 Rotamer 2/14, strain=30.49
14 SER B
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 1/3, strain=33.05
17 HIS B
Selected!
 Mutagenesis: 8 rotamers loaded.
 Rotamer 4/8, strain=37.50
18 ALA B
Selected!
 Mutagenesis: no rotamers found in library.
22 PHE B
Selected!
 Mutagenesis:

 Mutagenesis: 3 rotamers loaded.
 Rotamer 3/3, strain=25.78
235 ASN B
Selected!
 Mutagenesis: 11 rotamers loaded.
 Rotamer 5/11, strain=22.34
237 THR B
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 1/3, strain=26.85
239 THR B
Selected!
 Mutagenesis: 2 rotamers loaded.
 Rotamer 2/2, strain=20.13
240 ILE B
Selected!
 Mutagenesis: 5 rotamers loaded.
 Rotamer 3/5, strain=20.16
241 PRO B
Selected!
 Mutagenesis: 2 rotamers loaded.
 Rotamer 2/2, strain=47.35
242 ALA B
Selected!
 Mutagenesis: no rotamers found in library.
244 ILE B
Selected!
 Mutagenesis: 4 rotamers loaded.
 Rotamer 2/4, strain=58.86
245 MET B
Selected!
 Mutagenesis: 13 rotamers loaded.
 Rotamer 9/13, strain=36.79
246 GLN B
Selected!
 Mutagenesis: 20 rotamers loaded.
 Rotamer 13/20, strain=29.98
247 GLN B
Selected!
 Mutagenesis: 16 rotamers loaded.
 Rotamer 14/16, strain=31.80
249 LYS B
Selected!
 Mutagenesis: 19 rotamers loaded.
 Rotamer 2/19, strain=29.91
250 ASN B
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 9/

 Mutagenesis: 11 rotamers loaded.
 Rotamer 7/11, strain=25.41
750 LEU B
Selected!
 Mutagenesis: 4 rotamers loaded.
 Rotamer 2/4, strain=20.53
767 SER B
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 2/3, strain=42.31
768 GLY B
Selected!
 Mutagenesis: no rotamers found in library.
769 GLY B
Selected!
 Mutagenesis: no rotamers found in library.
770 GLY B
Selected!
 Mutagenesis: no rotamers found in library.
771 ASP B
Selected!
 Mutagenesis: 8 rotamers loaded.
 Rotamer 5/8, strain=44.57
774 ASP B
Selected!
 Mutagenesis: 8 rotamers loaded.
 Rotamer 3/8, strain=21.14
792 ILE B
Selected!
 Mutagenesis: 6 rotamers loaded.
 Rotamer 1/6, strain=20.85
812 SER B
Selected!
 Mutagenesis: no phi/psi, using backbone-independent rotamers.
 Mutagenesis: 3 rotamers loaded.
 Rotamer 2/3, strain=25.68
2 ASN C
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 8/9, strain=40.16
11 ASN C
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 3/9, strain=12.88
12 GLN C
Selected!
 Mutagenesis: 15 rotamers l

 Rotamer 16/19, strain=30.84
223 GLU C
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 3/9, strain=38.31
224 SER C
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 3/3, strain=91.30
227 ASN C
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 8/9, strain=28.90
229 THR C
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 3/3, strain=37.23
233 LEU C
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 3/3, strain=25.77
235 ASN C
Selected!
 Mutagenesis: 11 rotamers loaded.
 Rotamer 5/11, strain=22.35
237 THR C
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 1/3, strain=26.85
239 THR C
Selected!
 Mutagenesis: 2 rotamers loaded.
 Rotamer 2/2, strain=20.14
240 ILE C
Selected!
 Mutagenesis: 5 rotamers loaded.
 Rotamer 3/5, strain=20.16
241 PRO C
Selected!
 Mutagenesis: 2 rotamers loaded.
 Rotamer 2/2, strain=47.34
242 ALA C
Selected!
 Mutagenesis: no rotamers found in library.
244 ILE C
Selected!
 Mutagenesis: 4 rotamers loaded.
 Rotamer 2/4, strain=58.88
245 MET C
Selected!
 Muta

 Mutagenesis: 12 rotamers loaded.
 Rotamer 3/12, strain=27.33
683 ILE C
Selected!
 Mutagenesis: 5 rotamers loaded.
 Rotamer 2/5, strain=17.34
733 ALA C
Selected!
 Mutagenesis: no rotamers found in library.
735 ARG C
Selected!
 Mutagenesis: 22 rotamers loaded.
 Rotamer 3/22, strain=24.80
736 ASN C
Selected!
 Mutagenesis: 11 rotamers loaded.
 Rotamer 6/11, strain=16.15
746 ASN C
Selected!
 Mutagenesis: 11 rotamers loaded.
 Rotamer 7/11, strain=26.14
750 LEU C
Selected!
 Mutagenesis: 4 rotamers loaded.
 Rotamer 2/4, strain=20.53
767 SER C
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 3/3, strain=20.97
768 GLY C
Selected!
 Mutagenesis: no rotamers found in library.
769 GLY C
Selected!
 Mutagenesis: no rotamers found in library.
770 GLY C
Selected!
 Mutagenesis: no rotamers found in library.
771 ASP C
Selected!
 Mutagenesis: 7 rotamers loaded.
 Rotamer 3/7, strain=19.52
774 ASP C
Selected!
 Mutagenesis: 6 rotamers loaded.
 Rotamer 6/6, strain=28.95
792 ILE C
Selected!
 Mutagenesis: 4 

 Mutagenesis: 11 rotamers loaded.
 Rotamer 9/11, strain=30.91
216 ILE D
Selected!
 Mutagenesis: 5 rotamers loaded.
 Rotamer 4/5, strain=39.56
219 ASN D
Selected!
 Mutagenesis: 10 rotamers loaded.
 Rotamer 9/10, strain=68.86
221 PHE D
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 3/3, strain=46.93
222 LYS D
Selected!
 Mutagenesis: 19 rotamers loaded.
 Rotamer 16/19, strain=44.03
223 GLU D
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 9/9, strain=52.71
224 SER D
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 3/3, strain=97.71
227 ASN D
Selected!
 Mutagenesis: 9 rotamers loaded.
 Rotamer 8/9, strain=28.62
229 THR D
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 3/3, strain=37.22
233 LEU D
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 3/3, strain=25.76
235 ASN D
Selected!
 Mutagenesis: 11 rotamers loaded.
 Rotamer 5/11, strain=22.35
237 THR D
Selected!
 Mutagenesis: 3 rotamers loaded.
 Rotamer 1/3, strain=26.85
239 THR D
Selected!
 Mutagenesis: 2 rotamers loaded

In [11]:
ABCD_new = seq_in_chains(pdbout,chains='ABCD')

i0 = 1
for chain in 'ABCD':
    print('\n\nChain:',chain)
    print_diff_seq(ABCD_new[chain],seq_ampa,i0=i0)
    




Chain: A
Resid	seq1       seq2


Chain: B
Resid	seq1       seq2


Chain: C
Resid	seq1       seq2


Chain: D
Resid	seq1       seq2


In [12]:
i0 = 1
for chain in 'ABCD':
    print('\n\nChain:',chain)
    print_diff_seq(ABCD[chain],seq_ampa,i0=i0)
    



Chain: A
Resid	seq1       seq2
2,	SER(S) !=  ASN(N)
11,	ARG(R) !=  ASN(N)
12,	GLY(G) !=  GLN(Q)
13,	ALA(A) !=  GLN(Q)
14,	ASP(D) !=  SER(S)
17,	TYR(Y) !=  HIS(H)
18,	SER(S) !=  ALA(A)
22,	VAL(V) !=  PHE(F)
23,	GLY(G) !=  ALA(A)
24,	MET(M) !=  LEU(L)
25,	VAL(V) !=  SER(S)
27,	PHE(F) !=  LEU(L)
28,	SER(S) !=  THR(T)
29,	THR(T) !=  GLU(E)
30,	GLU(E) !=  PRO(P)
31,	PHE(F) !=  PRO(P)
32,	ARG(R) !=  LYS(K)
34,	THR(T) !=  LEU(L)
36,	HIS(H) !=  GLN(Q)
39,	ASN(N) !=  ILE(I)
40,	LEU(L) !=  VAL(V)
41,	GLU(E) !=  ASN(N)
42,	VAL(V) !=  ILE(I)
43,	ALA(A) !=  SER(S)
44,	ASN(N) !=  ASP(D)
47,	ALA(A) !=  GLU(E)
48,	VAL(V) !=  MET(M)
50,	ASN(N) !=  TYR(Y)
51,	ALA(A) !=  ARG(R)
58,	ARG(R) !=  LYS(K)
68,	ASP(D) !=  GLU(E)
69,	LYS(K) !=  ARG(R)
70,	LYS(K) !=  ARG(R)
71,	SER(S) !=  THR(T)
74,	THR(T) !=  MET(M)
75,	ILE(I) !=  LEU(L)
81,	THR(T) !=  ALA(A)
85,	SER(S) !=  CYS(C)
93,	THR(T) !=  VAL(V)
95,	GLY(G) !=  THR(T)
96,	THR(T) !=  SER(S)
97,	HIS(H) !=  ASN(N)
98,	PRO(P) !=  GLN(Q)
101,	ILE(I) !=  LEU(L)