## Demo how map the positions of post-tanslational modifications to 3D structures.
The dataset below is a processed version of the data from:

Site-specific mapping and quantification of protein S-sulphenylation in cells.
Yang J, Gupta V, Carroll KS, Liebler DC (2014) Nat Commun. (2014), 5:4776. 
[DOI: 10.1038/ncomms5776](https://doi.org/10.1038/ncomms5776)

In [1]:
import warnings
warnings.filterwarnings("ignore") # numpy version issue?
import pandas as pd
import numpy as np
from ipywidgets import interact, IntSlider
import py3Dmol

In [2]:
df = pd.read_parquet('../../data/ncomms5776-s3-m_pdb.parquet')
# add column with PTM type
df = df.assign(ptms=np.full((df.shape[0], 1), 'S-sulphenylation'))

In [3]:
df.head()

Unnamed: 0,structureChainId,start,end,_c0,GeneName,ProteinDescription,ModifiedSite,Modification,PeptideSequence,ModifiedPeptideSequence,modPeptide,pdbSeq,modPosition,pdbResNum,pdbSeqNum,uniprotId,uniprotNum,ptms
0,1JHB.A,77,97,305,glutaredoxin (thioltransferase),glutaredoxin-1,C79,333@C79,DCIGGCSDLVSLQQSGELLTR,DC#IGGCSDLVSLQQSGELLTR,"D(C,333)IGGCSDLVSLQQSGELLTR",K.DCIGGCSDLVSLQQSGELLTR.L,79,79,79,P35754,79,S-sulphenylation
1,3J8C.A,158,164,227,"eukaryotic translation initiation factor 3, su...",eukaryotic translation initiation factor 3 sub...,C160,333@C160,QCLDLLR,QC#LDLLR,"Q(C,333)LDLLR",R.QCLDLLR.N,160,160,160,Q14152,160,S-sulphenylation
2,3J8B.A,75,83,226,"eukaryotic translation initiation factor 3, su...",eukaryotic translation initiation factor 3 sub...,C78,333@C78,NICQQVNIK,NIC#QQVNIK,"NI(C,333)QQVNIK",K.NICQQVNIK.S,78,78,78,Q14152,78,S-sulphenylation
3,2P0W.B,275,284,336,histone acetyltransferase 1,histone acetyltransferase type B catalytic sub...,C299,333@C299,LCQDLPCFSR,LCQDLPC#FSR,"LCQDLP(C,333)FSR",K.LCQDLPCFSR.E,282,299,282,O14929,299,S-sulphenylation
4,5N5N.D,216,240,881,"tubulin, beta 4A class IVa",tubulin beta-4A chain,C239,333@C239,LTTPTYGDLNHLVSATMSGVTTCLR,LTTPTYGDLNHLVSATMSGVTTC#LR,"LTTPTYGDLNHLVSATMSGVTT(C,333)LR",K.LTTPTYGDLNHLVSATMSGVTTCLR.F,239,241,239,P07437,239,S-sulphenylation


In [4]:
def view_modifications(df, *args):

    def view3d(show_bio_assembly=False, show_surface=False, i=0):
        pdb_id, chain_id = df.iloc[i]['structureChainId'].split('.')
        res_num = str(df.iloc[i]['pdbResNum'])
        label = df.iloc[i]['ptms']
        
        print (pdb_id, chain_id, res_num)
        for a in args:
            print(a + ": " + df.iloc[i][a])
        
        mod_res = {'resi': res_num, 'chain': chain_id}        
        # select by distance
        selection = {'chain': chain_id, 'resi': res_num, 'byres': True, 'expand': 6}

        viewer = py3Dmol.view(query='pdb:' + pdb_id, options={'doAssembly': show_bio_assembly})
    
        # set styles
        viewer.setStyle({'cartoon': {'color': 'spectrum', 'width': 0.6, 'opacity':0.8}})
        viewer.addStyle(selection, {'stick':{'colorscheme':'orangeCarbon', 'radius': 0.15}})
        viewer.addStyle(mod_res, {'stick':{'colorscheme':'redCarbon', 'radius': 0.4}})
        viewer.addStyle(mod_res, {'sphere':{'colorscheme':'gray', 'opacity': 0.7}})
        viewer.addLabel(str(res_num) + ": " + label, {'fontColor':'black', 'fontSize': 8, 'backgroundColor': 'lightgray'}, mod_res)

        viewer.zoomTo(selection)
        
        if show_surface:
            viewer.addSurface(py3Dmol.SES,{'opacity':0.8,'color':'lightblue'})

        return viewer.show()
       
    s_widget = IntSlider(min=0, max=len(df)-1, description='Structure', continuous_update=False)
    
    return interact(view3d, show_bio_assembly=False, show_surface=False, i=s_widget)

In [5]:
view_modifications(df, 'uniprotId', 'GeneName', 'modPeptide');

interactive(children=(Checkbox(value=False, description='show_bio_assembly'), Checkbox(value=False, descriptio…