### Normalisation of phosphosite signal against 3D motif background.

In [32]:
# Autoreload 
%load_ext autoreload
%autoreload 2

from pathlib import Path 
import pandas as pd 
import numpy as np
import os 
import re 
import gzip 
import shutil
import Bio.PDB.MMCIF2Dict
from typing import Union, List, Tuple, Dict, Optional
from pathlib import Path

pd.options.mode.chained_assignment = None  # default='warn'

from phosphosite import DATA_DIR
from phosphosite.utils import aa1to3, aa3to1


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
to_process = "all"
residue_type = "STY"
radius = 6.0
residue_adjacent = 2
next_nearest = 3
ref_atom = "ca"


In [34]:
from phosphosite.load_data import background_df
len(background_df)

974971

In [35]:

# First, remove nan 
# all rows without nan in 1_res
df = background_df[~background_df["1_res"].isna()]
# all rows with +1 and -1 not nan
df = df[~df["-1"].isna()]
df = df[~df["+1"].isna()]

In [36]:
from phosphosite.motif.processing import make_count_df, make_motif_df
df = make_motif_df(
    df[df["site_res"] == "S"],
    prev_col="-1",
    next_col="+1",
    nearest_col="1_res",
)

In [37]:
df 

nearest_res,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
motif,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
A-A,265,42,94,169,66,114,51,102,105,234,55,73,36,126,124,183,128,130,28,57
A-C,53,36,27,49,21,53,21,36,32,81,13,21,16,30,33,60,51,61,1,20
A-D,80,24,56,100,49,70,21,63,56,142,19,32,36,55,60,98,63,76,19,29
A-E,184,44,88,164,48,95,49,79,110,211,39,42,36,90,103,128,74,98,20,36
A-F,93,31,73,95,62,88,51,59,88,149,17,53,25,75,98,116,71,74,9,47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Y-S,75,22,58,67,64,69,25,73,61,178,33,37,45,42,51,92,48,64,21,59
Y-T,47,11,40,45,37,47,22,38,47,86,27,27,23,36,32,51,41,58,10,38
Y-V,67,25,44,113,36,67,33,47,70,100,33,39,26,49,76,68,88,89,14,26
Y-W,19,5,9,21,10,9,8,14,16,31,5,7,9,16,17,30,15,18,2,10


In [39]:
df = df + 1

In [40]:
df

nearest_res,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
motif,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
A-A,266,43,95,170,67,115,52,103,106,235,56,74,37,127,125,184,129,131,29,58
A-C,54,37,28,50,22,54,22,37,33,82,14,22,17,31,34,61,52,62,2,21
A-D,81,25,57,101,50,71,22,64,57,143,20,33,37,56,61,99,64,77,20,30
A-E,185,45,89,165,49,96,50,80,111,212,40,43,37,91,104,129,75,99,21,37
A-F,94,32,74,96,63,89,52,60,89,150,18,54,26,76,99,117,72,75,10,48
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Y-S,76,23,59,68,65,70,26,74,62,179,34,38,46,43,52,93,49,65,22,60
Y-T,48,12,41,46,38,48,23,39,48,87,28,28,24,37,33,52,42,59,11,39
Y-V,68,26,45,114,37,68,34,48,71,101,34,40,27,50,77,69,89,90,15,27
Y-W,20,6,10,22,11,10,9,15,17,32,6,8,10,17,18,31,16,19,3,11


In [18]:
from phosphosite.motif.visualisation import plot_heatmap

df = background_df[~background_df["1_res"].isna()]
# all rows with +1 and -1 not nan
df = df[~df["-1"].isna()]
df = df[~df["+1"].isna()]

if False:
    for residue in "STY": 

        motif_df = make_motif_df(
            df[df["site_res"] == residue],
            prev_col="-1",
            next_col="+1",
            nearest_col="1_res",
        )
        plot_heatmap(
            motif_df, title=f"All {aa1to3[residue].capitalize()} residues in human proteome: next-nearest spatial neighbour (R={radius}Å)",
            
        )
        motif_df = make_motif_df(
            df[(df["site_res"] == residue) & (df["phosphosite"] == True)],
            prev_col="-1",
            next_col="+1",
            nearest_col="1_res",
        )
        plot_heatmap(
            motif_df, title=f"{aa1to3[residue].capitalize()} PHOSPHOSITES: next-nearest spatial neighbour (R={radius}Å)",
            
        )

In [21]:
placement = "vertical"

plots = Path("./norm_3d")

normalisation = None

for site in "STY":
    
    dff = df[df["site_res"] == site] 
    psites = dff[dff["phosphosite"] == True]
    not_psites = dff[dff["phosphosite"] == False]

    


    if placement == "horizontal":
        motif_df = motif_df.T
        aspect = "auto"
        height = 1000
    else: 
        aspect = "auto" 
        height = 8000

    motif_df = make_motif_df(
        psites, 
        prev_col="-1",
        next_col="+1",
        nearest_col="1_res",
    )
    filepath = plots / f"{site}_PHOSPHOSITE_1-NN_heatmap.html"
    plot_heatmap(
        motif_df, 
        title=f"{site} phosphosites: frequency of spatially-nearest residue for sequence-adjacent pairs (R={radius}Å) normalisation={normalisation}", 
        aspect=aspect, 
        height=height,
        filepath=filepath,
    )

    motif_df = make_motif_df(
        not_psites, 
        prev_col="-1",
        next_col="+1",
        nearest_col="1_res",
    )
    filepath = plots / f"{site}_NOT_PHOSPHOSITE_1-NN_heatmap.html"
    plot_heatmap(
        motif_df, 
        title=f"{site} NOT phosphosites: frequency of spatially-nearest residue for sequence-adjacent pairs (R={radius}Å) normalisation={normalisation}", 
        aspect=aspect, 
        height=height,
        filepath=filepath,
    )

    motif_df = make_motif_df(
        dff, 
        prev_col="-1",
        next_col="+1",
        nearest_col="1_res",
    )
    filepath = plots / f"{site}_ALL_1-NN_heatmap.html"
    plot_heatmap(
        motif_df, 
        title=f"{site} NOT phosphosites: frequency of spatially-nearest residue for sequence-adjacent pairs (R={radius}Å) normalisation={normalisation}", 
        aspect=aspect, 
        height=height,
        filepath=filepath,
    )