In [5]:
# Autoreload 
%load_ext autoreload
%autoreload 2

from pathlib import Path 
import pandas as pd 
import numpy as np
import os 
import re 
import gzip 
import shutil
import Bio.PDB.MMCIF2Dict
from typing import Union, List, Tuple, Dict, Optional
from pathlib import Path
from tqdm import tqdm

pd.options.mode.chained_assignment = None  # default='warn'

from phosphosite.utils import aa1to3, aa3to1
from phosphosite import GAMMA_OXYGEN_CODES


from phosphosite.domain import map_site_to_domain, download_uniprot_data
from phosphosite import UNIPROT_DATA_DIR 
UNIPROT_DATA_DIR.is_dir()

from phosphosite.bubble.data import result_df as df
from phosphosite.motif.processing import make_motif_df


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
# Select reference atom
ref_atom = "oxygen" 
residue = "S"

res_col = "site_res" if "site_res" in df.columns else "res"
dff = df[df[res_col] == residue] 

if ref_atom == "CA":
    dff = dff[dff["ref_atom"] == ref_atom]
elif ref_atom == "oxygen":
    dff = dff[dff["ref_atom"].isin(GAMMA_OXYGEN_CODES)]

In [12]:
kwargs = dict(
            prev_col="prev",
            next_col="next",
            nearest_col="nn_res",

            #orient="triplet", 
            


        )


In [14]:
motif_df = make_motif_df(
    dff, 
    **kwargs,
    orient="triplet",
    sep=residue,
)
motif_df

nearest_res,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
motif,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
ASA,254,54,224,321,87,96,142,101,163,239,71,116,141,170,202,269,158,134,48,73
ASC,47,58,74,73,23,44,74,30,45,67,24,34,44,50,70,110,55,42,16,25
ASD,74,28,132,189,45,62,49,64,122,147,16,69,48,106,138,154,109,90,14,63
ASE,169,32,171,241,70,90,81,100,199,212,53,92,93,158,159,193,101,140,25,50
ASF,74,27,154,139,52,77,68,69,106,169,24,69,51,98,125,137,98,64,24,83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YSS,64,30,136,168,67,78,61,45,106,125,28,83,66,86,108,146,85,69,25,101
YST,40,8,83,88,28,46,40,37,65,83,12,49,36,57,78,87,52,44,20,54
YSV,65,25,86,121,45,53,45,52,97,76,23,38,44,66,94,109,96,53,18,40
YSW,10,4,23,33,10,9,10,13,22,31,7,14,13,18,20,28,17,12,4,15


In [19]:
# Sum all rows that have index "AS*"

# Select all rows that have index "AS*"
motif_df[motif_df.index.str.startswith("AS")].sum()

nearest_res
A    2645
C     677
D    3087
E    3917
F    1343
G    1615
H    1509
I    1379
K    2266
L    3419
M     774
N    1719
P    1688
Q    2377
R    2734
S    3613
T    2107
V    1974
W     576
Y    1219
dtype: int64

In [13]:
motif_df = make_motif_df(
    dff, 
    **kwargs,
    orient="prev",
    sep=residue,
)
motif_df

nearest_res,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
motif,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AS,2645,677,3087,3917,1343,1615,1509,1379,2266,3419,774,1719,1688,2377,2734,3613,2107,1974,576,1219
CS,733,801,1401,1526,611,609,710,564,966,1202,292,742,592,1343,1069,1405,914,679,321,585
DS,1336,456,2456,2753,1031,1161,1188,967,2049,2225,479,1366,1265,1588,2364,2584,1554,1215,435,1065
ES,1581,487,2505,3413,1055,1162,1045,1167,2296,2567,596,1428,1213,1811,2213,2603,1649,1373,469,1016
FS,1377,440,2474,3481,1113,1142,1312,1091,1856,2223,536,1640,954,1808,1967,2553,1708,1371,489,1191
GS,2069,719,3135,3514,1245,2086,1388,1201,2241,2874,709,1737,1960,1966,2860,3461,2133,1734,564,1291
HS,745,305,1342,1564,632,635,761,623,1084,1320,313,748,657,1140,1446,1352,861,709,248,732
IS,1421,463,2347,3176,1106,1091,1170,1357,2066,2325,648,1577,915,1951,1874,2503,1874,1529,482,1078
KS,1307,458,2717,3672,1129,895,1042,1278,2380,2282,567,1442,918,1699,2034,2093,1540,1392,409,974
LS,3514,1090,5127,7403,2138,2323,2568,2283,4125,5258,1295,3140,2188,4494,4285,5414,3436,3165,924,2036
