# Data

> Load various kinase-relatd datasets

## Setup

In [None]:
#| default_exp data

In [None]:
#| export
import pandas as pd
from functools import lru_cache

In [None]:
#| hide
from nbdev import show_doc

In [None]:
#| hide
pd.set_option('display.max_rows', 5)
pd.set_option('display.max_columns', 100) # show all columns

```python
from katlas.data import *
```

## Dataset

We will go through how to load kinase information data and phosphorylation sites data.

In [None]:
#| export
class Data:
    """A class for fetching various datasets."""
    
    @staticmethod
    @lru_cache(maxsize=None)
    def fetch_data(url: str) -> pd.DataFrame:
        """
        Fetch data from the given URL and return a DataFrame.
        Renames 'Unnamed: 0' column to 'kinase' if present.
        """
        df = pd.read_parquet(url)
        if "Unnamed: 0" in df.columns:
            df = df.rename(columns={"Unnamed: 0": "kinase"})
        return df

    @staticmethod
    @lru_cache(maxsize=None)
    def fetch_csv(url: str) -> pd.DataFrame:
        """
        Fetch data from the given URL and return a DataFrame.
        Renames 'Unnamed: 0' column to 'kinase' if present.
        """
        df = pd.read_csv(url)
        if "Unnamed: 0" in df.columns:
            df = df.rename(columns={"Unnamed: 0": "kinase"})
        return df
    

    @staticmethod
    def _convert_numeric_columns(df: pd.DataFrame) -> pd.DataFrame:
        """
        Convert column names that are numeric strings into integers,
        but only if they are still strings.
        """
        df.columns = [int(col) if isinstance(col, str) and col.lstrip('-').isdigit() else col for col in df.columns]
        return df

    BASE_URL = "https://github.com/sky1ove/katlas/raw/main/"
    
    #--------------------------- Kinase and PSPA ---------------------------
    @staticmethod
    def get_kinase_info() -> pd.DataFrame:
        """
        Get information of 523 human kinases on kinome tree. 
        Group, family, and subfamily classifications are sourced from Coral; 
        full protein sequences are retrieved using UniProt IDs; 
        kinase domain sequences are obtained from KinaseDomain.com; 
        and cellular localization data is extracted from published literature.
        """
        URL = f"{Data.BASE_URL}dataset/kinase_info.csv"
        return Data.fetch_csv(URL)

    @staticmethod
    def get_kinase_uniprot() -> pd.DataFrame:
        """
        Get information of 672 uniprot human kinases, which were retrieved from UniProt by filtering all human protein entries using the keyword 'kinase'. 
        It includes additional pseudokinases and lipid kinases.
        """
        URL = f"{Data.BASE_URL}dataset/uniprot_human_keyword_kinase.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_pspa_tyr_norm() -> pd.DataFrame:
        """Get PSPA normalized data of tyrosine kinase."""
        URL = f"{Data.BASE_URL}dataset/PSPA/pspa_tyr_norm.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_pspa_st_norm() -> pd.DataFrame:
        """Get PSPA normalized data of serine/threonine kinase."""
        URL = f"{Data.BASE_URL}dataset/PSPA/pspa_st_norm.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_pspa_all_norm() -> pd.DataFrame:
        """Get PSPA normalized data of serine/threonine and tyrosine kinases."""
        URL = f"{Data.BASE_URL}dataset/PSPA/pspa_all_norm.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_pspa_st_pct() -> pd.DataFrame:
        """Get PSPA reference score to calculate percentile for serine/threonine kinases."""
        URL = f"{Data.BASE_URL}dataset/PSPA/pspa_pct_st.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_pspa_tyr_pct() -> pd.DataFrame:
        """Get PSPA reference score to calculate percentile for tyrosine kinases."""
        URL = f"{Data.BASE_URL}dataset/PSPA/pspa_pct_tyr.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_num_dict() -> dict:
        """Get a dictionary mapping kinase to number of random amino acids in PSPA."""
        URL = f"{Data.BASE_URL}dataset/PSPA/pspa_divide_num.csv"
        num = pd.read_csv(URL)
        return num.set_index("kinase")["num_random_aa"].to_dict()

    #--------------------------- CDDM ---------------------------
    @staticmethod
    def get_ks_dataset(add_kinase_info=True) -> pd.DataFrame:
        """Get kinase substrate dataset collected from public resources, with the option of adding kinase info."""
        URL = f"{Data.BASE_URL}dataset/CDDM/ks_datasets_20250407.parquet"
        df = Data.fetch_data(URL)
        df = Data._convert_numeric_columns(df)
        if 'substrate_phosphoseq' in df.columns:
            df['substrate_sequence'] = df['substrate_phosphoseq'].str.upper()

        if add_kinase_info:
            # Remove pseudokinase duplicates by UniProt ID, keep only one entry per kinase
            info = Data.get_kinase_info().sort_values('kinase').drop_duplicates('uniprot')
            
            # Pre-extract UniProt ID without isoform for matching
            df['uniprot_clean'] = df['kinase_uniprot'].str.split('-').str[0]
            
            info_indexed = info.set_index('uniprot')
            group_map = info_indexed['group']
            family_map = info_indexed['family']
            pspa_small_map = info_indexed['pspa_category_small']
            pspa_big_map = info_indexed['pspa_category_big']
            
            df['kinase_on_tree'] = df['uniprot_clean'].isin(info['uniprot']).astype(int)
            
            kinase_gene_map = Data.get_kinase_uniprot().set_index('Entry')['Gene Names']
            df['kinase_genes'] = df['uniprot_clean'].map(kinase_gene_map)
            
            df['kinase_group'] = df['uniprot_clean'].map(group_map)
            df['kinase_family'] = df['uniprot_clean'].map(family_map)
            df['kinase_pspa_big'] = df['uniprot_clean'].map(pspa_big_map)
            df['kinase_pspa_small'] = df['uniprot_clean'].map(pspa_small_map)
            
            df.drop(columns='uniprot_clean', inplace=True)
        return df

    @staticmethod
    def get_ks_unique() -> pd.DataFrame:
        """Get kinase substrate dataset with unique site sequence (most phosphorylated version)."""
        URL = f"{Data.BASE_URL}dataset/CDDM/ks_datasets_seq_unique_20250407.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_cddm() -> pd.DataFrame:
        """Get the primary CDDM dataset."""
        URL = f"{Data.BASE_URL}dataset/CDDM/ks_main.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_cddm_upper() -> pd.DataFrame:
        """Get the CDDM dataset for all uppercase."""
        URL = f"{Data.BASE_URL}dataset/CDDM/ks_main_upper.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_cddm_others() -> pd.DataFrame:
        """Get CDDM data for other kinases with mutations."""
        URL = f"{Data.BASE_URL}dataset/CDDM/ks_others.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_cddm_others_info() -> pd.DataFrame:
        """Get additional information for CDDM 'others' dataset."""
        URL = f"{Data.BASE_URL}dataset/CDDM/ks_others_info.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_combine() -> pd.DataFrame:
        """Get the combined PSPA and CDDM dataset."""
        URL = f"{Data.BASE_URL}dataset/combine_main.parquet"
        return Data.fetch_data(URL)

    #--------------------------- Amino Acid ---------------------------
    @staticmethod
    def get_aa_info() -> pd.DataFrame:
        """Get amino acid information."""
        URL = f"{Data.BASE_URL}dataset/amino_acids/aa_info.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_aa_rdkit() -> pd.DataFrame:
        """Get RDKit representations of amino acids."""
        URL = f"{Data.BASE_URL}dataset/amino_acids/aa_rdkit.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_aa_morgan() -> pd.DataFrame:
        """Get Morgan fingerprint representations of amino acids."""
        URL = f"{Data.BASE_URL}dataset/amino_acids/aa_morgan.parquet"
        return Data.fetch_data(URL)

    #--------------------------- Phosphoproteomics ---------------------------
    @staticmethod
    def get_cptac_ensembl_site() -> pd.DataFrame:
        """Get CPTAC dataset with unique EnsemblProteinID+site."""
        URL = f"{Data.BASE_URL}dataset/phosphosites/linkedOmicsKB_ref_pan.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_cptac_unique_site() -> pd.DataFrame:
        """Get CPTAC dataset with unique site sequences."""
        URL = f"{Data.BASE_URL}dataset/phosphosites/cptac_unique_site.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_cptac_gene_site() -> pd.DataFrame:
        """Get CPTAC dataset with unique Gene+site."""
        URL = f"{Data.BASE_URL}dataset/phosphosites/linkedOmics_ref_pan.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_psp_human_site() -> pd.DataFrame:
        """Get PhosphoSitePlus human dataset (Gene+site)."""
        URL = f"{Data.BASE_URL}dataset/phosphosites/psp_human.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_ochoa_site() -> pd.DataFrame:
        """Get phosphoproteomics dataset from Ochoa et al."""
        URL = f"{Data.BASE_URL}dataset/phosphosites/ochoa_site.parquet"
        return Data.fetch_data(URL)

    @staticmethod
    def get_combine_site_psp_ochoa() -> pd.DataFrame:
        """
        Get the combined dataset from Ochoa and PhosphoSitePlus.
        """
        URL = f"{Data.BASE_URL}dataset/phosphosites/combine_site_psp_ochoa.parquet"
        df = Data.fetch_data(URL)
        return Data._convert_numeric_columns(df)

    @staticmethod
    def get_combine_site_phosphorylated() -> pd.DataFrame:
        """
        Get the combined phosphorylated dataset from Ochoa and PhosphoSitePlus.
        """
        URL = f"{Data.BASE_URL}dataset/phosphosites/phosphorylated_combine_site.parquet"
        df = Data.fetch_data(URL)
        return Data._convert_numeric_columns(df)

    @staticmethod
    def get_human_site() -> pd.DataFrame:
        """
        Get the combined phosphorylated dataset from Ochoa and PhosphoSitePlus (20-length version).
        """
        URL = f"{Data.BASE_URL}dataset/phosphosites/phosphorylated_combine_site20.parquet"
        df = Data.fetch_data(URL)
        return Data._convert_numeric_columns(df)

Datasets used in this study can be accessed through `Data`

In [None]:
#| echo: false
show_doc(Data.get_kinase_info)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L54){target="_blank" style="float:right; font-size:smaller"}

### Data.get_kinase_info

>      Data.get_kinase_info ()

*Get information of 523 human kinases on kinome tree. 
Group, family, and subfamily classifications are sourced from Coral; 
full protein sequences are retrieved using UniProt IDs; 
kinase domain sequences are obtained from KinaseDomain.com; 
and cellular localization data is extracted from published literature.*

To load kinase information data:

In [None]:
Data.get_kinase_info()

Unnamed: 0,kinase,ID_coral,uniprot,ID_HGNC,group,family,subfamily_coral,subfamily,in_ST_paper,in_Tyr_paper,in_cddm,pseudo,pspa_category_small,pspa_category_big,cddm_big,cddm_small,length,human_uniprot_sequence,kinasecom_domain,nucleus,cytosol,cytoskeleton,plasma membrane,mitochondrion,Golgi apparatus,endoplasmic reticulum,vesicle,centrosome,aggresome,main_location
0,AAK1,AAK1,Q2M2I8,AAK1,Other,NAK,,NAK,1,0,0,0,NAK,NAK,,,339,MKKFFDSRREQGGSGLGSGSSGGGGSTSGLGSGYIGRVFGIGRQQV...,VTVDEVLAEGGFAIVFLVRTSNGMKCALKRMFVNNEHDLQVCKREI...,,,,,,,,,,,
1,ABL1,ABL1,P00519,ABL1,TK,Abl,,Abl,0,1,1,0,ABL,ABL,1.0,2.0,1130,MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAA...,ITMKHKLGGGQYGEVYEGVWKKYSLTVAVKTLKEDTMEVEEFLKEA...,,6.0,,4.0,,,,,,,cytosol
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
521,EEF2K,eEF2K,O00418,EEF2K,Atypical,Alpha,eEF2K,eEF2K,1,0,1,0,ALPHA/MLK,ALPHA/MLK,2.0,7.0,725,MADEDLIFRLEGVDGGQSPRAGHDGDSDGDSDDEEGYFICPITDDP...,VTGEWLDDEVLIKMASQPFGRGAMRECFRTKKLSNFLHAQQWKGAS...,,9.0,,1.0,,,,,,,cytosol
522,FAM20C,FAM20C,Q8IXL6,FAM20C,Atypical,FAM20C,,FAM20C,1,0,0,0,FAM20C,acidophilic,,,562,MKMMLVRRFRVLILMVFLVACALHIALDLLPRLERRGARPSGEPGC...,FISPANNICFYGECSYYCSTEHALCGKPDQIEGSLAAFLPDLSLAK...,,2.0,,,,7.0,1.0,,,,Golgi apparatus


In [None]:
#| echo: false
show_doc(Data.get_kinase_uniprot)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L66){target="_blank" style="float:right; font-size:smaller"}

### Data.get_kinase_uniprot

>      Data.get_kinase_uniprot ()

*Get information of 672 uniprot human kinases, which were retrieved from UniProt by filtering all human protein entries using the keyword 'kinase'. 
It includes additional pseudokinases and lipid kinases.*

In [None]:
Data.get_kinase_uniprot()

Unnamed: 0,Entry,Entry Name,Protein names,Gene Names,uniprot_keyword_kinase,on_tree,Organism,Keywords,Sequence
0,A2RU49,HYKK_HUMAN,Hydroxylysine kinase (5-hydroxy-L-lysine kinas...,HYKK AGPHD1,1,0,Homo sapiens (Human),Alternative splicing;Cytoplasm;Kinase;Proteomi...,MSSGNYQQSEALSKPTFSEEQASALVESVFGLKVSKVRPLPSYDDQ...
1,A4D2B8,PM2P1_HUMAN,Putative postmeiotic segregation increased 2-l...,PMS2P1 PMS2L1 PMS2L13 PMS2L6 PMS2L8 PMS3 PMS8 ...,1,0,Homo sapiens (Human),Alternative splicing;Kinase;Reference proteome...,MVTMCGGHRPENFLHQVLTEFGEELAGEGKSEVGGGAPRSYLQVAS...
...,...,...,...,...,...,...,...,...,...
670,Q8NCB2,CAMKV_HUMAN,CaM kinase-like vesicle-associated protein,CAMKV,0,1,Homo sapiens (Human),Alternative splicing;Calmodulin-binding;Cell m...,MPFGCVTLGDKKNYNQPSEVTDRYDLGQVIKTEEFCEIFRAKDKTT...
671,Q8IV63,VRK3_HUMAN,Serine/threonine-protein kinase VRK3 (EC 2.7.1...,VRK3,0,1,Homo sapiens (Human),3D-structure;Alternative splicing;Cytoplasm;Nu...,MISFCPDCGKSIQAAFKFCPYCGNSLPVEEHVGSQTFVNPHVSSFQ...


In [None]:
#| echo: false
show_doc(Data.get_pspa_tyr_norm)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L75){target="_blank" style="float:right; font-size:smaller"}

### Data.get_pspa_tyr_norm

>      Data.get_pspa_tyr_norm ()

*Get PSPA normalized data of tyrosine kinase.*

In [None]:
Data.get_pspa_tyr_norm()

Unnamed: 0_level_0,-5P,-5G,-5A,-5C,-5S,-5T,-5V,-5I,-5L,-5M,-5F,-5Y,-5W,-5H,-5K,-5R,-5Q,-5N,-5D,-5E,-5s,-5t,-5y,-4P,-4G,-4A,-4C,-4S,-4T,-4V,-4I,-4L,-4M,-4F,-4Y,-4W,-4H,-4K,-4R,-4Q,-4N,-4D,-4E,-4s,-4t,-4y,-3P,-3G,-3A,-3C,...,4A,4C,4S,4T,4V,4I,4L,4M,4F,4Y,4W,4H,4K,4R,4Q,4N,4D,4E,4s,4t,4y,5P,5G,5A,5C,5S,5T,5V,5I,5L,5M,5F,5Y,5W,5H,5K,5R,5Q,5N,5D,5E,5s,5t,5y,0S,0T,0Y,0s,0t,0y
kinase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
ABL1,0.0668,0.0689,0.0646,0.0520,0.0564,0.0539,0.0485,0.0448,0.0520,0.0536,0.0454,0.0454,0.0283,0.0597,0.0600,0.0662,0.0594,0.0606,0.0575,0.0535,0.0507,0.0507,0.0658,0.0680,0.0758,0.0687,0.0518,0.0478,0.0442,0.0561,0.0443,0.0547,0.0491,0.0390,0.0390,0.0345,0.0496,0.0485,0.0530,0.0655,0.0502,0.0667,0.0843,0.0618,0.0618,0.0925,0.0634,0.0642,0.0568,0.0527,...,0.0526,0.0584,0.0563,0.0522,0.0514,0.0429,0.0510,0.0664,0.0645,0.0645,0.0600,0.0647,0.0647,0.0757,0.0607,0.0499,0.0322,0.0342,0.0217,0.0217,0.0306,0.0769,0.0707,0.0624,0.0493,0.0591,0.0661,0.0553,0.0378,0.0548,0.0603,0.0392,0.0392,0.0413,0.0613,0.0652,0.0756,0.0526,0.0512,0.0362,0.0339,0.0254,0.0254,0.0337,0,0,1,0,0,1
TNK2,0.0679,0.0818,0.0627,0.0617,0.0529,0.0528,0.0419,0.0463,0.0437,0.0453,0.0539,0.0539,0.0598,0.0583,0.0624,0.0727,0.0537,0.0553,0.0451,0.0435,0.0430,0.0430,0.0555,0.0723,0.0682,0.0665,0.0567,0.0458,0.0423,0.0436,0.0426,0.0433,0.0495,0.0584,0.0584,0.0697,0.0556,0.0664,0.0757,0.0647,0.0532,0.0409,0.0413,0.0398,0.0398,0.0493,0.0755,0.0653,0.0572,0.0544,...,0.0580,0.0648,0.0670,0.0571,0.0470,0.0493,0.0412,0.0568,0.0516,0.0516,0.0499,0.0559,0.0430,0.0553,0.0485,0.0502,0.0416,0.0464,0.0452,0.0452,0.0533,0.0644,0.0599,0.0609,0.0629,0.0564,0.0634,0.0527,0.0502,0.0641,0.0539,0.0679,0.0679,0.0680,0.0499,0.0385,0.0302,0.0531,0.0465,0.0630,0.0572,0.0364,0.0364,0.0572,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YES1,0.0677,0.0571,0.0537,0.0530,0.0527,0.0505,0.0435,0.0375,0.0400,0.0463,0.0478,0.0478,0.0419,0.0564,0.0681,0.0647,0.0610,0.0752,0.0748,0.0610,0.0652,0.0652,0.0668,0.0625,0.0695,0.0524,0.0530,0.0470,0.0404,0.0476,0.0416,0.0528,0.0529,0.0406,0.0406,0.0424,0.0566,0.0549,0.0561,0.0607,0.0559,0.0786,0.0875,0.0596,0.0596,0.0705,0.0564,0.0580,0.0456,0.0572,...,0.0464,0.0747,0.0521,0.0561,0.0468,0.0452,0.0481,0.0716,0.0689,0.0689,0.0619,0.0623,0.0587,0.0757,0.0652,0.0499,0.0421,0.0492,0.0371,0.0371,0.0467,0.0762,0.0532,0.0533,0.0610,0.0596,0.0558,0.0416,0.0375,0.0467,0.0518,0.0627,0.0627,0.0456,0.0593,0.0662,0.0840,0.0559,0.0604,0.0422,0.0482,0.0374,0.0374,0.0411,0,0,1,0,0,1
ZAP70,0.0602,0.0880,0.0623,0.0496,0.0471,0.0514,0.0465,0.0380,0.0307,0.0526,0.0479,0.0479,0.0347,0.0641,0.0471,0.0452,0.0492,0.0703,0.0870,0.0777,0.1622,0.1622,0.1208,0.0977,0.0792,0.0865,0.0556,0.0487,0.0367,0.0375,0.0312,0.0317,0.0344,0.0274,0.0274,0.0347,0.0474,0.0338,0.0380,0.0571,0.0457,0.0929,0.1392,0.1432,0.1432,0.1685,0.0588,0.0610,0.0537,0.0581,...,0.0491,0.0520,0.0583,0.0530,0.0504,0.0304,0.0424,0.0554,0.0393,0.0393,0.0539,0.0671,0.0562,0.0557,0.0712,0.0406,0.0597,0.0558,0.0440,0.0440,0.0318,0.1269,0.0904,0.0737,0.0457,0.0638,0.0692,0.0343,0.0235,0.0305,0.0461,0.0343,0.0343,0.0344,0.0484,0.0477,0.0290,0.0520,0.0537,0.0709,0.0710,0.0862,0.0862,0.0605,0,0,1,0,0,1


In [None]:
#| echo: false
show_doc(Data.get_pspa_st_norm)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L81){target="_blank" style="float:right; font-size:smaller"}

### Data.get_pspa_st_norm

>      Data.get_pspa_st_norm ()

*Get PSPA normalized data of serine/threonine kinase.*

In [None]:
Data.get_pspa_st_norm()

Unnamed: 0_level_0,-5P,-5G,-5A,-5C,-5S,-5T,-5V,-5I,-5L,-5M,-5F,-5Y,-5W,-5H,-5K,-5R,-5Q,-5N,-5D,-5E,-5s,-5t,-5y,-4P,-4G,-4A,-4C,-4S,-4T,-4V,-4I,-4L,-4M,-4F,-4Y,-4W,-4H,-4K,-4R,-4Q,-4N,-4D,-4E,-4s,-4t,-4y,-3P,-3G,-3A,-3C,...,3A,3C,3S,3T,3V,3I,3L,3M,3F,3Y,3W,3H,3K,3R,3Q,3N,3D,3E,3s,3t,3y,4P,4G,4A,4C,4S,4T,4V,4I,4L,4M,4F,4Y,4W,4H,4K,4R,4Q,4N,4D,4E,4s,4t,4y,0s,0t,0y,0S,0T,0Y
kinase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
AAK1,0.0720,0.0245,0.0284,0.0456,0.0425,0.0425,0.0951,0.1554,0.0993,0.0864,0.0425,0.0952,0.0315,0.0331,0.0262,0.0956,0.0560,0.0275,0.0160,0.0153,0.0201,0.0201,0.0611,0.0534,0.0642,0.0706,0.0560,0.0619,0.0619,0.0619,0.0621,0.0742,0.0693,0.0520,0.0534,0.0403,0.0514,0.0809,0.0715,0.0627,0.0429,0.0332,0.0560,0.0332,0.0332,0.0339,0.1084,0.0512,0.1119,0.0655,...,0.0582,0.0742,0.0582,0.0582,0.0610,0.0388,0.0489,0.0437,0.0430,0.0533,0.0481,0.0674,0.0739,0.0901,0.0623,0.0735,0.0405,0.0371,0.0335,0.0335,0.0359,0.0628,0.0702,0.0646,0.0603,0.0560,0.0560,0.0422,0.0415,0.0461,0.0464,0.0523,0.0521,0.0826,0.0560,0.0831,0.0928,0.0635,0.0592,0.0389,0.0457,0.0251,0.0251,0.0270,0.1013,1.0,0.0,0.1013,1.0,0.0
ACVR2A,0.0415,0.0481,0.0584,0.0489,0.0578,0.0578,0.0598,0.0625,0.0596,0.0521,0.0600,0.0578,0.0803,0.0570,0.0510,0.0475,0.0430,0.0536,0.0888,0.0789,0.0783,0.0783,0.0760,0.0466,0.0549,0.0555,0.0551,0.0549,0.0549,0.0543,0.0526,0.0520,0.0576,0.0619,0.0586,0.0754,0.0551,0.0406,0.0457,0.0482,0.0501,0.1040,0.0869,0.0809,0.0809,0.0681,0.0502,0.0653,0.0537,0.0588,...,0.0529,0.0481,0.0590,0.0590,0.0567,0.0554,0.0612,0.0589,0.0644,0.0654,0.0635,0.0590,0.0462,0.0387,0.0499,0.0524,0.0622,0.0870,0.0519,0.0519,0.0815,0.0758,0.0544,0.0498,0.0517,0.0563,0.0563,0.0516,0.0563,0.0512,0.0662,0.0523,0.0579,0.0800,0.0573,0.0527,0.0491,0.0616,0.0556,0.0640,0.0640,0.0703,0.0703,0.0589,0.9833,1.0,0.0,0.9833,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YSK4,0.0593,0.0728,0.0744,0.0734,0.0597,0.0597,0.0517,0.0400,0.0433,0.0512,0.0636,0.0600,0.0755,0.0664,0.0525,0.0597,0.0481,0.0616,0.0692,0.0508,0.0703,0.0703,0.0474,0.0622,0.0683,0.0618,0.0652,0.0618,0.0618,0.0477,0.0412,0.0519,0.0570,0.0610,0.0620,0.0714,0.0626,0.0491,0.0523,0.0551,0.0649,0.0671,0.0645,0.0663,0.0663,0.0534,0.0561,0.0683,0.0571,0.0636,...,0.0531,0.0545,0.0578,0.0578,0.0540,0.0436,0.0481,0.0473,0.0573,0.0578,0.0625,0.0645,0.0774,0.0697,0.0580,0.0720,0.0471,0.0450,0.0642,0.0642,0.0397,0.0790,0.0721,0.0573,0.0557,0.0573,0.0573,0.0445,0.0471,0.0481,0.0507,0.0461,0.0493,0.0539,0.0657,0.0814,0.0618,0.0741,0.0620,0.0585,0.0484,0.0634,0.0634,0.0389,0.7907,1.0,0.0,0.7907,1.0,0.0
ZAK,0.0604,0.0641,0.0659,0.0631,0.0597,0.0597,0.0454,0.0431,0.0477,0.0484,0.0544,0.0597,0.0673,0.0650,0.0815,0.0669,0.0538,0.0653,0.0591,0.0520,0.0716,0.0716,0.0611,0.0627,0.0682,0.0537,0.0621,0.0627,0.0627,0.0447,0.0423,0.0433,0.0500,0.0526,0.0564,0.0630,0.0663,0.0634,0.0653,0.0548,0.0701,0.0759,0.0673,0.0703,0.0703,0.0469,0.0723,0.0694,0.0593,0.0680,...,0.0489,0.0588,0.0582,0.0582,0.0579,0.0636,0.0630,0.0572,0.0652,0.0710,0.0738,0.0664,0.0851,0.0833,0.0527,0.0484,0.0281,0.0319,0.0459,0.0459,0.0423,0.0684,0.0623,0.0561,0.0519,0.0556,0.0556,0.0406,0.0429,0.0395,0.0481,0.0424,0.0526,0.0698,0.0672,0.1207,0.1012,0.0614,0.0556,0.0342,0.0370,0.0390,0.0390,0.0408,0.6135,1.0,0.0,0.6135,1.0,0.0


In [None]:
#| echo: false
show_doc(Data.get_pspa_all_norm)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L87){target="_blank" style="float:right; font-size:smaller"}

### Data.get_pspa_all_norm

>      Data.get_pspa_all_norm ()

*Get PSPA normalized data of serine/threonine and tyrosine kinases.*

In [None]:
Data.get_pspa_all_norm()

Unnamed: 0_level_0,-5P,-5G,-5A,-5C,-5S,-5T,-5V,-5I,-5L,-5M,-5F,-5Y,-5W,-5H,-5K,-5R,-5Q,-5N,-5D,-5E,-5s,-5t,-5y,-4P,-4G,-4A,-4C,-4S,-4T,-4V,-4I,-4L,-4M,-4F,-4Y,-4W,-4H,-4K,-4R,-4Q,-4N,-4D,-4E,-4s,-4t,-4y,-3P,-3G,-3A,-3C,...,4A,4C,4S,4T,4V,4I,4L,4M,4F,4Y,4W,4H,4K,4R,4Q,4N,4D,4E,4s,4t,4y,0s,0t,0y,0S,0T,0Y,5P,5G,5A,5C,5S,5T,5V,5I,5L,5M,5F,5Y,5W,5H,5K,5R,5Q,5N,5D,5E,5s,5t,5y
kinase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
AAK1,0.0720,0.0245,0.0284,0.0456,0.0425,0.0425,0.0951,0.1554,0.0993,0.0864,0.0425,0.0952,0.0315,0.0331,0.0262,0.0956,0.0560,0.0275,0.0160,0.0153,0.0201,0.0201,0.0611,0.0534,0.0642,0.0706,0.0560,0.0619,0.0619,0.0619,0.0621,0.0742,0.0693,0.0520,0.0534,0.0403,0.0514,0.0809,0.0715,0.0627,0.0429,0.0332,0.0560,0.0332,0.0332,0.0339,0.1084,0.0512,0.1119,0.0655,...,0.0646,0.0603,0.0560,0.0560,0.0422,0.0415,0.0461,0.0464,0.0523,0.0521,0.0826,0.0560,0.0831,0.0928,0.0635,0.0592,0.0389,0.0457,0.0251,0.0251,0.0270,0.1013,1.0,0.0,0.1013,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,
ACVR2A,0.0415,0.0481,0.0584,0.0489,0.0578,0.0578,0.0598,0.0625,0.0596,0.0521,0.0600,0.0578,0.0803,0.0570,0.0510,0.0475,0.0430,0.0536,0.0888,0.0789,0.0783,0.0783,0.0760,0.0466,0.0549,0.0555,0.0551,0.0549,0.0549,0.0543,0.0526,0.0520,0.0576,0.0619,0.0586,0.0754,0.0551,0.0406,0.0457,0.0482,0.0501,0.1040,0.0869,0.0809,0.0809,0.0681,0.0502,0.0653,0.0537,0.0588,...,0.0498,0.0517,0.0563,0.0563,0.0516,0.0563,0.0512,0.0662,0.0523,0.0579,0.0800,0.0573,0.0527,0.0491,0.0616,0.0556,0.0640,0.0640,0.0703,0.0703,0.0589,0.9833,1.0,0.0,0.9833,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YES1,0.0677,0.0571,0.0537,0.0530,0.0527,0.0505,0.0435,0.0375,0.0400,0.0463,0.0478,0.0478,0.0419,0.0564,0.0681,0.0647,0.0610,0.0752,0.0748,0.0610,0.0652,0.0652,0.0668,0.0625,0.0695,0.0524,0.0530,0.0470,0.0404,0.0476,0.0416,0.0528,0.0529,0.0406,0.0406,0.0424,0.0566,0.0549,0.0561,0.0607,0.0559,0.0786,0.0875,0.0596,0.0596,0.0705,0.0564,0.0580,0.0456,0.0572,...,0.0464,0.0747,0.0521,0.0561,0.0468,0.0452,0.0481,0.0716,0.0689,0.0689,0.0619,0.0623,0.0587,0.0757,0.0652,0.0499,0.0421,0.0492,0.0371,0.0371,0.0467,0.0000,0.0,1.0,0.0000,0.0,1.0,0.0762,0.0532,0.0533,0.0610,0.0596,0.0558,0.0416,0.0375,0.0467,0.0518,0.0627,0.0627,0.0456,0.0593,0.0662,0.084,0.0559,0.0604,0.0422,0.0482,0.0374,0.0374,0.0411
ZAP70,0.0602,0.0880,0.0623,0.0496,0.0471,0.0514,0.0465,0.0380,0.0307,0.0526,0.0479,0.0479,0.0347,0.0641,0.0471,0.0452,0.0492,0.0703,0.0870,0.0777,0.1622,0.1622,0.1208,0.0977,0.0792,0.0865,0.0556,0.0487,0.0367,0.0375,0.0312,0.0317,0.0344,0.0274,0.0274,0.0347,0.0474,0.0338,0.0380,0.0571,0.0457,0.0929,0.1392,0.1432,0.1432,0.1685,0.0588,0.0610,0.0537,0.0581,...,0.0491,0.0520,0.0583,0.0530,0.0504,0.0304,0.0424,0.0554,0.0393,0.0393,0.0539,0.0671,0.0562,0.0557,0.0712,0.0406,0.0597,0.0558,0.0440,0.0440,0.0318,0.0000,0.0,1.0,0.0000,0.0,1.0,0.1269,0.0904,0.0737,0.0457,0.0638,0.0692,0.0343,0.0235,0.0305,0.0461,0.0343,0.0343,0.0344,0.0484,0.0477,0.029,0.0520,0.0537,0.0709,0.0710,0.0862,0.0862,0.0605


In [None]:
#| echo: false
show_doc(Data.get_pspa_st_pct)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L93){target="_blank" style="float:right; font-size:smaller"}

### Data.get_pspa_st_pct

>      Data.get_pspa_st_pct ()

*Get PSPA reference score to calculate percentile for serine/threonine kinases.*

In [None]:
Data.get_pspa_st_pct()

kinase,AAK1,ACVR2A,ACVR2B,AKT1,AKT2,AKT3,ALK2,ALK4,ALPHAK3,AMPKA1,AMPKA2,ANKRD3,ASK1,ATM,ATR,AURA,AURB,AURC,BCKDK,BIKE,BMPR1A,BMPR1B,BMPR2,BRAF,BRSK1,BRSK2,BUB1,CAMK1A,CAMK1B,CAMK1D,CAMK1G,CAMK2A,CAMK2B,CAMK2D,CAMK2G,CAMK4,CAMKK1,CAMKK2,CAMLCK,CDC7,CDK1,CDK10,CDK12,CDK13,CDK14,CDK16,CDK17,CDK18,CDK19,CDK2,...,RIPK1,RIPK2,RIPK3,ROCK1,ROCK2,RSK2,RSK3,RSK4,SBK,SGK1,SGK3,SIK,SKMLCK,SLK,SMG1,SMMLCK,SNRK,SRPK1,SRPK2,SRPK3,SSTK,STK33,STLK3,TAK1,TAO1,TAO2,TAO3,TBK1,TGFBR1,TGFBR2,TLK1,TLK2,TNIK,TSSK1,TSSK2,TTBK1,TTBK2,TTK,ULK1,ULK2,VRK1,VRK2,WNK1,WNK3,WNK4,YANK2,YANK3,YSK1,YSK4,ZAK
0,-10.960,-0.581,0.329,-3.891,-3.591,-5.312,0.814,-0.559,-0.933,-2.607,-3.167,-0.764,-6.366,2.533,0.010,-1.164,-4.296,-4.302,1.695,-7.684,1.713,1.965,0.183,0.018,-0.379,-3.219,-9.801,-3.750,0.149,-0.610,-2.746,4.377,4.940,0.902,2.957,-2.297,-3.196,-3.469,-1.716,-0.232,-1.378,-9.497,-5.727,-4.521,-8.698,-7.186,-5.666,-5.802,-4.029,-4.039,...,-3.605,-5.763,-5.047,-6.065,-4.406,-1.298,-3.001,-0.837,-2.256,-3.190,-3.763,-0.262,-0.226,-3.190,-1.971,-3.519,-4.495,-3.007,-2.348,-3.560,-4.580,-6.610,-6.038,-1.211,-4.498,-2.911,0.463,-1.287,0.680,-3.707,1.479,3.406,-3.950,-2.172,-2.109,-7.773,-4.765,-5.262,-3.549,-2.822,-4.682,-2.854,-1.669,-1.527,-2.965,-2.877,-1.792,-6.283,-1.715,-3.204
1,-6.788,-0.166,0.307,-5.886,-4.786,-6.576,1.561,-0.865,-3.399,-3.261,-3.464,-4.366,-7.176,0.019,-0.697,-1.581,-4.628,-4.533,-1.092,-5.161,1.202,1.525,-0.386,-3.377,-1.858,-3.819,-5.141,-5.537,-1.294,-3.311,-5.862,-0.315,0.517,-0.535,0.877,-3.692,-1.182,-1.239,-1.285,2.578,-4.304,-7.431,-4.146,-3.498,-6.062,-5.981,-5.925,-5.136,-4.122,-3.122,...,-2.358,-5.160,-3.075,-7.793,-6.160,-1.518,-2.279,-2.877,-2.437,-5.513,-6.566,-4.297,-0.846,-7.367,1.751,-4.481,-5.585,-4.549,-5.719,-5.101,-2.420,-6.698,-8.190,-4.619,-8.130,-4.908,-5.247,-1.650,1.537,-4.228,-5.881,-4.427,-7.145,-2.825,-0.935,-4.638,-3.624,-9.189,-4.698,-3.656,-5.670,-2.817,-4.071,-3.394,-5.097,-1.874,-1.480,-8.709,-3.708,-6.093
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89782,-3.753,1.451,1.883,-5.583,-5.253,-7.164,1.226,-0.399,3.341,-5.932,-7.009,-1.346,-1.257,-1.930,-1.086,-5.300,-6.268,-6.415,-6.027,-2.674,2.403,3.097,0.315,0.331,-8.989,-9.727,-0.962,-6.574,-2.149,-5.753,-6.502,-3.379,-3.798,-4.577,-2.381,-5.040,-0.197,-0.772,-1.802,-0.784,-1.720,-6.366,-3.784,-2.870,-4.482,-2.392,-2.803,-2.889,-3.985,-2.087,...,-5.479,-5.107,-5.028,-4.316,-3.410,-4.957,-5.244,-5.152,-5.324,-5.636,-5.262,-5.684,-3.825,-1.824,-3.824,-4.159,-6.882,-4.014,-4.454,-3.755,-8.207,-5.581,-2.655,1.240,-2.303,-0.338,0.146,-3.978,0.138,-3.302,-3.075,-1.497,-1.209,-6.844,-5.322,-4.496,-2.852,-1.356,-3.828,-3.049,-1.930,-1.420,-5.949,-4.854,-5.401,-1.853,-2.068,-2.824,-0.340,-1.326
89783,-1.540,-2.180,-2.014,-2.416,-0.592,-1.364,-3.320,-0.826,-4.438,-1.393,-2.058,-2.101,-2.838,-3.597,-0.664,-4.098,-1.564,-2.006,-5.130,-2.428,-3.192,-2.013,-0.868,-2.795,-5.958,-6.211,-0.309,-1.495,0.020,-1.378,-1.652,-2.380,-3.275,-3.026,-2.647,-1.958,-5.024,-3.868,0.578,-2.915,-3.348,-4.227,-5.020,-5.209,-1.570,-2.480,-2.677,-2.574,-5.737,-5.213,...,-1.942,-6.158,-0.930,-1.559,-0.154,0.361,-0.754,-0.276,-3.035,-1.083,-0.965,-3.652,1.355,-0.824,-3.360,-0.030,-5.503,-2.102,-4.456,-3.284,-2.001,-3.563,-4.212,-4.696,-4.132,-2.436,-2.292,-3.454,-2.188,-2.436,-2.339,-1.988,-1.042,-0.358,-0.074,-2.837,-1.059,0.434,-5.814,-4.250,-1.979,-0.661,-2.586,-4.076,-2.832,-0.575,-0.859,-2.415,-2.999,-2.550


In [None]:
#| echo: false
show_doc(Data.get_pspa_tyr_pct)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L99){target="_blank" style="float:right; font-size:smaller"}

### Data.get_pspa_tyr_pct

>      Data.get_pspa_tyr_pct ()

*Get PSPA reference score to calculate percentile for tyrosine kinases.*

In [None]:
Data.get_pspa_tyr_pct()

kinase,ABL1,TNK2,ALK,ABL2,AXL,BLK,BMPR2_TYR,PTK6,BTK,CSF1R,CSK,MATK,DDR1,DDR2,EGFR,EPHA1,EPHA2,EPHA3,EPHA4,EPHA5,EPHA6,EPHA7,EPHA8,EPHB1,EPHB2,EPHB3,EPHB4,BMX,PTK2,FER,FES,FGFR1,FGFR2,FGFR3,FGFR4,FGR,FLT3,FRK,FYN,HCK,ERBB2,ERBB4,IGF1R,INSR,INSRR,ITK,JAK1,JAK2,JAK3,KIT,LCK,LIMK1_TYR,LIMK2_TYR,LTK,LYN,MERTK,MET,MAP2K4_TYR,MAP2K6_TYR,MAP2K7_TYR,MST1R,MUSK,PKMYT1_TYR,NEK10_TYR,PDGFRA,PDGFRB,PDHK1_TYR,PDHK3_TYR,PDHK4_TYR,PINK1_TYR,PTK2B,RET,ROS1,SRC,SRMS,SYK,TEC,TESK1_TYR,TEK,TNK1,TNNI3K_TYR,NTRK1,NTRK2,NTRK3,TXK,TYK2,TYRO3,FLT1,KDR,FLT4,WEE1_TYR,YES1,ZAP70
0,-0.709617,-3.624831,-2.136338,-0.022776,-0.737589,2.345905,0.504821,2.417165,-0.121611,-1.205218,1.576014,1.917812,-2.449589,-3.806540,1.005984,-2.925415,-0.609664,-1.256237,0.491093,0.494654,-1.348782,-1.938472,0.221994,-0.805070,1.130282,0.111910,-0.253774,0.221683,2.233854,2.295975,-0.025152,-1.738376,-0.986921,-0.375908,1.631896,2.154664,-1.214864,-1.185009,6.124421,1.940066,-1.020647,-0.292182,0.761946,-0.992736,-0.599703,0.298515,-4.146873,-2.339013,-2.479359,-1.082389,1.758755,-3.027934,-3.701494,-0.480120,1.346726,1.124192,-1.205155,-0.094138,1.031112,-0.287930,-2.589829,-3.379800,-1.341062,-3.607123,-4.385381,-1.978546,1.390759,0.259191,0.378106,-0.094980,0.908537,-0.618981,-2.562691,4.377146,1.855049,2.017083,-0.572871,-2.295149,-2.833113,-3.811383,-6.262204,0.683287,-0.626250,-0.368491,1.187208,-1.601712,-1.143748,-0.891566,-1.888643,-1.758264,-1.610344,4.545175,0.280174
1,0.986158,-1.645273,-1.183920,0.553010,-1.098784,-1.245678,-0.276461,-0.156496,-1.322652,-0.684989,0.447463,0.054841,-0.295641,-2.374194,0.261968,-0.444003,0.620075,-0.918899,-0.266802,-1.466189,0.181707,-0.884474,-0.829816,-1.039152,-1.332577,-1.553626,-1.557679,-1.041167,-1.465569,-0.573358,-1.402839,-1.119166,-0.006615,-0.690700,0.057757,-1.329587,-0.752020,-1.421231,-1.119607,-0.361730,-0.067540,-1.488971,-1.959423,-1.198227,-1.250944,-1.559043,-1.742221,-0.297813,-0.737524,0.101179,-0.785122,-0.803239,-0.227134,-0.226996,-1.061925,-0.749321,-0.324825,0.101380,-0.753605,-0.186826,-0.078232,-1.533730,-0.949200,1.335091,-0.953302,-1.471499,-0.039170,0.376709,-0.653393,-0.690604,-0.187336,0.372630,-1.753648,-0.822064,0.291210,-1.380433,-0.652058,-0.974885,-2.822036,-1.223497,-2.903235,-0.909811,-1.158577,-0.777541,-0.385554,-0.624216,-0.737089,-0.315447,-1.293708,-1.182827,-1.891533,-0.456570,-2.465316
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7313,0.737694,-0.477689,-0.646850,0.928066,0.187149,-1.000041,-0.283551,-3.053869,-0.750475,0.132043,0.069439,0.778468,-0.572751,-0.877774,0.279656,-1.905299,-2.542936,-0.629488,-1.433432,-1.319311,-0.959149,-1.904233,-1.836303,-0.665941,-0.500875,-1.140260,-0.324883,-0.138589,-1.634647,-1.463951,-2.856428,1.338733,1.308337,1.210296,0.236992,-1.117447,0.508686,-1.152320,-0.680328,-1.311162,-1.278938,-0.529947,-0.707274,-0.296606,0.170371,-0.406315,0.053109,-0.184986,1.285550,-1.175831,-0.657466,-0.059045,-0.307151,-0.027580,-1.925899,0.415170,0.535199,-0.446664,0.591918,0.316993,0.457042,0.960955,-1.222173,0.783990,-0.043431,1.226039,0.035492,0.102904,0.600526,0.732808,-1.478030,1.261671,0.460373,-1.074805,-1.293450,-1.402257,-0.780591,-0.107376,-0.479018,1.326369,-1.912418,-0.017800,0.585871,-0.122134,-1.275022,-0.020350,0.483620,-0.060204,1.378042,0.573273,-2.383657,-0.246005,1.174693
7314,2.115113,0.153795,0.356357,1.846239,-0.856035,-0.422296,-0.985140,0.554181,0.381133,-1.666383,-0.728353,-0.254733,-0.999583,0.117162,-1.622541,-1.882815,-3.830351,-1.601071,-1.750483,-2.748548,-0.900263,-1.661417,-2.349142,-0.825704,-1.211989,-1.668258,-0.242232,0.727625,-2.099673,0.240011,-0.659603,-1.569382,-1.979445,-2.605414,-1.726131,0.175403,-2.030979,-0.537023,-0.303194,-0.134410,-1.497615,-2.364148,-3.279693,-1.919277,-2.157962,0.598978,-0.792918,-0.965011,-1.728287,-1.616595,-0.084054,-0.968384,-0.373329,-0.234884,-1.088861,-1.129566,-0.358743,0.154190,-0.787745,-0.482053,-1.581810,-1.865008,-0.707663,-0.049274,-1.190498,-1.403224,-0.849104,-0.212996,-0.159123,-0.885066,0.447393,-0.121666,-0.543588,0.170109,0.044321,-1.576300,1.325049,-1.122959,-2.962076,-0.786136,-1.090242,-1.402034,-1.365117,-1.670434,1.684176,-0.508297,-0.304215,-2.045909,-1.629804,-2.227050,-2.294855,0.428825,-1.789086


In [None]:
#| echo: false
show_doc(Data.get_num_dict)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L105){target="_blank" style="float:right; font-size:smaller"}

### Data.get_num_dict

>      Data.get_num_dict ()

*Get a dictionary mapping kinase to number of random amino acids in PSPA.*

In [None]:
num_dict = Data.get_num_dict()
list(num_dict.items())[:5]

[('SYK', 18), ('PTK2', 18), ('ZAP70', 18), ('ERBB2', 18), ('CSK', 18)]

In [None]:
#| echo: false
show_doc(Data.get_ks_dataset)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L113){target="_blank" style="float:right; font-size:smaller"}

### Data.get_ks_dataset

>      Data.get_ks_dataset (add_kinase_info=True)

*Get kinase substrate dataset collected from public resources, with the option of adding kinase info.*

In [None]:
Data.get_ks_dataset()

Unnamed: 0,kin_sub_site,kinase_uniprot,substrate_uniprot,site,source,substrate_genes,substrate_phosphoseq,position,site_seq,sub_site,substrate_sequence,kinase_on_tree,kinase_genes,kinase_group,kinase_family,kinase_pspa_big,kinase_pspa_small
0,O00141_A4FU28_S140,O00141,A4FU28,S140,Sugiyama,CTAGE9,MEEPGATPQPYLGLVLEELGRVVAALPESMRPDENPYGFPSELVVC...,140,AAAEEARSLEATCEKLSRsNsELEDEILCLEKDLKEEKSKH,A4FU28_S140,MEEPGATPQPYLGLVLEELGRVVAALPESMRPDENPYGFPSELVVC...,1,SGK1 SGK,AGC,SGK,basophilic,AKT/ROCK
1,O00141_O00141_S252,O00141,O00141,S252,Sugiyama,SGK1 SGK,MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...,252,SQGHIVLTDFGLCKENIEHNsTtstFCGtPEyLAPEVLHKQ,O00141_S252,MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNS...,1,SGK1 SGK,AGC,SGK,basophilic,AKT/ROCK
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187064,Q9Y6R4_Q9Y6R4_T1494,Q9Y6R4,Q9Y6R4,T1494,SIGNOR|EPSD|PSP,MAP3K4 KIAA0213 MAPKKK4 MEKK4 MTK1,MREAAAALVPPPAFAVTPAAAMEEPPPPPPPPPPPPEPETESEPEC...,1494,SGLIKLGDFGCSVKLKNNAQtMPGEVNSTLGTAAYMAPEVI,Q9Y6R4_T1494,MREAAAALVPPPAFAVTPAAAMEEPPPPPPPPPPPPEPETESEPEC...,1,MAP3K4 KIAA0213 MAPKKK4 MEKK4 MTK1,STE,STE11,,
187065,Q9Y6R4_Q9Y6R4_Y1328,Q9Y6R4,Q9Y6R4,Y1328,Sugiyama,MAP3K4 KIAA0213 MAPKKK4 MEKK4 MTK1,MREAAAALVPPPAFAVTPAAAMEEPPPPPPPPPPPPEPETESEPEC...,1328,RYREMRRKNIIGQVCDtPKSyDNVMHVGLRKVTFKWQRGNK,Q9Y6R4_Y1328,MREAAAALVPPPAFAVTPAAAMEEPPPPPPPPPPPPEPETESEPEC...,1,MAP3K4 KIAA0213 MAPKKK4 MEKK4 MTK1,STE,STE11,,


In [None]:
#| echo: false
show_doc(Data.get_ks_unique)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L148){target="_blank" style="float:right; font-size:smaller"}

### Data.get_ks_unique

>      Data.get_ks_unique ()

*Get kinase substrate dataset with unique site sequence (most phosphorylated version).*

In [None]:
Data.get_ks_unique()

Unnamed: 0,site_seq,site_source_all,substrate_gene,sub_site,O00141_SGK1,O00238_BMPR1B,O00311_CDC7,O00329_PIK3CD,O00418_EEF2K,O00443_PIK3C2A,O00444_PLK4,O00506_STK25,O14578_CIT,O14730_RIOK3,O14733_MAP2K7,O14757_CHEK1,O14874_BCKDK,O14920_IKBKB,O14936_CASK,O14965_AURKA,O14976_GAK,O15021_MAST4,O15075_DCLK1,O15111_CHUK,O15146_MUSK,O15264_MAPK13,O15530_PDPK1,O43283_MAP3K13,O43293_DAPK3,O43318_MAP3K7,O43353_RIPK2,O43683_BUB1,O43781_DYRK3,O60285_NUAK1,O60331_PIP5K1C,O60566_BUB1B,O60674_JAK2,O60885_BRD4,O75116_ROCK2,O75385_ULK1,O75460_ERN1,O75582_RPS6KA5,O75676_RPS6KA4,O75716_STK16,O75914_PAK3,O76039_CDKL5,O94768_STK17B,O94804_STK10,O94806_PRKD3,O94921_CDK14,...,Q9HBY8_SGK2,Q9HC98_NEK6,Q9HCP0_CSNK1G1,Q9NQU5_PAK6,Q9NR20_DYRK4,Q9NRA0_SPHK2,Q9NRM7_LATS2,Q9NSY1_BMP2K,Q9NWZ3_IRAK4,Q9NYA1_SPHK1,Q9NYL2_MAP3K20,Q9NYV4_CDK12,Q9NYY3_PLK2,Q9NZJ5_EIF2AK3,Q9P0L2_MARK1,Q9P1W9_PIM2,Q9P286_PAK5,Q9P289_STK26,Q9P2K8_EIF2AK4,Q9UBE8_NLK,Q9UBS0_RPS6KB2,Q9UEE5_STK17A,Q9UEW8_STK39,Q9UF33_EPHA6,Q9UHD2_TBK1,Q9UIG0_BAZ1B,Q9UIK4_DAPK2,Q9UK32_RPS6KA6,Q9UKE5_TNIK,Q9UKI8_TLK1,Q9UL54_TAOK2,Q9UM73_ALK,Q9UPE1_SRPK3,Q9UPZ9_CILK1,Q9UQ07_MOK,Q9UQ88_CDK11A,Q9UQB9_AURKC,Q9UQM7_CAMK2A,Q9Y243_AKT3,Q9Y2H1_STK38L,Q9Y2K2_SIK3,Q9Y2U5_MAP3K2,Q9Y3S1_WNK2,Q9Y463_DYRK1B,Q9Y4K4_MAP4K5,Q9Y572_RIPK3,Q9Y5S2_CDC42BPB,Q9Y6E0_STK24,Q9Y6M4_CSNK1G3,Q9Y6R4_MAP3K4
0,AAAAAAAAAVAAPPTAVGSLsGAEGVPVSsQPLPSQPW___,SIGNOR|human_phosphoproteome|PSP|iPTMNet,MAZ,P56270_S460,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,AAAAAAASGGAQQRsHHAPMsPGssGGGGQPLARtPQPssP,PSP|human_phosphoproteome|EPSD|Sugiyama,ARID1A,O14497_S363,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29154,___________________MtSSyGHVLERQPALGGRLDsP,Sugiyama,PRRX1,P54821_T2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
29155,___________________MttsQKHRDFVAEPMGEKPVGS,SIGNOR|human_phosphoproteome|EPSD|PSP|GPS6,BANF1,O75531_T2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
#| echo: false
show_doc(Data.get_cddm)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L154){target="_blank" style="float:right; font-size:smaller"}

### Data.get_cddm

>      Data.get_cddm ()

*Get the primary CDDM dataset.*

In [None]:
Data.get_cddm()

substrate,-7P,-7G,-7A,-7C,-7S,-7T,-7V,-7I,-7L,-7M,-7F,-7Y,-7W,-7H,-7K,-7R,-7Q,-7N,-7D,-7E,-7s,-7t,-7y,-6P,-6G,-6A,-6C,-6S,-6T,-6V,-6I,-6L,-6M,-6F,-6Y,-6W,-6H,-6K,-6R,-6Q,-6N,-6D,-6E,-6s,-6t,-6y,-5P,-5G,-5A,-5C,...,6A,6C,6S,6T,6V,6I,6L,6M,6F,6Y,6W,6H,6K,6R,6Q,6N,6D,6E,6s,6t,6y,7P,7G,7A,7C,7S,7T,7V,7I,7L,7M,7F,7Y,7W,7H,7K,7R,7Q,7N,7D,7E,7s,7t,7y,0s,0t,0y,0S,0T,0Y
kinase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
SRC,0.055749,0.064895,0.060105,0.010017,0.045732,0.033101,0.049216,0.037892,0.080139,0.020035,0.032230,0.012195,0.009146,0.017422,0.091899,0.077091,0.044861,0.040941,0.073606,0.086237,0.025697,0.013066,0.018728,0.058747,0.069626,0.061358,0.010879,0.038729,0.033943,0.053525,0.038294,0.067885,0.019147,0.034378,0.007833,0.004787,0.020453,0.084421,0.062663,0.051784,0.037424,0.081375,0.100522,0.023934,0.015666,0.022628,0.051193,0.068547,0.067679,0.015184,...,0.073614,0.013304,0.040355,0.026608,0.051885,0.039911,0.077162,0.016408,0.028381,0.011973,0.006208,0.024390,0.082040,0.068293,0.045233,0.033259,0.072284,0.083814,0.029268,0.015078,0.023503,0.061552,0.070027,0.058876,0.013381,0.045495,0.030330,0.054862,0.038359,0.078947,0.026316,0.032560,0.007136,0.011151,0.022302,0.082070,0.063782,0.045941,0.036574,0.074487,0.085192,0.029438,0.013381,0.017841,0.038927,0.034602,0.926471,0.038927,0.034602,0.926471
EPHA3,0.042881,0.075316,0.068169,0.013194,0.039582,0.031336,0.048378,0.043430,0.079714,0.021440,0.026938,0.012644,0.013744,0.024739,0.096756,0.085761,0.038483,0.035734,0.073117,0.073667,0.025289,0.011545,0.018142,0.065349,0.070291,0.056013,0.015925,0.041735,0.030752,0.047227,0.038990,0.064250,0.019220,0.026359,0.008237,0.007688,0.020868,0.099396,0.071389,0.044481,0.040637,0.074684,0.103789,0.014278,0.019769,0.018671,0.045430,0.068966,0.063492,0.012042,...,0.073513,0.014590,0.025253,0.021324,0.065095,0.049944,0.078563,0.011785,0.029181,0.006734,0.005051,0.023569,0.080247,0.067340,0.040965,0.038159,0.076880,0.092593,0.031425,0.020202,0.023569,0.047968,0.067720,0.061512,0.010158,0.034424,0.027088,0.058126,0.037246,0.079007,0.023702,0.027652,0.011287,0.011287,0.019752,0.091422,0.072235,0.046275,0.046840,0.073928,0.092551,0.024266,0.013544,0.022009,0.054526,0.035442,0.910033,0.054526,0.035442,0.910033
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MAP2K3,0.045977,0.057471,0.114943,0.000000,0.045977,0.045977,0.022989,0.022989,0.022989,0.011494,0.011494,0.000000,0.000000,0.034483,0.057471,0.264368,0.068966,0.022989,0.045977,0.080460,0.022989,0.000000,0.000000,0.068182,0.056818,0.056818,0.022727,0.068182,0.011364,0.045455,0.034091,0.022727,0.011364,0.011364,0.000000,0.000000,0.034091,0.068182,0.136364,0.068182,0.034091,0.102273,0.068182,0.056818,0.011364,0.011364,0.034091,0.079545,0.159091,0.000000,...,0.085366,0.012195,0.048780,0.012195,0.060976,0.073171,0.073171,0.000000,0.012195,0.036585,0.000000,0.000000,0.060976,0.073171,0.048780,0.012195,0.097561,0.060976,0.073171,0.036585,0.012195,0.073171,0.024390,0.085366,0.012195,0.060976,0.000000,0.012195,0.024390,0.048780,0.000000,0.012195,0.012195,0.073171,0.024390,0.036585,0.109756,0.024390,0.060976,0.073171,0.109756,0.085366,0.036585,0.000000,0.528090,0.191011,0.280899,0.528090,0.191011,0.280899
GRK1,0.060241,0.072289,0.084337,0.000000,0.048193,0.036145,0.024096,0.060241,0.012048,0.012048,0.024096,0.012048,0.000000,0.000000,0.084337,0.096386,0.036145,0.012048,0.060241,0.144578,0.036145,0.060241,0.024096,0.048193,0.060241,0.060241,0.000000,0.024096,0.000000,0.072289,0.072289,0.048193,0.036145,0.000000,0.000000,0.000000,0.000000,0.108434,0.060241,0.036145,0.048193,0.084337,0.144578,0.036145,0.012048,0.048193,0.036145,0.060241,0.156627,0.012048,...,0.103896,0.038961,0.012987,0.025974,0.038961,0.064935,0.090909,0.000000,0.051948,0.000000,0.000000,0.000000,0.142857,0.064935,0.038961,0.000000,0.103896,0.116883,0.025974,0.025974,0.000000,0.039474,0.065789,0.052632,0.000000,0.026316,0.000000,0.039474,0.000000,0.039474,0.052632,0.052632,0.000000,0.013158,0.013158,0.092105,0.105263,0.013158,0.026316,0.118421,0.197368,0.039474,0.000000,0.013158,0.831325,0.156627,0.012048,0.831325,0.156627,0.012048


In [None]:
#| echo: false
show_doc(Data.get_cddm_upper)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L160){target="_blank" style="float:right; font-size:smaller"}

### Data.get_cddm_upper

>      Data.get_cddm_upper ()

*Get the CDDM dataset for all uppercase.*

In [None]:
Data.get_cddm_upper()

substrate,-7P,-7G,-7A,-7C,-7S,-7T,-7V,-7I,-7L,-7M,-7F,-7Y,-7W,-7H,-7K,-7R,-7Q,-7N,-7D,-7E,-6P,-6G,-6A,-6C,-6S,-6T,-6V,-6I,-6L,-6M,-6F,-6Y,-6W,-6H,-6K,-6R,-6Q,-6N,-6D,-6E,-5P,-5G,-5A,-5C,-5S,-5T,-5V,-5I,-5L,-5M,...,5Q,5N,5D,5E,6P,6G,6A,6C,6S,6T,6V,6I,6L,6M,6F,6Y,6W,6H,6K,6R,6Q,6N,6D,6E,7P,7G,7A,7C,7S,7T,7V,7I,7L,7M,7F,7Y,7W,7H,7K,7R,7Q,7N,7D,7E,0S,0T,0Y,0s,0t,0y
kinase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
SRC,0.055749,0.064895,0.060105,0.010017,0.071429,0.046167,0.049216,0.037892,0.080139,0.020035,0.032230,0.030923,0.009146,0.017422,0.091899,0.077091,0.044861,0.040941,0.073606,0.086237,0.058747,0.069626,0.061358,0.010879,0.062663,0.049608,0.053525,0.038294,0.067885,0.019147,0.034378,0.030461,0.004787,0.020453,0.084421,0.062663,0.051784,0.037424,0.081375,0.100522,0.051193,0.068547,0.067679,0.015184,0.062473,0.048590,0.050325,0.044685,0.068980,0.018221,...,0.046276,0.033495,0.070956,0.089467,0.063415,0.073614,0.073614,0.013304,0.069623,0.041685,0.051885,0.039911,0.077162,0.016408,0.028381,0.035477,0.006208,0.024390,0.082040,0.068293,0.045233,0.033259,0.072284,0.083814,0.061552,0.070027,0.058876,0.013381,0.074933,0.043711,0.054862,0.038359,0.078947,0.026316,0.032560,0.024978,0.011151,0.022302,0.082070,0.063782,0.045941,0.036574,0.074487,0.085192,0.038927,0.034602,0.926471,0.038927,0.034602,0.926471
EPHA3,0.042881,0.075316,0.068169,0.013194,0.064871,0.042881,0.048378,0.043430,0.079714,0.021440,0.026938,0.030786,0.013744,0.024739,0.096756,0.085761,0.038483,0.035734,0.073117,0.073667,0.065349,0.070291,0.056013,0.015925,0.056013,0.050522,0.047227,0.038990,0.064250,0.019220,0.026359,0.026908,0.007688,0.020868,0.099396,0.071389,0.044481,0.040637,0.074684,0.103789,0.045430,0.068966,0.063492,0.012042,0.058566,0.042146,0.049261,0.036672,0.057471,0.016420,...,0.052807,0.041690,0.085603,0.106726,0.054433,0.069585,0.073513,0.014590,0.056678,0.041526,0.065095,0.049944,0.078563,0.011785,0.029181,0.030303,0.005051,0.023569,0.080247,0.067340,0.040965,0.038159,0.076880,0.092593,0.047968,0.067720,0.061512,0.010158,0.058691,0.040632,0.058126,0.037246,0.079007,0.023702,0.027652,0.033296,0.011287,0.019752,0.091422,0.072235,0.046275,0.046840,0.073928,0.092551,0.054526,0.035442,0.910033,0.054526,0.035442,0.910033
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MAP2K3,0.045977,0.057471,0.114943,0.000000,0.068966,0.045977,0.022989,0.022989,0.022989,0.011494,0.011494,0.000000,0.000000,0.034483,0.057471,0.264368,0.068966,0.022989,0.045977,0.080460,0.068182,0.056818,0.056818,0.022727,0.125000,0.022727,0.045455,0.034091,0.022727,0.011364,0.011364,0.011364,0.000000,0.034091,0.068182,0.136364,0.068182,0.034091,0.102273,0.068182,0.034091,0.079545,0.159091,0.000000,0.056818,0.079545,0.022727,0.034091,0.102273,0.000000,...,0.048193,0.048193,0.096386,0.084337,0.085366,0.024390,0.085366,0.012195,0.121951,0.048780,0.060976,0.073171,0.073171,0.000000,0.012195,0.048780,0.000000,0.000000,0.060976,0.073171,0.048780,0.012195,0.097561,0.060976,0.073171,0.024390,0.085366,0.012195,0.146341,0.036585,0.012195,0.024390,0.048780,0.000000,0.012195,0.012195,0.073171,0.024390,0.036585,0.109756,0.024390,0.060976,0.073171,0.109756,0.528090,0.191011,0.280899,0.528090,0.191011,0.280899
GRK1,0.060241,0.072289,0.084337,0.000000,0.084337,0.096386,0.024096,0.060241,0.012048,0.012048,0.024096,0.036145,0.000000,0.000000,0.084337,0.096386,0.036145,0.012048,0.060241,0.144578,0.048193,0.060241,0.060241,0.000000,0.060241,0.012048,0.072289,0.072289,0.048193,0.036145,0.000000,0.048193,0.000000,0.000000,0.108434,0.060241,0.036145,0.048193,0.084337,0.144578,0.036145,0.060241,0.156627,0.012048,0.048193,0.036145,0.048193,0.060241,0.084337,0.000000,...,0.000000,0.037975,0.113924,0.113924,0.038961,0.012987,0.103896,0.038961,0.038961,0.051948,0.038961,0.064935,0.090909,0.000000,0.051948,0.000000,0.000000,0.000000,0.142857,0.064935,0.038961,0.000000,0.103896,0.116883,0.039474,0.065789,0.052632,0.000000,0.065789,0.000000,0.039474,0.000000,0.039474,0.052632,0.052632,0.013158,0.013158,0.013158,0.092105,0.105263,0.013158,0.026316,0.118421,0.197368,0.831325,0.156627,0.012048,0.831325,0.156627,0.012048


In [None]:
#| echo: false
show_doc(Data.get_cddm_others)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L166){target="_blank" style="float:right; font-size:smaller"}

### Data.get_cddm_others

>      Data.get_cddm_others ()

*Get CDDM data for other kinases with mutations.*

In [None]:
Data.get_cddm_others()

substrate,-7P,-7G,-7A,-7C,-7S,-7T,-7V,-7I,-7L,-7M,-7F,-7Y,-7W,-7H,-7K,-7R,-7Q,-7N,-7D,-7E,-7s,-7t,-7y,-6P,-6G,-6A,-6C,-6S,-6T,-6V,-6I,-6L,-6M,-6F,-6Y,-6W,-6H,-6K,-6R,-6Q,-6N,-6D,-6E,-6s,-6t,-6y,-5P,-5G,-5A,-5C,...,5y,6P,6G,6A,6C,6S,6T,6V,6I,6L,6M,6F,6Y,6W,6H,6K,6R,6Q,6N,6D,6E,6s,6t,6y,7P,7G,7A,7C,7S,7T,7V,7I,7L,7M,7F,7Y,7W,7H,7K,7R,7Q,7N,7D,7E,7s,7t,7y,0s,0t,0y
kinase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
LYNb,0.045929,0.068894,0.061935,0.013222,0.034795,0.029923,0.050800,0.045233,0.083507,0.022269,0.032011,0.006959,0.011830,0.016701,0.096729,0.082811,0.041754,0.044537,0.067502,0.089770,0.024356,0.010438,0.018093,0.059722,0.068750,0.055556,0.011111,0.034722,0.030556,0.056250,0.031250,0.063194,0.013194,0.029167,0.005556,0.006944,0.018750,0.098611,0.066667,0.045833,0.038194,0.083333,0.117361,0.025000,0.022222,0.018056,0.050554,0.060942,0.067867,0.018698,...,0.024683,0.060284,0.075177,0.074468,0.014184,0.029078,0.026241,0.063830,0.045390,0.080851,0.016312,0.024113,0.011348,0.003546,0.021986,0.081560,0.073050,0.039007,0.034043,0.070922,0.082270,0.027660,0.019149,0.025532,0.047177,0.074339,0.055039,0.015011,0.040743,0.027162,0.048606,0.036455,0.075768,0.026447,0.027162,0.010722,0.011437,0.027877,0.088635,0.085061,0.045032,0.036455,0.074339,0.082202,0.027162,0.019299,0.017870,0.038010,0.035245,0.926745
ABL1[T315I],0.046140,0.074534,0.066548,0.010648,0.039042,0.023957,0.055013,0.037267,0.075421,0.021295,0.030169,0.008873,0.011535,0.020408,0.092280,0.089618,0.040816,0.040816,0.074534,0.084295,0.029281,0.014197,0.013310,0.069149,0.070922,0.066489,0.015071,0.037234,0.027482,0.045213,0.027482,0.060284,0.009752,0.021277,0.001773,0.007979,0.015071,0.091312,0.064716,0.045213,0.053191,0.088652,0.112589,0.028369,0.022163,0.018617,0.053097,0.069912,0.078761,0.007080,...,0.018650,0.055556,0.079749,0.080645,0.015233,0.034050,0.020609,0.060932,0.057348,0.091398,0.013441,0.026882,0.009857,0.004480,0.025986,0.077061,0.073477,0.038530,0.034050,0.068996,0.068100,0.026882,0.020609,0.016129,0.045086,0.076646,0.051398,0.011722,0.043282,0.027051,0.047791,0.040577,0.086564,0.027051,0.025248,0.009017,0.012624,0.023445,0.094680,0.080252,0.048693,0.035167,0.064022,0.079351,0.027953,0.022543,0.019838,0.085613,0.045013,0.869373
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
BRAF,0.029412,0.117647,0.029412,0.000000,0.058824,0.000000,0.088235,0.029412,0.088235,0.088235,0.000000,0.029412,0.000000,0.000000,0.088235,0.088235,0.029412,0.000000,0.000000,0.147059,0.029412,0.029412,0.029412,0.057143,0.057143,0.057143,0.000000,0.028571,0.028571,0.085714,0.085714,0.028571,0.028571,0.057143,0.000000,0.000000,0.085714,0.028571,0.142857,0.057143,0.000000,0.028571,0.085714,0.028571,0.028571,0.000000,0.000000,0.142857,0.000000,0.000000,...,0.000000,0.057143,0.085714,0.000000,0.000000,0.085714,0.000000,0.114286,0.142857,0.057143,0.028571,0.028571,0.028571,0.028571,0.000000,0.085714,0.000000,0.028571,0.028571,0.028571,0.057143,0.057143,0.000000,0.057143,0.028571,0.085714,0.085714,0.000000,0.057143,0.000000,0.000000,0.057143,0.028571,0.028571,0.057143,0.085714,0.000000,0.028571,0.057143,0.114286,0.000000,0.028571,0.085714,0.085714,0.028571,0.057143,0.000000,0.800000,0.142857,0.057143
PKG1,0.065934,0.087912,0.095238,0.003663,0.062271,0.040293,0.032967,0.025641,0.047619,0.029304,0.007326,0.010989,0.010989,0.014652,0.076923,0.190476,0.029304,0.021978,0.036630,0.065934,0.040293,0.000000,0.003663,0.065455,0.080000,0.101818,0.010909,0.072727,0.043636,0.043636,0.032727,0.058182,0.010909,0.007273,0.007273,0.003636,0.036364,0.069091,0.101818,0.050909,0.047273,0.036364,0.072727,0.029091,0.014545,0.003636,0.072727,0.058182,0.061818,0.007273,...,0.021739,0.062500,0.080882,0.069853,0.007353,0.047794,0.022059,0.058824,0.047794,0.073529,0.003676,0.040441,0.018382,0.011029,0.014706,0.014706,0.055147,0.033088,0.029412,0.084559,0.121324,0.047794,0.025735,0.029412,0.069853,0.055147,0.095588,0.003676,0.062500,0.014706,0.047794,0.033088,0.080882,0.003676,0.018382,0.011029,0.022059,0.022059,0.080882,0.069853,0.014706,0.014706,0.077206,0.099265,0.058824,0.033088,0.011029,0.851449,0.134058,0.014493


In [None]:
#| echo: false
show_doc(Data.get_cddm_others_info)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L172){target="_blank" style="float:right; font-size:smaller"}

### Data.get_cddm_others_info

>      Data.get_cddm_others_info ()

*Get additional information for CDDM 'others' dataset.*

In [None]:
Data.get_cddm_others_info()

Unnamed: 0,kinase,count
0,ALK,1889
1,ABL1,1837
...,...,...
34,SPHK2,123
35,BRAF,41


In [None]:
#| echo: false
show_doc(Data.get_combine)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L178){target="_blank" style="float:right; font-size:smaller"}

### Data.get_combine

>      Data.get_combine ()

*Get the combined PSPA and CDDM dataset.*

In [None]:
Data.get_combine()

Unnamed: 0_level_0,-5P,-5G,-5A,-5C,-5S,-5T,-5V,-5I,-5L,-5M,-5F,-5Y,-5W,-5H,-5K,-5R,-5Q,-5N,-5D,-5E,-5s,-5t,-5y,-4P,-4G,-4A,-4C,-4S,-4T,-4V,-4I,-4L,-4M,-4F,-4Y,-4W,-4H,-4K,-4R,-4Q,-4N,-4D,-4E,-4s,-4t,-4y,-3P,-3G,-3A,-3C,...,3A,3C,3S,3T,3V,3I,3L,3M,3F,3Y,3W,3H,3K,3R,3Q,3N,3D,3E,3s,3t,3y,4P,4G,4A,4C,4S,4T,4V,4I,4L,4M,4F,4Y,4W,4H,4K,4R,4Q,4N,4D,4E,4s,4t,4y,0s,0t,0y,0S,0T,0Y
kinase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
CK1A,0.029499,0.106195,0.058997,0.008850,0.029499,0.020649,0.035398,0.029499,0.085546,0.061947,0.020649,0.005900,0.000000,0.023599,0.035398,0.050147,0.032448,0.070796,0.076696,0.100295,0.091445,0.020649,0.005900,0.023599,0.103245,0.058997,0.000000,0.017699,0.020649,0.035398,0.029499,0.050147,0.011799,0.017699,0.005900,0.017699,0.011799,0.035398,0.067847,0.047198,0.023599,0.126844,0.126844,0.094395,0.050147,0.023599,0.035294,0.070588,0.047059,0.014706,...,0.058824,0.011765,0.020588,0.005882,0.047059,0.020588,0.041176,0.005882,0.026471,0.000000,0.011765,0.011765,0.026471,0.017647,0.041176,0.023529,0.126471,0.141176,0.141176,0.008824,0.041176,0.035608,0.068249,0.062315,0.008902,0.029674,0.011869,0.056380,0.035608,0.077151,0.008902,0.053412,0.002967,0.002967,0.017804,0.062315,0.053412,0.035608,0.026706,0.097923,0.124629,0.074184,0.029674,0.023739,0.800587,0.129032,0.070381,0.800587,0.129032,0.070381
CK1D,0.047619,0.084942,0.082368,0.011583,0.029601,0.023166,0.052767,0.034749,0.047619,0.024453,0.034749,0.009009,0.001287,0.011583,0.069498,0.050193,0.015444,0.050193,0.087516,0.123552,0.061776,0.029601,0.016731,0.029601,0.068211,0.077220,0.010296,0.036036,0.014157,0.046332,0.023166,0.046332,0.016731,0.032175,0.002574,0.005148,0.020592,0.069498,0.054054,0.036036,0.033462,0.129987,0.114543,0.074646,0.038610,0.020592,0.019305,0.077220,0.032175,0.007722,...,0.055627,0.018111,0.015524,0.001294,0.025873,0.028461,0.065977,0.009056,0.023286,0.001294,0.007762,0.015524,0.041397,0.031048,0.047865,0.036223,0.131953,0.125485,0.098318,0.029754,0.015524,0.036316,0.077821,0.079118,0.009079,0.018158,0.006485,0.053178,0.050584,0.067445,0.025940,0.042802,0.006485,0.002594,0.012970,0.029831,0.038911,0.031128,0.042802,0.106355,0.143969,0.067445,0.018158,0.032425,0.745174,0.213642,0.041184,0.745174,0.213642,0.041184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YANK3,0.045607,0.056626,0.047212,0.043637,0.039769,0.039769,0.036632,0.039186,0.040937,0.039623,0.039696,0.039769,0.040937,0.036996,0.045023,0.061223,0.033421,0.037872,0.050861,0.038091,0.050569,0.050569,0.045972,0.046653,0.058438,0.045328,0.041004,0.038145,0.038145,0.036192,0.033264,0.030962,0.038563,0.031172,0.033961,0.035286,0.031311,0.039470,0.056485,0.034379,0.038145,0.056695,0.050976,0.070711,0.070711,0.044003,0.029846,0.028145,0.027027,0.025520,...,0.023833,0.032567,0.029989,0.029989,0.041248,0.046667,0.029989,0.029200,0.033409,0.036460,0.037512,0.033935,0.032620,0.038196,0.028411,0.025885,0.021361,0.029621,0.101331,0.101331,0.178618,0.035920,0.026927,0.029201,0.022936,0.029201,0.029201,0.023694,0.024856,0.024704,0.026473,0.022027,0.026725,0.031272,0.030363,0.035314,0.037031,0.031929,0.029504,0.025715,0.043549,0.060827,0.060827,0.291806,0.526732,0.473268,0.000000,0.526732,0.473268,0.000000
YSK4,0.042943,0.052719,0.053878,0.053154,0.043233,0.043233,0.037439,0.028967,0.031356,0.037077,0.046057,0.043450,0.054674,0.048085,0.038019,0.043233,0.034832,0.044609,0.050112,0.036788,0.050909,0.050909,0.034325,0.045240,0.049676,0.044949,0.047422,0.044949,0.044949,0.034693,0.029966,0.037748,0.041458,0.044367,0.045094,0.051931,0.045531,0.035712,0.038039,0.040076,0.047203,0.048804,0.046913,0.048222,0.048222,0.038839,0.040943,0.049847,0.041673,0.046417,...,0.039677,0.040723,0.043189,0.043189,0.040350,0.032579,0.035941,0.035343,0.042816,0.043189,0.046701,0.048195,0.057835,0.052081,0.043339,0.053800,0.035194,0.033625,0.047971,0.047971,0.029664,0.059132,0.053967,0.042889,0.041692,0.042889,0.042889,0.033308,0.035254,0.036003,0.037949,0.034506,0.036901,0.040344,0.049177,0.060928,0.046257,0.055464,0.046407,0.043787,0.036228,0.047455,0.047455,0.029117,0.441559,0.558441,0.000000,0.441559,0.558441,0.000000


In [None]:
#| echo: false
show_doc(Data.get_aa_info)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L185){target="_blank" style="float:right; font-size:smaller"}

### Data.get_aa_info

>      Data.get_aa_info ()

*Get amino acid information.*

In [None]:
Data.get_aa_info()

Unnamed: 0_level_0,Name,SMILES,MW,pKa1,pKb2,pKx3,pl4,H,VSC,P1,P2,SASA,NCISC,phospho
aa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
A,Alanine,C[C@@H](C(=O)O)N,89.10,2.34,9.69,,6.00,0.62,27.5,8.1,0.046,1.181,0.007187,0
C,Cysteine,C([C@@H](C(=O)O)N)S,121.16,1.96,10.28,8.18,5.07,0.29,44.6,5.5,0.128,1.461,-0.036610,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Kac,Acetyllysine,CC(=O)NCCCC[C@H](N)C(=O)O,188.23,,,,,,,,,,,0
Kme3,Trimethyllysine,C[N+](C)(C)CCCC[C@H](N)C(=O)O,189.28,,,,,,,,,,,0


In [None]:
#| echo: false
show_doc(Data.get_aa_rdkit)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L191){target="_blank" style="float:right; font-size:smaller"}

### Data.get_aa_rdkit

>      Data.get_aa_rdkit ()

*Get RDKit representations of amino acids.*

In [None]:
Data.get_aa_rdkit()

Unnamed: 0_level_0,MaxAbsEStateIndex,MinAbsEStateIndex,MinEStateIndex,qed,MolWt,MinPartialCharge,MaxAbsPartialCharge,FpDensityMorgan1,FpDensityMorgan2,FpDensityMorgan3,BCUT2D_MWHI,BCUT2D_MWLOW,BCUT2D_CHGHI,BCUT2D_CHGLO,BCUT2D_LOGPHI,BCUT2D_LOGPLOW,BCUT2D_MRLOW,AvgIpc,BalabanJ,BertzCT,Chi0n,Chi0v,Chi1,Chi1v,Chi2n,Chi2v,Chi3n,Chi3v,Chi4n,Chi4v,HallKierAlpha,Ipc,Kappa1,Kappa2,Kappa3,PEOE_VSA1,PEOE_VSA10,PEOE_VSA11,PEOE_VSA12,PEOE_VSA14,PEOE_VSA2,PEOE_VSA3,PEOE_VSA4,PEOE_VSA6,PEOE_VSA7,PEOE_VSA8,PEOE_VSA9,SMR_VSA1,SMR_VSA10,SMR_VSA3,...,SlogP_VSA2,SlogP_VSA3,SlogP_VSA4,SlogP_VSA5,SlogP_VSA8,TPSA,EState_VSA1,EState_VSA10,EState_VSA2,EState_VSA3,EState_VSA4,EState_VSA5,EState_VSA6,EState_VSA7,EState_VSA8,EState_VSA9,VSA_EState10,VSA_EState2,VSA_EState3,VSA_EState4,VSA_EState5,VSA_EState6,VSA_EState7,VSA_EState8,VSA_EState9,FractionCSP3,NHOHCount,NOCount,NumAliphaticHeterocycles,NumAromaticCarbocycles,NumAromaticHeterocycles,NumAromaticRings,NumHAcceptors,NumHDonors,NumHeteroatoms,NumRotatableBonds,RingCount,MolLogP,fr_Al_COO,fr_Al_OH,fr_Ar_N,fr_C_O,fr_NH0,fr_NH1,fr_NH2,fr_SH,fr_imidazole,fr_priamide,fr_sulfide,fr_unbrch_alkane
aa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
A,9.574074,0.731481,-0.962963,0.451352,89.094,-0.480094,0.480094,2.000000,2.166667,2.166667,16.367425,10.385341,1.990289,-1.897069,1.676610,-2.250004,-0.137563,1.360964,3.257586,59.813538,3.510162,3.510162,2.642734,1.627090,1.126913,1.126913,0.389528,0.389528,0.000000,0.000000,-0.57,13.609640,5.43,1.767634,1.721545,10.840195,6.041841,0.0,0.00000,5.969305,4.794537,0.0,0.000000,0.0,6.923737,0.000000,0.000000,9.901065,5.969305,0.000000,...,17.117674,4.794537,0.0,6.923737,0.0,63.32,12.011146,4.794537,0.000000,0.000000,0.000000,6.923737,0.0,0.0,0.000000,10.840195,0.000000,9.574074,7.865741,4.835648,-0.962963,-0.731481,0.000000,1.418981,0.000000,0.666667,3.0,3.0,0.0,0.0,0.0,0.0,2.0,2.0,3.0,1.0,0.0,-0.5818,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
C,9.756435,0.189815,-1.004630,0.424382,121.161,-0.480064,0.480064,2.000000,2.428571,2.428571,32.116867,10.376545,2.056705,-1.960761,1.795249,-2.309520,-0.137419,1.645711,3.343417,75.335159,3.664483,4.558910,3.180739,2.406671,1.127305,1.492453,0.513894,0.907286,0.078093,0.234278,-0.22,27.977093,6.78,2.872925,2.472042,10.840195,6.041841,0.0,0.00000,5.969305,4.794537,0.0,12.628789,0.0,0.000000,5.752854,0.000000,9.901065,18.598094,0.000000,...,22.870527,4.794537,0.0,0.000000,0.0,63.32,12.011146,4.794537,5.752854,0.000000,0.000000,0.000000,0.0,0.0,12.628789,10.840195,3.649043,9.756435,8.008102,4.939815,-0.814815,-0.816358,0.000000,0.000000,0.000000,0.666667,3.0,3.0,0.0,0.0,0.0,0.0,3.0,3.0,4.0,2.0,0.0,-0.6719,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Kac,10.419084,0.067099,-0.972660,0.499039,188.227,-0.480086,0.480086,1.461538,2.076923,2.538462,16.367568,10.120530,2.091144,-2.051830,1.973906,-2.297932,-0.138286,2.022803,3.255326,182.227953,7.746838,7.746838,6.036581,4.265665,2.923476,2.923476,1.633048,1.633048,0.877145,0.877145,-1.10,519.860388,11.90,6.419740,7.266972,16.156983,6.041841,0.0,5.90718,5.969305,9.589074,0.0,0.000000,0.0,19.262465,13.468494,0.000000,14.695602,11.876485,5.316789,...,29.569610,9.589074,0.0,26.186202,0.0,92.42,12.011146,9.589074,5.907180,19.386400,6.420822,6.923737,0.0,0.0,5.316789,10.840195,0.000000,20.695587,11.053315,5.271623,-1.039759,-0.780252,1.933755,2.032398,0.000000,0.750000,4.0,5.0,0.0,0.0,0.0,0.0,3.0,3.0,5.0,6.0,0.0,-0.2953,1.0,0.0,0.0,2.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,2.0
Kme3,10.361469,0.578704,-0.897918,0.465620,189.279,-0.480086,0.480086,1.384615,1.923077,2.307692,16.367548,10.113915,2.114811,-2.232682,2.021932,-2.354302,-0.870130,1.951693,3.414940,163.487862,8.785803,8.785803,5.827186,4.615856,4.454444,4.454444,1.911260,1.911260,1.073871,1.073871,-0.61,396.193584,12.39,5.191317,7.741185,15.323226,6.041841,0.0,0.00000,5.969305,4.794537,0.0,0.000000,0.0,19.262465,0.000000,27.687772,14.384095,5.969305,0.000000,...,49.288477,4.794537,0.0,19.262465,0.0,63.32,12.011146,4.794537,0.000000,6.420822,23.869431,0.000000,0.0,0.0,21.143016,10.840195,0.000000,10.361469,8.512288,5.361669,-0.897918,-0.687529,2.502778,1.061844,6.366441,0.888889,3.0,4.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,6.0,0.0,0.2748,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


In [None]:
#| echo: false
show_doc(Data.get_aa_morgan)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L197){target="_blank" style="float:right; font-size:smaller"}

### Data.get_aa_morgan

>      Data.get_aa_morgan ()

*Get Morgan fingerprint representations of amino acids.*

In [None]:
Data.get_aa_morgan()

Unnamed: 0_level_0,morgan_1,morgan_11,morgan_24,morgan_27,morgan_70,morgan_74,morgan_79,morgan_80,morgan_82,morgan_116,morgan_118,morgan_119,morgan_132,morgan_140,morgan_172,morgan_192,morgan_197,morgan_210,morgan_222,morgan_227,morgan_229,morgan_245,morgan_280,morgan_283,morgan_294,morgan_295,morgan_305,morgan_310,morgan_319,morgan_321,morgan_322,morgan_328,morgan_362,morgan_364,morgan_376,morgan_378,morgan_394,morgan_412,morgan_414,morgan_425,morgan_429,morgan_473,morgan_482,morgan_486,morgan_545,morgan_550,morgan_553,morgan_575,morgan_592,morgan_623,...,morgan_1431,morgan_1451,morgan_1452,morgan_1456,morgan_1459,morgan_1507,morgan_1517,morgan_1544,morgan_1558,morgan_1564,morgan_1573,morgan_1595,morgan_1602,morgan_1607,morgan_1633,morgan_1644,morgan_1685,morgan_1693,morgan_1716,morgan_1719,morgan_1736,morgan_1737,morgan_1750,morgan_1751,morgan_1752,morgan_1754,morgan_1758,morgan_1773,morgan_1778,morgan_1783,morgan_1785,morgan_1791,morgan_1819,morgan_1838,morgan_1840,morgan_1844,morgan_1847,morgan_1849,morgan_1873,morgan_1876,morgan_1879,morgan_1882,morgan_1898,morgan_1911,morgan_1912,morgan_1926,morgan_1937,morgan_1942,morgan_1946,morgan_1970
aa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
A,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
C,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Kac,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0
Kme3,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0


In [None]:
#| echo: false
show_doc(Data.get_cptac_ensembl_site)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L204){target="_blank" style="float:right; font-size:smaller"}

### Data.get_cptac_ensembl_site

>      Data.get_cptac_ensembl_site ()

*Get CPTAC dataset with unique EnsemblProteinID+site.*

In [None]:
Data.get_cptac_ensembl_site()

Unnamed: 0,gene,site,site_seq,protein,gene_name,gene_site,protein_site
0,ENSG00000003056.8,S267,DDQLGEESEERDDHL,ENSP00000000412.3,M6PR,M6PR_S267,ENSP00000000412_S267
1,ENSG00000003056.8,S267,DDQLGEESEERDDHL,ENSP00000440488.2,M6PR,M6PR_S267,ENSP00000440488_S267
...,...,...,...,...,...,...,...
488584,ENSG00000143631.11,S648,ASRNHHGSAQEQSRD,ENSP00000357789.1,FLG,FLG_S648,ENSP00000357789_S648
488585,ENSG00000143520.6,S2310,DTTRHGHSGYGQSTQ,ENSP00000373370.4,FLG2,FLG2_S2310,ENSP00000373370_S2310


In [None]:
#| echo: false
show_doc(Data.get_cptac_unique_site)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L210){target="_blank" style="float:right; font-size:smaller"}

### Data.get_cptac_unique_site

>      Data.get_cptac_unique_site ()

*Get CPTAC dataset with unique site sequences.*

In [None]:
Data.get_cptac_unique_site()

Unnamed: 0,site_seq,gene_site,num_site,acceptor
0,AAAAAAASFPWSAFG,ZBTB7A_S182,1,S
1,AAAAAAASGAAGGGG,INTS3_S16,1,S
...,...,...,...,...
125474,______MYPAGPPAG,TIGD5_Y2,1,Y
125475,_______SPASLPLA,RFLNB_S1,1,S


In [None]:
#| echo: false
show_doc(Data.get_cptac_gene_site)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L216){target="_blank" style="float:right; font-size:smaller"}

### Data.get_cptac_gene_site

>      Data.get_cptac_gene_site ()

*Get CPTAC dataset with unique Gene+site.*

In [None]:
Data.get_cptac_gene_site()

Unnamed: 0,gene,site,site_seq,protein,gene_name,gene_site,protein_site
0,ENSG00000003056.8,S267,DDQLGEESEERDDHL,ENSP00000000412.3,M6PR,M6PR_S267,ENSP00000000412_S267
1,ENSG00000048028.11,S1053,PPTIRPNSPYDLCSR,ENSP00000003302.4,USP28,USP28_S1053,ENSP00000003302_S1053
...,...,...,...,...,...,...,...
126223,ENSG00000143631.11,S648,ASRNHHGSAQEQSRD,ENSP00000357789.1,FLG,FLG_S648,ENSP00000357789_S648
126224,ENSG00000143520.6,S2310,DTTRHGHSGYGQSTQ,ENSP00000373370.4,FLG2,FLG2_S2310,ENSP00000373370_S2310


In [None]:
#| echo: false
show_doc(Data.get_psp_human_site)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L222){target="_blank" style="float:right; font-size:smaller"}

### Data.get_psp_human_site

>      Data.get_psp_human_site ()

*Get PhosphoSitePlus human dataset (Gene+site).*

In [None]:
Data.get_psp_human_site()

Unnamed: 0,gene,protein,uniprot,site,gene_site,SITE_GRP_ID,species,site_seq,LT_LIT,MS_LIT,MS_CST,CST_CAT#,Ambiguous_Site
0,YWHAB,14-3-3 beta,P31946,T2,YWHAB_T2,15718712,human,______MtMDksELV,,3.0,1.0,,0
1,YWHAB,14-3-3 beta,P31946,S6,YWHAB_S6,15718709,human,__MtMDksELVQkAk,,8.0,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
240009,ZZZ3,ZZZ3,Q8IYH5,S677,ZZZ3_S677,23077721,human,yPPEEVEsRRWQKIA,,,1.0,,0
240010,ZZZ3,ZZZ3,Q8IYH5,S777,ZZZ3_S777,41455930,human,NTAVEDAsDDESIPI,,2.0,,,0


In [None]:
#| echo: false
show_doc(Data.get_ochoa_site)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L228){target="_blank" style="float:right; font-size:smaller"}

### Data.get_ochoa_site

>      Data.get_ochoa_site ()

*Get phosphoproteomics dataset from Ochoa et al.*

In [None]:
Data.get_ochoa_site()

Unnamed: 0,uniprot,position,residue,is_disopred,disopred_score,log10_hotspot_pval_min,isHotspot,uniprot_position,functional_score,current_uniprot,name,gene,Sequence,is_valid,site_seq,gene_site
0,A0A075B6Q4,24,S,True,0.91,6.839384,True,A0A075B6Q4_24,0.149257,A0A075B6Q4,A0A075B6Q4_HUMAN,,MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT...,True,VDDEKGDSNDDYDSA,A0A075B6Q4_S24
1,A0A075B6Q4,35,S,True,0.87,9.192622,False,A0A075B6Q4_35,0.136966,A0A075B6Q4,A0A075B6Q4_HUMAN,,MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT...,True,YDSAGLLSDEDCMSV,A0A075B6Q4_S35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112279,V9GYY5,134,T,True,0.83,2.055830,False,V9GYY5_134,0.187417,V9GYY5,V9GYY5_HUMAN,,KRDGDDRRPRLVLSFDEEKRREYLTGFHKRKVERKKAAIEEIKQRL...,True,SEEEASSTEKPTKAL,V9GYY5_T134
112280,V9GYY5,138,T,True,0.82,0.726611,False,V9GYY5_138,0.121025,V9GYY5,V9GYY5_HUMAN,,KRDGDDRRPRLVLSFDEEKRREYLTGFHKRKVERKKAAIEEIKQRL...,True,ASSTEKPTKALPRKS,V9GYY5_T138


In [None]:
#| echo: false
show_doc(Data.get_combine_site_psp_ochoa)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L234){target="_blank" style="float:right; font-size:smaller"}

### Data.get_combine_site_psp_ochoa

>      Data.get_combine_site_psp_ochoa ()

*Get the combined dataset from Ochoa and PhosphoSitePlus.*

In [None]:
Data.get_combine_site_psp_ochoa()

Unnamed: 0,uniprot,gene,site,site_seq,source,AM_pathogenicity,CDDM_upper,CDDM_max_score
0,A0A024R4G9,C19orf48,S20,ITGSRLLSMVPGPAR,psp,,"PRKX,AKT1,PKG1,P90RSK,HIPK4,AKT3,HIPK1,PKACB,H...",2.407041
1,A0A075B6Q4,,S24,VDDEKGDSNDDYDSA,ochoa,,"CK2A2,CK2A1,GRK7,GRK5,CK1G1,CK1A,IKKA,CK1G2,CA...",2.295654
...,...,...,...,...,...,...,...,...
121417,V9GYY5,,T134,SEEEASSTEKPTKAL,ochoa,,"ASK1,PERK,EEF2K,MAP2K4,MEKK2,MST1,BMPR1B,OSR1,...",1.832532
121418,V9GYY5,,T138,ASSTEKPTKALPRKS,ochoa,,"ASK1,MEK2,MPSK1,TNIK,PBK,MST2,MINK,NEK4,LKB1,MEK5",1.807565


In [None]:
#| echo: false
show_doc(Data.get_combine_site_phosphorylated)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L243){target="_blank" style="float:right; font-size:smaller"}

### Data.get_combine_site_phosphorylated

>      Data.get_combine_site_phosphorylated ()

*Get the combined phosphorylated dataset from Ochoa and PhosphoSitePlus.*

In [None]:
Data.get_combine_site_phosphorylated()

Unnamed: 0,uniprot,gene,site,site_seq,source,AM_pathogenicity,CDDM,PSPA,CDDM_max_score,PSPA_max_score
0,A0A024R4G9,C19orf48,S20,ITGSRLLsMVPGPAR,psp,,"PRKX,PKG1,AKT1,AKT3,HIPK4,P90RSK,PKACB,PKACA,P...","MAPKAPK5,AKT1,RSK3,P70S6K,MAPKAPK3,AKT2,DYRK1A...",2.339278,3.726109
1,A0A075B6Q4,,S24,VDDEKGDsNDDYDSA,ochoa,,"CK2A2,CK2A1,GRK7,GRK5,CK1G1,IKKA,CAMK1D,MARK2,...","CAMK2B,CK2A2,CAMK2A,CK2A1,GRK7,TLK2,FAM20C,CAM...",2.253027,4.940056
...,...,...,...,...,...,...,...,...,...,...
120102,V9GYY5,,T134,sEEEAsstEKPtKAL,ochoa,,"PERK,ASK1,EEF2K,MST1,BMPR1B,PBK,MEKK2,OSR1,MST...","CK1G2,GSK3A,ALPHAK3,GRK1,GRK7,GSK3B,BMPR1B,BMP...",1.723089,7.009429
120103,V9GYY5,,T138,AsstEKPtKALPRKS,ochoa,,"ASK1,PBK,TNIK,MPSK1,MINK,MST2,NEK4,MEK2,MST1,BUB1","CK1G3,CK1G2,CK1A2,CK1D,CK1A,GRK3,PASK,GRK2,CK1...",1.651888,4.350109


In [None]:
#| echo: false
show_doc(Data.get_human_site)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L252){target="_blank" style="float:right; font-size:smaller"}

### Data.get_human_site

>      Data.get_human_site ()

*Get the combined phosphorylated dataset from Ochoa and PhosphoSitePlus (20-length version).*

In [None]:
Data.get_human_site()

Unnamed: 0,substrate_uniprot,substrate_genes,site,source,AM_pathogenicity,substrate_sequence,substrate_species,sub_site,substrate_phosphoseq,position,site_seq
0,A0A024R4G9,C19orf48 MGC13170 hCG_2008493,S20,psp,,MTVLEAVLEIQAITGSRLLSMVPGPARPPGSCWDPTQCTRTWLLSH...,Homo sapiens (Human),A0A024R4G9_S20,MTVLEAVLEIQAITGSRLLsMVPGPARPPGSCWDPTQCTRTWLLSH...,20,_MTVLEAVLEIQAITGSRLLsMVPGPARPPGSCWDPTQCTR
1,A0A075B6Q4,,S24,ochoa,,MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT...,Homo sapiens (Human),A0A075B6Q4_S24,MDIQKSENEDDSEWEDVDDEKGDsNDDYDSAGLLsDEDCMSVPGKT...,24,QKSENEDDSEWEDVDDEKGDsNDDYDSAGLLsDEDCMSVPG
...,...,...,...,...,...,...,...,...,...,...,...
121330,V9GYY5,,T134,ochoa,,KRDGDDRRPRLVLSFDEEKRREYLTGFHKRKVERKKAAIEEIKQRL...,Homo sapiens (Human),V9GYY5_T134,KRDGDDRRPRLVLSFDEEKRREYLTGFHKRKVERKKAAIEEIKQRL...,134,LGLtPPEGGAGDRsEEEAsstEKPtKALPRKSRDPLLSQRI
121331,V9GYY5,,T138,ochoa,,KRDGDDRRPRLVLSFDEEKRREYLTGFHKRKVERKKAAIEEIKQRL...,Homo sapiens (Human),V9GYY5_T138,KRDGDDRRPRLVLSFDEEKRREYLTGFHKRKVERKKAAIEEIKQRL...,138,PPEGGAGDRsEEEAsstEKPtKALPRKSRDPLLSQRISSLT


## CPTAC

In [None]:
#| export
class CPTAC:
    
    "A class for fetching CPTAC phosphoproteomics data."
    @staticmethod
    def _fetch_data(cancer: str, # cancer type CPTAC
                    is_Tumor: bool=True, # tumor tissue or normal
                    is_KB: bool=False, # whether it is for LinkedOmicsKB or LinkedOmics
                   ):
        "Fetches the data from the given URL and returns a DataFrame"
        
        # URL of ID and data
        sample_type = "Tumor" if is_Tumor else "Normal"
        ID_URL = f"https://zenodo.org/records/8196130/files/bcm-{cancer.lower()}-mapping-gencode.v34.basic.annotation-mapping.txt.gz"
        DATA_URL = f"https://cptac-pancancer-data.s3.us-west-2.amazonaws.com/data_freeze_v1.2_reorganized/{cancer.upper()}/{cancer.upper()}_phospho_site_abundance_log2_reference_intensity_normalized_{sample_type}.txt"

        # Load ID data
        ref = pd.read_csv(ID_URL, compression='gzip', sep='\t')[['protein','gene','gene_name']].drop_duplicates().reset_index(drop=True)
        
        # Load CPTAC phosphoproteomics data
        try:
            raw = pd.read_csv(DATA_URL, sep='\t')
        except Exception as e:
            print(f'{cancer} has {e}')
        else:
            info = pd.DataFrame({'gene':raw.idx.str.split('|').str[0],
                                 'site':raw.idx.str.split('|').str[2],
                                 'site_seq':raw.idx.str.split('|').str[3]})

            print(f'the {cancer} dataset length is: {info.shape[0]}')

            # Merge ensembl ID with gene name
            info = info.merge(ref,'left')
            print(f'after id mapping, the length is {info.shape[0]}')

            print(f'{info.gene_name.isna().sum()} sites does not have a mapped gene name')

            info['gene_site'] = info['gene_name'] + '_' + info['site']
            info['protein_site'] = info['protein'].str.split('.').str[0] + '_' + info['site']
            
            info = info.drop_duplicates(subset="protein_site" if is_KB else "gene_site").reset_index(drop=True)
            print(f'after removing duplicates of protein_site, the length is {info.shape[0]}')

            return info
    
    
    @staticmethod
    def list_cancer():
        "List available CPTAC cancer type"
        return ['HNSCC','GBM','COAD','CCRCC','LSCC','BRCA','UCEC','LUAD','PDAC','OV']

    @staticmethod
    def get_id(cancer_type: str,
               is_Tumor: bool=True, # tumor tissue or normal
               is_KB: bool=False, # whether it is for LinkedOmicsKB or LinkedOmics
              ):
        "Get CPTAC phosphorylation sites information given a cancer type"
        assert cancer_type in CPTAC.list_cancer(), "cancer type is not included, check available cancer types from CPTAC.list_cancer()"
        return CPTAC._fetch_data(cancer_type,is_Tumor, is_KB)

In [None]:
show_doc(CPTAC.list_cancer)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L307){target="_blank" style="float:right; font-size:smaller"}

### CPTAC.list_cancer

>      CPTAC.list_cancer ()

*List available CPTAC cancer type*

In [None]:
CPTAC.list_cancer()

['HNSCC', 'GBM', 'COAD', 'CCRCC', 'LSCC', 'BRCA', 'UCEC', 'LUAD', 'PDAC', 'OV']

In [None]:
show_doc(CPTAC.get_id)

---

[source](https://github.com/sky1ove/katlas/blob/main/katlas/data.py#L312){target="_blank" style="float:right; font-size:smaller"}

### CPTAC.get_id

>      CPTAC.get_id (cancer_type:str, is_Tumor:bool=True, is_KB:bool=False)

*Get CPTAC phosphorylation sites information given a cancer type*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| cancer_type | str |  |  |
| is_Tumor | bool | True | tumor tissue or normal |
| is_KB | bool | False | whether it is for LinkedOmicsKB or LinkedOmics |

Use `CPTAC.get_id()` to load CPTAC phosphorylation site information. Fold change of various conditions can be acquired through [LinkedOmics](https://www.linkedomics.org/login.php) or [LinkedOmicsKB](https://kb.linkedomics.org/). Use `is_KB` to indicate whether the phosphorylation site information is for LinkedOmics or LinkedOmicsKB.

In [None]:
# normal tissue
CPTAC.get_id('CCRCC',is_KB=True, is_Tumor=False)

the CCRCC dataset length is: 53152
after id mapping, the length is 209188
0 sites does not have a mapped gene name
after removing duplicates of protein_site, the length is 208298


Unnamed: 0,gene,site,site_seq,protein,gene_name,gene_site,protein_site
0,ENSG00000003056.8,S267,DDQLGEESEERDDHL,ENSP00000000412.3,M6PR,M6PR_S267,ENSP00000000412_S267
1,ENSG00000003056.8,S267,DDQLGEESEERDDHL,ENSP00000440488.2,M6PR,M6PR_S267,ENSP00000440488_S267
...,...,...,...,...,...,...,...
208296,ENSG00000145362.20,Y1455,LNITLPIYTKESESD,ENSP00000500947.1,ANK2,ANK2_Y1455,ENSP00000500947_Y1455
208297,ENSG00000145362.20,Y1455,LNITLPIYTKESESD,ENSP00000500883.1,ANK2,ANK2_Y1455,ENSP00000500883_Y1455


In [None]:
# tumor
CPTAC.get_id('CCRCC',is_KB=True, is_Tumor=True)

the CCRCC dataset length is: 54238
after id mapping, the length is 213737
0 sites does not have a mapped gene name
after removing duplicates of protein_site, the length is 212814


Unnamed: 0,gene,site,site_seq,protein,gene_name,gene_site,protein_site
0,ENSG00000003056.8,S267,DDQLGEESEERDDHL,ENSP00000000412.3,M6PR,M6PR_S267,ENSP00000000412_S267
1,ENSG00000003056.8,S267,DDQLGEESEERDDHL,ENSP00000440488.2,M6PR,M6PR_S267,ENSP00000440488_S267
...,...,...,...,...,...,...,...
212812,ENSG00000145362.20,Y2928,DPQITSPYENVPSQS,ENSP00000500947.1,ANK2,ANK2_Y2928,ENSP00000500947_Y2928
212813,ENSG00000145362.20,Y2928,DPQITSPYENVPSQS,ENSP00000500883.1,ANK2,ANK2_Y2928,ENSP00000500883_Y2928


## End

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()