# Find all human kinases in ChEMBL

This notebook maps Uniprot IDs to ChEMBL target IDs and produces a helper CSV file useful in other notebooks

In [7]:
from pathlib import Path

import pandas as pd
from chembl_webresource_client.new_client import new_client as chembl
from chembl_webresource_client.settings import Settings
Settings.Instance().CACHE = False

In [2]:
REPO = (Path(_dh[-1]) / "..").resolve()
DATA = REPO / 'data'

Load human kinases list, as obtained from http://kinhub.org/kinases.html

In [4]:
kinases = pd.read_csv(DATA / "KinHubKinaseList.csv")
kinases

Unnamed: 0,xName,Manning Name,HGNC Name,Kinase Name,Group,Family,SubFamily,UniprotID
0,ABL1,ABL,ABL1,Tyrosine-protein kinase ABL1,TK,Abl,,P00519
1,ACK,ACK,TNK2,Activated CDC42 kinase 1,TK,Ack,,Q07912
2,ACTR2,ACTR2,ACVR2A,Activin receptor type-2A,TKL,STKR,STKR2,P27037
3,ACTR2B,ACTR2B,ACVR2B,Activin receptor type-2B,TKL,STKR,STKR2,Q13705
4,ADCK4,ADCK4,ADCK4,Uncharacterized aarF domain-containing protein...,Atypical,ABC1,ABC1-A,Q96D53
...,...,...,...,...,...,...,...,...
531,GTF2F1,GTF2F1,,,Atypical,GTF2F1,,Q6IBK5
532,Col4A3BP,Col4A3BP,COL4A3BP,Collagen type IV alpha-3-binding protein,Atypical,Col4A3BP,,Q9Y5P4
533,BLVRA,BLVRA,BLVRA,Biliverdin reductase A,Atypical,BLVRA,,P53004
534,BAZ1A,BAZ1A,BAZ1A,Bromodomain adjacent to zinc finger domain pro...,Atypical,BAZ,,Q9NRL2


In [5]:
def get_target_chembl_ids_for_column(uniprot_ids):
    """
    Given a list of uniprot_ids, return a list of target_chembl_ids for each uniprot_id
    This can be used with pd.Dataframe.assign(...).
    """
    results = []
    for uniprot_id in uniprot_ids:
        targets = [t['target_chembl_id'] for t in chembl.target.filter(
            target_components__accession=uniprot_id, 
            organism="Homo sapiens", 
            target_type="SINGLE PROTEIN"
        )]
        results.append(targets)
    return results

In [8]:
df = (kinases
    .assign(chembl_targets=lambda x: get_target_chembl_ids_for_column(x.UniprotID.tolist()))
    .explode('chembl_targets')
    .dropna(subset=["chembl_targets"])
)
df.to_csv(DATA / "human_kinases_and_chembl_targets.csv")
df

Unnamed: 0,xName,Manning Name,HGNC Name,Kinase Name,Group,Family,SubFamily,UniprotID,chembl_targets
0,ABL1,ABL,ABL1,Tyrosine-protein kinase ABL1,TK,Abl,,P00519,CHEMBL1862
1,ACK,ACK,TNK2,Activated CDC42 kinase 1,TK,Ack,,Q07912,CHEMBL4599
2,ACTR2,ACTR2,ACVR2A,Activin receptor type-2A,TKL,STKR,STKR2,P27037,CHEMBL5616
3,ACTR2B,ACTR2B,ACVR2B,Activin receptor type-2B,TKL,STKR,STKR2,Q13705,CHEMBL5466
4,ADCK4,ADCK4,ADCK4,Uncharacterized aarF domain-containing protein...,Atypical,ABC1,ABC1-A,Q96D53,CHEMBL5753
...,...,...,...,...,...,...,...,...,...
527,TAF1L,TAF1L,TAF1L,Transcription initiation factor TFIID subunit ...,Atypical,TAF1,,Q8IZX4,CHEMBL3108641
528,BCR,BCR,BCR,Breakpoint cluster region protein,Atypical,BCR,,P11274,CHEMBL5146
532,Col4A3BP,Col4A3BP,COL4A3BP,Collagen type IV alpha-3-binding protein,Atypical,Col4A3BP,,Q9Y5P4,CHEMBL3399913
534,BAZ1A,BAZ1A,BAZ1A,Bromodomain adjacent to zinc finger domain pro...,Atypical,BAZ,,Q9NRL2,CHEMBL4105737
