---
badges: true
author: "Samdani Ansar"
categories:
- Kinome
date: '2023-04-23'
title: Mapping the Kinome
description: Creating common numbering scheme for the all kinases
toc: true
image: images/kinome.png

---

This notebook will provide function on how to create common kinase numbering using two different schemes KLIFS and DUNBARK.

# Creating master file

## KLIFS

### Using structure

In [41]:
import requests
import pandas as pd
from Bio import SeqIO
from Bio import pairwise2
from io import StringIO
from Bio.pairwise2 import format_alignment
#from Bio.Align.PairwiseAligner
pd.set_option("display.max_rows", 5)
pd.set_option('display.max_columns', 10)

class KLIFSparse:
    def __init__(self):
        self.KLIFS = None
        self.KLIFS_PDB = None
        self.KLIFS_pocket = None
        self.KLIFS_conformation = None

    def kinase_info(self):
        # 1. Parsing KLIFS annotation the informations
        # ~~ For Human
        response = requests.get('https://klifs.net/api_v2/kinase_information?species=HUMAN')
        KLIFS = pd.DataFrame(
            columns=['kinase_ID', 'name', 'gene_name', 'family', 'group', 'subfamily', 'species', 'full_name',
                     'uniprot', 'iuphar', 'pocket'])
        for f in response.json():
            TEMP_DICT = pd.DataFrame(f,index=[0])
            KLIFS = pd.concat([KLIFS,TEMP_DICT], ignore_index=True)
#             KLIFS = KLIFS.append(f, ignore_index=True)

        # ~~ For Mouse
        response = requests.get('https://klifs.net/api_v2/kinase_information?species=MOUSE')
        for f in response.json():
            TEMP_DICT = pd.DataFrame(f,index=[0])
            KLIFS = pd.concat([KLIFS,TEMP_DICT], ignore_index=True)
#             KLIFS = KLIFS.append(f, ignore_index=True)
        self.KLIFS = KLIFS

    def structure_info(self):
        KLIFS_PDB = pd.DataFrame(
            columns=['structure_ID', 'kinase', 'species', 'kinase_ID', 'pdb', 'alt', 'chain', 'rmsd1', 'rmsd2',
                     'pocket', 'resolution', 'quality_score', 'missing_residues', 'missing_atoms', 'ligand',
                     'allosteric_ligand', 'DFG', 'aC_helix', 'Grich_distance', 'Grich_angle', 'Grich_rotation', 'front',
                     'gate', 'back', 'fp_I', 'fp_II', 'bp_I_A', 'bp_I_B', 'bp_II_in', 'bp_II_A_in', 'bp_II_B_in',
                     'bp_II_out', 'bp_II_B', 'bp_III', 'bp_IV', 'bp_V'])
        for kinase_id in self.KLIFS['kinase_ID']:
            response = requests.get('https://klifs.net/api_v2/structures_list?kinase_ID=' + str(kinase_id))
            if response.status_code == 200:
                for f in response.json():
                    TEMP_DICT = pd.DataFrame(f,index=[0],dtype=object)
                    KLIFS_PDB = pd.concat([KLIFS_PDB,TEMP_DICT], ignore_index=True)
#                     KLIFS_PDB = KLIFS_PDB.append(f, ignore_index=True)
        self.KLIFS_PDB = KLIFS_PDB

    def pocket_mapping(self):
        KLIFS_pocket = pd.DataFrame(columns=['structure_ID', 'KLIFS_RES', 'RES_KLIFS'])
        # for i, cdetails in self.KLIFS_PDB:
        for sid in self.KLIFS_PDB['structure_ID']:
            # Add the x-ray number to KLIFS number mapping
            KLIFS_RES = {}  # KLIFS to Residue mapping dict
            RES_KLIFS = {}  # Residue to KLIFS mapping dict
            response = requests.get(
                'https://klifs.net/api_v2/interactions_match_residues?structure_ID=' + str(sid))
            for c in response.json():
                if c['index'] not in KLIFS_RES:
                    KLIFS_RES[c['index']] = 0
                if c['Xray_position'] not in RES_KLIFS:
                    if c['Xray_position'] == '_':
                        RES_KLIFS[c['Xray_position']] = []
                    else:
                        RES_KLIFS[c['Xray_position']] = 0
                if c['Xray_position'] == '_':
                    KLIFS_RES[c['index']] = 0
                elif c['Xray_position'] == None: #Some structure PDB's are not mapped well in KLIFS
                    KLIFS_RES[c['index']] = 0
                else:
                    KLIFS_RES[c['index']] = c['Xray_position']
                if c['Xray_position'] == '_':
                    RES_KLIFS[c['Xray_position']].append(c['index'])
                elif c['Xray_position'] == None:  #Some structure PDB's are not mapped well in KLIFS
                    pass
                else:
                    RES_KLIFS[c['Xray_position']] = c['index']
            # temp_dict.update({'KLIFS_RES': KLIFS_RES, 'RES_KLIFS': RES_KLIFS, 'structure_ID': cdetails['structure_ID'],
            #                   'DFG': cdetails['DFG'], 'aC_helix': cdetails['aC_helix'],
            #                   'Grich_distance': cdetails['Grich_distance'], 'Grich_angle': cdetails['Grich_angle'],
            #                   'Grich_rotation': cdetails['Grich_rotation'], 'front': cdetails['front'],
            #                   'gate': cdetails['gate'], 'back': cdetails['back'],
            #                   'allosteric_ligand_ID': cdetails['allosteric_ligand_ID'],
            #                   'curation_flag': cdetails['curation_flag'], 'ligand_ID': cdetails['ligand_ID']})
            TEMP_DICT = pd.DataFrame({'structure_ID': sid, 'KLIFS_RES': KLIFS_RES, 'RES_KLIFS': RES_KLIFS},index=[0])
            KLIFS_pocket = pd.concat([KLIFS_pocket,TEMP_DICT], ignore_index=True)  
#             KLIFS_pocket = KLIFS_pocket.append({'structure_ID': sid, 'KLIFS_RES': KLIFS_RES, 'RES_KLIFS': RES_KLIFS},ignore_index=True)
        self.KLIFS_pocket = KLIFS_pocket

    # def get_structure(self):
    #     response = requests.get('https://klifs.net/api_v2/structure_get_pdb_complex?structure_ID='+structure_id)
    def structure_conformation(self):
        KLIFS_conformation = pd.DataFrame(
            columns=['structure_ID', 'DFG', 'ac_helix', 'ploop_angle', 'ploop_distance', 'ploop_rotation',
                     'mobitz_dihedral', 'reference_distance', 'distance_67_82', 'distance_67_82_out', 'dfg_angle_d_f',
                     'dfg_d_rotation', 'dfg_f_rotation', 'dfg_d_outer_rotation', 'dfg_f_outer_rotation',
                     'aloop_rotation', 'ac_helix_distance', 'salt_bridge_17_24'])
        for sid in self.KLIFS_PDB['structure_ID']:
            response = requests.get(
                'https://klifs.net/api_v2/structure_conformation?structure_ID=' + str(sid))
            for c in response.json():
                TEMP_DICT = pd.DataFrame(c,index=[0])
                KLIFS_conformation = pd.concat([KLIFS_conformation,TEMP_DICT], ignore_index=True)  
#                 KLIFS_conformation = KLIFS_conformation.append(c, ignore_index=True)
        self.KLIFS_conformation = KLIFS_conformation


In [None]:
def get_uniprot_data(gene):
    UID = get_uid(gene) #get uniprot id for gene name
    # parsing uniprot gene seq
    response = requests.get('http://www.uniprot.org/uniprot/'+UID+'.fasta')
    fastq_io = StringIO(response.text)
    fasta_data = SeqIO.parse(fastq_io,format='fasta')
    seq='' 
    for z in fasta_data:
        seq=str(z.seq)
    return UID,seq

def get_uid(gene,txid):
    import requests
    response = requests.get('https://biodbnet-abcc.ncifcrf.gov/webServices/rest.php/biodbnetRestApi.json?method=db2db&input=Gene%20symbol%20and%20synonyms&inputValues='+gene+'&outputs=UniProt%20Accession&taxonId='+txid)
    UNIPROT=''
    if len(response.json().get('0').get('outputs')) == 0:
        pass
    else:
        uids = response.json().get('0').get('outputs').get('UniProt Accession')
        for i in uids:
            response = requests.get('https://rest.uniprot.org/uniprotkb/search?query='+i)
            UID_RESULTS=response.json()
            if UID_RESULTS['results'][0]['entryType'] == 'UniProtKB reviewed (Swiss-Prot)':
                UNIPROT=UID_RESULTS['results'][0]['primaryAccession']
                break
    return UNIPROT

In [44]:
remote_data = KLIFSparse()

In [45]:
remote_data.kinase_info() #Get kinase information of mouse and human

In [46]:
remote_data.KLIFS #Dataframe of details about kinase

Unnamed: 0,kinase_ID,name,gene_name,family,group,...,species,full_name,uniprot,iuphar,pocket
0,1,AKT1,AKT1,Akt,AGC,...,Human,v-akt murine thymoma viral oncogene homolog 1,P31749,1479,KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME...
1,2,AKT2,AKT2,Akt,AGC,...,Human,v-akt murine thymoma viral oncogene homolog 2,P31751,1480,KLLGKGTFGKVILYAMKILHTVTESRVLQNTRPFLTALKYACFVME...
...,...,...,...,...,...,...,...,...,...,...,...
1125,1126,Plk5,Plk5,PLK,Other,...,Mouse,polo like kinase 5,Q4FZD7,0,0
1126,1127,Efna2,Efna2,Eph,TK,...,Mouse,ephrin A2,P52801,0,0


In [47]:
remote_data.KLIFS.columns

Index(['kinase_ID', 'name', 'gene_name', 'family', 'group', 'subfamily',
       'species', 'full_name', 'uniprot', 'iuphar', 'pocket'],
      dtype='object')

It will take some time ~10min since we have ~13400 structures in KLIFS database.

In [48]:
remote_data.structure_info() #Get structure information by kinase id from the kinase informations

In [49]:
remote_data.KLIFS_PDB #Dataframe of details about kinase structures

Unnamed: 0,structure_ID,kinase,species,kinase_ID,pdb,...,bp_IV,bp_V,curation_flag,ligand_ID,allosteric_ligand_ID
0,2538,AKT1,Human,1,3mvh,...,False,False,False,748,0
1,2539,AKT1,Human,1,4ekk,...,False,False,False,64,0
...,...,...,...,...,...,...,...,...,...,...,...
13415,14156,Pik3c2a,Mouse,1116,7bi4,...,False,False,False,0,0
13416,14155,Pik3c2a,Mouse,1116,7bi4,...,False,False,False,0,0


This step will also take some time ~5-10min for mapping the pocket residue of kinase with KLIFS residue nubmering

In [52]:
remote_data.pocket_mapping() #mapping the pocket residue and klifs residue

In [55]:
remote_data.KLIFS_pocket

Unnamed: 0,structure_ID,KLIFS_RES,RES_KLIFS
0,2538,,
1,2539,,
...,...,...,...
13415,14156,,
13416,14155,,


In [57]:
remote_data.structure_conformation() #Get structure conformation details

In [58]:
remote_data.KLIFS_conformation

Unnamed: 0,structure_ID,DFG,ac_helix,ploop_angle,ploop_distance,...,dfg_d_outer_rotation,dfg_f_outer_rotation,aloop_rotation,ac_helix_distance,salt_bridge_17_24
0,2538,in,in,59.5972,18.0522,...,0,2,161.401,17.4143,2.6197746
1,2539,in,in,55.3905,16.7899,...,342,358,155.097,17.5527,2.6652446
...,...,...,...,...,...,...,...,...,...,...,...
13415,14156,in,in,63.5557,18.9955,...,332,335,154.919,15.9778,5.579536
13416,14155,in,in,63.5557,18.9955,...,332,335,154.919,15.9778,5.579536


In [59]:
remote_data.KLIFS_PDB.columns

Index(['structure_ID', 'kinase', 'species', 'kinase_ID', 'pdb', 'alt', 'chain',
       'rmsd1', 'rmsd2', 'pocket', 'resolution', 'quality_score',
       'missing_residues', 'missing_atoms', 'ligand', 'allosteric_ligand',
       'DFG', 'aC_helix', 'Grich_distance', 'Grich_angle', 'Grich_rotation',
       'front', 'gate', 'back', 'fp_I', 'fp_II', 'bp_I_A', 'bp_I_B',
       'bp_II_in', 'bp_II_A_in', 'bp_II_B_in', 'bp_II_out', 'bp_II_B',
       'bp_III', 'bp_IV', 'bp_V', 'curation_flag', 'ligand_ID',
       'allosteric_ligand_ID'],
      dtype='object')

In [60]:
KLIFS_MASTER = pd.DataFrame(columns=['structure_ID', 'kinase', 'species', 'kinase_ID', 'pdb', 'alt', 'chain',
       'rmsd1', 'rmsd2', 'pocket', 'resolution', 'quality_score',
       'missing_residues', 'missing_atoms', 'ligand', 'allosteric_ligand',
       'DFG', 'aC_helix', 'Grich_distance', 'Grich_angle', 'Grich_rotation',
       'front', 'gate', 'back', 'fp_I', 'fp_II', 'bp_I_A', 'bp_I_B',
       'bp_II_in', 'bp_II_A_in', 'bp_II_B_in', 'bp_II_out', 'bp_II_B',
       'bp_III', 'bp_IV', 'bp_V', 'allosteric_ligand_ID', 'curation_flag',
       'ligand_ID','KLIFS_RES', 'RES_KLIFS','ploop_angle', 'ploop_distance',
       'ploop_rotation', 'mobitz_dihedral', 'reference_distance',
       'distance_67_82', 'distance_67_82_out', 'dfg_angle_d_f',
       'dfg_d_rotation', 'dfg_f_rotation', 'dfg_d_outer_rotation',
       'dfg_f_outer_rotation', 'aloop_rotation', 'ac_helix_distance',
       'salt_bridge_17_24','gene_name', 'family', 'group', 'subfamily','full_name',
        'uniprot', 'iuphar','mutation','modified_residue','covalent-link'])
for i, sdetails in remote_data.KLIFS_PDB[0:100].iterrows():
    temp_dict=sdetails.to_dict()
    #KLIFS Kinase family annotation details
    for k, pdetails in remote_data.KLIFS[remote_data.KLIFS['kinase_ID'] == sdetails['kinase_ID']].iterrows():
        temp_dict.update({'gene_name':pdetails['gene_name'], 'family':pdetails['family'],'group':pdetails['group'],
                         'subfamily':pdetails['subfamily'],'full_name':pdetails['full_name'],'uniprot':pdetails['uniprot'],
                         'iuphar':pdetails['iuphar']})

    #Structure conformation details
    for j, cdetails in remote_data.KLIFS_conformation[remote_data.KLIFS_conformation['structure_ID'] == sdetails['structure_ID']].iterrows():
        temp_dict.update({'ploop_angle':cdetails['ploop_angle'], 'ploop_distance':cdetails['ploop_distance'],
       'ploop_rotation':cdetails['ploop_rotation'], 'mobitz_dihedral':cdetails['mobitz_dihedral'], 'reference_distance':cdetails['reference_distance'],
       'distance_67_82':cdetails['distance_67_82'], 'distance_67_82_out':cdetails['distance_67_82_out'], 'dfg_angle_d_f':cdetails['dfg_angle_d_f'],
       'dfg_d_rotation':cdetails['dfg_d_rotation'], 'dfg_f_rotation':cdetails['dfg_f_rotation'], 'dfg_d_outer_rotation':cdetails['dfg_d_outer_rotation'],
       'dfg_f_outer_rotation':cdetails['dfg_f_outer_rotation'], 'aloop_rotation':cdetails['aloop_rotation'], 'ac_helix_distance':cdetails['ac_helix_distance'],
       'salt_bridge_17_24':cdetails['salt_bridge_17_24']})
    #KLIFS pocket mapped details
    for k, pdetails in remote_data.KLIFS_pocket[remote_data.KLIFS_pocket['structure_ID'] == sdetails['structure_ID']].iterrows():
        temp_dict.update({'KLIFS_RES':pdetails['KLIFS_RES'], 'RES_KLIFS':pdetails['RES_KLIFS']})
    # Get PDB header
    response = requests.get('https://files.rcsb.org/header/'+sdetails['pdb']+'.pdb')
    # Parsing mutation information from SEQRES
    SEQRES=[]
    LINK=[]
    MODRES=[]
    for line in response.text.split('\n'):
        if line[0:6].strip() == 'SEQADV':
            resName=line[12:15].strip() # Name of the PDB residue in conflict.
            chainID=line[16] # PDB  chain identifier.
            seqNum=line[18:22].strip() # PDB  sequence number.
            iCode=line[22] # PDB insertion code.
            if not iCode.strip():
                iCode='X'
            database=line[24:28].strip()     
            dbAccession=line[29:38].strip() # Sequence  database accession number.
            dbRes=line[39:42].strip() # Sequence database residue name.
            dbSeq=line[43:48].strip() # Sequence database sequence number.
            conflict=line[49:70] # Conflict comment.
            COMMENT = ''
            l=len(conflict.split())
            for z in conflict.split():
                l-=1
                if z:
                    COMMENT+=z
                if l != 0:
                    COMMENT+='-'
            if sdetails['chain'] == chainID: #including particular chain id and alt id specific annotation alone
                if sdetails['alt'].strip():
                    if sdetails['alt'] == iCode:
                        SEQRES.append(resName+'_'+chainID+'_'+seqNum+'_'+iCode+'_'+database+'_'+dbAccession+'_'+dbRes+'_'+dbSeq+'_'+COMMENT)
                else:
                    SEQRES.append(resName+'_'+chainID+'_'+seqNum+'_'+iCode+'_'+database+'_'+dbAccession+'_'+dbRes+'_'+dbSeq+'_'+COMMENT)
                        
    # Parsing covalent link information from LINK
        if line[0:6].strip() == 'LINK':
            name1=line[12:16].strip() # Atom name.
            altLoc1=line[16] # Alternate location indicator.
            if not altLoc1.strip():
                altLoc1='X'
            resName1=line[17:20].strip() # Residue  name.
            chainID1=line[21] # Chain identifier.
            resSeq1=line[22:26].strip() # Residue sequence number.
            iCode1=line[26] # Insertion code.
            if not iCode1.strip():
                iCode1='X'
            name2=line[42:46].strip() # Atom name.
            altLoc2=line[46]  # Alternate location indicator.
            if not altLoc2.strip():
                altLoc2='X'
            resName2=line[47:50].strip() # Residue name.
            chainID2=line[51] # Chain identifier.
            resSeq2=line[52:56].strip() # Residue sequence number.
            iCode2=line[56] # Insertion code.
            if not iCode2.strip():
                iCode2='X'
            Length=line[73:78].strip() # Link distance
            if sdetails['chain'] in [ chainID1, chainID2 ]: #including particular chain id and alt id specific annotation alone
                if sdetails['alt'].strip():
                    if sdetails['alt'] in [ iCode1, iCode2 ]:
                        LINK.append(name1+'_'+altLoc1+'_'+resName1+'_'+chainID1+'_'+resSeq1+'_'+iCode1+'_'+name2+'_'+altLoc2+'_'+resName2+'_'+chainID2+'_'+resSeq2+'_'+iCode2+'_'+Length)
                else:
                    LINK.append(name1+'_'+altLoc1+'_'+resName1+'_'+chainID1+'_'+resSeq1+'_'+iCode1+'_'+name2+'_'+altLoc2+'_'+resName2+'_'+chainID2+'_'+resSeq2+'_'+iCode2+'_'+Length)
    # Adding modified residue information eg: phophorylation
        if line[0:6].strip() == 'MODRES':
            resName=line[12:15].strip() # Residue name used in this entry.
            chainID=line[16] # Chain identifier.
            seqNum=line[18:22].strip() # Sequence number.
            iCode=line[22] # Insertion code.
            if not iCode.strip():
                iCode='X'
            stdRes=line[24:27].strip() # Standard residue name.
            comment=line[29:70] # Description of the residue modification.
            COMMENT = ''
            l=len(comment.split())
            for z in comment.split():
                l-=1
                if z:
                    COMMENT+=z
                if l != 0:
                    COMMENT+='-'
            if sdetails['chain'] == chainID: #including particular chain id and alt id specific annotation alone
                if sdetails['alt'].strip():
                    if sdetails['alt'] == iCode:
                         MODRES.append(resName+'_'+chainID+'_'+seqNum+'_'+iCode+'_'+stdRes+'_'+COMMENT)
                else:
                    MODRES.append(resName+'_'+chainID+'_'+seqNum+'_'+iCode+'_'+stdRes+'_'+COMMENT)
    if len(SEQRES) == 0:
        temp_dict.update({'mutation':0})
    else:
        temp_dict.update({'mutation':SEQRES}) #mutation
    if len(MODRES) == 0:
        temp_dict.update({'modified_residue':0})
    else:
        temp_dict.update({'modified_residue':MODRES}) #modified residue
    if len(LINK) == 0:
        temp_dict.update({'covalent-link':0})
    else:
        temp_dict.update({'covalent-link':LINK}) #covalent-link

    KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)

  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MA

  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MA

  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MA

  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MA

  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MA

  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MA

  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MASTER = KLIFS_MASTER.append(temp_dict,ignore_index=True)
  KLIFS_MA

In [61]:
KLIFS_MASTER

Unnamed: 0,structure_ID,kinase,species,kinase_ID,pdb,...,uniprot,iuphar,mutation,modified_residue,covalent-link
0,2538,AKT1,Human,1,3mvh,...,P31749,1479,0,0,0
1,2539,AKT1,Human,1,4ekk,...,P31749,1479,"[GLY_B_140_X_UNP_P31749___EXPRESSION-TAG, ALA_...",[TPO_B_308_X_THR_PHOSPHOTHREONINE],"[C_X_LYS_B_307_X_N_X_TPO_B_308_X_1.33, C_X_TPO..."
...,...,...,...,...,...,...,...,...,...,...,...
98,2698,ROCK1,Human,9,3ndm,...,Q13464,1503,"[GLY_B_1_X_UNP_Q13464___EXPRESSION-TAG, SER_B_...",0,0
99,2699,ROCK1,Human,9,3ncz,...,Q13464,1503,"[GLY_A_1_X_UNP_Q13464___EXPRESSION-TAG, SER_A_...",0,0


In [59]:
from datetime import date
KLIFS_MASTER.to_csv('KLIFS_master_'+str(date.today().strftime('%d-%m-%Y'))+'.csv',index=False)

### Using Sequence

Humand Mouse kinase sequence taken from

http://kinase.com/kinbase/FastaFiles/

In [4]:
remote_data = remote.KLIFSparse()

In [5]:
remote_data.kinase_info() #Get kinase information of mouse and human

In [6]:
KLIFS = remote_data.KLIFS
KLIFS

Unnamed: 0,kinase_ID,name,gene_name,family,group,...,species,full_name,uniprot,iuphar,pocket
0,1,AKT1,AKT1,Akt,AGC,...,Human,v-akt murine thymoma viral oncogene homolog 1,P31749,1479,KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME...
1,2,AKT2,AKT2,Akt,AGC,...,Human,v-akt murine thymoma viral oncogene homolog 2,P31751,1480,KLLGKGTFGKVILYAMKILHTVTESRVLQNTRPFLTALKYACFVME...
...,...,...,...,...,...,...,...,...,...,...,...
1125,1126,Plk5,Plk5,PLK,Other,...,Mouse,polo like kinase 5,Q4FZD7,0,0
1126,1127,Efna2,Efna2,Eph,TK,...,Mouse,ephrin A2,P52801,0,0


In [7]:
#Delete obsolete uniprot which has full name 'Deleted.'
KLIFS = KLIFS.drop(KLIFS[KLIFS['full_name']=='Deleted.'].index)

In [9]:
KLIFS[KLIFS['gene_name'] == ''].to_dict()

{'kinase_ID': {555: 529, 556: 530},
 'name': {555: 'A6', 556: 'A6r'},
 'gene_name': {555: '', 556: ''},
 'family': {555: 'A6', 556: 'A6'},
 'group': {555: 'Atypical', 556: 'Atypical'},
 'subfamily': {555: '', 556: ''},
 'species': {555: 'Mouse', 556: 'Mouse'},
 'full_name': {555: 0, 556: 0},
 'uniprot': {555: 0, 556: 0},
 'iuphar': {555: 0, 556: 0},
 'pocket': {555: '', 556: ''}}

In [42]:
KLIFS['RES_KLIFS'] = pd.Series(dtype='str')
for i,kid in KLIFS.iterrows():
    pocket_seq=kid['pocket']
    if pocket_seq:
        # parsing uniprot gene seq
        response = requests.get('http://www.uniprot.org/uniprot/'+kid['uniprot']+'.fasta')
        # If uniprot id not able to map the fasta seq, get gene name and parse the uniprot id and get the fasta sequence
        if response.text:
            fasta_io = StringIO(response.text)
        else:
            print(kid.to_dict())
            #some gene have error in uniprot the below section will map for correct uniprot-id or print error
            gene=kid['gene_name']
            if kid['species'] == 'Human':
                txid='9606'
            else:
                txid='10090'
            uid=get_uid(gene,txid)
            if uid:
                response = requests.get('http://www.uniprot.org/uniprot/'+uid+'.fasta')
                print(uid,gene,txid)
                fasta_io = StringIO(response.text)
            else:
                print('Trembl-id-error',kid.to_dict())
                KLIFS['RES_KLIFS'].loc[i] = 0
                continue
        uniprot = SeqIO.parse(fasta_io,format='fasta')
        gene_seq='' 
        for z in uniprot:
            gene_seq=str(z.seq)
        #Perform sequence alignment of pocket and gene seq
        alignments = pairwise2.align.globalms(gene_seq,pocket_seq,2, -1, -10, -0.5) #https://biopython.org/docs/1.76/api/Bio.pairwise2.html?highlight=pairwise2%20align%20globalxx#
        aligned_seq= [format_alignment(*alignments[0]).split('\n')[0],format_alignment(*alignments[0]).split('\n')[2]] # 0: Gene aligned seq, 2: Pocket aligned seq
        #mapping the gene seq number
        geneposition=1
        mapped_seq=[]
        for n in aligned_seq[1]: #loop through alignd pocket seq
            if n != '-':
                mapped_seq.append(str(geneposition)+'-'+n) #pocketnum-genenum-AA
            geneposition+=1
        #~~~~~~
        RES_KLIFS_mapped=[]
        map_num=0
        for z in range(0,85):
            if pocket_seq[z] == '-':
                RES_KLIFS_mapped.append(str(z+1)+'-X-X')
            elif pocket_seq[z] == mapped_seq[map_num].split('-')[1]:
                RES_KLIFS_mapped.append(str(z+1)+'-'+ mapped_seq[map_num])
                map_num+=1    
        KLIFS['RES_KLIFS'].loc[i] = RES_KLIFS_mapped
    else:
        KLIFS['RES_KLIFS'].loc[i] = 0

{'kinase_ID': 670, 'name': 'EPHA6', 'gene_name': 'Epha6', 'family': 'Eph', 'group': 'TK', 'subfamily': '', 'species': 'Mouse', 'full_name': 'Ephrin type-A receptor 6', 'uniprot': 'E9PUK8', 'iuphar': 0, 'pocket': 'RVIGAGEFGEVCSVAIKTLDFLREASIMGQFDPNIIRLEGVAIGVEAFCPSFLRAGFLNYLSDMGYVHRDLAARNILVVSDFGLS', 'RES_KLIFS': nan}
Q62413 Epha6 10090
{'kinase_ID': 797, 'name': 'MarkmA3', 'gene_name': 'Smok3a', 'family': 'CAMKL', 'group': 'CAMK', 'subfamily': 'MARK', 'species': 'Mouse', 'full_name': 'sperm motility kinase 3A', 'uniprot': 'Q9QYZ5', 'iuphar': 0, 'pocket': 'ETIGHGGCATVKLVAVKTIRVISEVELLMMADPNIISLLQVYLIMELCKGKSLYQHIRKYCHNQGIVHRDLKPDNIMVIIDFGLG', 'RES_KLIFS': nan}
C0HKC8 Smok3a 10090
{'kinase_ID': 798, 'name': 'MarkmA4', 'gene_name': 'Smok3c', 'family': 'CAMKL', 'group': 'CAMK', 'subfamily': 'MARK', 'species': 'Mouse', 'full_name': 'sperm motility kinase 3C', 'uniprot': 'D3Z0E1', 'iuphar': 0, 'pocket': 'ETIGHGGCATVKLVAVKTILIMSEVDLLMMADPNVISLLQVYLIMELCEGKSLYQHIRKYCHNQGIVHRDLKPDNIMVIIDFGLG', 

In [43]:
from datetime import date
KLIFS.to_csv('data/KLIFS_seq_master_'+str(date.today().strftime('%d-%m-%Y'))+'.csv',index=False)

## DUNBARK

In [3]:
#1. Parsing DUNBARK alignment file and annotation the informations
DUNBARK_REGION={"B1N":"1-4","B1C":"21-27","B2":"44-52","B3":"93-103","HC":"140-153","B4":"180-195","B5":"420-430","HD":"445-453","HE":"939-962","CL":"1008-1028","ALN":"1331-1351","ALC":"1904-1920","HF":"1953-1975","FL":"1993-1998","HG":"2049-2061","HH":"2175-2194","HI":"2209-2218"}
DUNBARK_pd=pd.DataFrame(columns=['Gene','Family','Uniprot-id','Uniprote-name','Position','Seq'])
for record in SeqIO.parse("data/Human-PK-alignment.fasta", "fasta"):
    if record.id != '0ANNOTATION/1-2208':
        DUNBARK_pd = DUNBARK_pd.append({'Gene':record.description.split()[2],'Family':record.description.split()[0].split('/')[0].split('_')[0],'Uniprot-id':record.description.split()[3],'Uniprote-name':record.description.split()[1],'Position':record.description.split()[0].split('/')[1],'Seq':str(record.seq)},ignore_index=True)
'''
#~~03-11-2022
#~~Too much dataframes and hence used as one dataframe and proceeded
#2.Check for multiple gene entry(few genes has multiple kinase domains)
MULTI_DOMAIN_KINASE={} #uniprot id and count
UNIPROT_ID=list(DUNBARK_pd['Uniprot-id'])
for f in UNIPROT_ID:
    if f not in MULTI_DOMAIN_KINASE:
        MULTI_DOMAIN_KINASE[f]=0
    MULTI_DOMAIN_KINASE[f]+=1
MULTI_KINASE_LIST={} #count and their uniprot id
for f in MULTI_DOMAIN_KINASE:
    if MULTI_DOMAIN_KINASE[f] not in MULTI_KINASE_LIST:
        MULTI_KINASE_LIST[MULTI_DOMAIN_KINASE[f]]=[]
    MULTI_KINASE_LIST[MULTI_DOMAIN_KINASE[f]].append(f)
DUNBARK_pd1=DUNBARK_pd[~DUNBARK_pd['Uniprot-id'].isin(MULTI_KINASE_LIST[2])] #Dataframe which has 1 gene entries
DUNBARK_pd2=DUNBARK_pd[DUNBARK_pd['Uniprot-id'].isin(MULTI_KINASE_LIST[2])] #Dataframe which has 2 gene entries

'''
#2. Parsing uniprot-gene
DUNBARK_mapped=pd.DataFrame(columns=['Gene', 'Family', 'Uniprot-id', 'Uniprote-name', 'Position', 'Seq', 'domain_seq', 'alignment', 'mapped_data'])
for i, row in DUNBARK_pd.iterrows():
    # parsing uniprot gene seq
    response = requests.get('http://www.uniprot.org/uniprot/'+row['Uniprot-id']+'.fasta')
    fastq_io = StringIO(response.text)
    uniprot = SeqIO.parse(fastq_io,format='fasta')
    seq='' 
    for z in uniprot:
        seq=str(z.seq)
    #3. Performing sequence alignement with domain region and pocket seq
    pocketseq = row['Seq']
    domain_seq = seq[int(row['Position'].split('-')[0])-1:int(row['Position'].split('-')[1])]
    alignments = pairwise2.align.globalxx(domain_seq,pocketseq.upper())
    aligned_seq= [format_alignment(*alignments[0]).split('\n')[0],format_alignment(*alignments[0]).split('\n')[2]] # 0: Gene aligned seq, 2: Pocket aligned seq
    #4. correlating the pocket aligned sequence zith the gene number for mapping
    '''
    Loop through the pocket seq and match with the kinase domain aligned seq AA and print the 
    pocket number, gene position and AA. If no AA present print pocket number-X-X.
    pocket_num-gene_num-AA
    '''
    domain_start_num=int(row['Position'].split('-')[0])
    geneposition=domain_start_num
    pocket_num=0
    mapped_seq=[]
    for n in aligned_seq[1]: #loop through alignd pocket seq
        if n != '-':
            if n == aligned_seq[0][pocket_num]:
                mapped_seq.append(str(pocket_num+1)+'-'+str(geneposition)+'-'+n) #pocketnum-genenum-AA
            geneposition+=1
        else:
            mapped_seq.append(str(pocket_num+1)+'-X-X')
        pocket_num+=1
    kindata=row.to_dict()
    kindata.update({'domain_seq':domain_seq,'alignment':aligned_seq,'mapped_data':mapped_seq})
    DUNBARK_mapped = DUNBARK_mapped.append(kindata,ignore_index=True)
    

In [4]:
DUNBARK_mapped

Unnamed: 0,Gene,Family,Uniprot-id,Uniprote-name,Position,Seq,domain_seq,alignment,mapped_data
0,AKT1,AGC,P31749,AKT1_HUMAN,150-408,FEYL----------------KLLGKGT----------------FGK...,FEYLKLLGKGTFGKVILVKEKATGRYYAMKILKKEVIVAKDEVAHT...,[FEYL----------------KLLGKGT----------------FG...,"[1-150-F, 2-151-E, 3-152-Y, 4-153-L, 5-X-X, 6-..."
1,AKT2,AGC,P31751,AKT2_HUMAN,152-409,FDYL----------------KLLGKGT----------------FGK...,FDYLKLLGKGTFGKVILVREKATGRYYAMKILRKEVIIAKDEVAHT...,[FDYL----------------KLLGKGT----------------FG...,"[1-152-F, 2-153-D, 3-154-Y, 4-155-L, 5-X-X, 6-..."
...,...,...,...,...,...,...,...,...,...
495,YES1,TYR,P07947,YES_HUMAN,277-528,LRLE----------------VKLGQGC----------------FGE...,LRLEVKLGQGCFGEVWMGTWNGTTKVAIKTLKPGTMMPEAFLQEAQ...,[LRLE----------------VKLGQGC----------------FG...,"[1-277-L, 2-278-R, 3-279-L, 4-280-E, 5-X-X, 6-..."
496,ZAP70,TYR,P43403,ZAP70_HUMAN,337-595,LLIA--d-------------IELGCGN----------------FGS...,LLIADIELGCGNFGSVRQGVYRMRKKQIDVAIKVLKQGTEKADTEE...,[LLIA--D-------------IELGCGN----------------FG...,"[1-337-L, 2-338-L, 3-339-I, 4-340-A, 5-X-X, 6-..."


In [5]:
DUNBARK_mapped.to_csv('data/DUNBARK_mapped.csv',index=False)

# Cross numbering comparison

## KLIFS

In [3]:
REFERENCE='KLIFS'

In [5]:
KLIFS = pd.read_csv('../../../scripts/KLIFS/data/KLIFS_seq_master_07-12-2022.csv')
#Delete mouse species data
KLIFS = KLIFS.drop(KLIFS[KLIFS['species']=='Mouse'].index)
KLIFS

Unnamed: 0,kinase_ID,name,gene_name,family,group,...,full_name,uniprot,iuphar,pocket,RES_KLIFS
0,1,AKT1,AKT1,Akt,AGC,...,v-akt murine thymoma viral oncogene homolog 1,P31749,1479,KLLGKGTFGKVILYAMKILHTLTENRVLQNSRPFLTALKYSCFVME...,"['1-154-K', '2-155-L', '3-156-L', '4-157-G', '..."
1,2,AKT2,AKT2,Akt,AGC,...,v-akt murine thymoma viral oncogene homolog 2,P31751,1480,KLLGKGTFGKVILYAMKILHTVTESRVLQNTRPFLTALKYACFVME...,"['1-156-K', '2-157-L', '3-158-L', '4-159-G', '..."
...,...,...,...,...,...,...,...,...,...,...,...
553,1109,PI4KAP1,PI4KAP1,PIK,Atypical,...,phosphatidylinositol 4-kinase alpha pseudogene 1,Q8N8J0,0,0,0
554,1110,EFNA2,EFNA2,Eph,TK,...,ephrin A2,O43921,4909,0,0


In [31]:
def kinase_to_gene(gene,pos):
    '''
    Easy pocket alignment based amino acid parsing between dunbark and klifs

    input gene, klifs number output gene, seq position, aa example:

    input: KLIFS_to_Gene('ROCK1',45)
    output: ROCK1, 153, M
    ~~+ input gene, dunbark number output gene, seq position, aa example:

    input: DUNBARK_to_Gene('ROCK1',1338)
    output: ROCK1, 216, D
    '''
    DETAILS=[]
    if REFERENCE == 'DUNBARK':
        if gene not in DUNBARK['Gene']: #parsing uid and mapping the gene_name
            gene = DUNBARK[DUNBARK['Uniprot-id'] == get_uid(gene)]['Gene'].iloc[0]
        for i,kid in DUNBARK[DUNBARK['Gene'] == gene].iterrows():
            for z in eval(kid['mapped_data']):
                if int(z.split('-')[0]) == pos:
                    gene_name = gene
                    align_num = z.split('-')[0]
                    gene_num = z.split('-')[1]
                    AA = z.split('-')[2]
                    DETAILS.extend([gene_name,align_num,gene_num,AA])
                    break
    elif REFERENCE == 'KLIFS':
        if gene not in KLIFS['name']:
            gene = KLIFS[KLIFS['uniprot'] == get_uid(gene)]['name'].iloc[0]
        #print(gene)
        for z in eval(KLIFS[KLIFS['name'] == gene]['RES_KLIFS'].iloc[0]):
            if int(z.split('-')[0]) == pos:
                gene_name = gene
                align_num = z.split('-')[0]
                gene_num = z.split('-')[1]
                AA = z.split('-')[2]
                DETAILS.extend([gene_name,align_num,gene_num,AA])
                break
    if len(DETAILS) == 0:
        DETAILS=[0,0,0,0]
    return DETAILS
kinase_to_gene('MEK1',45)

['MAP2K1', '45', '143', 'M']

In [30]:
def gene_to_kinase(gene,pos):
    '''
    input gene ,seq position output klifs position, aa example:

    input: Gene_to_KLIFS('ROCK1',153)
    output: 45, M
    input gene ,seq position output dunbark position, aa example:

    input: Gene_to_DUNBARK('ROCK1',216)
    output: 1338, D
    '''
    DETAILS=[]
    if REFERENCE == 'DUNBARK':
        if gene not in DUNBARK['Gene']: #parsing uid and mapping the gene_name
            gene = DUNBARK[DUNBARK['Uniprot-id'] == get_uid(gene)]['Gene'].iloc[0]
        for i,kid in DUNBARK[DUNBARK['Gene'] == gene].iterrows():
            for z in eval(kid['mapped_data']):
                if z.split('-')[1] != 'X':
                    if int(z.split('-')[1]) == pos:
                        gene_name = gene
                        align_num = z.split('-')[0]
                        gene_num = z.split('-')[1]
                        AA = z.split('-')[2]
                        DETAILS.extend([gene_name,align_num,gene_num,AA])
                        break
    elif REFERENCE == 'KLIFS':
        if gene not in KLIFS['name']:
            gene = KLIFS[KLIFS['uniprot'] == get_uid(gene)]['name'].iloc[0]
        #print(gene)
        for z in eval(KLIFS[KLIFS['name'] == gene]['RES_KLIFS'].iloc[0]):
            if int(z.split('-')[1]) == pos:
                gene_name = gene
                align_num = z.split('-')[0]
                gene_num = z.split('-')[1]
                AA = z.split('-')[2]
                DETAILS.extend([gene_name,align_num,gene_num,AA])
                break
    if len(DETAILS) == 0:
        DETAILS = [0,0,0,0]
    return DETAILS
gene_to_kinase('MEK1',207)

['MAP2K1', '80', '207', 'C']

In [64]:
def Kinase_grouping_parse(pos):
    '''
    input gene, seq position, output klifs/dunbark position and aa based kinase grouping example:

    input: Kinase_group_parse(45)
    output: { KLIFS : { 
                      'A': [ 'AKT1', 'PAK1']
                      'M' : [ 'ROCK1', 'PAK1' ]
                    },
            DUNBARK : {
                      'A': [ 'AKT1', 'PAK1']
                      'M' : [ 'ROCK1', 'PAK1' ]
                    }
          }
    '''
    AA_GROUP={'A':[],'C':[],'D':[],'E':[],'F':[],'G':[],'H':[],'I':[],'L':[],'K':[],'M':[],'N':[],'P':[],'Q':[],'R':[],'S':[],'T':[],'V':[],'W':[],'Y':[],'X':[]}
    if REFERENCE == 'DUNBARK':
        for i,kid in DUNBARK.iterrows():
            for z in eval(kid['mapped_data']):
                if int(z.split('-')[0]) == pos:
                    AA_GROUP[z.split('-')[2]].append(kid['Gene'])
                    break
    elif REFERENCE == 'KLIFS':
        for i,kid in KLIFS.iterrows():
            if eval(kid['RES_KLIFS']) != 0:
                for z in eval(kid['RES_KLIFS']):
                    if int(z.split('-')[0]) == pos:
                        AA_GROUP[z.split('-')[2]].append(kid['name'])
                        break
    return AA_GROUP
Kinase_grouping_parse(45)

{'A': ['SPEG-b', 'PINK1', 'TBCK'],
 'C': ['SgK494', 'MOK'],
 'D': [],
 'E': ['CLIK1', 'JAK1-b'],
 'F': ['RSKL1',
  'CaMK2a',
  'CaMK2b',
  'CaMK2d',
  'CaMK2g',
  'CASK',
  'MNK2',
  'MNK1',
  'TTN',
  'PHKg1',
  'PHKg2',
  'Trb1',
  'Trb2',
  'Trb3',
  'CCRK',
  'CDC2',
  'CDK2',
  'CDK3',
  'CDK4',
  'CDK6',
  'CDK5',
  'CDK7',
  'CDK8',
  'CDK11',
  'CDK9',
  'CHED',
  'CRK7',
  'PCTAIRE1',
  'PCTAIRE2',
  'PCTAIRE3',
  'PFTAIRE1',
  'PFTAIRE2',
  'CDKL5',
  'CDKL1',
  'CDKL2',
  'CDKL3',
  'CDKL4',
  'CK2a1',
  'CK2a2',
  'CLK1',
  'CLK2',
  'CLK3',
  'CLK4',
  'DYRK1A',
  'DYRK1B',
  'DYRK3',
  'DYRK2',
  'DYRK4',
  'HIPK3',
  'HIPK2',
  'HIPK4',
  'HIPK1',
  'PRP4',
  'Erk7',
  'MAK',
  'ICK',
  'SRPK1',
  'SRPK2',
  'CaMKK2',
  'CaMKK1',
  'Haspin',
  'LMR2',
  'MUSK',
  'FLT3',
  'ROR1',
  'ROR2',
  'ITK',
  'TRKA',
  'TRKB',
  'TRKC',
  'CSNK2A3',
  'PAN3'],
 'G': ['BUB1'],
 'H': ['BUBR1'],
 'I': ['PKCi',
  'PKCz',
  'ATR',
  'DNAPK',
  'FRAP',
  'SMG1',
  'IRE1',
  'HSER',
  

In [86]:
gene_to_kinase('CDK7',312)

[0, 0, 0, 0]

In [66]:
def Kinase_grouping_gene_position(gene,pos):
    '''
    input gene, seq position, output klifs/dunbark position and aa based kinase grouping example:

    input: Kinase_group_gene_position('ROCK1',216)
    output: { KLIFS : { 
                      'A': [ 'AKT1', 'PAK1']
                      'M' : [ 'ROCK1', 'PAK1' ]
                    },
            DUNBARK : {
                      'A': [ 'AKT1', 'PAK1']
                      'M' : [ 'ROCK1', 'PAK1' ]
                    }
          }
    '''
    AA_GROUP={'A':[],'C':[],'D':[],'E':[],'F':[],'G':[],'H':[],'I':[],'L':[],'K':[],'M':[],'N':[],'P':[],'Q':[],'R':[],'S':[],'T':[],'V':[],'W':[],'Y':[],'X':[]}
    if REFERENCE == 'DUNBARK':
        if gene not in DUNBARK['Gene']: #parsing uid and mapping the gene_name
            gene = DUNBARK[DUNBARK['Uniprot-id'] == get_uid(gene)]['Gene'].iloc[0]
    kpos = int(gene_to_kinase(gene,pos)[1])
    if kpos == 0:
        AA_GROUP = 'Position error'
    else:
        if REFERENCE == 'DUNBARK':
            for i,kid in DUNBARK.iterrows():
                for z in eval(kid['mapped_data']):
                    if int(z.split('-')[0]) == kpos:
                        AA_GROUP[z.split('-')[2]].append(kid['Gene'])
                        break
        if REFERENCE == 'KLIFS':
            for i,kid in KLIFS.iterrows():
                if eval(kid['RES_KLIFS']) != 0:
                    for z in eval(kid['RES_KLIFS']):
                        if int(z.split('-')[0]) == kpos:
                            AA_GROUP[z.split('-')[2]].append(kid['name'])
                            break
                        
    return AA_GROUP
Kinase_grouping_gene_position('MEK1',207)

{'A': ['DMPK1',
  'MRCKa',
  'MRCKb',
  'DMPK2',
  'ROCK1',
  'ROCK2',
  'PKCa',
  'PKCb',
  'PKCd',
  'PKCt',
  'PKCe',
  'PKCh',
  'PKN1',
  'PKN2',
  'PKN3',
  'CaMK4',
  'CaMK2a',
  'CaMK2b',
  'CaMK2d',
  'CaMK2g',
  'AMPKa1',
  'AMPKa2',
  'BRSK2',
  'BRSK1',
  'MARK2',
  'MARK1',
  'MARK3',
  'MARK4',
  'NuaK1',
  'NuaK2',
  'QIK',
  'QSK',
  'SIK',
  'DCLK3',
  'Trio',
  'VRK2',
  'VRK3',
  'CCRK',
  'CDC2',
  'CDK2',
  'CDK3',
  'CDK10',
  'CDK4',
  'CDK6',
  'CDK5',
  'CDK7',
  'CDK8',
  'CDK11',
  'CDK9',
  'CHED',
  'CRK7',
  'PCTAIRE1',
  'PCTAIRE2',
  'PCTAIRE3',
  'PFTAIRE1',
  'PFTAIRE2',
  'CLK3',
  'MAK',
  'ICK',
  'SRPK1',
  'SRPK2',
  'MSSK1',
  'AurA',
  'AurC',
  'AurB',
  'CaMKK2',
  'CaMKK1',
  'SBK',
  'PINK1',
  'CLIK1',
  'CLIK1L',
  'RNAseL',
  'PLK4',
  'TBCK',
  'ULK1',
  'ULK2',
  'ULK3',
  'MAP3K1',
  'OSR1',
  'STLK3',
  'GCK',
  'KHS2',
  'KHS1',
  'HPK1',
  'MST2',
  'MST1',
  'LOK',
  'SLK',
  'TAO3',
  'TAO1',
  'MST3',
  'YSK1',
  'MST4',
  'ABL1'

## DUNBARK

Kinome tree "Illustration reproduced courtesy of Cell Signaling Technology, Inc. (www.cellsignal.com)"