# Bioinformatics: Drug discovery on CaM-kinase kinase beta protein

## Data Collection

### Libraries Required

In [1]:
# install ChEMBL web service package to retrieve the biological data
! pip install -q chembl_webresource_client

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/55.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━[0m [32m51.2/55.2 kB[0m [31m6.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.2/55.2 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/61.4 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# import necessary libraries
import pandas as pd
from chembl_webresource_client.new_client import new_client

### Searching for the target protein

In [5]:
# target search for CaMKK2
target = new_client.target
target_query = target.search('CaMKK2')
targets = pd.DataFrame.from_dict(target_query)
targets

Unnamed: 0,cross_references,organism,pref_name,score,species_group_flag,target_chembl_id,target_components,target_type,tax_id
0,"[{'xref_id': 'CAMKK2', 'xref_name': None, 'xre...",Homo sapiens,CaM-kinase kinase beta,12.0,False,CHEMBL5284,"[{'accession': 'Q96RR4', 'component_descriptio...",SINGLE PROTEIN,9606
1,"[{'xref_id': 'O88831', 'xref_name': None, 'xre...",Rattus norvegicus,Calcium/calmodulin-dependent protein kinase ki...,11.0,False,CHEMBL1795115,"[{'accession': 'O88831', 'component_descriptio...",SINGLE PROTEIN,10116
2,[],Mus musculus,Calcium/calmodulin-dependent protein kinase ki...,11.0,False,CHEMBL4295888,"[{'accession': 'Q8C078', 'component_descriptio...",SINGLE PROTEIN,10090


In [6]:
# select and retrieve bioactivity data for CaM-kinase kinase beta (first entry)
selected_target = targets.target_chembl_id[0]
selected_target

'CHEMBL5284'

In [7]:
# retrieve only bioactivity data for CHEMBL5284
activity = new_client.activity
res = activity.filter(target_chembl_id=selected_target).filter(standard_type="IC50")

In [8]:
df = pd.DataFrame.from_dict(res)

In [10]:
df.head(3)

Unnamed: 0,action_type,activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,assay_variant_accession,assay_variant_mutation,bao_endpoint,...,target_organism,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value
0,,,2137123,[],CHEMBL940168,Inhibition of CaM-KKbeta,B,,,BAO_0000190,...,Homo sapiens,CaM-kinase kinase beta,9606,,,IC50,nM,UO_0000065,,200.0
1,,,2137359,[],CHEMBL940168,Inhibition of CaM-KKbeta,B,,,BAO_0000190,...,Homo sapiens,CaM-kinase kinase beta,9606,,,IC50,ng/ml,UO_0000274,,40.0
2,"{'action_type': 'ANTAGONIST', 'description': '...",Antagonist,2897276,[],CHEMBL1051267,Inhibition of CAMKKbeta in the presence of 20u...,B,,,BAO_0000190,...,Homo sapiens,CaM-kinase kinase beta,9606,,,IC50,uM,UO_0000065,,0.01


In [11]:
# check that all of the standard types are IC50
df.standard_type.unique()

array(['IC50'], dtype=object)

In [13]:
# standard value represents potency; lower = better potency of the drug
df.standard_value.head(3)

Unnamed: 0,standard_value
0,200.0
1,0.04
2,10.0


## Data Pre-Processing