**Machine learning models for pi3k**

*This models are based on empirical potentials obtained with docking by means of Smina docking software.*

# Setup

First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:

In [1]:
# Clear all variables
%reset -f
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import pandas as pd
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

# Get the data

In [2]:
import pandas as pd

HOUSING_PATH = os.path.join('.', 'datasets')

def load_data(filename, housing_path=HOUSING_PATH):
    csv_path = os.path.join(housing_path, filename)
    return pd.DataFrame.from_csv(csv_path, sep='\t', header=None)

In [3]:
actives = load_data('actives_scores_pdb.txt')
decoys = load_data('decoys_scores_pdb.txt')
actives['Active'] = 1
decoys['Active'] = 0

  import sys


In [4]:
def merge_data(dat1, dat2):
    new_dat = pd.concat([dat1, dat2])
    new_dat = new_dat.sample(frac=1)
    return new_dat

In [5]:
merged_data = merge_data(actives, decoys)

# Prepare the data for Machine Learning algorithms

In [6]:
from sklearn.model_selection import train_test_split

train_set, test_set = train_test_split(merged_data, test_size=0.3, random_state=43)

In [7]:
def split_data_to_labels(dat):
    labels = dat["Active"].copy().as_matrix()
    new_dat = dat.drop("Active", axis=1)
    return labels, new_dat

In [8]:
test_labels, new_test_set = split_data_to_labels(test_set)
train_labels, new_train_set = split_data_to_labels(train_set)

### Transorm the data 

In [9]:
def get_transformer(train):
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.feature_selection import VarianceThreshold

    num_pipeline = Pipeline([
            ('VarianceThreshold', VarianceThreshold(threshold=0.0)),
            ('std_scaler', StandardScaler()),
        ])
    num_pipeline.fit(train)
    return num_pipeline

In [10]:
def transform_data(transformer, dat):
    dat_tr = transformer.transform(dat)
    return dat_tr

In [11]:
transformer = get_transformer(new_train_set)
train_set_tr = transform_data(transformer, new_train_set)
test_set_tr = transform_data(transformer, new_test_set)

# Deep neural network model

In [12]:
def get_dnn_clf(train, train_labels):
    import tensorflow as tf

    config = tf.contrib.learn.RunConfig(tf_random_seed=42) # not shown in the config

    feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(train)
    dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300,100], n_classes=2,
                                             feature_columns=feature_cols, dropout=0.45, config=config)
    dnn_clf = tf.contrib.learn.SKCompat(dnn_clf) # if TensorFlow >= 1.1
    dnn_clf.fit(train, train_labels, batch_size=50, steps=100000)
    
    return dnn_clf

In [13]:
def get_auc(clf, test, test_labels):
    from sklearn.metrics import roc_auc_score, log_loss, accuracy_score

    y_pred = clf.predict(test)
    roc_auc = (roc_auc_score(test_labels, y_pred['probabilities'][:,1]))
    # print(accuracy_score(test_labels, y_pred['classes']))
    return roc_auc

In [14]:
clf = get_dnn_clf(train_set_tr, train_labels)

INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000023322B46160>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': 42, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'C:\\Users\\vladi\\AppData\\Local\\Temp\\tmpzyqe3p_r'}
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Create Ch

INFO:tensorflow:loss = 0.0736559, step = 6601 (0.226 sec)
INFO:tensorflow:global_step/sec: 388.837
INFO:tensorflow:loss = 0.0844394, step = 6701 (0.256 sec)
INFO:tensorflow:global_step/sec: 465.789
INFO:tensorflow:loss = 0.293033, step = 6801 (0.216 sec)
INFO:tensorflow:global_step/sec: 417.385
INFO:tensorflow:loss = 0.0350056, step = 6901 (0.239 sec)
INFO:tensorflow:global_step/sec: 469.881
INFO:tensorflow:loss = 0.0973935, step = 7001 (0.214 sec)
INFO:tensorflow:global_step/sec: 420.367
INFO:tensorflow:loss = 0.143404, step = 7101 (0.237 sec)
INFO:tensorflow:global_step/sec: 419.977
INFO:tensorflow:loss = 0.0856152, step = 7201 (0.238 sec)
INFO:tensorflow:global_step/sec: 470.451
INFO:tensorflow:loss = 0.131055, step = 7301 (0.213 sec)
INFO:tensorflow:global_step/sec: 402.495
INFO:tensorflow:loss = 0.0887873, step = 7401 (0.248 sec)
INFO:tensorflow:global_step/sec: 460.679
INFO:tensorflow:loss = 0.0807551, step = 7501 (0.217 sec)
INFO:tensorflow:global_step/sec: 455.619
INFO:tensorfl

INFO:tensorflow:loss = 0.0212889, step = 14901 (0.219 sec)
INFO:tensorflow:global_step/sec: 460.605
INFO:tensorflow:loss = 0.0119374, step = 15001 (0.217 sec)
INFO:tensorflow:global_step/sec: 462.991
INFO:tensorflow:loss = 0.122084, step = 15101 (0.216 sec)
INFO:tensorflow:global_step/sec: 463.51
INFO:tensorflow:loss = 0.0559998, step = 15201 (0.217 sec)
INFO:tensorflow:global_step/sec: 458.053
INFO:tensorflow:loss = 0.0133383, step = 15301 (0.217 sec)
INFO:tensorflow:global_step/sec: 469.42
INFO:tensorflow:loss = 0.115815, step = 15401 (0.214 sec)
INFO:tensorflow:global_step/sec: 457.857
INFO:tensorflow:loss = 0.0665759, step = 15501 (0.217 sec)
INFO:tensorflow:global_step/sec: 461.542
INFO:tensorflow:loss = 0.113196, step = 15601 (0.218 sec)
INFO:tensorflow:global_step/sec: 465.351
INFO:tensorflow:loss = 0.028187, step = 15701 (0.214 sec)
INFO:tensorflow:global_step/sec: 466.201
INFO:tensorflow:loss = 0.101383, step = 15801 (0.215 sec)
INFO:tensorflow:global_step/sec: 467.537
INFO:te

INFO:tensorflow:global_step/sec: 451.87
INFO:tensorflow:loss = 0.153377, step = 23201 (0.221 sec)
INFO:tensorflow:global_step/sec: 444.955
INFO:tensorflow:loss = 0.01496, step = 23301 (0.225 sec)
INFO:tensorflow:global_step/sec: 452.356
INFO:tensorflow:loss = 0.110459, step = 23401 (0.222 sec)
INFO:tensorflow:global_step/sec: 441.388
INFO:tensorflow:loss = 0.0429601, step = 23501 (0.226 sec)
INFO:tensorflow:global_step/sec: 458.824
INFO:tensorflow:loss = 0.0752708, step = 23601 (0.217 sec)
INFO:tensorflow:global_step/sec: 456.701
INFO:tensorflow:loss = 0.0942464, step = 23701 (0.220 sec)
INFO:tensorflow:global_step/sec: 450.517
INFO:tensorflow:loss = 0.0126466, step = 23801 (0.221 sec)
INFO:tensorflow:global_step/sec: 450.745
INFO:tensorflow:loss = 0.00715493, step = 23901 (0.223 sec)
INFO:tensorflow:global_step/sec: 446.992
INFO:tensorflow:loss = 0.0239748, step = 24001 (0.223 sec)
INFO:tensorflow:global_step/sec: 460.488
INFO:tensorflow:loss = 0.111594, step = 24101 (0.217 sec)
INFO:

INFO:tensorflow:loss = 0.00126173, step = 31401 (0.220 sec)
INFO:tensorflow:global_step/sec: 454.624
INFO:tensorflow:loss = 0.0330492, step = 31501 (0.220 sec)
INFO:tensorflow:global_step/sec: 454.194
INFO:tensorflow:loss = 0.030475, step = 31601 (0.221 sec)
INFO:tensorflow:global_step/sec: 458.477
INFO:tensorflow:loss = 0.0296336, step = 31701 (0.218 sec)
INFO:tensorflow:global_step/sec: 447.613
INFO:tensorflow:loss = 0.132709, step = 31801 (0.223 sec)
INFO:tensorflow:global_step/sec: 455.306
INFO:tensorflow:loss = 0.0424509, step = 31901 (0.220 sec)
INFO:tensorflow:global_step/sec: 447.629
INFO:tensorflow:loss = 0.0467143, step = 32001 (0.224 sec)
INFO:tensorflow:global_step/sec: 458.965
INFO:tensorflow:loss = 0.0170046, step = 32101 (0.221 sec)
INFO:tensorflow:global_step/sec: 444.198
INFO:tensorflow:loss = 0.0566161, step = 32201 (0.222 sec)
INFO:tensorflow:global_step/sec: 449.171
INFO:tensorflow:loss = 0.0361585, step = 32301 (0.222 sec)
INFO:tensorflow:global_step/sec: 459.305
I

INFO:tensorflow:global_step/sec: 424.466
INFO:tensorflow:loss = 0.00761903, step = 39701 (0.236 sec)
INFO:tensorflow:global_step/sec: 433.459
INFO:tensorflow:loss = 0.0164293, step = 39801 (0.231 sec)
INFO:tensorflow:global_step/sec: 431.736
INFO:tensorflow:loss = 0.00634873, step = 39901 (0.232 sec)
INFO:tensorflow:global_step/sec: 439.592
INFO:tensorflow:loss = 0.0658547, step = 40001 (0.227 sec)
INFO:tensorflow:global_step/sec: 426.117
INFO:tensorflow:loss = 0.0253237, step = 40101 (0.235 sec)
INFO:tensorflow:global_step/sec: 439.861
INFO:tensorflow:loss = 0.0150627, step = 40201 (0.227 sec)
INFO:tensorflow:global_step/sec: 428.152
INFO:tensorflow:loss = 0.0325974, step = 40301 (0.234 sec)
INFO:tensorflow:global_step/sec: 442.222
INFO:tensorflow:loss = 0.0041492, step = 40401 (0.227 sec)
INFO:tensorflow:global_step/sec: 431.642
INFO:tensorflow:loss = 0.0195004, step = 40501 (0.231 sec)
INFO:tensorflow:global_step/sec: 433.889
INFO:tensorflow:loss = 0.0223546, step = 40601 (0.232 sec

INFO:tensorflow:global_step/sec: 437.327
INFO:tensorflow:loss = 0.0351328, step = 47901 (0.229 sec)
INFO:tensorflow:global_step/sec: 443.403
INFO:tensorflow:loss = 0.0155457, step = 48001 (0.225 sec)
INFO:tensorflow:global_step/sec: 439.472
INFO:tensorflow:loss = 0.0124724, step = 48101 (0.228 sec)
INFO:tensorflow:global_step/sec: 434.498
INFO:tensorflow:loss = 0.0395193, step = 48201 (0.231 sec)
INFO:tensorflow:global_step/sec: 437.547
INFO:tensorflow:loss = 0.00956261, step = 48301 (0.229 sec)
INFO:tensorflow:global_step/sec: 427.035
INFO:tensorflow:loss = 0.0400496, step = 48401 (0.233 sec)
INFO:tensorflow:global_step/sec: 442.513
INFO:tensorflow:loss = 0.0195476, step = 48501 (0.226 sec)
INFO:tensorflow:global_step/sec: 439.603
INFO:tensorflow:loss = 0.0192201, step = 48601 (0.228 sec)
INFO:tensorflow:global_step/sec: 433.479
INFO:tensorflow:loss = 0.0497698, step = 48701 (0.230 sec)
INFO:tensorflow:global_step/sec: 431.609
INFO:tensorflow:loss = 0.0157713, step = 48801 (0.232 sec)

INFO:tensorflow:global_step/sec: 436.003
INFO:tensorflow:loss = 0.00576189, step = 56101 (0.229 sec)
INFO:tensorflow:global_step/sec: 437.761
INFO:tensorflow:loss = 0.0207018, step = 56201 (0.228 sec)
INFO:tensorflow:global_step/sec: 387.953
INFO:tensorflow:loss = 0.0145016, step = 56301 (0.259 sec)
INFO:tensorflow:global_step/sec: 433.825
INFO:tensorflow:loss = 0.0231849, step = 56401 (0.230 sec)
INFO:tensorflow:global_step/sec: 432.476
INFO:tensorflow:loss = 0.00234764, step = 56501 (0.232 sec)
INFO:tensorflow:global_step/sec: 425.004
INFO:tensorflow:loss = 0.0246088, step = 56601 (0.235 sec)
INFO:tensorflow:global_step/sec: 437.537
INFO:tensorflow:loss = 0.00342278, step = 56701 (0.230 sec)
INFO:tensorflow:global_step/sec: 431.614
INFO:tensorflow:loss = 0.0625644, step = 56801 (0.232 sec)
INFO:tensorflow:global_step/sec: 432.141
INFO:tensorflow:loss = 0.0798171, step = 56901 (0.230 sec)
INFO:tensorflow:global_step/sec: 441.697
INFO:tensorflow:loss = 0.0358634, step = 57001 (0.227 se

INFO:tensorflow:global_step/sec: 447.99
INFO:tensorflow:loss = 0.0169362, step = 64301 (0.223 sec)
INFO:tensorflow:global_step/sec: 436.144
INFO:tensorflow:loss = 0.00416554, step = 64401 (0.229 sec)
INFO:tensorflow:global_step/sec: 440.145
INFO:tensorflow:loss = 0.000944561, step = 64501 (0.228 sec)
INFO:tensorflow:global_step/sec: 434.344
INFO:tensorflow:loss = 0.0124718, step = 64601 (0.231 sec)
INFO:tensorflow:global_step/sec: 435.599
INFO:tensorflow:loss = 0.0432933, step = 64701 (0.229 sec)
INFO:tensorflow:global_step/sec: 444.169
INFO:tensorflow:loss = 0.0028671, step = 64801 (0.226 sec)
INFO:tensorflow:global_step/sec: 444.643
INFO:tensorflow:loss = 0.00847467, step = 64901 (0.224 sec)
INFO:tensorflow:global_step/sec: 433.658
INFO:tensorflow:loss = 0.0702059, step = 65001 (0.230 sec)
INFO:tensorflow:global_step/sec: 433.731
INFO:tensorflow:loss = 0.0398495, step = 65101 (0.232 sec)
INFO:tensorflow:global_step/sec: 441.266
INFO:tensorflow:loss = 0.0220573, step = 65201 (0.227 se

INFO:tensorflow:global_step/sec: 426.681
INFO:tensorflow:loss = 0.00855361, step = 72501 (0.234 sec)
INFO:tensorflow:global_step/sec: 444.166
INFO:tensorflow:loss = 0.0303595, step = 72601 (0.225 sec)
INFO:tensorflow:global_step/sec: 427.383
INFO:tensorflow:loss = 0.0435918, step = 72701 (0.235 sec)
INFO:tensorflow:global_step/sec: 433.115
INFO:tensorflow:loss = 0.00851296, step = 72801 (0.230 sec)
INFO:tensorflow:global_step/sec: 440.785
INFO:tensorflow:loss = 0.00602571, step = 72901 (0.227 sec)
INFO:tensorflow:global_step/sec: 439.755
INFO:tensorflow:loss = 0.00766308, step = 73001 (0.227 sec)
INFO:tensorflow:global_step/sec: 438.61
INFO:tensorflow:loss = 0.0204306, step = 73101 (0.228 sec)
INFO:tensorflow:global_step/sec: 432.822
INFO:tensorflow:loss = 0.00480375, step = 73201 (0.231 sec)
INFO:tensorflow:global_step/sec: 438.502
INFO:tensorflow:loss = 0.00150942, step = 73301 (0.229 sec)
INFO:tensorflow:global_step/sec: 436.203
INFO:tensorflow:loss = 0.0165626, step = 73401 (0.229 

INFO:tensorflow:global_step/sec: 400.822
INFO:tensorflow:loss = 0.0408699, step = 80701 (0.249 sec)
INFO:tensorflow:global_step/sec: 412.07
INFO:tensorflow:loss = 0.0238829, step = 80801 (0.243 sec)
INFO:tensorflow:global_step/sec: 399.625
INFO:tensorflow:loss = 0.0297042, step = 80901 (0.250 sec)
INFO:tensorflow:global_step/sec: 405.28
INFO:tensorflow:loss = 0.0679926, step = 81001 (0.247 sec)
INFO:tensorflow:global_step/sec: 402.072
INFO:tensorflow:loss = 0.00289366, step = 81101 (0.249 sec)
INFO:tensorflow:global_step/sec: 390.685
INFO:tensorflow:loss = 0.0153765, step = 81201 (0.256 sec)
INFO:tensorflow:global_step/sec: 396.66
INFO:tensorflow:loss = 0.00959321, step = 81301 (0.252 sec)
INFO:tensorflow:global_step/sec: 374.311
INFO:tensorflow:loss = 0.0302637, step = 81401 (0.267 sec)
INFO:tensorflow:global_step/sec: 431.16
INFO:tensorflow:loss = 0.0460025, step = 81501 (0.231 sec)
INFO:tensorflow:global_step/sec: 442.521
INFO:tensorflow:loss = 0.0809611, step = 81601 (0.227 sec)
IN

INFO:tensorflow:global_step/sec: 388.5
INFO:tensorflow:loss = 0.0548921, step = 88901 (0.259 sec)
INFO:tensorflow:global_step/sec: 424.413
INFO:tensorflow:loss = 0.0667765, step = 89001 (0.234 sec)
INFO:tensorflow:global_step/sec: 412.028
INFO:tensorflow:loss = 0.00638102, step = 89101 (0.242 sec)
INFO:tensorflow:global_step/sec: 425.563
INFO:tensorflow:loss = 0.0530132, step = 89201 (0.235 sec)
INFO:tensorflow:global_step/sec: 423.238
INFO:tensorflow:loss = 0.0152001, step = 89301 (0.237 sec)
INFO:tensorflow:global_step/sec: 417.746
INFO:tensorflow:loss = 0.0120099, step = 89401 (0.239 sec)
INFO:tensorflow:global_step/sec: 429.189
INFO:tensorflow:loss = 0.0182912, step = 89501 (0.233 sec)
INFO:tensorflow:global_step/sec: 423.803
INFO:tensorflow:loss = 0.0164248, step = 89601 (0.235 sec)
INFO:tensorflow:global_step/sec: 423.008
INFO:tensorflow:loss = 0.0147337, step = 89701 (0.237 sec)
INFO:tensorflow:global_step/sec: 434.62
INFO:tensorflow:loss = 0.0644305, step = 89801 (0.229 sec)
IN

INFO:tensorflow:global_step/sec: 374.135
INFO:tensorflow:loss = 0.0305631, step = 97101 (0.265 sec)
INFO:tensorflow:global_step/sec: 437.454
INFO:tensorflow:loss = 0.0371087, step = 97201 (0.229 sec)
INFO:tensorflow:global_step/sec: 452.474
INFO:tensorflow:loss = 0.0402964, step = 97301 (0.221 sec)
INFO:tensorflow:global_step/sec: 403.437
INFO:tensorflow:loss = 0.000521435, step = 97401 (0.250 sec)
INFO:tensorflow:global_step/sec: 380.268
INFO:tensorflow:loss = 0.013856, step = 97501 (0.260 sec)
INFO:tensorflow:global_step/sec: 443.039
INFO:tensorflow:loss = 0.00381998, step = 97601 (0.226 sec)
INFO:tensorflow:global_step/sec: 316.259
INFO:tensorflow:loss = 0.00326873, step = 97701 (0.317 sec)
INFO:tensorflow:global_step/sec: 303.179
INFO:tensorflow:loss = 0.0200896, step = 97801 (0.331 sec)
INFO:tensorflow:global_step/sec: 408.413
INFO:tensorflow:loss = 0.0233696, step = 97901 (0.243 sec)
INFO:tensorflow:global_step/sec: 385.412
INFO:tensorflow:loss = 0.00842263, step = 98001 (0.259 s

In [15]:
print(get_auc(clf, test_set_tr, test_labels))

INFO:tensorflow:Restoring parameters from C:\Users\vladi\AppData\Local\Temp\tmpzyqe3p_r\model.ckpt-100000
0.989283076346


# Prediction for new data

In [16]:
# load new data
path = 'C:/Users/vladi/Home/validation/pdb_rescoring'
scores = load_data('results_rescoring_pdb.txt', path)

  import sys


In [17]:
scores

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,13,14,15,16,17,18,19,20,21,22
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BBABRN.xaa-ligand-3991_rescoring,3.01401,30.22427,42.53304,-433.61367,-25.01201,0.00000,4.18199,0.94196,1.54775,960.95384,...,2.0,0.08,0.06325,0.0,0.0,2.0,1.0,0.0,0.90,0.30
BCAARL.xaa-ligand-302_rescoring,2.07061,34.79490,55.79453,-463.37911,-16.10150,0.37630,2.14313,1.20715,0.00000,995.37908,...,1.0,0.02,0.04472,0.0,0.0,1.0,1.0,0.0,0.90,0.35
BCAARL.xaa-ligand-303_rescoring,2.07172,34.81494,55.80674,-463.39736,-16.11696,0.37737,2.14382,1.20778,0.00000,994.64472,...,1.0,0.02,0.04472,0.0,0.0,1.0,1.0,0.0,0.90,0.35
BCAARN.xaa-ligand-0688_rescoring,1.76154,43.90186,55.34020,-474.29098,-12.25059,0.00000,1.10691,0.58185,0.00000,981.80099,...,2.0,0.08,0.06325,0.0,0.0,2.0,1.0,0.0,0.90,0.35
BCAARN.xaa-ligand-3452_rescoring,2.41989,42.64010,52.15538,-450.68835,-19.30387,0.00259,2.74468,0.27431,0.63829,932.74429,...,1.0,0.02,0.04472,0.0,0.0,1.0,1.0,0.0,0.85,0.35
BCAARN.xab-ligand-0397_rescoring,2.95683,31.92573,55.31536,-476.84850,-22.02522,0.09362,3.85933,0.00000,1.30334,980.31131,...,3.0,0.18,0.07746,0.0,0.0,4.0,1.0,0.0,0.90,0.30
BCAARN.xab-ligand-9383_rescoring,2.12407,30.86094,50.65179,-460.69237,-12.77119,0.53665,1.95974,0.14492,1.02957,964.12564,...,1.0,0.02,0.04472,0.0,0.0,1.0,1.0,0.0,0.90,0.30
BCAARN.xab-ligand-9739_rescoring,2.68747,40.00753,55.52905,-449.80875,-18.75856,0.21363,3.05565,0.21514,0.63007,925.86691,...,1.0,0.02,0.04472,0.0,0.0,1.0,1.0,0.0,0.85,0.35
BCAARN.xab-ligand-9773_rescoring,3.88588,38.75960,66.59920,-484.41178,-32.23933,0.32986,5.33111,2.26377,0.83201,977.59211,...,4.0,0.32,0.08944,0.0,0.0,6.0,1.0,0.0,0.90,0.35
BCAARN.xac-ligand-1824_rescoring,3.32499,44.69922,58.28242,-451.22115,-21.85014,0.07304,3.64881,0.37128,0.47761,928.86403,...,1.0,0.02,0.04472,0.0,0.0,1.0,1.0,0.0,0.85,0.35


In [18]:
# transform new for predictions
scores_tr =  transform_data(transformer, scores)

In [19]:
scores_prediction = clf.predict(scores_tr)

INFO:tensorflow:Restoring parameters from C:\Users\vladi\AppData\Local\Temp\tmpzyqe3p_r\model.ckpt-100000


In [20]:
# list(iox_scores_prediction['probabilities'][:,1])
len([indx for indx, prob in enumerate(scores_prediction['probabilities'][:,1]) if prob > 0.99])

15784

In [23]:
selected = [scores.index[indx] for indx, prob in enumerate(scores_prediction['probabilities'][:,1]) if prob > 0.9]
len(selected)

21182

In [26]:
selected = [(scores.index[indx]) for indx, prob in enumerate(scores_prediction['probabilities'][:,1]) if prob > 0.9]
selected

['BCAARN.xaa-ligand-0688_rescoring',
 'BCAARN.xab-ligand-9739_rescoring',
 'BCAARN.xac-ligand-1824_rescoring',
 'BCABMN.xaa-ligand-1016_rescoring',
 'BCABRN.xaa-ligand-01358_rescoring',
 'BCABRN.xaa-ligand-06903_rescoring',
 'BCABRN.xab-ligand-0305_rescoring',
 'BDAARN.xaa-ligand-0074_rescoring',
 'BDAARN.xaa-ligand-2074_rescoring',
 'BDAARN.xaa-ligand-5897_rescoring',
 'BDAARN.xaa-ligand-9220_rescoring',
 'BDAARN.xab-ligand-0016_rescoring',
 'BDAARN.xab-ligand-7275_rescoring',
 'BDABMN.xaa-ligand-0930_rescoring',
 'BDABRN.xaa-ligand-3843_rescoring',
 'BDABRN.xaa-ligand-3871_rescoring',
 'BDABRN.xaa-ligand-7080_rescoring',
 'BDABRN.xab-ligand-1698_rescoring',
 'BDABRO.xaa-ligand-6163_rescoring',
 'BFAAMN.xaa-ligand-1037_rescoring',
 'BFAARN.xaa-ligand-02075_rescoring',
 'BFAARN.xaa-ligand-03017_rescoring',
 'BFAARN.xab-ligand-00169_rescoring',
 'BFAARN.xab-ligand-03524_rescoring',
 'BFAARN.xab-ligand-03571_rescoring',
 'BFAARN.xab-ligand-05383_rescoring',
 'BFAARN.xac-ligand-3031_resco

In [27]:
with open('pi3k_leaders_clean.txt', 'w') as fw:
    for name in selected:
        fw.write('%s\n' % name)