In [2]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
# from xgboost import XGBClassifier

import numpy as np
import pandas as pd
import itertools
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import copy

In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn as sk
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import itertools
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

pd.set_option('display.max_rows', None) # Show max rows/columns
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [10]:
def multilabel_log_loss(y_valid, y_pred):
    """Calculate the log-loss for the multilabel case."""
    N, M = y_valid.shape  # Create temp matrix to store values
    zero_mat = np.zeros((N, M))

    dummy_zero = 1 * 10 ** (-15)  # Compensate for 0's and 1's predictions
    y_pred.replace(0, dummy_zero, inplace=True)
    y_pred.replace(1, 1 - dummy_zero, inplace=True)

    for m in range(M):  # Calculate log-loss per index
        for n in range(N):
            y_true = y_valid.iloc[n, m]
            y_hat = y_pred.iloc[n, m]
            temp_log_loss = y_true * np.log(y_hat) + (1 - y_true) * np.log(1 - y_hat)
            zero_mat[n, m] = temp_log_loss

    log_loss_score = -zero_mat.mean(axis=0).mean()

    return log_loss_score

In [11]:
FEATURES_FILE="../input/train_features.csv"
TARGETS_FILE="../input/train_targets_scored.csv"

In [12]:
def preprocess_data():
    """Preprocess the data."""
    X = pd.read_csv(FEATURES_FILE)
    X.drop(X.columns[0], axis=1, inplace=True)
    y = pd.read_csv(TARGETS_FILE)
    y.drop(y.columns[0], axis=1, inplace=True)

    # Add hidden class
    zero_class_indices = y[y.iloc[:, 1:].apply(sum, axis=1) == 0].index
    y["hidden_class"] = 0
    y.loc[zero_class_indices, "hidden_class"] = 1

    class_counts = y.iloc[:, 1:].sum(axis=0)
    class_counts = class_counts.sort_values(ascending=False)
    class_counts_sub = class_counts.head(13)
    chosen_classes = class_counts_sub.index.values

    # Save the column names
    X_col_names = X.columns.tolist()
    cat_cols = ["cp_type", "cp_time", "cp_dose"]  # Identify categorical columns
    ohe = OneHotEncoder()  # Load OHE
    _ = ohe.fit_transform(X[cat_cols])
    ohe_names = ohe.get_feature_names(cat_cols)
    ohe_names = ohe_names.tolist()

    # Fix new column names to include OHE names and normal feature names
    X_col_names = [col for col in X_col_names if col not in cat_cols]
    ohe_names.extend(X_col_names)

    # Transform the data with OHE on the indices of the cat variables
    ct = ColumnTransformer(
        transformers=[("encoder", OneHotEncoder(), list(range(0, 3)))],
        remainder="passthrough",
    )
    X = pd.DataFrame(ct.fit_transform(X))
    X.columns = ohe_names

    train_idx_list = []
    valid_idx_list = []
    mskf = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=0)
    for train_index, valid_index in mskf.split(X, y):
        train_idx_list.append(train_index)
        valid_idx_list.append(valid_index)

    return X, y, train_idx_list, valid_idx_list, chosen_classes

In [13]:
def set_model_params(clf_name, params):
    """Set the parameters for a model during grid search."""
    if clf_name == "log_reg":
        model = LogisticRegression(
            penalty=params[0],
            C=params[1],
            random_state=0,
            max_iter=1e10,
        )
    elif clf_name == "svm":
        model = SVC(
            C=params[0], gamma=params[1], class_weight=params[2], probability=params[3]
        )
    elif clf_name == "rf":
        model = RandomForestClassifier(
            n_estimators=params[0],
            max_depth=params[1],
            min_samples_split=params[2],
            min_samples_leaf=params[3],
            max_features=params[4],
        )
    elif clf_name == "dt":
        model = DecisionTreeClassifier(
            max_depth=params[0],
            min_samples_split=params[1],
            min_samples_leaf=params[2],
            max_features=params[3],
        )
    elif clf_name == "knn":
        model = KNeighborsClassifier(n_neighbors=params[0], p=params[1])
    elif clf_name == "nb":
        model = GaussianNB()
#     elif clf_name == 'xgb':
#         model = XGBClassifier(
#             learning_rate=params[0],
#             gamma=params[1],
#             max_depth=params[2],
#             min_child_weight=params[3],
#             subsample=params[4],
#             colsample_bytree=params[5],
#             reg_lambda=params[6],
#             reg_alpha=params[7]
#         )

    return model

In [14]:
clf_list = ["log_reg", "svm", "rf", "dt", "knn", "nb"]

param_grid = {
    "log_reg": {
        "Penalty": ["l2"],
        "C": [0.001, 0.01, 0.1, 1],
    },
    "svm": {
        "C": [round((0.1) * ((0.1) ** (n - 1)), 5) for n in reversed(range(-1, 3))],
        "gamma": ["auto"],
        "class_weight": ["balanced"],
        "probability": [True],
    },
    "rf": {
        "n_estimators": [120],
        "max_depth": [5],
        "min_samples_split": [5],
        "min_samples_leaf": [1, 10],
        "max_features": ["log2"],
    },
    "dt": {
        "max_depth": [5, 25],
        "min_samples_split": [2, 10],
        "min_samples_leaf": [2, 10],
        "max_features": ["log2"],
    },
    "knn": {
        "n_neighbors": [round((2) * ((2) ** (n - 1)), 5) for n in range(1, 3)],
        "p": [2, 3],
    },
    "nb": {
        "dummy_param": [None, None]
    },
    'xgb': {
        'eta': [0.01, 0.015, 0.025, 0.05, 0.1],
        'gamma': [0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
        'max_depth': [3, 5, 7, 9, 12, 15, 17, 25],
        'min_child_weight': [1, 3, 5, 7],
        'subsample': [0.6, 0.7, 0.8, 0.9, 1.0],
        'colsample_bytree': [0.6, 0.7, 0.8, 0.9, 1.0],
        'lambda': [0.01, 0.1, 1.0],
        'alpha': [0, 0.1, 0.5, 1.0]
    }
}

Currently:
- loop through models
    - loop through parameters
        - do CV
        
Ideally:
- loop through $K$ classes
    - loop through models
        - loop through parameters
            - do CV
- find the best model+parameters per class
    - within each $k$'th class, decide which model+parameter combination has the best "log-loss"
        - for each model-param combo, calculate the avg. CV log-loss
            - output/print the model-param combo that performs best
            
Convenient to current coding:
- loop through models
    - loop through parameters
        - Rather than do regular CV, we can maybe save the output of the log-loss per binary class to some list. This can be averaged with CV.
        - e.g., for fold=0, do the train/valid split, then for each of the $K$ classes save the resulting fold 0's log-loss, repeat for all folds, then in the end average over the folds per $K$ class

In [15]:
# y_df=y_train
# chosen_class=chosen_class
def binary_msfk_fun(y_df, chosen_class):
    """Create a OVR binary vector for a set ofchosen classes."""
    y_df_copy = copy.deepcopy(y_df) # Create copy of target df
    y_df_copy.reset_index(drop=True, inplace=True) # Reset row indices
    c_indices = y_df_copy[y_df_copy.loc[:, chosen_class] == 1].loc[:, chosen_class]
    n = y_df.shape[0]
    zeros = [0] * n  # Can't do this actually

    for j in range(n):  # Loop through all rows
        # Check if the index should be one instead
        if j in c_indices:
            zeros[j] = 1

    binary_target = pd.DataFrame({chosen_class: zeros})
    
    return binary_target

In [16]:
def class_wise_log_loss(y_true, y_hat):
    """Calculate the log-loss for just a chosen class."""
    class_log_loss = y_true * np.log(y_hat) + (1 - y_true) * np.log(1 - y_hat)
    class_log_loss = -np.mean(class_log_loss)
    
    return class_log_loss

In [17]:
def run_cv(
    fold,
    X,
    y,
    train_idx_list,
    valid_idx_list,
    chosen_class,
    fold_log_loss_list,
    param_combo_,
    model,
):
    """Run the cross-validation."""
    train_idx = train_idx_list[fold]
    valid_idx = valid_idx_list[fold]

    ### These have shifted row names
    x_train = X.iloc[train_idx, :].values
    y_train = y.iloc[train_idx, :]
    x_valid = X.iloc[valid_idx, :].values
    y_valid = y.iloc[valid_idx, :]

    # Apply feature scaling to the numeric attributes
    sc = StandardScaler()
    x_train[:, 7:] = sc.fit_transform(x_train[:, 7:])
    x_valid[:, 7:] = sc.transform(x_valid[:, 7:])

    ### Need a non-scored df of dimensions equal to validation set
    non_scored_y_valid = copy.deepcopy(y_valid)
    non_scored_y_valid.replace(1, 0, inplace=True)

    y_temp = binary_msfk_fun(y_df=y_train, chosen_class=chosen_class)

    class_name = y_temp.columns[0]

    # Fit the model
    model.fit(x_train, y_temp.values.ravel())

    # Create predictions
    y_pred_probs = model.predict_proba(x_valid)[:, 1]

    # Calculate the class-log-loss and save it to the list
    class_log_loss_score = class_wise_log_loss(y_true=y_valid.loc[:,class_name], y_hat=y_pred_probs)
    fold_log_loss_list.append(class_log_loss_score)

In [18]:
# Note: This takes a few seconds
X, y, train_idx_list, valid_idx_list, chosen_classes = preprocess_data()



In [19]:
# chosen_classes = chosen_classes[0:3] # temp limit the chosen classes

How to store all the results?
- list of lists of lists:
    - outer list: $K$ classes
    - center list: $C$ classifiers
    - inner list: $P$ parameter combinations
    
- dictionary is more coherent, dict of dict of dict:
    - outer dict: $K$ classes
    - center dict: $C$ classifiers
    - inner dict: $P$ parameter combinations
    
- perhaps better is a `dict[class][clf][list]`

In [20]:
# This way just stored a dict[class][clf][list]
# it is less concise, but simpler to make for now
result_dict = {}
for class_ in chosen_classes:
    temp_clf_dict = {}
    for clf_ in clf_list:
        temp_clf_dict[clf_] = []
    
    result_dict[class_] = temp_clf_dict

In [21]:
# clf_list = ["log_reg", "rf"]
clf_list = ["dt", "knn"]

Notes:
- log_reg is fast, can try all params possibly
- rf works ok, can try a few params
- dt is fast, can try all params possibly
- knn works, but it is slow

In [22]:
chosen_classes

array(['hidden_class', 'nfkb_inhibitor', 'proteasome_inhibitor',
       'cyclooxygenase_inhibitor', 'dopamine_receptor_antagonist',
       'serotonin_receptor_antagonist', 'dna_inhibitor',
       'glutamate_receptor_antagonist', 'adrenergic_receptor_antagonist',
       'cdk_inhibitor', 'egfr_inhibitor', 'tubulin_inhibitor',
       'acetylcholine_receptor_antagonist'], dtype=object)

In [24]:
c = chosen_classes[3]

In [30]:
chosen_classes_list = chosen_classes.tolist()

In [32]:
chosen_classes_list.index(c)

3

In [33]:
chosen_classes_list = chosen_classes.tolist()
for class_idx in chosen_classes: # Loop through classes
    ith_class = chosen_classes_list.index(class_idx)
    print(f"Class: {class_idx}, index {ith_class + 1} out of {len(chosen_classes_list)}")
    for clf_idx in clf_list:  # Loop through models
        print(f"Classifier: {clf_idx}")
        
        # Create parameter combinations
        clf_param_grid = param_grid[clf_idx]
        param_names = [
            key for key in clf_param_grid.keys()
        ]
        param_combos = itertools.product(
            *(clf_param_grid[p_name] for p_name in param_names)
        )
        param_combos_list = list(param_combos)
        total_param_combos = len(param_combos_list)

        # 3) Loop through parameters
        for p_combo_idx in range(total_param_combos):  # Loop through parameters
            print(f"Parameter combination index: {p_combo_idx + 1} out of {total_param_combos}")
            param_combo_ = param_combos_list[p_combo_idx]
            model_ = set_model_params(clf_name=clf_idx, params=param_combo_)

            # 4) Run CV on the parameter
            # Calculate the log-loss for clf_idx-class_idx-p_combo_idx
            fold_log_loss_list_ = []
            for fold_ in range(5):
                run_cv(
                    fold=fold_,
                    X=X,
                    y=y,
                    train_idx_list=train_idx_list,
                    valid_idx_list=valid_idx_list,
                    chosen_class=class_idx,
                    fold_log_loss_list = fold_log_loss_list_,
                    param_combo_=param_combo_,
                    model=model_
                )

            mean_log_loss = np.mean(fold_log_loss_list_)
            result_dict[class_idx][clf_idx].append([mean_log_loss, param_combo_])

Class: hidden_class, index 1 out of 13
Class: nfkb_inhibitor, index 2 out of 13
Class: proteasome_inhibitor, index 3 out of 13
Class: cyclooxygenase_inhibitor, index 4 out of 13
Class: dopamine_receptor_antagonist, index 5 out of 13
Class: serotonin_receptor_antagonist, index 6 out of 13
Class: dna_inhibitor, index 7 out of 13
Class: glutamate_receptor_antagonist, index 8 out of 13
Class: adrenergic_receptor_antagonist, index 9 out of 13
Class: cdk_inhibitor, index 10 out of 13
Class: egfr_inhibitor, index 11 out of 13
Class: tubulin_inhibitor, index 12 out of 13
Class: acetylcholine_receptor_antagonist, index 13 out of 13


do a last change to make sure that the final printout includes the parameter combination and not just the relative index

In [None]:
# def result_stats(result_dict, clf_list, chosen_classes, param_grid):
# For each class print out 1) the best parameter per model 2) the best model/param combo overall

stat_dict = {}; best_dict = {} # Initialize dictionaries
for class_ in chosen_classes:
    temp_clf_dict = {}
    for clf_ in clf_list:
        temp_clf_dict[clf_] = []
    stat_dict[class_] = temp_clf_dict
    best_dict[class_] = []

# Fill the dictionaries showing best parameters per model (stat_dict)
# and best model per class (best_dict)
for class_idx in chosen_classes: # Loop through classes
    for clf_idx in clf_list: # Loop through classifiers
        # Find best parameter (index) per model
        temp_class_clf_list = result_dict[class_idx][clf_idx]
        cv_score_list = [cv_score[0] for cv_score in temp_class_clf_list]
        best_clf_score = min(cv_score_list)
        best_idx = cv_score_list.index(best_clf_score)
        stat_dict[class_idx][clf_idx] = [temp_class_clf_list[best_idx][1], best_clf_score]

    # Find best model/param combo per class
    # Reference: https://stackoverflow.com/questions/34249441/finding-minimum-value-in-dictionary
    best_clf = min(stat_dict['hidden_class'].items(), key=lambda x: x[1][1])
    best_dict[class_idx] = [best_clf]

# Reference: https://stackoverflow.com/questions/29771895/save-nested-dictionary-with-differing-number-of-dictionaries    
# Save results to csv
best_dict_list = [dict(class_name=i, clf_result=j) for i, j in best_dict.items()]
with open("../output/best_dict.csv", 'w') as f:
    fieldnames = ['class_name', 'clf_result']
    w = csv.DictWriter(f, fieldnames)
    w.writeheader()
    w.writerows(best_dict_list)
    
import csv
import itertools

nested_dict_keys = [] # Set fieldnames for stat_dict
for idx_i, idx_j in stat_dict.items():
    for idx_k, idx_l in idx_j.items():
        nested_dict_keys.append(idx_k)
nested_dict_keys = list(set(nested_dict_keys))
fieldnames = ['class_name'] + nested_dict_keys

# Reference: https://stackoverflow.com/questions/29400631/python-writing-nested-dictionary-to-csv
# Save results to csv
with open("../output/stat_dict.csv", "w") as f:
    w = csv.DictWriter(f, fieldnames)
    w.writeheader()
    for key in stat_dict:
        w.writerow({field: stat_dict[key].get(field) or key for field in fieldnames})

In [4]:
stat_dict_csv = pd.read_csv('../output/stat_dict.csv')
best_dict_csv = pd.read_csv('../output/best_dict.csv')

In [23]:
log_reg_results = stat_dict_csv.loc[:,'log_reg']
eta_list = []
for i in log_reg_results:
    eta_list.append(float(i.split(',')[1].split(')')[0].split(' ')[1]))

In [52]:
eta_list

[0.1,
 0.01,
 100.0,
 0.001,
 0.001,
 0.001,
 0.001,
 0.001,
 0.001,
 0.01,
 0.1,
 0.01,
 0.001]

In [36]:
best_dict_csv

Unnamed: 0,class_name,clf_result
0,hidden_class,"[('log_reg', [('l2', 0.1), 0.5705306695722291])]"
1,nfkb_inhibitor,"[('log_reg', [('l2', 0.1), 0.5705306695722291])]"
2,proteasome_inhibitor,"[('log_reg', [('l2', 0.1), 0.5705306695722291])]"
3,cyclooxygenase_inhibitor,"[('log_reg', [('l2', 0.1), 0.5705306695722291])]"
4,dopamine_receptor_antagonist,"[('log_reg', [('l2', 0.1), 0.5705306695722291])]"
5,serotonin_receptor_antagonist,"[('log_reg', [('l2', 0.1), 0.5705306695722291])]"
6,dna_inhibitor,"[('log_reg', [('l2', 0.1), 0.5705306695722291])]"
7,glutamate_receptor_antagonist,"[('log_reg', [('l2', 0.1), 0.5705306695722291])]"
8,adrenergic_receptor_antagonist,"[('log_reg', [('l2', 0.1), 0.5705306695722291])]"
9,cdk_inhibitor,"[('log_reg', [('l2', 0.1), 0.5705306695722291])]"


test tuned_ovr.py

In [38]:
def preprocess_data(num_retained_classes=12):
    """Preprocess the data."""
#     X = pd.read_csv(config.FEATURES_FILE)
    X = pd.read_csv("../input/train_features.csv")
    X.drop(X.columns[0], axis=1, inplace=True)
    X_test = pd.read_csv("../input/test_features.csv")
    X_test.drop(X_test.columns[0], axis=1, inplace=True)

    # Save the column names
    X_col_names = X.columns.tolist()
    cat_cols = ["cp_type", "cp_time", "cp_dose"]  # Identify categorical columns
    ohe = OneHotEncoder()  # Load OHE
    _ = ohe.fit_transform(X[cat_cols])
    ohe_names = ohe.get_feature_names(cat_cols)
    ohe_names = ohe_names.tolist()

    # Fix new column names to include OHE names and normal feature names
    X_col_names = [col for col in X_col_names if col not in cat_cols]
    ohe_names.extend(X_col_names)

    # Transform the data with OHE on the indices of the cat variables
    ct = ColumnTransformer(
        transformers=[("encoder", OneHotEncoder(), list(range(0, 3)))],
        remainder="passthrough",
    )
    X = pd.DataFrame(ct.fit_transform(X))
    X.columns = ohe_names
    X_test = pd.DataFrame(ct.transform(X_test))
    X_test.columns = ohe_names

    # Apply feature scaling to the numeric attributes
    sc = StandardScaler()
    X.iloc[:, 7:] = sc.fit_transform(X.iloc[:, 7:])
    X_test.iloc[:, 7:] = sc.transform(X_test.iloc[:, 7:])

    return X, X_test

In [39]:
X_train, X_test = preprocess_data(num_retained_classes=12)

In [42]:
def generate_OVR_targets(num_retained_classes=12):
    """Generate the list of binary OVR target vectors that will be tested."""
    y = pd.read_csv("../input/train_targets_scored.csv")

    # Add hidden class
    zero_class_indices = y[y.iloc[:, 1:].apply(sum, axis=1) == 0].index
    y["hidden_class"] = 0
    y["hidden_class"].iloc[zero_class_indices] = 1

    class_counts = y.iloc[:, 1:].sum(axis=0)
    class_counts = class_counts.sort_values(ascending=False)

    class_counts_sub = class_counts.head(num_retained_classes + 1)
    retained_classes = class_counts_sub.index.values
    y2 = y.iloc[:, 1:]

    ### The following creates 'c' binary target vectors saved in a list: 'binary_vector_list'
    class_index_list = []  # Save indices that contain the class
    for c in retained_classes:
        c_indices = y2.loc[:, c][y2.loc[:, c] == 1].index.values
        class_index_list.append([c, c_indices])

    binary_vector_list = []
    n = len(y)
    for i in class_index_list:  # Loop through class/index pairs
        zeros = [0] * n
        for j in range(n):  # Loop through all rows
            # Check if the index should be one instead
            if j in i[1]:
                zeros[j] = 1
        binary_vector_list.append(pd.DataFrame({i[0]: zeros}))

    return binary_vector_list

In [43]:
ovr_targets = generate_OVR_targets(num_retained_classes=12)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [48]:
for i in range(len(ovr_targets)):
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12


In [47]:
len(ovr_targets[0])

23814

In [50]:
submission = pd.read_csv('../output/submission.csv')

In [51]:
submission.head()

Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,adrenergic_receptor_agonist,adrenergic_receptor_antagonist,akt_inhibitor,aldehyde_dehydrogenase_inhibitor,alk_inhibitor,ampk_activator,analgesic,androgen_receptor_agonist,androgen_receptor_antagonist,anesthetic_-_local,angiogenesis_inhibitor,angiotensin_receptor_antagonist,anti-inflammatory,antiarrhythmic,antibiotic,anticonvulsant,antifungal,antihistamine,antimalarial,antioxidant,antiprotozoal,antiviral,apoptosis_stimulant,aromatase_inhibitor,atm_kinase_inhibitor,atp-sensitive_potassium_channel_antagonist,atp_synthase_inhibitor,atpase_inhibitor,atr_kinase_inhibitor,aurora_kinase_inhibitor,autotaxin_inhibitor,bacterial_30s_ribosomal_subunit_inhibitor,bacterial_50s_ribosomal_subunit_inhibitor,bacterial_antifolate,bacterial_cell_wall_synthesis_inhibitor,bacterial_dna_gyrase_inhibitor,bacterial_dna_inhibitor,bacterial_membrane_integrity_inhibitor,bcl_inhibitor,bcr-abl_inhibitor,benzodiazepine_receptor_agonist,beta_amyloid_inhibitor,bromodomain_inhibitor,btk_inhibitor,calcineurin_inhibitor,calcium_channel_blocker,cannabinoid_receptor_agonist,cannabinoid_receptor_antagonist,carbonic_anhydrase_inhibitor,casein_kinase_inhibitor,caspase_activator,catechol_o_methyltransferase_inhibitor,cc_chemokine_receptor_antagonist,cck_receptor_antagonist,cdk_inhibitor,chelating_agent,chk_inhibitor,chloride_channel_blocker,cholesterol_inhibitor,cholinergic_receptor_antagonist,coagulation_factor_inhibitor,corticosteroid_agonist,cyclooxygenase_inhibitor,cytochrome_p450_inhibitor,dihydrofolate_reductase_inhibitor,dipeptidyl_peptidase_inhibitor,diuretic,dna_alkylating_agent,dna_inhibitor,dopamine_receptor_agonist,dopamine_receptor_antagonist,egfr_inhibitor,elastase_inhibitor,erbb2_inhibitor,estrogen_receptor_agonist,estrogen_receptor_antagonist,faah_inhibitor,farnesyltransferase_inhibitor,fatty_acid_receptor_agonist,fgfr_inhibitor,flt3_inhibitor,focal_adhesion_kinase_inhibitor,free_radical_scavenger,fungal_squalene_epoxidase_inhibitor,gaba_receptor_agonist,gaba_receptor_antagonist,gamma_secretase_inhibitor,glucocorticoid_receptor_agonist,glutamate_inhibitor,glutamate_receptor_agonist,glutamate_receptor_antagonist,gonadotropin_receptor_agonist,gsk_inhibitor,hcv_inhibitor,hdac_inhibitor,histamine_receptor_agonist,histamine_receptor_antagonist,histone_lysine_demethylase_inhibitor,histone_lysine_methyltransferase_inhibitor,hiv_inhibitor,hmgcr_inhibitor,hsp_inhibitor,igf-1_inhibitor,ikk_inhibitor,imidazoline_receptor_agonist,immunosuppressant,insulin_secretagogue,insulin_sensitizer,integrin_inhibitor,jak_inhibitor,kit_inhibitor,laxative,leukotriene_inhibitor,leukotriene_receptor_antagonist,lipase_inhibitor,lipoxygenase_inhibitor,lxr_agonist,mdm_inhibitor,mek_inhibitor,membrane_integrity_inhibitor,mineralocorticoid_receptor_antagonist,monoacylglycerol_lipase_inhibitor,monoamine_oxidase_inhibitor,monopolar_spindle_1_kinase_inhibitor,mtor_inhibitor,mucolytic_agent,neuropeptide_receptor_antagonist,nfkb_inhibitor,nicotinic_receptor_agonist,nitric_oxide_donor,nitric_oxide_production_inhibitor,nitric_oxide_synthase_inhibitor,norepinephrine_reuptake_inhibitor,nrf2_activator,opioid_receptor_agonist,opioid_receptor_antagonist,orexin_receptor_antagonist,p38_mapk_inhibitor,p-glycoprotein_inhibitor,parp_inhibitor,pdgfr_inhibitor,pdk_inhibitor,phosphodiesterase_inhibitor,phospholipase_inhibitor,pi3k_inhibitor,pkc_inhibitor,potassium_channel_activator,potassium_channel_antagonist,ppar_receptor_agonist,ppar_receptor_antagonist,progesterone_receptor_agonist,progesterone_receptor_antagonist,prostaglandin_inhibitor,prostanoid_receptor_antagonist,proteasome_inhibitor,protein_kinase_inhibitor,protein_phosphatase_inhibitor,protein_synthesis_inhibitor,protein_tyrosine_kinase_inhibitor,radiopaque_medium,raf_inhibitor,ras_gtpase_inhibitor,retinoid_receptor_agonist,retinoid_receptor_antagonist,rho_associated_kinase_inhibitor,ribonucleoside_reductase_inhibitor,rna_polymerase_inhibitor,serotonin_receptor_agonist,serotonin_receptor_antagonist,serotonin_reuptake_inhibitor,sigma_receptor_agonist,sigma_receptor_antagonist,smoothened_receptor_antagonist,sodium_channel_inhibitor,sphingosine_receptor_agonist,src_inhibitor,steroid,syk_inhibitor,tachykinin_antagonist,tgf-beta_receptor_inhibitor,thrombin_inhibitor,thymidylate_synthase_inhibitor,tlr_agonist,tlr_antagonist,tnf_inhibitor,topoisomerase_inhibitor,transient_receptor_potential_channel_antagonist,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,0.0,0.0,0.0,0.0,0.016208,0.0,0.0,0.0,0.0,0.0,0.015518,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000414,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028453,0.0,0.0,0.0,0.0,0.0,0.019203,0.0,0.023427,2.4e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0203,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006633,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.370125e-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016112,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000967,0.0,0.0,0.0,0.0,0.0,0.0
1,id_001897cda,0.0,0.0,0.0,0.0,0.008534,0.0,0.0,0.0,0.0,0.0,0.015204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00124,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010341,0.0,0.0,0.0,0.0,0.0,0.005823,0.0,0.014819,5.2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008777,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.840568e-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017492,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000409,0.0,0.0,0.0,0.0,0.0,0.0
2,id_002429b5b,0.0,0.0,0.0,0.0,0.012487,0.0,0.0,0.0,0.0,0.0,0.011272,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000675,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01046,0.0,0.0,0.0,0.0,0.0,0.010947,0.0,0.008858,0.000303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001175,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.807055e-11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012758,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009175,0.0,0.0,0.0,0.0,0.0,0.0
3,id_00276f245,0.0,0.0,0.0,0.0,0.007029,0.0,0.0,0.0,0.0,0.0,0.012448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002061,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013791,0.0,0.0,0.0,0.0,0.0,0.012946,0.0,0.015566,0.002475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001536,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.403353e-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014176,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008936,0.0,0.0,0.0,0.0,0.0,0.0
4,id_0027f1083,0.0,0.0,0.0,0.0,0.015483,0.0,0.0,0.0,0.0,0.0,0.017287,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018675,0.0,0.0,0.0,0.0,0.0,0.017725,0.0,0.016659,1.6e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014257,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006913,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.414738e-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014034,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002376,0.0,0.0,0.0,0.0,0.0,0.0
