# HyperOpt for CYP2D6 dataset
I used cyp2d6_union_trainset_base_smiles.csv from MoDaC old data.
Goal:Generate models with RF, NN, XGBoost with different parameter setting and select best model based on valid_r2 score.

In [19]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import os, time
import multiprocessing
from joblib import Parallel, delayed

from atomsci.ddm.pipeline import model_pipeline as mp
from atomsci.ddm.pipeline import parameter_parser as parse
from atomsci.ddm.pipeline import predict_from_model as pfm 
from atomsci.ddm.pipeline import compare_models as cm 

sns.set_context("poster")
sns.set_style("whitegrid")
sns.set_palette("Set2")
pal = sns.color_palette()

pd.set_option('display.max_rows', 15)
pd.set_option('display.max_columns', 150)

print("Imports Done.")

Imports Done.


In [20]:
target='CYP2D6'

# load data and examine

In [21]:
data_dir = '/mnt/projects/ATOM/sarkart4/Data/'+target+'-ampl-1.1.0_old/'
outdir = '/mnt/projects/ATOM/sarkart4/Data/HPO_models_'+target+'_old/'
if not os.path.exists(outdir):
    os.makedirs(outdir)

In [22]:
#test_file = data_dir + "cyp2d6_union_testset_base_smiles.csv"
data_file = data_dir + target.lower()+"_union_trainset_base_smiles.csv"
data = pd.read_csv(data_file)
data.head(2)

Unnamed: 0,base_rdkit_smiles,compound_id,pIC50,relation,active
0,COCCOc1ccn2c(-c3ccc4cccc(OC5CCNCC5)c4n3)cnc2c1,CHEMBL3109340,5.0,,1
1,CN1CCC[C@@H](CC2c3ccccc3Sc3ccccc32)C1,MJFJKKXQDNNUJF-HNNXBMFYNA-N,6.50004,,1


In [23]:
print("Duplicate SMILES:",data.duplicated(subset="base_rdkit_smiles").sum(), "\tDuplicate compound IDs:",data.duplicated(subset="compound_id").sum(), "\tDuplicate indices:",data.index.duplicated().sum(), "\tNA SMILES:",data.base_rdkit_smiles.isna().sum())
print("Shape:", data.shape)
print('CYP2D6 actives:', data.active.sum())
print('CYP2D6 inactives:', len(data)-data.active.sum())

Duplicate SMILES: 0 	Duplicate compound IDs: 0 	Duplicate indices: 0 	NA SMILES: 0
Shape: (10445, 5)
CYP2D6 actives: 6982
CYP2D6 inactives: 3463


In [24]:
## save split uuids for future reference
# all are mordread_filtered
# /home/sarkart4/MoDaC/NCI_DOE_Archive/ATOM/Safety_Screen_Targets/CYP2D6-ampl-1.1.0/cyp2d6_union_trainset_base_smiles_train_valid_test_scaffold_c8c36365-b294-41a4-983a-57879bbde0c0.csv
# /home/sarkart4/MoDaC/NCI_DOE_Archive/ATOM/Safety_Screen_Targets/CYP2D6-ampl-1.1.0/cyp2d6_union_trainset_base_smiles_train_valid_test_scaffold_4cb49d5e-1882-474e-ae0b-781d77a4738a.csv
# Dataset split table saved to /mnt/projects/ATOM/sarkart4/Data/CYP2D6-ampl-1.1.0/cyp2d6_union_trainset_base_smiles_train_valid_test_scaffold_5fdd8051-5b67-4a13-a768-a8a702d0182b.csv

#CYP2D6_old FRCE
#Dataset split table saved to /mnt/projects/ATOM/sarkart4/Data/CYP2D6-ampl-1.1.0_old/cyp2d6_union_trainset_base_smiles_train_valid_test_scaffold_c391ef72-9cd5-4bb7-a88e-9c55be5012a9.csv
#split_uuid = 'c391ef72-9cd5-4bb7-a88e-9c55be5012a9' # scaffold, ecfp
split_uuid =  '4dc24a89-e3c9-4c91-b8b1-6244f5a76132'# scaffold, graphconv
#split_uuid =  '621c5873-bd9e-400e-a379-fd94996d547a' # scaffold, rdkit_raw
#split_uuid = 'c517f69d-dd98-46c9-a9ae-94e6488149fb' # scaffold,mord
[split_uuid,data_file]

['4dc24a89-e3c9-4c91-b8b1-6244f5a76132',
 '/mnt/projects/ATOM/sarkart4/Data/CYP2D6-ampl-1.1.0_old/cyp2d6_union_trainset_base_smiles.csv']

In [None]:
#mf = pd.read_csv(data_dir+"scaled_descriptors/cyp2d6_union_trainset_base_smiles_with_mordred_filtered_descriptors.csv")
#mf.shape

# Grid Search - RF

In [13]:
params = {
  #  "hyperparam": "True",
    "search_type" : "user_specified",
    "save_results": "False",
    "collection_name": "CYP2D6",
    "rerun": "False",
    "dataset_key" : data_file,
    "datastore": "False",
    "response_cols" : "pIC50", 
    "id_col": "compound_id",
    "smiles_col" : "base_rdkit_smiles",
    "result_dir": outdir, 
    "split_only": "False",
    "previously_split": "True",
    "split_uuid": split_uuid,
    "splitter": "scaffold",
    #"split_valid_frac": "0.15",
    #"split_test_frac": "0.002",
    "prediction_type": "regression",
    "transformers": "True",
    "featurizer": "computed_descriptors",
    "descriptor_type": "rdkit_raw", 
    "max_epochs": "100",
    "model_type" :"RF",
    "uncertainty": "True",
    "verbose": "True"
}

In [14]:
# # 210 models per feature set
# estimator_choice = [512]
# depth_choice = [128]
# features_choice = [64] #16,24,32,64,128, # can only be < total number of features

estimator_choice = [16,24,32,64,128,256,512]
depth_choice = [16,24,32,64,128,256]
features_choice = [16,24,32,64,128] #16,24,32,64,128, # can only be < total number of features

for rf_estimator in estimator_choice:
  for rf_depth in depth_choice:
    for rf_max_feature in features_choice:
        params["rf_estimators"] = rf_estimator
        params["rf_max_depth"] = rf_depth
        params["rf_max_features"] = rf_max_feature
        tp = parse.wrapper(params)
        pl = mp.ModelPipeline(tp)
        pl.train_model()
        pred_data = pl.model_wrapper.get_perf_data(subset="valid", epoch_label="best")
        pred_results = pred_data.get_prediction_results()
        print(f"rf_estimators: {rf_estimator}, rf_max_depth: {rf_depth}, rf_max_features: {rf_max_feature}, valid_r2: {pred_results['r2_score']}")

INFO:ATOM:Created a dataset hash 'a0eb2e6092b3542373f67528e299e283' from dataset_key '/mnt/projects/ATOM/sarkart4/Data/CYP2D6-ampl-1.1.0_old/cyp2d6_union_trainset_base_smiles.csv'
DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/dbfc8da7-0711-4811-923a-08245daa1bb6/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.822, validation r2_score = 0.167, test r2

rf_estimators: 16, rf_max_depth: 16, rf_max_features: 16, valid_r2: 0.16726828648589243


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/9d0c9672-2d33-48f6-83fb-29f1244c1289/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.797, validation r2_score = 0.174, test r2_score = 0.051
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_9d0c9672-2d33-48f6-83fb-29f1244c1289.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 16, rf_max_depth: 16, rf_max_features: 24, valid_r2: 0.17367151791778357


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/a5b28328-4dee-43c7-bc6f-b3827c4f3589/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.816, validation r2_score = 0.156, test r2_score = -0.093
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_a5b28328-4dee-43c7-bc6f-b3827c4f3589.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 16, rf_max_features: 32, valid_r2: 0.15553468569967954


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/b98e084d-d294-41c5-8b26-a9a5f446f07f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.809, validation r2_score = 0.161, test r2_score = -0.021
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_b98e084d-d294-41c5-8b26-a9a5f446f07f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 16, rf_max_features: 64, valid_r2: 0.16067980209859134


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/bd7d6186-2f4b-479d-8d75-dd249a564b02/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.801, validation r2_score = 0.133, test r2_score = -0.056
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_bd7d6186-2f4b-479d-8d75-dd249a564b02.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 16, rf_max_features: 128, valid_r2: 0.13272950187940735


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/6fe37712-440b-435f-a0bc-0226e159758e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.893, validation r2_score = 0.155, test r2_score = -0.191
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_6fe37712-440b-435f-a0bc-0226e159758e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 24, rf_max_features: 16, valid_r2: 0.15453058475938486


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/cee5f5e7-ed82-48b0-9702-862b7f176c25/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.891, validation r2_score = 0.140, test r2_score = 0.149
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_cee5f5e7-ed82-48b0-9702-862b7f176c25.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 16, rf_max_depth: 24, rf_max_features: 24, valid_r2: 0.13968362222332498


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/b09bd674-77a6-426b-91e6-10dc021e74f1/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.890, validation r2_score = 0.173, test r2_score = -0.517
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_b09bd674-77a6-426b-91e6-10dc021e74f1.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 24, rf_max_features: 32, valid_r2: 0.1726822069455055


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/31f843d6-80ba-4d79-a9d8-273b295ce528/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.892, validation r2_score = 0.166, test r2_score = -0.271
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_31f843d6-80ba-4d79-a9d8-273b295ce528.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 24, rf_max_features: 64, valid_r2: 0.1657141825166757


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/fe904ee7-ff92-4f48-b0a5-873da745c29d/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.894, validation r2_score = 0.169, test r2_score = 0.044
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_fe904ee7-ff92-4f48-b0a5-873da745c29d.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 16, rf_max_depth: 24, rf_max_features: 128, valid_r2: 0.1688632941439715


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/99c022e0-79f1-4d8a-acd0-57ec528e39a1/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.899, validation r2_score = 0.132, test r2_score = -0.317
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_99c022e0-79f1-4d8a-acd0-57ec528e39a1.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 32, rf_max_features: 16, valid_r2: 0.1317033627697346


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/cf3cfd95-0d65-4337-b447-061a5601a666/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.901, validation r2_score = 0.146, test r2_score = 0.211
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_cf3cfd95-0d65-4337-b447-061a5601a666.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 16, rf_max_depth: 32, rf_max_features: 24, valid_r2: 0.1463455125346731


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/e76516ae-220f-44c9-ad21-0b05fa5b43d3/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.900, validation r2_score = 0.177, test r2_score = -0.400
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_e76516ae-220f-44c9-ad21-0b05fa5b43d3.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 32, rf_max_features: 32, valid_r2: 0.1766685014874364


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/4797c46c-2bbb-4444-9fbb-3dc20fde3eda/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.900, validation r2_score = 0.168, test r2_score = -0.220
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_4797c46c-2bbb-4444-9fbb-3dc20fde3eda.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 32, rf_max_features: 64, valid_r2: 0.16776716810313663


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/2feee930-8cc4-4c7c-9bef-ccc738357436/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.903, validation r2_score = 0.149, test r2_score = -0.068
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_2feee930-8cc4-4c7c-9bef-ccc738357436.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 32, rf_max_features: 128, valid_r2: 0.14887253258343525


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/b62f5060-83d9-42ed-b71f-69925b2ff03e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.902, validation r2_score = 0.145, test r2_score = -0.606
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_b62f5060-83d9-42ed-b71f-69925b2ff03e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 64, rf_max_features: 16, valid_r2: 0.14479827445670768


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/291d4f18-6782-4885-8585-c8a7923c491a/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.900, validation r2_score = 0.151, test r2_score = -0.207
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_291d4f18-6782-4885-8585-c8a7923c491a.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 64, rf_max_features: 24, valid_r2: 0.15129284762379314


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/80c8b568-7f58-41fa-b426-3c1f1563f43a/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.899, validation r2_score = 0.152, test r2_score = -0.318
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_80c8b568-7f58-41fa-b426-3c1f1563f43a.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 64, rf_max_features: 32, valid_r2: 0.15220961077388395


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/9d051d18-1880-4c41-9a45-a84e6bcbae58/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.904, validation r2_score = 0.151, test r2_score = -0.121
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_9d051d18-1880-4c41-9a45-a84e6bcbae58.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 64, rf_max_features: 64, valid_r2: 0.15103493652058686


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/365c3154-69a6-4f36-806b-2242b8b95c40/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.899, validation r2_score = 0.152, test r2_score = 0.037
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_365c3154-69a6-4f36-806b-2242b8b95c40.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 16, rf_max_depth: 64, rf_max_features: 128, valid_r2: 0.15183631663752817


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/db5f0a2f-3a75-45e6-bc78-eff339bd2996/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.900, validation r2_score = 0.148, test r2_score = -0.895
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_db5f0a2f-3a75-45e6-bc78-eff339bd2996.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 128, rf_max_features: 16, valid_r2: 0.14791461339366607


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/0ce25835-be24-427a-8d1a-6567f01d784c/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.903, validation r2_score = 0.157, test r2_score = 0.094
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_0ce25835-be24-427a-8d1a-6567f01d784c.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 16, rf_max_depth: 128, rf_max_features: 24, valid_r2: 0.15702960832655644


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/b8286f0b-764f-4e98-87b3-797a5e244762/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.900, validation r2_score = 0.138, test r2_score = -0.225
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_b8286f0b-764f-4e98-87b3-797a5e244762.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 128, rf_max_features: 32, valid_r2: 0.13848460400050933


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/774101da-8cca-4476-9363-985dded411bf/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.900, validation r2_score = 0.134, test r2_score = -0.231
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_774101da-8cca-4476-9363-985dded411bf.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 128, rf_max_features: 64, valid_r2: 0.13445770544683244


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/e1958159-5e65-43bb-b716-a7284bd2cd19/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.900, validation r2_score = 0.138, test r2_score = 0.018
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_e1958159-5e65-43bb-b716-a7284bd2cd19.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 16, rf_max_depth: 128, rf_max_features: 128, valid_r2: 0.13781710265127056


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/a6d37e08-69de-4f80-a9ea-1533167405b7/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.902, validation r2_score = 0.147, test r2_score = -0.349
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_a6d37e08-69de-4f80-a9ea-1533167405b7.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 256, rf_max_features: 16, valid_r2: 0.1471556010983317


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/2486e4ec-e0c6-47a1-8d73-f17a8419a0f5/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.903, validation r2_score = 0.156, test r2_score = -0.465
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_2486e4ec-e0c6-47a1-8d73-f17a8419a0f5.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 256, rf_max_features: 24, valid_r2: 0.1556513495429268


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/3c01f7d5-ab66-4c6f-90d0-7d4082535123/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.899, validation r2_score = 0.139, test r2_score = -0.482
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_3c01f7d5-ab66-4c6f-90d0-7d4082535123.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 256, rf_max_features: 32, valid_r2: 0.13869418706617198


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/6e1f845b-b05b-4421-b01e-74cb04391a5b/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.903, validation r2_score = 0.149, test r2_score = 0.120
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_6e1f845b-b05b-4421-b01e-74cb04391a5b.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 16, rf_max_depth: 256, rf_max_features: 64, valid_r2: 0.14883210823014537


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/70b7c835-6700-48ce-85f6-95366e9b0cfe/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.900, validation r2_score = 0.159, test r2_score = -0.032
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_70b7c835-6700-48ce-85f6-95366e9b0cfe.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 16, rf_max_depth: 256, rf_max_features: 128, valid_r2: 0.15923430333854438


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/f9dbd485-63b9-4e13-aa2f-624e4a4f487f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.822, validation r2_score = 0.179, test r2_score = 0.182
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_f9dbd485-63b9-4e13-aa2f-624e4a4f487f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 24, rf_max_depth: 16, rf_max_features: 16, valid_r2: 0.17925714650001934


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/68edea65-1705-478d-baa7-163e6eae1871/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.814, validation r2_score = 0.164, test r2_score = -0.327
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_68edea65-1705-478d-baa7-163e6eae1871.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 16, rf_max_features: 24, valid_r2: 0.1641541575298684


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/ac07cc1f-dbad-40eb-bf8b-6999a35ad7de/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.814, validation r2_score = 0.171, test r2_score = -0.305
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_ac07cc1f-dbad-40eb-bf8b-6999a35ad7de.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 16, rf_max_features: 32, valid_r2: 0.1713445232363866


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/9de9afde-acf3-4ae5-9f6a-7a130326bf74/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.810, validation r2_score = 0.172, test r2_score = 0.054
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_9de9afde-acf3-4ae5-9f6a-7a130326bf74.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 24, rf_max_depth: 16, rf_max_features: 64, valid_r2: 0.17153721757665608


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/067d0347-8400-45ff-bf89-60a1a47a19c5/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.805, validation r2_score = 0.179, test r2_score = 0.168
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_067d0347-8400-45ff-bf89-60a1a47a19c5.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 24, rf_max_depth: 16, rf_max_features: 128, valid_r2: 0.17920714333396792


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/8fbcce9f-de7e-4449-911e-a761ac6d91e2/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.902, validation r2_score = 0.175, test r2_score = -0.148
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_8fbcce9f-de7e-4449-911e-a761ac6d91e2.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 24, rf_max_features: 16, valid_r2: 0.17495264398650645


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/eb4d4da6-f67c-4d17-a153-6cdb4cf8848a/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.899, validation r2_score = 0.168, test r2_score = -0.393
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_eb4d4da6-f67c-4d17-a153-6cdb4cf8848a.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 24, rf_max_features: 24, valid_r2: 0.16786274117612987


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/45658e71-1276-4e70-a4c9-e62ddd752490/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.901, validation r2_score = 0.172, test r2_score = -0.286
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_45658e71-1276-4e70-a4c9-e62ddd752490.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 24, rf_max_features: 32, valid_r2: 0.17186320474377959


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/0e25cf93-a231-414d-b83e-6955068a1fd2/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.898, validation r2_score = 0.155, test r2_score = -0.002
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_0e25cf93-a231-414d-b83e-6955068a1fd2.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 24, rf_max_features: 64, valid_r2: 0.1548334347913961


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/3ffcab0f-71c3-493d-801a-2cd76807f58c/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.897, validation r2_score = 0.159, test r2_score = -0.159
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_3ffcab0f-71c3-493d-801a-2cd76807f58c.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 24, rf_max_features: 128, valid_r2: 0.15858450253545975


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/1fdee03a-9565-4eb4-bc1c-5659923bbdd5/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.906, validation r2_score = 0.174, test r2_score = -0.178
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_1fdee03a-9565-4eb4-bc1c-5659923bbdd5.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 32, rf_max_features: 16, valid_r2: 0.1737884879265108


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/17ea1f02-2966-43eb-a82f-b6a24c35224a/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.906, validation r2_score = 0.171, test r2_score = 0.092
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_17ea1f02-2966-43eb-a82f-b6a24c35224a.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 24, rf_max_depth: 32, rf_max_features: 24, valid_r2: 0.17093171347422043


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/fb40fc95-7a1d-4a6d-bf91-b157224e1fce/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.907, validation r2_score = 0.193, test r2_score = -0.256
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_fb40fc95-7a1d-4a6d-bf91-b157224e1fce.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 32, rf_max_features: 32, valid_r2: 0.1933757646916069


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/c4896644-b99a-4035-b0b2-63e78be9b355/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.907, validation r2_score = 0.198, test r2_score = -0.654
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_c4896644-b99a-4035-b0b2-63e78be9b355.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 32, rf_max_features: 64, valid_r2: 0.19761963029813734


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/7e43bdb2-0d2c-4506-bcac-21fac5aaa914/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.908, validation r2_score = 0.171, test r2_score = -0.215
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_7e43bdb2-0d2c-4506-bcac-21fac5aaa914.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 32, rf_max_features: 128, valid_r2: 0.17094996929613204


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/adfa1c54-56c9-4e7f-8670-d97fbcc71bbc/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.905, validation r2_score = 0.169, test r2_score = 0.064
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_adfa1c54-56c9-4e7f-8670-d97fbcc71bbc.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 24, rf_max_depth: 64, rf_max_features: 16, valid_r2: 0.16932001595267876


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/c461c0de-5857-4b76-8afa-88646955c384/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.907, validation r2_score = 0.164, test r2_score = -0.003
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_c461c0de-5857-4b76-8afa-88646955c384.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 64, rf_max_features: 24, valid_r2: 0.1639071844203599


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/fc77e730-37aa-4630-938b-79143707916f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.908, validation r2_score = 0.190, test r2_score = 0.126
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_fc77e730-37aa-4630-938b-79143707916f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 24, rf_max_depth: 64, rf_max_features: 32, valid_r2: 0.18984834990169153


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/868e381b-f447-4d5f-8d74-b937a712f65e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.907, validation r2_score = 0.170, test r2_score = 0.018
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_868e381b-f447-4d5f-8d74-b937a712f65e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 24, rf_max_depth: 64, rf_max_features: 64, valid_r2: 0.1699369886980816


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/d1daeadf-24ac-47cd-be8d-8522ce79e569/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.909, validation r2_score = 0.168, test r2_score = 0.221
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_d1daeadf-24ac-47cd-be8d-8522ce79e569.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 24, rf_max_depth: 64, rf_max_features: 128, valid_r2: 0.16764799095530014


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/525f0055-902e-4f1d-9f19-8391d79e955e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.908, validation r2_score = 0.173, test r2_score = -0.646
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_525f0055-902e-4f1d-9f19-8391d79e955e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 128, rf_max_features: 16, valid_r2: 0.17271044125618695


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/da775ba1-d667-4f42-8840-27cd3be59711/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.907, validation r2_score = 0.180, test r2_score = 0.165
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_da775ba1-d667-4f42-8840-27cd3be59711.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 24, rf_max_depth: 128, rf_max_features: 24, valid_r2: 0.18022250419400043


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/552434c7-a8b5-467a-9564-1d266eab7811/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.907, validation r2_score = 0.167, test r2_score = -0.339
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_552434c7-a8b5-467a-9564-1d266eab7811.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 128, rf_max_features: 32, valid_r2: 0.16699762859146328


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/df7ca0ff-d7f7-4605-a594-8a0b23c850c7/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.906, validation r2_score = 0.186, test r2_score = -0.485
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_df7ca0ff-d7f7-4605-a594-8a0b23c850c7.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 128, rf_max_features: 64, valid_r2: 0.1856677333331238


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/908e6f27-358a-496c-891f-5fc625f795c4/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.908, validation r2_score = 0.195, test r2_score = -0.085
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_908e6f27-358a-496c-891f-5fc625f795c4.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 128, rf_max_features: 128, valid_r2: 0.19511592199484462


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/5dc76e99-2e82-4d52-93a0-c1e6097ad0c8/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.907, validation r2_score = 0.200, test r2_score = -0.375
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_5dc76e99-2e82-4d52-93a0-c1e6097ad0c8.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 256, rf_max_features: 16, valid_r2: 0.1999399672596881


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/d64b7f46-5985-4515-a582-de4849322e05/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.908, validation r2_score = 0.178, test r2_score = 0.082
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_d64b7f46-5985-4515-a582-de4849322e05.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 24, rf_max_depth: 256, rf_max_features: 24, valid_r2: 0.17805304620602225


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/59aa1af8-de9b-4453-bc21-fca3627653b1/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.907, validation r2_score = 0.167, test r2_score = -0.313
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_59aa1af8-de9b-4453-bc21-fca3627653b1.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 256, rf_max_features: 32, valid_r2: 0.1669003032532288


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/105d31fe-862b-4b58-a364-add1ef009fca/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.908, validation r2_score = 0.170, test r2_score = 0.101
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_105d31fe-862b-4b58-a364-add1ef009fca.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 24, rf_max_depth: 256, rf_max_features: 64, valid_r2: 0.16986226974686214


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/e3d72412-148a-4011-9d7a-5cfddb4129c1/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.907, validation r2_score = 0.165, test r2_score = -0.078
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_e3d72412-148a-4011-9d7a-5cfddb4129c1.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 24, rf_max_depth: 256, rf_max_features: 128, valid_r2: 0.16451846701459394


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/1f513010-a1c8-4ca7-9418-933b5fffb09a/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.825, validation r2_score = 0.167, test r2_score = -0.081
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_1f513010-a1c8-4ca7-9418-933b5fffb09a.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 16, rf_max_features: 16, valid_r2: 0.16704678810592988


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/c710c221-5bbc-4a2f-acf0-6442ea07c864/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.828, validation r2_score = 0.167, test r2_score = 0.112
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_c710c221-5bbc-4a2f-acf0-6442ea07c864.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 32, rf_max_depth: 16, rf_max_features: 24, valid_r2: 0.16730470727110036


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/99d2eff5-3a17-407b-a7b4-2d328786db6a/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.833, validation r2_score = 0.168, test r2_score = -0.121
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_99d2eff5-3a17-407b-a7b4-2d328786db6a.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 16, rf_max_features: 32, valid_r2: 0.16801032864317733


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/8d890243-4539-4f29-9f4e-9c9d39ae9dcc/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.822, validation r2_score = 0.177, test r2_score = -0.035
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_8d890243-4539-4f29-9f4e-9c9d39ae9dcc.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 16, rf_max_features: 64, valid_r2: 0.17701741762056133


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/4f9285d5-6261-468d-9006-fc2e164c1fb0/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.808, validation r2_score = 0.165, test r2_score = -0.323
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_4f9285d5-6261-468d-9006-fc2e164c1fb0.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 16, rf_max_features: 128, valid_r2: 0.1649999029613376


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/4e4aa6fa-d050-45db-8141-9f9714a51c24/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.903, validation r2_score = 0.181, test r2_score = 0.011
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_4e4aa6fa-d050-45db-8141-9f9714a51c24.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 32, rf_max_depth: 24, rf_max_features: 16, valid_r2: 0.18084619684696945


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/f0d93b77-af12-4725-bb2f-a1ee979ccd19/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.905, validation r2_score = 0.177, test r2_score = -0.100
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_f0d93b77-af12-4725-bb2f-a1ee979ccd19.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 24, rf_max_features: 24, valid_r2: 0.17705903750324292


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/898e30d4-46f0-422c-bd02-c298b1ae0ba2/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.906, validation r2_score = 0.197, test r2_score = -0.136
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_898e30d4-46f0-422c-bd02-c298b1ae0ba2.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 24, rf_max_features: 32, valid_r2: 0.19708915520844095


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/3d292ed0-b60d-4a90-8ca7-aa09420fca27/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.902, validation r2_score = 0.182, test r2_score = -0.646
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_3d292ed0-b60d-4a90-8ca7-aa09420fca27.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 24, rf_max_features: 64, valid_r2: 0.18177669681051556


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/e160ec5b-7792-4c78-b67e-2ac4f44aa8ec/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.898, validation r2_score = 0.161, test r2_score = -0.002
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_e160ec5b-7792-4c78-b67e-2ac4f44aa8ec.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 24, rf_max_features: 128, valid_r2: 0.16088767530723114


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/35e169e1-cdc1-4795-9b61-b7d1093bbfd7/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.911, validation r2_score = 0.187, test r2_score = -0.231
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_35e169e1-cdc1-4795-9b61-b7d1093bbfd7.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 32, rf_max_features: 16, valid_r2: 0.1865591399118086


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/9df69585-9e2f-4a97-8dda-c9c67fbf6439/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.913, validation r2_score = 0.193, test r2_score = -0.559
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_9df69585-9e2f-4a97-8dda-c9c67fbf6439.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 32, rf_max_features: 24, valid_r2: 0.1930374388108148


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/cc7a34a3-2819-4a1b-984e-e4dd86a50c65/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.913, validation r2_score = 0.197, test r2_score = 0.068
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_cc7a34a3-2819-4a1b-984e-e4dd86a50c65.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 32, rf_max_depth: 32, rf_max_features: 32, valid_r2: 0.19712804175743548


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/66960052-331c-4b14-9b1b-7c5a64812357/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.910, validation r2_score = 0.183, test r2_score = -0.176
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_66960052-331c-4b14-9b1b-7c5a64812357.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 32, rf_max_features: 64, valid_r2: 0.18332898410435017


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/2aeb51e6-e706-4971-9652-be4c9fcc47b9/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.912, validation r2_score = 0.169, test r2_score = -0.066
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_2aeb51e6-e706-4971-9652-be4c9fcc47b9.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 32, rf_max_features: 128, valid_r2: 0.1694789388008332


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/31895b7f-5f92-45ce-89fc-cf8c47ccb98f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.911, validation r2_score = 0.171, test r2_score = -0.012
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_31895b7f-5f92-45ce-89fc-cf8c47ccb98f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 64, rf_max_features: 16, valid_r2: 0.17068146884186308


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/605e878d-ace7-4625-b553-fc8e7d2329f1/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.910, validation r2_score = 0.183, test r2_score = -0.151
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_605e878d-ace7-4625-b553-fc8e7d2329f1.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 64, rf_max_features: 24, valid_r2: 0.18280045943613588


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/2886f2ac-b610-4322-8b92-96b460cc95c9/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.911, validation r2_score = 0.188, test r2_score = -0.318
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_2886f2ac-b610-4322-8b92-96b460cc95c9.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 64, rf_max_features: 32, valid_r2: 0.18812098738699623


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/20f7108c-ba62-42a8-9d63-320e17d65a7e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.914, validation r2_score = 0.174, test r2_score = -0.220
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_20f7108c-ba62-42a8-9d63-320e17d65a7e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 64, rf_max_features: 64, valid_r2: 0.1737009371152618


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/27634ab7-ee5c-403b-960b-4c844c606a1e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.912, validation r2_score = 0.171, test r2_score = 0.061
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_27634ab7-ee5c-403b-960b-4c844c606a1e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 32, rf_max_depth: 64, rf_max_features: 128, valid_r2: 0.17144611450942981


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/47b4ef63-58c4-4e8c-a0c3-e5827f16ae20/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.912, validation r2_score = 0.185, test r2_score = -0.625
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_47b4ef63-58c4-4e8c-a0c3-e5827f16ae20.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 128, rf_max_features: 16, valid_r2: 0.184567030991294


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/91823b86-ce4b-4ca0-b97f-1caebd897779/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.913, validation r2_score = 0.194, test r2_score = -0.335
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_91823b86-ce4b-4ca0-b97f-1caebd897779.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 128, rf_max_features: 24, valid_r2: 0.19385790355493315


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/d7725f31-77eb-4d68-984c-d1dc2f08528f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.910, validation r2_score = 0.188, test r2_score = 0.092
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_d7725f31-77eb-4d68-984c-d1dc2f08528f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 32, rf_max_depth: 128, rf_max_features: 32, valid_r2: 0.18814822334386627


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/ce2db8ce-e138-4402-96a7-5041f76d2a6f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.910, validation r2_score = 0.211, test r2_score = -0.223
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_ce2db8ce-e138-4402-96a7-5041f76d2a6f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 128, rf_max_features: 64, valid_r2: 0.21119173263628532


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/4cdc9379-8e00-4fb3-b7d1-9adbf34dee87/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.912, validation r2_score = 0.175, test r2_score = -0.334
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_4cdc9379-8e00-4fb3-b7d1-9adbf34dee87.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 128, rf_max_features: 128, valid_r2: 0.17472261537188993


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/a7a43268-25ca-4de5-b3d3-7676deb43587/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.912, validation r2_score = 0.186, test r2_score = -0.442
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_a7a43268-25ca-4de5-b3d3-7676deb43587.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 256, rf_max_features: 16, valid_r2: 0.18578993357078077


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/bf7c0382-d393-43d2-a218-6c6fcc7de59d/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.910, validation r2_score = 0.193, test r2_score = -0.086
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_bf7c0382-d393-43d2-a218-6c6fcc7de59d.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 256, rf_max_features: 24, valid_r2: 0.19257497722969108


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/62454ccc-5973-4b00-9c0b-241c0beb8664/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.912, validation r2_score = 0.195, test r2_score = -0.063
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_62454ccc-5973-4b00-9c0b-241c0beb8664.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 256, rf_max_features: 32, valid_r2: 0.19542722911748056


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/3cbdcdc7-cb27-430f-b60e-6958fb9243e9/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.914, validation r2_score = 0.179, test r2_score = -0.190
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_3cbdcdc7-cb27-430f-b60e-6958fb9243e9.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 256, rf_max_features: 64, valid_r2: 0.17873646949727529


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/547b68a4-47f7-4632-aa16-aa473eaeeea1/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.911, validation r2_score = 0.189, test r2_score = -0.164
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_547b68a4-47f7-4632-aa16-aa473eaeeea1.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 32, rf_max_depth: 256, rf_max_features: 128, valid_r2: 0.18868399928866852


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/8e1ca40a-b501-4733-b918-9c26e422cd14/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.831, validation r2_score = 0.199, test r2_score = -0.054
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_8e1ca40a-b501-4733-b918-9c26e422cd14.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 16, rf_max_features: 16, valid_r2: 0.19858281049159965


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/1fcfc1e9-398b-4094-867a-9bc125a8ed4f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.836, validation r2_score = 0.196, test r2_score = -0.082
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_1fcfc1e9-398b-4094-867a-9bc125a8ed4f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 16, rf_max_features: 24, valid_r2: 0.196330433466396


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/1e7fc36b-e9b7-42ff-a29b-748775bf3e00/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.832, validation r2_score = 0.192, test r2_score = -0.227
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_1e7fc36b-e9b7-42ff-a29b-748775bf3e00.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 16, rf_max_features: 32, valid_r2: 0.19235036906523872


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/a4143791-dfea-4bfc-98cd-7013c0b32f9b/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.831, validation r2_score = 0.189, test r2_score = -0.008
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_a4143791-dfea-4bfc-98cd-7013c0b32f9b.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 16, rf_max_features: 64, valid_r2: 0.18893315636767372


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/2652a639-ba54-46cf-92e2-4d03d2014465/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.821, validation r2_score = 0.182, test r2_score = -0.010
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_2652a639-ba54-46cf-92e2-4d03d2014465.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 16, rf_max_features: 128, valid_r2: 0.18181856506885774


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/070b7938-63d2-415f-9b24-755c34a9e10e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.910, validation r2_score = 0.217, test r2_score = -0.232
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_070b7938-63d2-415f-9b24-755c34a9e10e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 24, rf_max_features: 16, valid_r2: 0.21747532002855552


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/9ee863db-8edd-4bf8-b141-650f64f72ed1/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.912, validation r2_score = 0.201, test r2_score = -0.296
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_9ee863db-8edd-4bf8-b141-650f64f72ed1.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 24, rf_max_features: 24, valid_r2: 0.20118998742317162


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/aef848b1-83b7-444f-a937-f61e60b759d2/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.911, validation r2_score = 0.219, test r2_score = -0.309
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_aef848b1-83b7-444f-a937-f61e60b759d2.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 24, rf_max_features: 32, valid_r2: 0.21882245419825364


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/1bc97dde-0ed0-4f58-b571-7dd95faa252e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.909, validation r2_score = 0.194, test r2_score = 0.101
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_1bc97dde-0ed0-4f58-b571-7dd95faa252e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 64, rf_max_depth: 24, rf_max_features: 64, valid_r2: 0.1941669278002478


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/328c769c-405b-40d2-8586-97d3387d471b/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.904, validation r2_score = 0.204, test r2_score = -0.026
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_328c769c-405b-40d2-8586-97d3387d471b.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 24, rf_max_features: 128, valid_r2: 0.20354900849578772


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/eaaa4be1-cc0d-43cb-a377-1330f842d14e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.917, validation r2_score = 0.199, test r2_score = -0.111
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_eaaa4be1-cc0d-43cb-a377-1330f842d14e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 32, rf_max_features: 16, valid_r2: 0.19929076674639312


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/73283829-a3f6-4179-b06d-0d0829453a67/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.916, validation r2_score = 0.200, test r2_score = -0.376
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_73283829-a3f6-4179-b06d-0d0829453a67.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 32, rf_max_features: 24, valid_r2: 0.20018437890870155


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/694aec0f-5ec6-4842-8ed3-02a87c9d039c/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.917, validation r2_score = 0.198, test r2_score = 0.165
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_694aec0f-5ec6-4842-8ed3-02a87c9d039c.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 64, rf_max_depth: 32, rf_max_features: 32, valid_r2: 0.1984147468336892


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/75065768-1900-485b-8011-0ea5847a25c0/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.916, validation r2_score = 0.210, test r2_score = -0.142
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_75065768-1900-485b-8011-0ea5847a25c0.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 32, rf_max_features: 64, valid_r2: 0.2095422825467106


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/25473b9c-391e-4c0d-b72e-b1e87ab8322e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.917, validation r2_score = 0.197, test r2_score = -0.271
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_25473b9c-391e-4c0d-b72e-b1e87ab8322e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 32, rf_max_features: 128, valid_r2: 0.19746365761550633


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/19b7de28-adf6-47e2-8c3e-4772a29e7879/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.916, validation r2_score = 0.208, test r2_score = -0.238
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_19b7de28-adf6-47e2-8c3e-4772a29e7879.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 64, rf_max_features: 16, valid_r2: 0.2077894326629881


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/6be1cdcd-ff65-4902-be48-3451919d62c7/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.917, validation r2_score = 0.206, test r2_score = 0.027
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_6be1cdcd-ff65-4902-be48-3451919d62c7.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 64, rf_max_depth: 64, rf_max_features: 24, valid_r2: 0.20560228950165116


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/cfb574dc-1450-493e-ad95-e21e7efb4141/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.917, validation r2_score = 0.202, test r2_score = -0.327
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_cfb574dc-1450-493e-ad95-e21e7efb4141.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 64, rf_max_features: 32, valid_r2: 0.20176342294644667


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/65e0d454-4b20-4995-bb74-f6c5a6c36b1a/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.917, validation r2_score = 0.194, test r2_score = -0.121
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_65e0d454-4b20-4995-bb74-f6c5a6c36b1a.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 64, rf_max_features: 64, valid_r2: 0.19368504134433173


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/576db375-b537-4e5c-bb03-e9ca69e58ea3/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.917, validation r2_score = 0.192, test r2_score = -0.345
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_576db375-b537-4e5c-bb03-e9ca69e58ea3.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 64, rf_max_features: 128, valid_r2: 0.192491880792534


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/9636438a-8daf-4c5e-b771-aa3b52b29c36/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.918, validation r2_score = 0.201, test r2_score = 0.024
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_9636438a-8daf-4c5e-b771-aa3b52b29c36.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 64, rf_max_depth: 128, rf_max_features: 16, valid_r2: 0.20116737330845036


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/6726dc00-124c-4fb5-b309-172384550373/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.918, validation r2_score = 0.197, test r2_score = -0.110
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_6726dc00-124c-4fb5-b309-172384550373.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 128, rf_max_features: 24, valid_r2: 0.19740942261186523


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/2b423d01-e2d4-416f-976b-3a446b0cc175/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.917, validation r2_score = 0.199, test r2_score = -0.039
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_2b423d01-e2d4-416f-976b-3a446b0cc175.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 128, rf_max_features: 32, valid_r2: 0.19905151840469082


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/3bfc93cb-bd78-48f3-a3fe-b096abebdf18/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.918, validation r2_score = 0.199, test r2_score = -0.167
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_3bfc93cb-bd78-48f3-a3fe-b096abebdf18.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 128, rf_max_features: 64, valid_r2: 0.1990988119903805


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/6bd490b9-af71-47c4-94d0-41378b331584/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.917, validation r2_score = 0.194, test r2_score = -0.206
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_6bd490b9-af71-47c4-94d0-41378b331584.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 128, rf_max_features: 128, valid_r2: 0.1944413766799783


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/e08feedd-dd30-4961-9db2-f2ab9a7af9ce/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.918, validation r2_score = 0.200, test r2_score = -0.185
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_e08feedd-dd30-4961-9db2-f2ab9a7af9ce.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 256, rf_max_features: 16, valid_r2: 0.1997187716347666


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/7bbcde3a-d0ba-415b-9ced-a76d25d0776f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.917, validation r2_score = 0.197, test r2_score = 0.043
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_7bbcde3a-d0ba-415b-9ced-a76d25d0776f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 64, rf_max_depth: 256, rf_max_features: 24, valid_r2: 0.19720714998288147


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/ef13f1e7-16b1-4870-afea-e58663e8085a/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.918, validation r2_score = 0.201, test r2_score = -0.055
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_ef13f1e7-16b1-4870-afea-e58663e8085a.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 256, rf_max_features: 32, valid_r2: 0.20144937410718644


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/376e1f19-5612-4404-9df5-75b920150d0f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.918, validation r2_score = 0.209, test r2_score = -0.084
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_376e1f19-5612-4404-9df5-75b920150d0f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 256, rf_max_features: 64, valid_r2: 0.20934666960362225


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/afb8772a-e85e-4634-9773-85f66be81e52/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.917, validation r2_score = 0.180, test r2_score = -0.009
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_afb8772a-e85e-4634-9773-85f66be81e52.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 64, rf_max_depth: 256, rf_max_features: 128, valid_r2: 0.180378932532129


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/1fc38a29-0155-4643-bf9d-5b294b27d10f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.833, validation r2_score = 0.197, test r2_score = -0.000
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_1fc38a29-0155-4643-bf9d-5b294b27d10f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 16, rf_max_features: 16, valid_r2: 0.19673260599417441


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/d362434a-f283-4de7-94ba-ff879161d897/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.831, validation r2_score = 0.198, test r2_score = -0.158
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_d362434a-f283-4de7-94ba-ff879161d897.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 16, rf_max_features: 24, valid_r2: 0.19769223616177256


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/9a0df294-2332-48e5-9c2d-d2ab617938be/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.837, validation r2_score = 0.202, test r2_score = -0.062
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_9a0df294-2332-48e5-9c2d-d2ab617938be.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 16, rf_max_features: 32, valid_r2: 0.20193564804385034


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/156a93ed-3e5e-4aeb-a807-04ea6eea500c/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.833, validation r2_score = 0.201, test r2_score = -0.047
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_156a93ed-3e5e-4aeb-a807-04ea6eea500c.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 16, rf_max_features: 64, valid_r2: 0.20147377042038594


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/2b6362c3-d4b3-4b2d-be8a-a6e0e4e77708/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.814, validation r2_score = 0.191, test r2_score = 0.102
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_2b6362c3-d4b3-4b2d-be8a-a6e0e4e77708.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 128, rf_max_depth: 16, rf_max_features: 128, valid_r2: 0.19057573438525177


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/4c6a1cb7-abe4-4db7-8b1b-305ab42f11a5/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.912, validation r2_score = 0.216, test r2_score = -0.278
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_4c6a1cb7-abe4-4db7-8b1b-305ab42f11a5.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 24, rf_max_features: 16, valid_r2: 0.21605429931455045


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/3b2dddae-4528-4743-b64b-6afa4750e4a5/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.912, validation r2_score = 0.218, test r2_score = -0.174
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_3b2dddae-4528-4743-b64b-6afa4750e4a5.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 24, rf_max_features: 24, valid_r2: 0.21824894952042728


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/237f8eed-2c37-4fed-8ca8-4ce70031c941/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.912, validation r2_score = 0.209, test r2_score = -0.115
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_237f8eed-2c37-4fed-8ca8-4ce70031c941.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 24, rf_max_features: 32, valid_r2: 0.20866637545495026


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/5b9053a0-0d8f-4f1d-822d-63259ad06164/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.911, validation r2_score = 0.203, test r2_score = -0.162
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_5b9053a0-0d8f-4f1d-822d-63259ad06164.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 24, rf_max_features: 64, valid_r2: 0.20343724460534207


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/e170cd82-f289-4c6b-a409-b659ba862090/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.909, validation r2_score = 0.207, test r2_score = 0.008
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_e170cd82-f289-4c6b-a409-b659ba862090.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 128, rf_max_depth: 24, rf_max_features: 128, valid_r2: 0.20726898634320834


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/a1a24715-47a0-4090-a3e5-63172668be18/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.919, validation r2_score = 0.204, test r2_score = 0.070
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_a1a24715-47a0-4090-a3e5-63172668be18.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 128, rf_max_depth: 32, rf_max_features: 16, valid_r2: 0.2042559915130201


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/5161b53d-8bf6-4aae-9290-a513cf0af6b0/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.919, validation r2_score = 0.209, test r2_score = -0.131
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_5161b53d-8bf6-4aae-9290-a513cf0af6b0.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 32, rf_max_features: 24, valid_r2: 0.20893597416787413


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/8f0b37b4-49a6-4b74-9cd3-8ceef2b1fe39/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.919, validation r2_score = 0.208, test r2_score = -0.006
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_8f0b37b4-49a6-4b74-9cd3-8ceef2b1fe39.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 32, rf_max_features: 32, valid_r2: 0.2079728457140464


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/0fceb10e-3707-47a1-8f24-4b41a697974d/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.920, validation r2_score = 0.210, test r2_score = -0.300
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_0fceb10e-3707-47a1-8f24-4b41a697974d.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 32, rf_max_features: 64, valid_r2: 0.21027561199232314


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/99e010e1-0e78-4cfb-b570-f68255c84dd4/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.919, validation r2_score = 0.198, test r2_score = 0.054
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_99e010e1-0e78-4cfb-b570-f68255c84dd4.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 128, rf_max_depth: 32, rf_max_features: 128, valid_r2: 0.19834110980858544


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/a039376b-e175-412f-a6cd-9d4a1c77ae8d/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.918, validation r2_score = 0.209, test r2_score = -0.014
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_a039376b-e175-412f-a6cd-9d4a1c77ae8d.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 64, rf_max_features: 16, valid_r2: 0.20865456133836913


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/dc09d250-7515-46bf-a56e-cba5e55bd047/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.920, validation r2_score = 0.205, test r2_score = -0.211
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_dc09d250-7515-46bf-a56e-cba5e55bd047.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 64, rf_max_features: 24, valid_r2: 0.20456471383421015


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/2e1d8678-e458-46d9-ab0d-269455791e0c/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.919, validation r2_score = 0.202, test r2_score = -0.218
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_2e1d8678-e458-46d9-ab0d-269455791e0c.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 64, rf_max_features: 32, valid_r2: 0.20155358481440366


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/de40018b-f4ec-467a-aa51-399f7e8a7517/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.919, validation r2_score = 0.205, test r2_score = -0.128
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_de40018b-f4ec-467a-aa51-399f7e8a7517.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 64, rf_max_features: 64, valid_r2: 0.20519162670837499


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/cbcbf61b-0bd3-4bc0-9a3d-c224bba4b6a0/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.208, test r2_score = -0.154
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_cbcbf61b-0bd3-4bc0-9a3d-c224bba4b6a0.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 64, rf_max_features: 128, valid_r2: 0.2082560958576406


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/653d0b11-6974-4310-9628-923fa9f1e532/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.919, validation r2_score = 0.208, test r2_score = -0.270
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_653d0b11-6974-4310-9628-923fa9f1e532.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 128, rf_max_features: 16, valid_r2: 0.20755654005860402


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/c8fd5b7e-88c6-4726-9d1b-499cf55633db/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.919, validation r2_score = 0.199, test r2_score = -0.188
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_c8fd5b7e-88c6-4726-9d1b-499cf55633db.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 128, rf_max_features: 24, valid_r2: 0.1991743429345073


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/a23c07e7-bf23-4c19-b5a9-2d2d4e1a2c0c/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.919, validation r2_score = 0.209, test r2_score = -0.251
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_a23c07e7-bf23-4c19-b5a9-2d2d4e1a2c0c.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 128, rf_max_features: 32, valid_r2: 0.2091957112916083


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/ad4fd2c6-1893-4e05-8dbd-6ca0ed829d64/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.208, test r2_score = -0.161
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_ad4fd2c6-1893-4e05-8dbd-6ca0ed829d64.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 128, rf_max_features: 64, valid_r2: 0.20807690281798263


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/75df0f6a-c669-43a7-9e38-27651bc5f3fb/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.208, test r2_score = -0.212
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_75df0f6a-c669-43a7-9e38-27651bc5f3fb.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 128, rf_max_features: 128, valid_r2: 0.2078420278863975


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/09b6bc3e-2cc8-4ccb-83ed-2d66228eeec3/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.919, validation r2_score = 0.205, test r2_score = -0.105
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_09b6bc3e-2cc8-4ccb-83ed-2d66228eeec3.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 256, rf_max_features: 16, valid_r2: 0.2052849152481233


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/0301da10-59cf-4b7c-b982-6b4c2bd26840/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.920, validation r2_score = 0.212, test r2_score = -0.065
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_0301da10-59cf-4b7c-b982-6b4c2bd26840.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 256, rf_max_features: 24, valid_r2: 0.21150765798341786


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/35b9bb83-30e8-4f90-a548-5e55bfc02b68/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.920, validation r2_score = 0.204, test r2_score = -0.346
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_35b9bb83-30e8-4f90-a548-5e55bfc02b68.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 128, rf_max_depth: 256, rf_max_features: 32, valid_r2: 0.20393495733424793


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/33262b80-57a0-4449-997b-3c78ae05570b/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.920, validation r2_score = 0.205, test r2_score = -0.135
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_33262b80-57a0-4449-997b-3c78ae05570b.tar.gz


rf_estimators: 128, rf_max_depth: 256, rf_max_features: 64, valid_r2: 0.2053416184422462


INFO:ATOM:Created a dataset hash 'a0eb2e6092b3542373f67528e299e283' from dataset_key '/mnt/projects/ATOM/sarkart4/Data/CYP2D6-ampl-1.1.0_old/cyp2d6_union_trainset_base_smiles.csv'
DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/9910fc23-f41a-4fe5-8716-bad20ff7abd6/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.920, validation r2_score = 0.207, test r2

rf_estimators: 128, rf_max_depth: 256, rf_max_features: 128, valid_r2: 0.20738348202652224


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/60567e4c-f492-4eed-a6a1-e94674624e47/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.835, validation r2_score = 0.201, test r2_score = -0.093
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_60567e4c-f492-4eed-a6a1-e94674624e47.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 16, rf_max_features: 16, valid_r2: 0.2012121112903017


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/cc33901d-7d0d-420e-b8bc-bddd57febbc1/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.833, validation r2_score = 0.197, test r2_score = -0.047
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_cc33901d-7d0d-420e-b8bc-bddd57febbc1.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 16, rf_max_features: 24, valid_r2: 0.19727895152935804


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/61984254-1330-4c44-b3ba-f256e1a74325/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.833, validation r2_score = 0.206, test r2_score = -0.114
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_61984254-1330-4c44-b3ba-f256e1a74325.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 16, rf_max_features: 32, valid_r2: 0.20597402539850163


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/3ee862da-0e05-4490-82da-63f0a1cecda2/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.834, validation r2_score = 0.200, test r2_score = -0.022
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_3ee862da-0e05-4490-82da-63f0a1cecda2.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 16, rf_max_features: 64, valid_r2: 0.2001051135973687


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/8b8de6b0-ccd3-45d7-8eb0-1b810326b74f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.821, validation r2_score = 0.194, test r2_score = 0.079
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_8b8de6b0-ccd3-45d7-8eb0-1b810326b74f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 256, rf_max_depth: 16, rf_max_features: 128, valid_r2: 0.19428865271073248


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/c13c07c7-53b5-420e-aa28-98821ec19ddb/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.915, validation r2_score = 0.211, test r2_score = -0.210
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_c13c07c7-53b5-420e-aa28-98821ec19ddb.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 24, rf_max_features: 16, valid_r2: 0.21136371772396711


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/d8087eae-041d-43c6-8e29-a936e2ef98ea/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.913, validation r2_score = 0.211, test r2_score = -0.236
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_d8087eae-041d-43c6-8e29-a936e2ef98ea.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 24, rf_max_features: 24, valid_r2: 0.21067914254963405


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/53e43a72-9de4-4a1d-9dbb-9b936f86b04b/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.913, validation r2_score = 0.214, test r2_score = -0.281
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_53e43a72-9de4-4a1d-9dbb-9b936f86b04b.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 24, rf_max_features: 32, valid_r2: 0.21374221743014143


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/7bb13046-0207-4a01-8a02-e0fb666fa37b/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.911, validation r2_score = 0.208, test r2_score = -0.184
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_7bb13046-0207-4a01-8a02-e0fb666fa37b.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 24, rf_max_features: 64, valid_r2: 0.20764558916777942


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/faba805b-ad7d-4106-a6a8-2597cab81a17/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.910, validation r2_score = 0.206, test r2_score = -0.013
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_faba805b-ad7d-4106-a6a8-2597cab81a17.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 24, rf_max_features: 128, valid_r2: 0.20602573244500866


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/e78419b6-483a-4f28-a379-00f3f57fec10/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.920, validation r2_score = 0.211, test r2_score = -0.190
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_e78419b6-483a-4f28-a379-00f3f57fec10.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 32, rf_max_features: 16, valid_r2: 0.2112948496552901


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/0720a581-5519-4c24-8294-edb1c2fb4c58/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.213, test r2_score = -0.037
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_0720a581-5519-4c24-8294-edb1c2fb4c58.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 32, rf_max_features: 24, valid_r2: 0.21255622136869645


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/55223aa2-b2f4-4c5b-b957-877bbca998b0/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.920, validation r2_score = 0.213, test r2_score = -0.091
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_55223aa2-b2f4-4c5b-b957-877bbca998b0.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 32, rf_max_features: 32, valid_r2: 0.21293085472342377


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/f2e2301a-7b85-441c-8e83-9baa776ab9a5/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.210, test r2_score = -0.077
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_f2e2301a-7b85-441c-8e83-9baa776ab9a5.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 32, rf_max_features: 64, valid_r2: 0.2096427104609745


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/651777ac-a292-4078-b1d6-6d027ce1e822/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.205, test r2_score = -0.005
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_651777ac-a292-4078-b1d6-6d027ce1e822.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 32, rf_max_features: 128, valid_r2: 0.20530658633380872


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/0d2627ab-68c4-4d99-8373-c604ab01b08e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.217, test r2_score = -0.145
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_0d2627ab-68c4-4d99-8373-c604ab01b08e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 64, rf_max_features: 16, valid_r2: 0.21700078395390243


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/09824b20-277f-44a7-bcd3-af3f0026e4b0/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.217, test r2_score = -0.138
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_09824b20-277f-44a7-bcd3-af3f0026e4b0.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 64, rf_max_features: 24, valid_r2: 0.21657479978861038


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/7dc184c7-acd2-4f1f-a8fe-28bcb4063de3/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.214, test r2_score = -0.130
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_7dc184c7-acd2-4f1f-a8fe-28bcb4063de3.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 64, rf_max_features: 32, valid_r2: 0.2137799120181363


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/2fd4cf1a-2e89-4581-b047-41373a105088/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.215, test r2_score = -0.007
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_2fd4cf1a-2e89-4581-b047-41373a105088.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 64, rf_max_features: 64, valid_r2: 0.21460720432388225


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/bf53e91e-d387-44ae-9dd7-5574a7f07f05/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.206, test r2_score = 0.035
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_bf53e91e-d387-44ae-9dd7-5574a7f07f05.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 256, rf_max_depth: 64, rf_max_features: 128, valid_r2: 0.20626851169971983


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/2440cc6c-e112-4520-995d-04316de108c0/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.206, test r2_score = -0.131
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_2440cc6c-e112-4520-995d-04316de108c0.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 128, rf_max_features: 16, valid_r2: 0.20564782364055223


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/2bfcf8fa-0300-4604-9313-23f26cdae2b9/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.219, test r2_score = -0.286
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_2bfcf8fa-0300-4604-9313-23f26cdae2b9.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 128, rf_max_features: 24, valid_r2: 0.21908964798428354


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/b29d0b15-1054-4ead-91ed-e899d22dd5d4/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.212, test r2_score = -0.096
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_b29d0b15-1054-4ead-91ed-e899d22dd5d4.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 128, rf_max_features: 32, valid_r2: 0.21182586882694432


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/e74af939-6a79-4c45-a6ee-402aa6f8199a/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.216, test r2_score = -0.173
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_e74af939-6a79-4c45-a6ee-402aa6f8199a.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 128, rf_max_features: 64, valid_r2: 0.21645288028606324


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/8e3d6e81-7806-49db-87e6-63924ecf3dfd/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.208, test r2_score = -0.034
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_8e3d6e81-7806-49db-87e6-63924ecf3dfd.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 128, rf_max_features: 128, valid_r2: 0.20805713655323088


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/b00e6165-0074-4091-bbce-f092ec062104/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.920, validation r2_score = 0.210, test r2_score = -0.184
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_b00e6165-0074-4091-bbce-f092ec062104.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 256, rf_max_features: 16, valid_r2: 0.20973638519265225


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/4affa40c-0e47-417a-96bf-9f95fddb69d8/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.217, test r2_score = 0.013
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_4affa40c-0e47-417a-96bf-9f95fddb69d8.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 256, rf_max_depth: 256, rf_max_features: 24, valid_r2: 0.2166140531870565


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/0769b2a2-1117-40b4-b541-1c4dc9d02d6c/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.214, test r2_score = -0.018
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_0769b2a2-1117-40b4-b541-1c4dc9d02d6c.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 256, rf_max_features: 32, valid_r2: 0.21433723059841248


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/ff85452a-35cf-4864-91f5-38553c422a1a/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.214, test r2_score = -0.054
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_ff85452a-35cf-4864-91f5-38553c422a1a.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 256, rf_max_features: 64, valid_r2: 0.21378008771643864


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/71624326-2c2c-49bd-9163-913e473a7e18/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.212, test r2_score = -0.124
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_71624326-2c2c-49bd-9163-913e473a7e18.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 256, rf_max_depth: 256, rf_max_features: 128, valid_r2: 0.21164933013281184


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/8bdff5b5-b2a9-44ae-8635-96ac74aff97c/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.835, validation r2_score = 0.205, test r2_score = -0.063
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_8bdff5b5-b2a9-44ae-8635-96ac74aff97c.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 16, rf_max_features: 16, valid_r2: 0.2047228100468288


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/e4c2734c-9ee4-41d6-bddc-56a62e1edb06/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.839, validation r2_score = 0.203, test r2_score = -0.083
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_e4c2734c-9ee4-41d6-bddc-56a62e1edb06.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 16, rf_max_features: 24, valid_r2: 0.20321597209231046


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/f8972aa1-d1d1-41ed-a445-040c98d89cee/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.837, validation r2_score = 0.204, test r2_score = 0.003
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_f8972aa1-d1d1-41ed-a445-040c98d89cee.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 512, rf_max_depth: 16, rf_max_features: 32, valid_r2: 0.20351529205018426


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/a04789b4-b82f-4987-ae88-cf06d24bfb69/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.832, validation r2_score = 0.202, test r2_score = -0.037
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_a04789b4-b82f-4987-ae88-cf06d24bfb69.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 16, rf_max_features: 64, valid_r2: 0.20206850220399397


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/4c5f8eae-2b61-4f43-bf89-b350e7e906f2/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.821, validation r2_score = 0.194, test r2_score = 0.050
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_4c5f8eae-2b61-4f43-bf89-b350e7e906f2.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 512, rf_max_depth: 16, rf_max_features: 128, valid_r2: 0.193509416096038


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/9b07fc50-29f0-4b9e-9e0e-19e1af41a283/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.915, validation r2_score = 0.214, test r2_score = -0.121
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_9b07fc50-29f0-4b9e-9e0e-19e1af41a283.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 24, rf_max_features: 16, valid_r2: 0.21393096314989857


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/e1a709bf-dfa4-4f2e-8cf5-97162a3f2b71/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.914, validation r2_score = 0.213, test r2_score = -0.179
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_e1a709bf-dfa4-4f2e-8cf5-97162a3f2b71.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 24, rf_max_features: 24, valid_r2: 0.21326818727117447


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/fc628b9e-f861-4e89-813c-4cc81407c8d4/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.914, validation r2_score = 0.212, test r2_score = -0.043
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_fc628b9e-f861-4e89-813c-4cc81407c8d4.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 24, rf_max_features: 32, valid_r2: 0.21219584224084453


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/a0277ce0-a214-44fe-9501-1b92ff933504/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.913, validation r2_score = 0.213, test r2_score = -0.100
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_a0277ce0-a214-44fe-9501-1b92ff933504.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 24, rf_max_features: 64, valid_r2: 0.21340765408073936


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/954d245f-48c6-4df8-9c1a-ac3d741aff25/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.911, validation r2_score = 0.208, test r2_score = -0.043
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_954d245f-48c6-4df8-9c1a-ac3d741aff25.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 24, rf_max_features: 128, valid_r2: 0.20844751600659017


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/011565be-81a9-4ae1-be0f-109de83bfdb7/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.214, test r2_score = -0.097
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_011565be-81a9-4ae1-be0f-109de83bfdb7.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 32, rf_max_features: 16, valid_r2: 0.2142707776142624


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/5f3c0ec9-38c1-4043-9064-32eda41a5fc2/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.213, test r2_score = -0.079
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_5f3c0ec9-38c1-4043-9064-32eda41a5fc2.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 32, rf_max_features: 24, valid_r2: 0.21332967049741258


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/5db61182-7f58-4d20-a932-8f40e74e343d/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.216, test r2_score = -0.188
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_5db61182-7f58-4d20-a932-8f40e74e343d.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 32, rf_max_features: 32, valid_r2: 0.21643468304040703


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/ed9ce2c7-329e-455e-af99-c6ddae4e5d63/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.213, test r2_score = -0.084
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_ed9ce2c7-329e-455e-af99-c6ddae4e5d63.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 32, rf_max_features: 64, valid_r2: 0.2134100397185229


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/15e9eaa4-269f-40b0-b025-c1f5c38ccd46/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.210, test r2_score = -0.018
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_15e9eaa4-269f-40b0-b025-c1f5c38ccd46.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 32, rf_max_features: 128, valid_r2: 0.20975055099351936


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/0552a70a-064c-4e95-8ca4-ea709d1e727d/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.210, test r2_score = -0.171
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_0552a70a-064c-4e95-8ca4-ea709d1e727d.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 64, rf_max_features: 16, valid_r2: 0.20952228591321753


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/0d4d9bc1-4354-4385-bc7d-d1004e3c0a2a/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.213, test r2_score = -0.148
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_0d4d9bc1-4354-4385-bc7d-d1004e3c0a2a.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 64, rf_max_features: 24, valid_r2: 0.213028425175935


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/f82de6c6-5130-4062-8ecc-25a5f555d129/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.214, test r2_score = -0.199
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_f82de6c6-5130-4062-8ecc-25a5f555d129.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 64, rf_max_features: 32, valid_r2: 0.21358046432020927


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/615e2ab5-81d4-438f-bdb5-b27e4339fcdb/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.215, test r2_score = -0.069
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_615e2ab5-81d4-438f-bdb5-b27e4339fcdb.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 64, rf_max_features: 64, valid_r2: 0.21472088639724551


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/778a1d16-54a1-44d0-865f-0d1c00adf464/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.212, test r2_score = -0.051
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_778a1d16-54a1-44d0-865f-0d1c00adf464.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 64, rf_max_features: 128, valid_r2: 0.21237599111264538


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/1b65fe8c-d870-4296-8e06-f59cd059b60f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.215, test r2_score = -0.064
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_1b65fe8c-d870-4296-8e06-f59cd059b60f.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 128, rf_max_features: 16, valid_r2: 0.21493967039709105


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/eed8ffcc-4b9a-47ab-817f-cb97cccce8b0/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.213, test r2_score = -0.055
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_eed8ffcc-4b9a-47ab-817f-cb97cccce8b0.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 128, rf_max_features: 24, valid_r2: 0.21258236988811618


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/6795109c-548d-4286-8af5-65f38d0d8d73/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.216, test r2_score = -0.144
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_6795109c-548d-4286-8af5-65f38d0d8d73.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 128, rf_max_features: 32, valid_r2: 0.21602417712656752


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/daa893ea-366f-4127-95d8-6fe1a237067e/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.215, test r2_score = -0.115
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_daa893ea-366f-4127-95d8-6fe1a237067e.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 128, rf_max_features: 64, valid_r2: 0.21544220692093563


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/9ff2f530-3e3c-492d-a5f5-4835dac327c7/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.211, test r2_score = 0.019
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_9ff2f530-3e3c-492d-a5f5-4835dac327c7.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e60

rf_estimators: 512, rf_max_depth: 128, rf_max_features: 128, valid_r2: 0.2111471488427027


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/1569dc23-ecc4-48d0-97ae-f267dc03b97c/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.215, test r2_score = -0.105
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_1569dc23-ecc4-48d0-97ae-f267dc03b97c.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 256, rf_max_features: 16, valid_r2: 0.21507337375154867


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/5f456672-d975-443c-be6f-359342e4c008/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.209, test r2_score = -0.228
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_5f456672-d975-443c-be6f-359342e4c008.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 256, rf_max_features: 24, valid_r2: 0.20902177578191572


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/f3f57bbf-9bcb-46a7-95d2-c9ab6c8d918c/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.921, validation r2_score = 0.214, test r2_score = -0.134
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_f3f57bbf-9bcb-46a7-95d2-c9ab6c8d918c.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 256, rf_max_features: 32, valid_r2: 0.21426767640283895


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/ded4095e-2e9e-42e4-a6e3-6e68fc33a6f5/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.217, test r2_score = -0.145
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_ded4095e-2e9e-42e4-a6e3-6e68fc33a6f5.tar.gz
INFO:ATOM:Created a dataset hash 'a0eb2e6

rf_estimators: 512, rf_max_depth: 256, rf_max_features: 64, valid_r2: 0.21678202451589546


DEBUG:ATOM:Got dataset, attempting to extract data
DEBUG:ATOM:Creating deepchem dataset
INFO:ATOM:Using prefeaturized data; number of features = 200
INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/RF_computed_descriptors_scaffold_regression/583bac7a-b8b4-4450-95df-957374523f7f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
  X = np.nan_to_num((X - self.X_means) * X_weight / self.X_stds)
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming feature data
INFO:ATOM:Fitting random forest model
INFO:ATOM:Fold 0: training r2_score = 0.922, validation r2_score = 0.210, test r2_score = -0.087
INFO:ATOM:Wrote model tarball to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles_model_583bac7a-b8b4-4450-95df-957374523f7f.tar.gz


rf_estimators: 512, rf_max_depth: 256, rf_max_features: 128, valid_r2: 0.20972982758172243


# Grid Search - RF (parallel)

In [None]:
multiprocessing.cpu_count()

In [None]:
## 210 models per feature set
estimator_choice = [16,24,32,64,128,256,512]
depth_choice = [16,24,32,64,128,256]
features_choice = [16,24,32,64,128] #16,24,32,64,128, # can only be < total number of features

param_combos = [] # list of params dict: each element is a params dict with specific set of leyrs,dropouts, lr_rate combinations
for rf_estimator in estimator_choice:
  for rf_depth in depth_choice:
    for rf_max_feature in features_choice:
        params["rf_estimators"] = rf_estimator
        params["rf_max_depth"] = rf_depth
        params["rf_max_features"] = rf_max_feature
        param_combos.append(params)
print(" total models = ", len(param_combos))

def parallelrun(params_):
    tp = parse.wrapper(params_)
    pl = mp.ModelPipeline(tp)
    pl.train_model()
start=time.time()
cores = 4 #multiprocessing.cpu_count()
Parallel(n_jobs=cores, timeout=99999)(delayed(parallelrun)(params_)for params_ in param_combos)
print(" Time taken to create ", len(param_combos), "RF models(min)= ", np.round((time.time()-start)/60,2))

# Grid Search - NN

In [26]:
params={
 'collection_name': 'CYP2D6',
 'dataset_key': data_file,
 'datastore': 'False',
 "featurizer": "graphconv", #"computed_descriptors",
 #"descriptor_type": "rdkit_raw", #mordred_filtered",
 #'hyperparam': 'True',
 'id_col': 'compound_id',
 'model_type': 'NN',
 'prediction_type': 'regression',
 'previously_split': 'True',
 'split_uuid': split_uuid,
 'rerun': 'False',
 'response_cols': 'pIC50',
 'result_dir': outdir,
 'save_results': 'False',
 'search_type': 'user_specified',
 'smiles_col': 'base_rdkit_smiles',
 'split_only': 'False',
 #'split_test_frac': '0.002',
 #'split_valid_frac': '0.15',
 'splitter': 'scaffold',
 'transformers': 'True',
 'uncertainty': 'True',
 #"dropouts": "0.10,0.10,0.10",
 #"layer_sizes": "64,64,64",
 #"learning_rate": "0.000753",
 'verbose': 'True'}

In [None]:
# (SLOW - more than 12 hours)
layer_dropout = [
#                 ('64,16','0.3,0.3'),
#                 ('64,16','0.5,0.5'),
                 #('128,32','0.0,0.0'),
                 ('128,32','0.5,0.5'),
                 #('256,64','0.0,0.0'),
                 ('256,64','0.5,0.5'),
                 #('512,128','0.0,0.0'),
                 ('512,128','0.5,0.5'),
                 #('256,64,16','0.0,0.0,0.0'),
                 ('256,64,16','0.5,0.5,0.5'),
                 #('512,256,32','0.0,0.0,0.0'),
                 ('512,256,32','0.5,0.5,0.5'),
                 #('512,256,128','0.0,0.0,0.0'),
                 ('512,256,128','0.5,0.5,0.5'),
                 ('512,128,128','0.5,0.5,0.5'),
                 ('512,256,128,64','0.5,0.5,0.5,0.5'),

]
lr_choice = [0.0007, 0.005] # [.00001,.00005,.0001,.0005,.001,.005,.01,.05]

for layers,dropouts in layer_dropout:
    for learning_rate in lr_choice:
        params["layer_sizes"] = layers
        params["dropouts"] = dropouts
        params["learning_rate"] = learning_rate
        tp = parse.wrapper(params)
        pl = mp.ModelPipeline(tp)
        pl.train_model()
        pred_data = pl.model_wrapper.get_perf_data(subset="valid", epoch_label="best")
        pred_results = pred_data.get_prediction_results()
        print(f"layers: {layers}, dropouts: {dropouts}, learning rate: {learning_rate}, valid_r2: {pred_results['r2_score']}\n")

INFO:ATOM:Created a dataset hash 'a0eb2e6092b3542373f67528e299e283' from dataset_key '/mnt/projects/ATOM/sarkart4/Data/CYP2D6-ampl-1.1.0_old/cyp2d6_union_trainset_base_smiles.csv'
DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/8cb766e2-04f6-4d8e-a607-94063802f8ef/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
INFO:ATOM:Total score for epoch 0 is -2.42
INFO:ATOM:Epoch 0: training r2_score = -2.030, validation r2_score = -2.419, test r2_score = -2.090
INFO:ATOM:*** Total score for epoch 1 is -0.502, is new maximum
INFO:ATOM:Epoch 1: training r2_score = -0.378, validation r2_score = -0.502, test r2_score = -0.782
INFO:ATOM:*** Total score for epoch 2 is -0.294, is new maximum
INFO:ATOM:Epoch 2: training r2_score = -0.157, validation r2_score = -0.294, test r2_score = -0.337
INFO:ATOM:*** Total score for epoch 3 is -0.0504, is new maximum
INFO:ATOM:Epoch 3: training r2_score = 0.029, validation r2_score = -0.050, test r2_score = -0.075
INFO:ATOM:Epoch 4: training r2_score = -0.013, validation r2_score = -0.118, test r2_score = -0.034
INFO:ATOM:Epoch 5: training r2_score = 0.026, validation r2_score = -0.070, test r2_score = 0.032
INFO:ATOM:Epoch 6: training r2_score = 0.

layers: 128,32, dropouts: 0.5,0.5, learning rate: 0.0007, valid_r2: 0.07260191088157875



DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/3add0f23-dc2a-40cb-a99f-edbf3c4cd9eb/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)




INFO:ATOM:Total score for epoch 0 is -0.033
INFO:ATOM:Epoch 0: training r2_score = -0.021, validation r2_score = -0.033, test r2_score = 0.385
INFO:ATOM:*** Total score for epoch 1 is 0.0512, is new maximum
INFO:ATOM:Epoch 1: training r2_score = 0.082, validation r2_score = 0.051, test r2_score = 0.091
INFO:ATOM:*** Total score for epoch 2 is 0.0525, is new maximum
INFO:ATOM:Epoch 2: training r2_score = 0.069, validation r2_score = 0.052, test r2_score = 0.266
INFO:ATOM:Epoch 3: training r2_score = 0.070, validation r2_score = -0.004, test r2_score = 0.197
INFO:ATOM:*** Total score for epoch 4 is 0.0795, is new maximum
INFO:ATOM:Epoch 4: training r2_score = 0.144, validation r2_score = 0.079, test r2_score = 0.185
INFO:ATOM:Epoch 5: training r2_score = 0.115, validation r2_score = 0.044, test r2_score = 0.191
INFO:ATOM:Epoch 6: training r2_score = 0.048, validation r2_score = 0.002, test r2_score = 0.271
INFO:ATOM:Epoch 7: training r2_score = 0.108, validation r2_score = 0.045, test r2

layers: 128,32, dropouts: 0.5,0.5, learning rate: 0.005, valid_r2: 0.09363695684746032



DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/48b30b62-50a1-4fcc-a749-cbd0ee3d15e6/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)




INFO:ATOM:Total score for epoch 0 is -0.376
INFO:ATOM:Epoch 0: training r2_score = -0.358, validation r2_score = -0.376, test r2_score = -0.637
INFO:ATOM:*** Total score for epoch 1 is -0.127, is new maximum
INFO:ATOM:Epoch 1: training r2_score = -0.050, validation r2_score = -0.127, test r2_score = 0.125
INFO:ATOM:*** Total score for epoch 2 is -0.0428, is new maximum
INFO:ATOM:Epoch 2: training r2_score = 0.040, validation r2_score = -0.043, test r2_score = 0.387
INFO:ATOM:*** Total score for epoch 3 is 0.0399, is new maximum
INFO:ATOM:Epoch 3: training r2_score = 0.105, validation r2_score = 0.040, test r2_score = 0.419
INFO:ATOM:*** Total score for epoch 4 is 0.0488, is new maximum
INFO:ATOM:Epoch 4: training r2_score = 0.129, validation r2_score = 0.049, test r2_score = 0.401
INFO:ATOM:Epoch 5: training r2_score = 0.131, validation r2_score = 0.038, test r2_score = 0.413
INFO:ATOM:Epoch 6: training r2_score = 0.110, validation r2_score = 0.029, test r2_score = 0.486
INFO:ATOM:*** 

layers: 256,64, dropouts: 0.5,0.5, learning rate: 0.0007, valid_r2: 0.0888652658995932



DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/09b75184-8b59-48f9-8650-eb377e603bbf/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
INFO:ATOM:Total score for epoch 0 is -0.0279
INFO:ATOM:Epoch 0: training r2_score = 0.016, validation r2_score = -0.028, test r2_score = 0.266
INFO:ATOM:*** Total score for epoch 1 is 0.00453, is new maximum
INFO:ATOM:Epoch 1: training r2_score = 0.033, validation r2_score = 0.005, test r2_score = 0.224
INFO:ATOM:*** Total score for epoch 2 is 0.0217, is new maximum
INFO:ATOM:Epoch 2: training r2_score = 0.092, validation r2_score = 0.022, test r2_score = -0.208
INFO:ATOM:*** Total score for epoch 3 is 0.0802, is new maximum
INFO:ATOM:Epoch 3: training r2_score = 0.140, validation r2_score = 0.080, test r2_score = 0.212
INFO:ATOM:Epoch 4: training r2_score = 0.149, validation r2_score = 0.077, test r2_score = 0.122
INFO:ATOM:Epoch 5: training r2_score = 0.160, validation r2_score = 0.064, test r2_score = 0.230
INFO:ATOM:Epoch 6: training r2_score = 0.156, valida

layers: 256,64, dropouts: 0.5,0.5, learning rate: 0.005, valid_r2: 0.11124856627274715



DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/52979a41-f2a1-41ca-976e-0ad4093a5297/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
INFO:ATOM:Total score for epoch 0 is -0.718
INFO:ATOM:Epoch 0: training r2_score = -0.651, validation r2_score = -0.718, test r2_score = -0.050
INFO:ATOM:*** Total score for epoch 1 is -0.284, is new maximum
INFO:ATOM:Epoch 1: training r2_score = -0.222, validation r2_score = -0.284, test r2_score = 0.297
INFO:ATOM:*** Total score for epoch 2 is -0.0158, is new maximum
INFO:ATOM:Epoch 2: training r2_score = 0.035, validation r2_score = -0.016, test r2_score = 0.284
INFO:ATOM:*** Total score for epoch 3 is 0.0529, is new maximum
INFO:ATOM:Epoch 3: training r2_score = 0.149, validation r2_score = 0.053, test r2_score = 0.278
INFO:ATOM:*** Total score for epoch 4 is 0.0797, is new maximum
INFO:ATOM:Epoch 4: training r2_score = 0.152, validation r2_score = 0.080, test r2_score = 0.292
INFO:ATOM:Epoch 5: training r2_score = 0.154, validation r2_score = 0.058, test r2

layers: 512,128, dropouts: 0.5,0.5, learning rate: 0.0007, valid_r2: 0.09240770909953455



DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/84c2e17d-d785-4a9d-b1cf-53601ab98200/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
INFO:ATOM:Total score for epoch 0 is -0.00955
INFO:ATOM:Epoch 0: training r2_score = 0.032, validation r2_score = -0.010, test r2_score = 0.166
INFO:ATOM:*** Total score for epoch 1 is 0.0107, is new maximum
INFO:ATOM:Epoch 1: training r2_score = 0.072, validation r2_score = 0.011, test r2_score = -0.128
INFO:ATOM:*** Total score for epoch 2 is 0.0454, is new maximum
INFO:ATOM:Epoch 2: training r2_score = 0.112, validation r2_score = 0.045, test r2_score = 0.168
INFO:ATOM:Epoch 3: training r2_score = 0.090, validation r2_score = 0.019, test r2_score = 0.346
INFO:ATOM:Epoch 4: training r2_score = 0.118, validation r2_score = 0.034, test r2_score = 0.176
INFO:ATOM:Epoch 5: training r2_score = 0.105, validation r2_score = 0.008, test r2_score = 0.357
INFO:ATOM:*** Total score for epoch 6 is 0.0631, is new maximum
INFO:ATOM:Epoch 6: training r2_score = 0.143, valida

layers: 512,128, dropouts: 0.5,0.5, learning rate: 0.005, valid_r2: 0.09762333939493573



DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/0b2d0f60-2172-4c1f-8289-d8b8769682d4/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
INFO:ATOM:Total score for epoch 0 is -0.202
INFO:ATOM:Epoch 0: training r2_score = -0.240, validation r2_score = -0.202, test r2_score = -0.081
INFO:ATOM:*** Total score for epoch 1 is 0.00943, is new maximum
INFO:ATOM:Epoch 1: training r2_score = 0.024, validation r2_score = 0.009, test r2_score = 0.069
INFO:ATOM:*** Total score for epoch 2 is 0.0306, is new maximum
INFO:ATOM:Epoch 2: training r2_score = 0.041, validation r2_score = 0.031, test r2_score = 0.184
INFO:ATOM:Epoch 3: training r2_score = 0.045, validation r2_score = 0.011, test r2_score = 0.149
INFO:ATOM:*** Total score for epoch 4 is 0.0368, is new maximum
INFO:ATOM:Epoch 4: training r2_score = 0.069, validation 

layers: 256,64,16, dropouts: 0.5,0.5,0.5, learning rate: 0.0007, valid_r2: 0.08769820700698239



DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/5a9816e9-6152-480d-b7d6-880609e72d6b/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
INFO:ATOM:Total score for epoch 0 is -0.00337
INFO:ATOM:Epoch 0: training r2_score = 0.013, validation r2_score = -0.003, test r2_score = 0.204
INFO:ATOM:*** Total score for epoch 1 is 0.0134, is new maximum
INFO:ATOM:Epoch 1: training r2_score = 0.031, validation r2_score = 0.013, test r2_score = 0.014
INFO:ATOM:*** Total score for epoch 2 is 0.0278, is new maximum
INFO:ATOM:Epoch 2: training r2_score = 0.050, validation r2_score = 0.028, test r2_score = -0.152
INFO:ATOM:*** Total score for epoch 3 is 0.0441, is new maximum
INFO:ATOM:Epoch 3: training r2_score = 0.041, validation r2_score = 0.044, test r2_score = 0.030
INFO:ATOM:Epoch 4: training r2_score = 0.063, validation 

layers: 256,64,16, dropouts: 0.5,0.5,0.5, learning rate: 0.005, valid_r2: 0.10838180713959622



DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/61c05b55-b57f-4ee6-8f22-d2b81db6fb4f/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)




INFO:ATOM:Total score for epoch 0 is -0.122
INFO:ATOM:Epoch 0: training r2_score = -0.085, validation r2_score = -0.122, test r2_score = -0.463
INFO:ATOM:Epoch 1: training r2_score = -0.111, validation r2_score = -0.187, test r2_score = -0.389
INFO:ATOM:*** Total score for epoch 2 is -0.0731, is new maximum
INFO:ATOM:Epoch 2: training r2_score = 0.024, validation r2_score = -0.073, test r2_score = -0.061
INFO:ATOM:*** Total score for epoch 3 is -0.0242, is new maximum
INFO:ATOM:Epoch 3: training r2_score = 0.042, validation r2_score = -0.024, test r2_score = -0.244
INFO:ATOM:Epoch 4: training r2_score = -0.006, validation r2_score = -0.128, test r2_score = -0.384
INFO:ATOM:*** Total score for epoch 5 is 0.00322, is new maximum
INFO:ATOM:Epoch 5: training r2_score = 0.118, validation r2_score = 0.003, test r2_score = 0.164
INFO:ATOM:Epoch 6: training r2_score = 0.098, validation r2_score = -0.007, test r2_score = 0.149
INFO:ATOM:Epoch 7: training r2_score = 0.030, validation r2_score = 

layers: 512,256,32, dropouts: 0.5,0.5,0.5, learning rate: 0.0007, valid_r2: 0.05544227117330003



DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/1993b8a0-53c1-4669-a206-35b7b777f7f5/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


INFO:ATOM:Total score for epoch 0 is -0.00764
INFO:ATOM:Epoch 0: training r2_score = 0.016, validation r2_score = -0.008, test r2_score = -0.006
INFO:ATOM:*** Total score for epoch 1 is 0.00181, is new maximum
INFO:ATOM:Epoch 1: training r2_score = 0.021, validation r2_score = 0.002, test r2_score = -0.054
INFO:ATOM:*** Total score for epoch 2 is 0.0412, is new maximum
INFO:ATOM:Epoch 2: training r2_score = 0.095, validation r2_score = 0.041, test r2_score = -0.090
INFO:ATOM:Epoch 3: training r2_score = 0.102, validation r2_score = 0.037, test r2_score = 0.114
INFO:ATOM:Epoch 4: training r2_score = -0.150, validation r2_score = -0.265, test r2_score = -0.258
INFO:ATOM:*** Total score for epoch 5 is 0.0751, is new maximum
INFO:ATOM:Epoch 5: training r2_score = 0.121, validation r2_score = 0.075, test r2_score = -0.045
INFO:ATOM:*** Total score for epoch 6 is 0.0866, is new maximum
INFO:ATOM:Epoch 6: training r2_score = 0.142, validation r2_score = 0.087, test r2_score = 0.075
INFO:ATOM:

layers: 512,256,32, dropouts: 0.5,0.5,0.5, learning rate: 0.005, valid_r2: 0.11389018992719846



DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/1a316aff-6a7d-49ed-ad70-98d5285bf32b/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
INFO:ATOM:Total score for epoch 0 is -0.191
INFO:ATOM:Epoch 0: training r2_score = -0.173, validation r2_score = -0.191, test r2_score = -0.174
INFO:ATOM:*** Total score for epoch 1 is -0.0463, is new maximum
INFO:ATOM:Epoch 1: training r2_score = 0.001, validation r2_score = -0.046, test r2_score = -0.135
INFO:ATOM:Epoch 2: training r2_score = 0.001, validation r2_score = -0.048, test r2_score = 0.195
INFO:ATOM:*** Total score for epoch 3 is -0.000668, is new maximum
INFO:ATOM:Epoch 3: training r2_score = 0.068, validation r2_score = -0.001, test r2_score = 0.256
INFO:ATOM:Epoch 4: training r2_score = 0.056, validation r2_score = -0.035, test r2_score = 0.052
INFO:ATOM:*** To

layers: 512,256,128, dropouts: 0.5,0.5,0.5, learning rate: 0.0007, valid_r2: 0.038124783993827904



DEBUG:ATOM:Attempting to load featurized dataset
DEBUG:ATOM:Exception when trying to load featurized data:
DynamicFeaturization doesn't support get_featurized_dset_name()
INFO:ATOM:Featurized dataset not previously saved for dataset cyp2d6_union_trainset_base_smiles, creating new
INFO:ATOM:Featurizing sample 0
INFO:ATOM:Featurizing sample 1000
INFO:ATOM:Featurizing sample 2000
INFO:ATOM:Featurizing sample 3000
INFO:ATOM:Featurizing sample 4000
INFO:ATOM:Featurizing sample 5000
INFO:ATOM:Featurizing sample 6000
INFO:ATOM:Featurizing sample 7000
INFO:ATOM:Featurizing sample 8000
INFO:ATOM:Featurizing sample 9000
INFO:ATOM:Featurizing sample 10000


number of features: 75


INFO:ATOM:Wrote transformers to /mnt/projects/ATOM/sarkart4/Data/HPO_models_CYP2D6_old/cyp2d6_union_trainset_base_smiles/NN_graphconv_scaffold_regression/f559ad0c-a3b6-4195-a602-440e6ea855df/transformers.pkl
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
INFO:ATOM:Transforming response data
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
INFO:ATOM:Total score for epoch 0 is -0.0147
INFO:ATOM:Epoch 0: training r2_score = -0.002, validation r2_score = -0.015, test r2_score = -0.081
INFO:ATOM:*** Total score for epoch 1 is 0.0523, is new maximum
INFO:ATOM:Epoch 1: training r2_score = 0.076, validation r2_score = 0.052, test r2_score = 0.160
INFO:ATOM:Epoch 2: training r2_score = 0.001, validation r2_score = -0.043, test r2_score = 0.160
INFO:ATOM:*** Total score for epoch 3 is 0.0668, is new maximum
INFO:ATOM:Epoch 3: training r2_score = 0.131, validation r2_score = 0.067, test r2_score = 0.130
INFO:ATOM:Epoch 4: training r2_score = 0.092, validation r2_score = 0.034, test r2_score = 0.148
INFO:ATOM:Epoch 5: trai

# Grid Search - NN (submit batch jobs FRCE- Titli's code new)

In [None]:
import multiprocessing
from joblib import parallel, delayed
multiprocessing.cpu_count()

In [None]:
param_combos = [] # list of params dict: each element is a params dict with specific set of leyrs,dropouts, lr_rate combinations

# layer_dropout = [
#                 #  ('64,16','0.1,0.1'),
#                 #  ('64,16','0.2,0.2'),
#                  ('64,16','0.3,0.3'),
#                  ('64,16','0.4,0.4'),
#                  ('128,32','0.1,0.1'),
#                  ('128,32','0.2,0.2'),
#                  ('128,32','0.3,0.3'),
#                  ('128,32','0.4,0.4'),
#                  ('256,64','0.1,0.1'),
#                  ('256,64','0.2,0.2'),
#                  ('256,64','0.3,0.3'),
#                  ('256,64','0.4,0.4'),
#                  ('512,128','0.1,0.1'),
#                  ('512,128','0.2,0.2'),
#                  ('512,128','0.3,0.3'),
#                  ('512,128','0.4,0.4'),
#                  ('256,64,16','0.1,0.1,0.1'),
#                  ('256,64,16','0.2,0.2,0.2'),
#                  ('256,64,16','0.3,0.3,0.3'),
#                  ('256,64,16','0.4,0.4,0.4'),
# ]
# lr_choice = [.00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075]
# def parallelrun(params):
#     tp = parse.wrapper(params)
#     pl = mp.ModelPipeline(tp)
#     pl.train_model()
#     pred_data = pl.model_wrapper.get_perf_data(subset="valid", epoch_label="best")
#     pred_results = pred_data.get_prediction_results()
#     print(f"layers: {layers}, dropouts: {dropouts}, learning rate: {learning_rate}, valid_r2: {pred_results['r2_score']}\n")
    
# #Parallel(n_jobs=multiprocessing.cpu_count(), verbose=50)delayed(parallelrun)(params)for combo in combos)
param_combos=[]
for layers,dropouts in layer_dropout:
  for learning_rate in lr_choice:
    params[]

# Grid Search - xGBoost

In [None]:
params={
 'collection_name': 'CYP2D6',
 'dataset_key': data_file,
 'datastore': 'False',
 'featurizer': 'computed_descriptors',
 'descriptor_type':'mordred_filtered',
 'id_col': 'compound_id',
 'lc_account': 'None',
 'max_epochs': '100',
 'model_type': 'xgboost',
 'prediction_type': 'regression',
 'previously_split': 'True',
 'rerun': 'False',
 'response_cols': 'pIC50',
 'result_dir': outdir,
 'save_results': 'False',
 'smiles_col': 'base_rdkit_smiles',
 'split_uuid': split_uuid,
 'transformers': 'True',
 'uncertainty': 'True',
 'verbose': 'False'}

In [None]:
# 96 models per feature set, ~40 seconds per model; ~1:04 hours
gamma_choice = [4,8,16] # done [0.00,0.05, 0.1,0.2,0.3,0.4,0.5,1,2]
lr_choice = [0.15,0.10,0.05,0.01,0.001,0.0001] # done  [0.25,0.2] 

for gamma in gamma_choice:
    for learning_rate in lr_choice:
        params["xgb_gamma"] = gamma
        params["xgb_learning_rate"] = learning_rate
        tp = parse.wrapper(params)
        pl = mp.ModelPipeline(tp)
        pl.train_model()
        pred_data = pl.model_wrapper.get_perf_data(subset="valid", epoch_label="best")
        pred_results = pred_data.get_prediction_results()
        print(f"xgb gamma: {gamma}, xgb learning rate: {learning_rate}, valid_r2: {pred_results['r2_score']}\n")

# Monitor HP search with *groupby*

In [None]:
# source = outdir+'RF_GridSearch/*'
# ! mv $source $outdir
# !ls $outdir

In [None]:
perf_df = cm.get_summary_perf_tables(collection_names=None, filter_dict={}, result_dir=outdir, prediction_type='regression', verbose=False)
#perf_df = perf_df[perf_df.rf_estimators!=500]
perf_df = perf_df.sort_values(by="valid_r2_score", ascending=False)
perf_df.head(2)

In [None]:
perf_df.shape

In [None]:
perf_df.groupby(by=['model_type', 'features']).count()[['model_uuid']].T

In [None]:
rfmods = perf_df[perf_df.model_type=="RF"]
print(rfmods.shape)
print(rfmods.head(1)['valid_r2_score'])
rfmods.head(2)

In [None]:
nnmods = perf_df[perf_df.model_type=="NN"]
print(nnmods.shape)
print(nnmods.head(1)['valid_r2_score'])
nnmods.head(2)

In [None]:
xgboostmods = perf_df[perf_df.model_type=="xgboost"]
print(xgboostmods.shape)
print(xgboostmods.head(1)['valid_r2_score'])
xgboostmods.head(2)

In [None]:
# should be 10 each, 20 groups per feature, if you got all the models created
nnmods.groupby(by=['features','layer_sizes','dropouts']).count()[['model_uuid']]

In [None]:
#visual of RF HPO results
fig, ax = plt.subplots(1,3,figsize=(21,7))
sns.boxplot(data=perf_df, x="rf_estimators", y="valid_r2_score", ax=ax[0])
sns.boxplot(data=perf_df, x="rf_max_depth", y="valid_r2_score", ax=ax[1])
sns.boxplot(data=perf_df, x="rf_max_features", y="valid_r2_score", ax=ax[2])
fig.tight_layout()

# Baysean optimization NN - manually

In [None]:
#define loss function
def lossfn(p):
    params["learning_rate"] = float(p["learn_rate"])
    params["layer_sizes"] = int(p["layer_size"])
    params["layer_nums"] = int(p["layer_num"])
    params["dropouts"] = float(p["dropout"])

    tparam = parse.wrapper(params)
    pl = mp.ModelPipeline(tparam)
    pl.train_model()
    perf_data = pl.model_wrapper.get_perf_data(subset="valid", epoch_label="best")
    pred_results = perf_data.get_prediction_results()
    r2 = pred_results['r2_score']
    return {'loss': 1-r2, 
            'status': STATUS_OK, 
            'model': tparam.model_tarball_path, 
            'model_uuid': tparam.model_uuid,
            'learn_rate': params["learning_rate"],
            'layer_size': params["layer_sizes"],
            'layer_num': params["layer_nums"],
            'dropout': params['dropouts']}

space = {
    "learn_rate": hp.uniform("learn_rate", 0.002, 0.006),
    "layer_size": hp.uniform("layer_size", 16, 256),
    "layer_num": hp.uniform("layer_num", 2, 4),
    "dropout": hp.uniform("droupout", 0.1, 0.6)

}

In [None]:
trials = Trials()
best = fmin(lossfn, space, algo=tpe.suggest, max_evals=5, trials=trials)

In [None]:
r2_list = [1-trials.trials[i]["result"]["loss"] for i in range(len(trials.trials))]
fig, ax = plt.subplots(1,1,figsize=(10,10))
sns.lineplot(x=list(range(1,len(trials.trials)+1)), y=r2_list, ax=ax)
#ax.set_ylim(0.2,0.6)

In [None]:
perf_df = pd.DataFrame({"model_uuid": model_uuid_list,"learning_rate": lr_list, "layer_sizes": ls_list, "layer_nums": ln_list, "dropouts": drop_list, "valid_r2_score": r2_list})
perf_df.sort_values(by="valid_r2_score", ascending=False)
# perf_df.to_csv(file_dir + 'NN_Bayesian_Search/' + target_name + '_NN_Bayesian_Search_5_trials.csv', index=False)

In [None]:
#Save Bayesian optimization trial object and continue later
# save the trial object
trial_file = os.path.join("hp_trial.pkl")
with open(trial_file, "wb") as f:
    pickle.dump(trials, f)



In [None]:
# load the trial object
with open(trial_file, "rb") as f:
    new_trials = pickle.load(f)

In [None]:
# continue HPO
best = fmin(lossfn, space, algo=tpe.suggest, max_evals=50, trials=new_trials)

In [None]:
r2_list = [1-new_trials.trials[i]["result"]["loss"] for i in range(len(new_trials.trials))]
fig, ax = plt.subplots(1,1,figsize=(10,10))
sns.lineplot(x=list(range(1,len(new_trials.trials)+1)), y=r2_list, ax=ax)
#ax.set_ylim(0.2,0.6)

In [None]:
lr_list = [trials.trials[i]["result"]["learn_rate"] for i in range(len(trials.trials))]
ls_list = [trials.trials[i]["result"]["layer_size"] for i in range(len(trials.trials))]
ln_list = [trials.trials[i]["result"]["layer_num"] for i in range(len(trials.trials))]
drop_list = [trials.trials[i]["result"]["dropout"] for i in range(len(trials.trials))]
model_uuid_list = [trials.trials[i]["result"]["model_uuid"] for i in range(len(trials.trials))] 

### RF

In [None]:
! tail -qn 2 /g/g16/apaulson/train_JSON_FILE.DILIst_hyperopt_RF.json.out

In [None]:
! tail -qn 3 /g/g16/apaulson/train_JSON_FILE.DILIst_hyperopt_RF_mordred_seltrain.json.out

In [None]:
! tail -qn 3 /g/g16/apaulson/train_JSON_FILE.DILIst_txptr_hyperopt_RF.json.out

In [None]:
! tail -qn 3 /g/g16/apaulson/train_JSON_FILE.DILIst_scr_hyperopt_RF.json.out

### NN computed descriptors

In [None]:
! tail -qn 1 /g/g16/apaulson/train_JSON_FILE.DILIst_hyperopt_NN_3_noGC.json.out

In [None]:
# ! tail -qn 3 /g/g16/apaulson/train_JSON_FILE.DILIst_hyperopt_NN_mordred_sel.json.out

In [None]:
# ! tail -qn 3 /g/g16/apaulson/train_JSON_FILE.DILIst_hyperopt_NN_mordred_seltrain.json.out

In [None]:
! tail -qn 3 /g/g16/apaulson/train_JSON_FILE.DILIst_txptr_hyperopt_NN.json.out

In [None]:
! tail -qn 3 /g/g16/apaulson/train_JSON_FILE.DILIst_scr_hyperopt_NN.json.out

### NN GC

In [None]:
! tail -qn 1 /g/g16/apaulson/train_JSON_FILE.DILIst_hyperopt_NN_3_GC.json.out

### XG boost

In [None]:
! tail -qn 3 /g/g16/apaulson/train_JSON_FILE.DILIst_hyperopt_xg_mordred_seltrain.json.out

In [None]:
! tail -qn 3 /g/g16/apaulson/train_JSON_FILE.DILIst_txptr_hyperopt_xg.json.out

In [None]:
! tail -qn 3 /g/g16/apaulson/train_JSON_FILE.DILIst_scr_hyperopt_xg.json.out

# Best model

In [None]:
import warnings
warnings.filterwarnings(action='once')
from atomsci.clients import MLMTClient
import atomsci.ddm.pipeline.model_tracker as mt
import atomsci.ddm.pipeline.compare_models as cmp
import pandas as pd

In [None]:
client=MLMTClient()
for coll in client.get_collection_names()[2:19]:
    collections = [coll]
    res=client.count_models(coll)
    print(f'Num models in {coll}: {res}')

In [None]:
collections=['DILIst_class']
responsecols = 'DILIst_Classification'
allmodels1=cmp.get_multitask_perf_from_tracker(collections[0], response_cols=responsecols, 
                                              expand_subsets='train,test,val', exhaustive=True)
allmodels1=allmodels1[allmodels1.prediction_type=='classification']
allmodels1['model_run']='No bio data'
allmodels1=allmodels1.reset_index(drop=True)
print(allmodels1.best_valid_roc_auc_score.max())
print(allmodels1.shape)
allmodels1.groupby(by=['model_type', 'features', 'best_valid_num_compounds']).count()[['model_uuid']].T

In [None]:
collections=['DILIst_txptr_class']
responsecols = 'DILIst_Classification'
allmodels2=cmp.get_multitask_perf_from_tracker(collections[0], response_cols=responsecols, 
                                              expand_subsets='train,test,val', exhaustive=True)
allmodels2=allmodels2[allmodels2.features.str.contains('scr')]
allmodels2['model_run']='Scram bio data'
allmodels2=allmodels2.reset_index(drop=True)
print(allmodels2.best_valid_roc_auc_score.max())
print(allmodels2.shape)
allmodels2.groupby(by=['model_type', 'features', 'best_valid_num_compounds']).count()[['model_uuid']].T

In [None]:
collections=['DILIst_txptr_class']
responsecols = 'DILIst_Classification'
allmodels3=cmp.get_multitask_perf_from_tracker(collections[0], response_cols=responsecols, 
                                              expand_subsets='train,test,val', exhaustive=True)
allmodels3=allmodels3[allmodels3.prediction_type=='classification']
allmodels3=allmodels3[allmodels3.features.str.contains('txptr')]
allmodels3['model_run']='With bio data'
allmodels3=allmodels3.reset_index(drop=True)
print(allmodels3.best_valid_roc_auc_score.max())
print(allmodels3.shape)
allmodels3.groupby(by=['model_type', 'features', 'best_valid_num_compounds']).count()[['model_uuid']].T

In [None]:
allmodels=pd.concat([allmodels1, allmodels2, allmodels3])
allmodels = allmodels[allmodels.descriptor_type!= 'mordred_sel']
allmodels.shape

### Best RF model +/- txptr

In [None]:
model_uuid=allmodels[(allmodels.model_type=='RF') & (allmodels.model_run=='Scram bio data')].sort_values('best_valid_roc_auc_score', ascending=False).model_uuid.iloc[0]
collection_name = collection = mt.get_model_collection_by_uuid(model_uuid)
top_model=mt.get_full_metadata_by_uuid(model_uuid, collection_name)
model_uuid

In [None]:
top_model['training_metrics'][1]#['prediction_results']['r2_score']

In [None]:
model_uuid=allmodels[(allmodels.model_type=='RF') & (allmodels.model_run=='With bio data')].sort_values('best_valid_roc_auc_score', ascending=False).model_uuid.iloc[0]
collection_name = collection = mt.get_model_collection_by_uuid(model_uuid)
top_model=mt.get_full_metadata_by_uuid(model_uuid, collection_name)
model_uuid

In [None]:
top_model['training_metrics'][1]#['prediction_results']['r2_score']

# Best NN model

In [None]:
model_uuid=allmodels[(allmodels.model_type=='NN') & (allmodels.model_run=='Scram bio data')].sort_values('best_valid_roc_auc_score', ascending=False).model_uuid.iloc[0]
collection_name = mt.get_model_collection_by_uuid(model_uuid)
top_model=mt.get_full_metadata_by_uuid(model_uuid, collection_name)
model_uuid

In [None]:
top_model['training_metrics'][1]#['prediction_results']['r2_score']

In [None]:
model_uuid=allmodels[(allmodels.model_type=='NN') & (allmodels.model_run=='With bio data')].sort_values('best_valid_roc_auc_score', ascending=False).model_uuid.iloc[0]
collection_name = mt.get_model_collection_by_uuid(model_uuid)
top_model=mt.get_full_metadata_by_uuid(model_uuid, collection_name)
model_uuid

In [None]:
top_model['training_metrics'][1]#['prediction_results']['r2_score']

# Best XG model

In [None]:
model_uuid=allmodels[(allmodels.model_type=='xgboost') & (allmodels.model_run=='Scram bio data')].sort_values('best_valid_roc_auc_score', ascending=False).model_uuid.iloc[0]
collection_name = collection = mt.get_model_collection_by_uuid(model_uuid)
top_model=mt.get_full_metadata_by_uuid(model_uuid, collection_name)
model_uuid

In [None]:
top_model['training_metrics'][1]#['prediction_results']['r2_score']

In [None]:
model_uuid=allmodels[(allmodels.model_type=='xgboost') & (allmodels.model_run=='With bio data')].sort_values('best_valid_roc_auc_score', ascending=False).model_uuid.iloc[0]
collection_name = collection = mt.get_model_collection_by_uuid(model_uuid)
top_model=mt.get_full_metadata_by_uuid(model_uuid, collection_name)
model_uuid

In [None]:
top_model#['training_metrics'][1]#['prediction_results']['r2_score']

# predict w/ best model

In [None]:
import os
import sys
sys.path.append('%s/workspace/bitbucket_repos/data_science/code' % os.environ['HOME'])

from atomsci.ddm.pipeline import predict_from_model as pfm
import plotting_functions_misc_akp as pfma
from sklearn.metrics import roc_curve, auc, roc_auc_score, r2_score, precision_recall_curve, average_precision_score, confusion_matrix, precision_score, recall_score
from sklearn.metrics import precision_score, recall_score, accuracy_score, matthews_corrcoef, cohen_kappa_score, log_loss
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
sns.set_context('poster')
sns.set_style('white')

In [None]:
# adapted from sklearn 0.22.0 package
def balanced_accuracy_score(conf_mat, *, sample_weight=None,
                            adjusted=False):
    """Compute the balanced accuracy.
    The balanced accuracy in binary and multiclass classification problems to
    deal with imbalanced datasets. It is defined as the average of recall
    obtained on each class.
    The best value is 1 and the worst value is 0 when ``adjusted=False``.
    """
    with np.errstate(divide='ignore', invalid='ignore'):
        per_class = np.diag(conf_mat) / conf_mat.sum(axis=1)
    if np.any(np.isnan(per_class)):
        warnings.warn('y_pred contains classes not in y_true')
        per_class = per_class[~np.isnan(per_class)]
    score = np.mean(per_class)
    if adjusted:
        n_classes = len(per_class)
        chance = 1 / n_classes
        score -= chance
        score /= 1 - chance
    return score

In [None]:
allmodels['best_valid_balanced_accuracy'] = [balanced_accuracy_score(np.array(x[0])) for x in allmodels.best_valid_confusion_matrix]
allmodels['best_test_balanced_accuracy'] = [balanced_accuracy_score(np.array(x[0])) for x in allmodels.best_test_confusion_matrix]
allmodels['best_train_balanced_accuracy'] = [balanced_accuracy_score(np.array(x[0])) for x in allmodels.best_train_confusion_matrix]

In [None]:
selection_metric = 'best_valid_roc_auc_score'
# selection_metric = 'best_valid_balanced_accuracy'
# selection_metric = 'best_valid_prc_auc_score'
# selection_metric = 'best_valid_accuracy_score'

In [None]:
print('Best class scramble model', allmodels[allmodels.model_run=='Scram bio data'][selection_metric].max())
print('Best classification model', allmodels[allmodels.model_run=='No bio data'][selection_metric].max())
print('Best class+tbiodata model', allmodels[allmodels.model_run=='With bio data'][selection_metric].max())

In [None]:
top_model1=allmodels[allmodels.model_run=='Scram bio data'].sort_values(selection_metric, ascending=False).iloc[0]
top_model2=allmodels[allmodels.model_run=='With bio data'].sort_values(selection_metric, ascending=False).iloc[0:10]
# top_models=[(top_model1.model_uuid, 'best base model'), (top_model2.model_uuid, 'best +bio model')]
top_models=top_model2.model_uuid.tolist()
print(top_models)
print(top_model1[selection_metric])
print(top_model2[selection_metric])

In [None]:
# Create an array with the colors you want to use
colors = [
    "#7682A4",
    "#A7DDD8",
    "#373C50",
    "#694691",
    "#BE2369",
    "#EB1E23",
    "#6EC8BE",
    "#FFC30F",
]
# Set your custom color palette
pal=sns.color_palette(colors)
sns.set_palette(pal)
sns.palplot(pal)

In [None]:
import importlib
importlib.reload(pfma)
import tempfile
from atomsci.ddm.pipeline import parameter_parser as parse
from atomsci.ddm.pipeline import model_pipeline as mp

sns.set_context('poster')
sns.set_style("white")
tcpl='_tcpl';adj='_adj'
id_col='compound_id'
smiles_col='base_rdkit_smiles'
pos_label=1
class_names=['DILIneg', 'DILIpos']

model_label='Best bio data model'

with warnings.catch_warnings():
    warnings.simplefilter('ignore')

    for model_uuid in top_models:

# set variable names & get data
        collection_name = mt.get_model_collection_by_uuid(model_uuid)
        top_model=mt.get_full_metadata_by_uuid(model_uuid, collection_name)
        dataset_key=top_model['training_dataset']['dataset_key']
        dataset = dataset_key.split('/')[-1].replace('.csv','')
        df=pd.read_csv(dataset_key, index_col=0)
        response_cols = top_model['training_dataset']['response_cols']
        if len(response_cols)==1: response_col=response_cols[0]
        else: response_col=response_cols
        featurizer= top_model['model_parameters']['featurizer']
        cols=['compound_id', 'DILIst_Classification', 'CompoundName', 'vDILIConcern', 'SeverityClass']
        split_uuid=top_model['splitting_parameters']['split_uuid']
        if featurizer == 'computed_descriptors':
            featurized = True
            feat_type=top_model['descriptor_specific']['descriptor_type']
            feat = pd.read_csv(f'/g/g16/apaulson/workspace/datasets/scaled_descriptors/{dataset}_with_{feat_type}_descriptors.csv')
        else:
            featurized = False
            feat_type = featurizer
            feat=df
        feat[id_col]=feat[id_col].astype(str)
        meta=df[cols]
        meta[id_col]=meta[id_col].astype(str)
        splitdf=pd.read_csv(f'/g/g16/apaulson/workspace/datasets/{dataset}_train_valid_test_scaffold_{split_uuid}.csv')
        splitdf['cmpd_id']=splitdf['cmpd_id'].astype(str)
        
# list model metrics from model tracker
        print('Training metrics from model tracker\n\n', top_model['training_metrics'][0]['prediction_results'], '\n')
        print('Validation metrics from model tracker\n\n', top_model['training_metrics'][1]['prediction_results'], '\n')
        print('Test metrics from model tracker\n\n', top_model['training_metrics'][2]['prediction_results'], '\n')
        
# predict (either predict from model or this code snippet give same results)
        pred_params = {
        'featurizer': 'computed_descriptors',
        'result_dir': tempfile.mkdtemp(),
        'id_col': id_col,
        'smiles_col': smiles_col,
        'response_cols': response_cols
        }
        pred_params = parse.wrapper(pred_params)
        # pipe, pparams = mp.load_from_tracker(model_uuid)
        pipe = mp.create_prediction_pipeline(pred_params, model_uuid, collection_name)
        if pipe.params.model_type == 'xgboost':
            pipe.params.uncertainty = False
        full_df = pipe.predict_full_dataset(feat, contains_responses=True, is_featurized=featurized,
                                            dset_params=pred_params)
#         full_df = pfm.predict_from_tracker_model(model_uuid, collection_name, feat, id_col,
#                                                  smiles_col, response_cols,
#                                                  is_featurized=featurized, dont_standardize=True)
        full_df=full_df.merge(meta, how="inner", on="compound_id")
        full_df=full_df.merge(splitdf, how="inner", left_on="compound_id", right_on="cmpd_id")
        full_df=full_df[~full_df[response_col].isna()]
        
# graph
        fig, ax = plt.subplots(2,2, sharex=False, sharey=False, figsize=(20,20))
        ax=ax.ravel()
       
    
# training set    
        pred_df=full_df[full_df.subset=='train']
        
    # calculate further metrics
        print('\nCalculated metrics from predictions')
        
        accscore = accuracy_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print('\ntrain accuracy', accscore)
        
        cm=confusion_matrix(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print('train CM', cm)
                
        average_precision = average_precision_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_prob'])
        print('train prc_auc', average_precision)
        
        prescore = precision_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print("train precision", prescore)
        
        recscore =    recall_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print("train recall", recscore)

        roc_auc = roc_auc_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print("train roc_auc", roc_auc, '\n')
    # plot curve
        fpr, tpr, _ = roc_curve(pred_df[f'{response_col}'], pred_df[f'{response_col}_prob'])
        lw = 2
        ax[0].plot(fpr, tpr, lw=lw, label='Train ROC curve (area = %0.2f)' % roc_auc, color = pal[3])
    # prc 
        precision, recall, _ = precision_recall_curve(pred_df[f'{response_col}'], pred_df[f'{response_col}_prob'])
        line_kwargs = {"drawstyle": "steps-post"}
        line_kwargs["label"] = (f"AP = "
                                f"{average_precision:0.2f}")
        ax[1].plot(recall, precision, color=pal[3], **line_kwargs) 
        
# validation set
        pred_df=full_df[full_df.subset=='valid']
    
    # calculate further metrics
        accscore = accuracy_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print('valid accuracy', accscore)
        
        cm=confusion_matrix(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print('valid CM', cm)
        
        average_precision = average_precision_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_prob'])
        print('valid prc_auc', average_precision)
        
        prescore = precision_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print("valid precision", prescore)
        
        recscore =    recall_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print("valid recall", recscore)
        
        roc_auc = roc_auc_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print("valid roc_auc", roc_auc, '\n')
        
    # ROC AUC curve
        fpr, tpr, _ = roc_curve(pred_df[f'{response_col}'], pred_df[f'{response_col}_prob'])
        lw = 2
        line_kwargs = {"drawstyle": "steps-post"}
        ax[0].plot(fpr, tpr, lw=lw, label='Valid ROC curve (area = %0.2f)' % roc_auc, color = pal[4], **line_kwargs)
#         ax[0].plot([0, 1], [0, 1], lw=lw, linestyle='--')
#         ax[0].set_xlim([0.0, 1.0])
#         ax[0].set_ylim([0.0, 1.05])
#         ax[0].set_xlabel('False Positive Rate')
#         ax[0].set_ylabel('True Positive Rate')
#         ax[0].legend(loc="lower right")
        
    # PR curve
        precision, recall, _ = precision_recall_curve(pred_df[f'{response_col}'], pred_df[f'{response_col}_prob'])
        line_kwargs = {"drawstyle": "steps-post"}
        line_kwargs["label"] = (f"AP = "
                                f"{average_precision:0.2f}")
        ax[1].plot(recall, precision, color=pal[4], **line_kwargs)
#         info_pos_label = (f" (Positive label: {pos_label})")
#         xlabel = "Recall" + info_pos_label
#         ylabel = "Precision" + info_pos_label
#         ax[1].set(xlabel=xlabel, ylabel=ylabel)
#         ax[1].set_ylim([-0.05, 1.05])
#         ax[1].set_ylim([-0.05, 1.05])
#         ax[1].legend(loc="upper right")
#         ax[1].set_title(f'{dataset} {model_label} validation set\n{model_uuid}')
        
    # confusion matrix - valid set (recalculated, not from table)
        cm=confusion_matrix(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print('Valid balanced accuracy\n', balanced_accuracy_score(cm))
        # cm=np.array(top_model['training_metrics'][1]['prediction_results']['confusion_matrix'][0])
        im = pfma.plot_confusion_matrix(cm, classes=class_names, normalize=False, cmap=sns.cubehelix_palette(rot=0, start=2.40, as_cmap=True), ax=ax[2])
        fig.colorbar(mappable=im, ax=ax[2], shrink=0.7)
        ax[2].set_title("Valid CM")
        
        
# test set    
        pred_df=full_df[full_df.subset=='test']
    
    # calculate further metrics
        accscore = accuracy_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print('test accuracy', accscore)
        
        cm=confusion_matrix(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print('test CM', cm)
        
        average_precision = average_precision_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_prob'])
        print('test prc_auc', average_precision) 
        
        prescore = precision_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print("test precision", prescore)
        
        recscore =    recall_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print("test recall", recscore)
        
        roc_auc = roc_auc_score(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print("test roc_auc", roc_auc, '\n')
        
    # ROC AUC curve
        fpr, tpr, _ = roc_curve(pred_df[f'{response_col}'], pred_df[f'{response_col}_prob'])
        lw = 2
        line_kwargs = {"drawstyle": "steps-post"}
        ax[0].plot(fpr, tpr, lw=lw, label='Test ROC curve (area = %0.2f)' % roc_auc, color = pal[5], **line_kwargs)
        ax[0].plot([0, 1], [0, 1], lw=lw, linestyle='--')
        ax[0].set_xlim([-0.05, 1.05])
        ax[0].set_ylim([-0.05, 1.05])
        ax[0].set_xlabel('False Positive Rate')
        ax[0].set_ylabel('True Positive Rate')
        ax[0].legend(loc="lower right")
        
    # PR curve
        precision, recall, _ = precision_recall_curve(pred_df[f'{response_col}'], pred_df[f'{response_col}_prob'])
        line_kwargs = {"drawstyle": "steps-post"}
        line_kwargs["label"] = (f"AP = "
                                f"{average_precision:0.2f}")
        ax[1].plot(recall, precision, color=pal[5], **line_kwargs)
        info_pos_label = (f" (Positive label: {pos_label})")
        xlabel = "Recall" + info_pos_label
        ylabel = "Precision" + info_pos_label
        ax[1].set(xlabel=xlabel, ylabel=ylabel)
        ax[1].set_xlim([-0.05, 1.05])
        ax[1].set_ylim([-0.05, 1.05])
        ax[1].legend(loc="lower right")
#         ax[1].set_title(f'{dataset} {model_label} test set\n{model_uuid}')
        
    # confusion matrix - test set  (recalculated, not from table)
        cm=confusion_matrix(pred_df[f'{response_col}'], pred_df[f'{response_col}_pred'])
        print('Test balanced accuracy\n', balanced_accuracy_score(cm))
        # cm=np.array(top_model['training_metrics'][2]['prediction_results']['confusion_matrix'][0])
        im=pfma.plot_confusion_matrix(cm, classes=class_names, normalize=False, cmap=sns.cubehelix_palette(rot=0, start=2.40, as_cmap=True), ax=ax[3])
        fig.colorbar(mappable=im, ax=ax[3], shrink=0.7)
        ax[3].set_title("Train CM")
#         plt.tight_layout()
        fig.suptitle(f'{dataset} {model_label}')#\n{model_uuid}')

In [None]:
# pipe=mp.retrain_model('a7e54dec-a8aa-4d6d-a875-b15d29e464e7')

In [None]:
# import atomsci.ddm.pipeline.perf_plots as pp

In [None]:
# pp.plot_ROC_curve(pipe)

# examine hyperparams

In [None]:
scoretype = 'balanced_accuracy'
winnertype= f'best_valid_{scoretype}'

perf_track_df=allmodels.sort_values(by=winnertype, ascending=False)

if (sum(~perf_track_df.dropouts.isna())>0):
    perf_track_df[['drop1', 'drop2', 'drop3']] = perf_track_df.dropouts.astype(str).str.strip('[]').str.split(pat=',',n=3, expand=True).astype(float)
    perf_track_df['layer_sizes'] = perf_track_df.layer_sizes.astype(str).str.strip('[]')
    perf_track_df[['layer1','layer2','layer3']]=perf_track_df.layer_sizes.str.split(pat=',', n=3, expand=True).astype(float)
    perf_track_df['num_layers'] = 3-perf_track_df[['layer1','layer2','layer3']].isna().sum(axis=1)
    perf_track_df[['layer1','layer2','layer3']]=perf_track_df[['layer1','layer2','layer3']].fillna(value=1).astype(int)
    perf_track_df['num_nodes']=perf_track_df.layer1 * perf_track_df.layer2 * perf_track_df.layer3
    perf_track_df.loc[perf_track_df.model_type != "NN", 'layer_sizes']=np.nan
    perf_track_df.loc[perf_track_df.model_type != "NN", 'num_layers']=np.nan
    perf_track_df.loc[perf_track_df.model_type != "NN", 'num_nodes']=np.nan
    perf_track_df.loc[perf_track_df.model_type != "NN", 'drop1']=np.nan
    perf_track_df.loc[perf_track_df.model_type != "NN", 'drop2']=np.nan
    perf_track_df.loc[perf_track_df.model_type != "NN", 'drop3']=np.nan
    perf_track_df.loc[perf_track_df.model_type != "NN", 'layer1']=np.nan
    perf_track_df.loc[perf_track_df.model_type != "NN", 'layer2']=np.nan
    perf_track_df.loc[perf_track_df.model_type != "NN", 'layer3']=np.nan

print(perf_track_df.shape)
# print(perf_track_df.columns)

print('top bal acc score:', perf_track_df[f'{winnertype}'].max().round(4))

scoretype = 'roc_auc_score'
winnertype= f'best_valid_{scoretype}'
print('top roc score:', perf_track_df[f'{winnertype}'].max().round(4))

scoretype = 'prc_auc_score'
winnertype= f'best_valid_{scoretype}'
print('top prc score:', perf_track_df[f'{winnertype}'].max().round(4))

In [None]:
hyperparams = ['features', 'model_type', 'learning_rate','layer1', 'layer2', 'layer3', 'drop1', 'drop2', 'drop3',  'rf_estimators', 'rf_max_depth', 'rf_max_features','xgb_learning_rate', 'xgb_gamma', 'num_nodes', ]
loghps=['learning_rate', 'xgb_learning_rate']
len(hyperparams)

In [None]:
fig, axes = plt.subplots(5,3,figsize=(30,20))
for i, ax in enumerate(axes.flat):
    plot = perf_track_df[hyperparams[i]]
    if isinstance(plot.iloc[0], str):
        g=sns.countplot(plot, ax=ax)
        if hyperparams[i]=='features':
            g.set_xticklabels(g.get_xticklabels(), rotation=20, ha='right')
    elif hyperparams[i] in loghps:
        sns.distplot(plot, hist_kws={'log':True}, ax=ax)
    else:
        sns.distplot(plot, ax=ax)
plt.tight_layout()

In [None]:
from scipy.stats import ttest_ind

In [None]:
selection_metric = 'best_valid_roc_auc_score'
perf_track_df['txptr_features']=perf_track_df['features'].str.replace('_txptr', '').str.replace('_scr','')
perf_track_df=perf_track_df.sort_values(['model_run','features'])
plot_df=perf_track_df[(perf_track_df.txptr_features!='ecfp')&(perf_track_df.txptr_features!='graphconv')&(perf_track_df.model_run!='No bio data')]#&(perf_track_df.txptr_features!='mordred_seltrain')]
sns.set_style("ticks")
sns.catplot(x="txptr_features", y=selection_metric, 
            hue='model_run', height = 10, aspect=1, 
            kind = 'box',  showfliers = False,
            #col='model_type',
            data=plot_df);

plot_df=plot_df.reset_index(drop=True)
print(plot_df.shape)
scores = plot_df.pivot(columns='features', values=selection_metric)
print(scores.shape)
print(scores.max())
print("MOE",ttest_ind(scores.moe_scr, scores.moe_txptr, nan_policy='omit'))
print("MRS",ttest_ind(scores.mordred_filtered_scr, scores.mordred_filtered_txptr, nan_policy='omit'))
print("MRF",ttest_ind(scores.mordred_seltrain_scr, scores.mordred_seltrain_txptr, nan_policy='omit'))
print("RDK",ttest_ind(scores.rdkit_raw_scr, scores.rdkit_raw_txptr, nan_policy='omit'))

In [None]:
scores

In [None]:
scoretype='balanced_accuracy'
subset='best_valid'
winnertype= f'{subset}_{scoretype}'
plot_df=perf_track_df#[perf_track_df.descriptor_type=='mordred_seltrain']
plot_df=plot_df[[f"best_train_{scoretype}",f"best_valid_{scoretype}",f"best_test_{scoretype}"]]
# turn off sorting if you have a ton of models.. slow
plot_df=plot_df.sort_values(f"best_test_{scoretype}")

fig, ax = plt.subplots(1,2,figsize=(26,8))
sns.kdeplot(perf_track_df[f'best_train_{scoretype}'], label="train",ax=ax[0])
sns.kdeplot(perf_track_df[f'best_valid_{scoretype}'], label="valid",ax=ax[0])
sns.kdeplot(perf_track_df[f'best_test_{scoretype}'], label="test",ax=ax[0])

ax[0].legend(loc="upper left")
ax[1].plot(plot_df.T);
ax[1].set_xticklabels(['train','valid','test'])
ax[1].set_ylim(plot_df.min().min()-.1,1)
fig.suptitle(f"{scoretype} for DILIst classification models");

In [None]:
# bin each hp for downstream viz
pd.set_option('display.float_format', '{:.2g}'.format)
# hyperparams.append('best_valid_balanced_accuracy')
hpbins =      [10,10,10,10,10,10,10,10,10,10,5,5,10,10]

for i, hp in enumerate(hyperparams[2:]):
    perf_track_df[['binned_'+hp+'_0', 'binned_'+hp]] = pd.cut(perf_track_df[hp], hpbins[i], precision=2).astype(str).str.strip('(]').str.split(',', expand=True)
    perf_track_df=perf_track_df.drop('binned_'+hp+'_0', axis='columns')
    perf_track_df['binned_'+hp] = perf_track_df['binned_'+hp].astype(float)

In [None]:
# perf_track_df.binned_num_nodes=perf_track_df.binned_num_nodes.astype(float)
# perf_track_df.binned_num_nodes

In [None]:
dataset_name='DILIst Classification'
if sum(~perf_track_df.rf_estimators.isna())>0:
    feat1 = 'binned_rf_estimators'; feat2 = 'binned_rf_max_depth'; feat3 = 'binned_rf_max_features'
    hue=feat3
    plot_df=perf_track_df[perf_track_df.model_type=='RF']
    plot_df = plot_df.sort_values([feat3, feat1, feat2])
    plot_df[f'{feat1}/{feat2}'] = ['%s / %s' % (mf,est) for mf,est in zip(plot_df[feat1], plot_df[feat2])]
    with sns.axes_style("whitegrid"):
        fig = plt.figure(figsize=(80,15))
        ax1 = fig.add_subplot(111)
        sns.scatterplot(x=f'{feat1}/{feat2}', y='{0}_{1}'.format(subset, scoretype), hue=hue, palette=sns.cubehelix_palette(len(plot_df[hue].unique())), data=plot_df, ax=ax1)
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        plt.xticks(rotation=30, ha='right')
        plt.title(f'%s RF model performance' % dataset_name);

In [None]:
if sum(~perf_track_df.dropouts.isna())>0:
    feat1 = 'binned_learning_rate'; feat3 = 'binned_drop1'; feat2 = 'binned_layer1'
    hue=feat3
    plot_df=perf_track_df[perf_track_df.model_type=='NN']
    plot_df = plot_df.sort_values([feat3, feat1, feat2])
    plot_df[f'{feat1}/{feat2}'] = ['%s / %s' % (mf,est) for mf,est in zip(plot_df[feat1], plot_df[feat2])]
    with sns.axes_style("whitegrid"):
        fig = plt.figure(figsize=(80,15))
        ax1 = fig.add_subplot(111)
        sns.scatterplot(x=f'{feat1}/{feat2}', y='{0}_{1}'.format(subset, scoretype), hue=hue, palette=sns.cubehelix_palette(len(plot_df[hue].unique())), data=plot_df, ax=ax1)
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        plt.xticks(rotation=30, ha='right')
        plt.title(f'%s NN model performance - layer 1' % dataset_name);

In [None]:
if sum(~perf_track_df.dropouts.isna())>0:
    feat1 = 'binned_learning_rate'; feat3 = 'binned_drop2'; feat2 = 'binned_layer2'
    hue=feat3
    plot_df=perf_track_df[perf_track_df.model_type=='NN']
    plot_df = plot_df.sort_values([feat3, feat1, feat2])
    plot_df[f'{feat1}/{feat2}'] = ['%s / %s' % (mf,est) for mf,est in zip(plot_df[feat1], plot_df[feat2])]
    with sns.axes_style("whitegrid"):
        fig = plt.figure(figsize=(80,15))
        ax1 = fig.add_subplot(111)
        sns.scatterplot(x=f'{feat1}/{feat2}', y='{0}_{1}'.format(subset, scoretype), hue=hue, palette=sns.cubehelix_palette(len(plot_df[hue].unique())), data=plot_df, ax=ax1)
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        plt.xticks(rotation=30, ha='right')
        plt.title(f'%s NN model performance - layer 2' % dataset_name);

In [None]:
if sum(~perf_track_df.dropouts.isna())>0:
    feat1 = 'binned_learning_rate'; feat3 = 'binned_drop3'; feat2 = 'binned_layer3'
    hue=feat3
    plot_df=perf_track_df[perf_track_df.model_type=='NN']
    plot_df = plot_df.sort_values([feat3, feat1, feat2])
    plot_df[f'{feat1}/{feat2}'] = ['%s / %s' % (mf,est) for mf,est in zip(plot_df[feat1], plot_df[feat2])]
    with sns.axes_style("whitegrid"):
        fig = plt.figure(figsize=(80,15))
        ax1 = fig.add_subplot(111)
        sns.scatterplot(x=f'{feat1}/{feat2}', y='{0}_{1}'.format(subset, scoretype), hue=hue, palette=sns.cubehelix_palette(len(plot_df[hue].unique())-1), data=plot_df, ax=ax1)
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        plt.xticks(rotation=30, ha='right')
        plt.title(f'%s NN model performance - layer 3' % dataset_name);

In [None]:
if sum(~perf_track_df.xgb_gamma.isna())>0:
    feat2 = 'binned_xgb_learning_rate'; feat1 = 'xgb_gamma'
    hue=feat2
    plot_df=perf_track_df[perf_track_df.model_type=='xgboost']
    plot_df = plot_df.sort_values([feat1, feat2])
    #plot_df[f'{feat1}/{feat2}'] = ['%s / %s' % (mf,est) for mf,est in zip(plot_df[feat1], plot_df[feat2])]
    with sns.axes_style("whitegrid"):
        fig = plt.figure(figsize=(40,15))
        ax1 = fig.add_subplot(111)
        sns.scatterplot(x=feat1, y='{0}_{1}'.format(subset, scoretype), 
                        hue=hue, palette=sns.cubehelix_palette(len(plot_df[hue].unique())), 
                        data=plot_df, ax=ax1)
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        plt.xticks(rotation=30, ha='center')
        plt.title(f'%s XGboost model performance' % dataset_name);

In [None]:
nfeats=3; scoretype='balanced_accuracy'; winnertype='best_valid_balanced_accuracy'
feat1='binned_drop1'; feat2='binned_learning_rate'; feat3='binned_num_nodes'; feat4='binned_layer1'
feat5='binned_rf_max_depth'; feat6='binned_rf_max_features'; feat7='binned_rf_estimators'; 
feat8='binned_xgb_gamma'; feat9='binned_xgb_learning_rate'; feat12='best_test_balanced_accuracy'
feat10='features'; feat11='model_type'; 

In [None]:
import math
bnnlabs = perf_track_df.binned_num_nodes.sort_values(ascending=True).unique().tolist()
bnnlabs.pop(-1) #remove nan - comment out if only NN models in df
bnnlabs = [round(x, 3 - int(math.floor(math.log10(abs(x)))) - 1) for x in bnnlabs]
bnnlabs

In [None]:
# ax[0,0].ticklabel_format(style='sci', scilimits=(0,0)); 
sns.set_context('poster')
plotdf2=perf_track_df
fig, ax = plt.subplots(3,4, figsize=(60,40))
if sum(~perf_track_df.dropouts.isna())>0:
    sns.boxplot(x=feat1, y=winnertype, palette=sns.cubehelix_palette(len(plotdf2[feat1].unique()), rot=0, start=0.60), data=plotdf2, ax=ax[0,0]); ax[0,0].tick_params(rotation=0); ax[0,0].set_xlabel('NN dropouts layer 1')
    sns.boxplot(x=feat2, y=winnertype, palette=sns.cubehelix_palette(len(plotdf2[feat2].unique()), rot=0, start=0.60), data=plotdf2, ax=ax[0,1]); ax[0,1].tick_params(rotation=0); ax[0,1].set_xlabel('NN learning rate')#ax[0,1].legend_.remove(); ax[0,1].title.set_text(f"Hyperparameters colored by {feat1}")
    sns.boxplot(x=feat3, y=winnertype, palette=sns.cubehelix_palette(len(plotdf2[feat3].unique()), rot=0, start=0.60), data=plotdf2, ax=ax[0,2]); ax[0,2].tick_params(rotation=15);ax[0,2].set_xlabel('NN number of parameters in hidden layers'); ax[0,2].set_xticklabels(bnnlabs); #ax[0,2].legend_.remove()#(bbox_to_anchor=(1,1), title=feat1)#, prop={'size': 12})
    sns.boxplot(x=feat4, y=winnertype, palette=sns.cubehelix_palette(len(plotdf2[feat4].unique()), rot=0, start=0.60), data=plotdf2, ax=ax[0,3]); ax[0,3].tick_params(rotation=0); ax[0,3].set_xlabel('NN params layer 1')#ax[1,0].legend_.remove(); ax[1,0].tick_params(rotation=45)
if sum(~perf_track_df.rf_estimators.isna())>0:
    sns.boxplot(x=feat5, y=winnertype, palette=sns.cubehelix_palette(len(plotdf2[feat5].unique()), rot=0, start=2.00), data=plotdf2, ax=ax[1,0]); ax[1,0].tick_params(rotation=0); ax[1,0].set_xlabel('RF max depth')#ax[1,1].title.set_text(f"Hyperparameters colored by {feat2}")
    sns.boxplot(x=feat6, y=winnertype, palette=sns.cubehelix_palette(len(plotdf2[feat6].unique()), rot=0, start=2.00), data=plotdf2, ax=ax[1,1]); ax[1,1].tick_params(rotation=0); ax[1,1].set_xlabel('RF max features per node')#ax[1,2].legend_.remove()#(bbox_to_anchor=(1,1), title=feat2)
    sns.boxplot(x=feat7, y=winnertype, palette=sns.cubehelix_palette(len(plotdf2[feat7].unique()), rot=0, start=2.00), data=plotdf2, ax=ax[1,2]); ax[1,2].tick_params(rotation=0); ax[1,2].set_xlabel('RF number of trees')#ax[2,0].legend_.remove(); ax[2,0].tick_params(rotation=45)
sns.scatterplot(x=feat12,y=winnertype, color=pal,                                                                      data=plotdf2, ax=ax[1,3]); ax[1,3].tick_params(rotation=0); ax[1,3].set_xlabel(f'{feat12}')#ax[2,1].legend_.remove(); ax[2,1].title.set_text(f"Hyperparameters colored by {feat3}")
if sum(~perf_track_df.xgb_gamma.isna())>0:
    sns.boxplot(x=feat8, y=winnertype, palette=sns.cubehelix_palette(len(plotdf2[feat8].unique()), rot=0, start=2.40), data=plotdf2, ax=ax[2,0]); ax[2,0].tick_params(rotation=0); ax[2,0].set_xlabel('XGBoost gamma')#ax[2,2].legend(bbox_to_anchor=(1,1), title=feat3);
    sns.boxplot(x=feat9, y=winnertype, palette=sns.cubehelix_palette(len(plotdf2[feat9].unique()), rot=0, start=2.40), data=plotdf2, ax=ax[2,1]); ax[2,1].tick_params(rotation=0); ax[2,1].set_xlabel('XGBoost learning rate')#ax[2,0].legend_.remove(); ax[2,0].tick_params(rotation=45)
sns.boxplot(    x=feat10,y=winnertype, palette=sns.cubehelix_palette(len(plotdf2[feat10].unique()),rot=0, start=0.10), data=plotdf2, ax=ax[2,2]); ax[2,2].tick_params(rotation=30);ax[2,2].set_xlabel('Featurization type')#ax[2,1].legend_.remove(); ax[2,1].title.set_text(f"Hyperparameters colored by {feat3}")
sns.boxplot(    x=feat11,y=winnertype, palette=sns.cubehelix_palette(len(plotdf2[feat11].unique()),rot=0, start=0.10), data=plotdf2, ax=ax[2,3]); ax[2,3].tick_params(rotation=0); ax[2,3].set_xlabel('Model type');#ax[2,2].legend(bbox_to_anchor=(1,1), title=feat3);
plt.tight_layout()