# Fine tuning ChemBERTa

## Installs and imports


In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
!cp '/content/gdrive/My Drive/all_carboxylics.csv' all_carboxylics.csv
!cp '/content/gdrive/My Drive/carboxylic_embeddings.h5' carboxylic_embeddings.h5
!cp '/content/gdrive/My Drive/quantumDots/data/carboxylic_features.hdf5' carboxylic_features.hdf5

!pip install --pre deepchem
import deepchem as dc

!pip install wandb -qqq
import wandb
wandb.login()

import pandas as pd
import h5py

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


[34m[1mwandb[0m: Currently logged in as: [33mapjansen[0m (use `wandb login --relogin` to force relogin)


## Load best model and feature according to sweep

### first install transformers, datasets

In [2]:
!pip install transformers
!pip install datasets



### continue

In [3]:
from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification, AutoTokenizer

In [4]:
model_checkpoint = 'DeepChem/ChemBERTa-77M-MTR'
#model_checkpoint = "seyonec/PubChem10M_SMILES_BPE_450k"

#model = AutoModelForSequenceClassification.from_pretrained(
#    model_checkpoint, num_labels=1, problem_type='regression')

# tf version:

import tensorflow as tf

def set_seed(seed=42):
    tf.random.set_seed(seed)
    
def model_loader():
  set_seed()
  model = TFAutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=1,
                                                           problem_type='regression', from_pt=True) # num_labels=1 sets it to regression
  return model
  
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

feature_name = 'token_mean'

In [5]:
model_loader()

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['norm_std', 'regression.out_proj.bias', 'norm_mean', 'regression.dense.weight', 'regression.out_proj.weight', 'roberta.embeddings.position_ids', 'regression.dense.bias']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out

<transformers.models.roberta.modeling_tf_roberta.TFRobertaForSequenceClassification at 0x7f8a6dd10e50>

## Datasets

In [6]:
import datasets

In [33]:
from datasets import Dataset, DatasetDict  # transformers datasets
from transformers import DefaultDataCollator
import torch

carboxylics_frame = pd.read_csv('all_carboxylics.csv', index_col='Unnamed: 0')
tasks = list(carboxylics_frame.columns[2:])
carboxylics_smiles = carboxylics_frame['smiles']

def create_datasets(task_idx: int, 
                    seed: int = 42, 
                    train_frac: float = 0.8,
                    val_frac: float = 0.1,
                    test_frac: float = 0.1):
  df = carboxylics_frame[['smiles', tasks[task_idx]]]
  df = df.rename(columns={tasks[task_idx]: 'label'})

  dataset = Dataset.from_pandas(df, preserve_index=False)
  dataset = dataset.map(tokenize, batched=True, batch_size=None)
  dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
  datasets = split_dataset(dataset, train_frac, val_frac, test_frac)
  return datasets

def split_dataset(dataset, train_frac, val_frac, test_frac, seed=42):
  train_testvalid = dataset.train_test_split(test_size=(1 - train_frac), seed=seed)
  test_valid = train_testvalid['test'].train_test_split(test_size=test_frac / (test_frac + val_frac), seed=seed)
  return DatasetDict({
      'train': train_testvalid['train'],
      'test': test_valid['test'],
      'val': test_valid['train']
  })

def tokenize(batch):
  return tokenizer(batch["smiles"], padding=True, truncation=True)

def to_tf(datasets, config): # to use with tf model, not tested
  data_collator = DefaultDataCollator(return_tensors="tf")
  set_seed(seed=42)
  return {
      k: v.to_tf_dataset(
          columns=tokenizer.model_input_names,
          label_cols=['label'], 
          shuffle=True, 
          batch_size=config['batch_size'],
          collate_fn=data_collator)
      for k, v in datasets.items()
  }


In [36]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions
    r2 = r2_score(labels, preds)
    mse = mean_squared_error(labels, preds)
    return {"rmse": np.sqrt(mse), "r2": r2}

In [37]:
import tensorflow as tf

In [38]:
  datasets = create_datasets(3)
  datasets_tf = to_tf(datasets, {'batch_size': 32})

  0%|          | 0/1 [00:00<?, ?ba/s]

In [39]:
from tensorflow.keras.optimizers.schedules import PolynomialDecay
from transformers import AdamWeightDecay, WarmUp

def configure_optimizer(config, batches_per_epoch):
  # The number of training steps is the number of samples in the dataset, divided by the batch size then multiplied
  # by the total number of epochs. Note that the tf_train_dataset here is a batched tf.data.Dataset,
  # not the original Hugging Face Dataset, so its len() is already num_samples // batch_size.
  num_train_steps = batches_per_epoch * config['epochs']
  warmup_steps = int(config['warmup_steps_frac'] * batches_per_epoch)

  lr_scheduler = PolynomialDecay(  # this linearly decays the learning rate, every batch, until 0.
    initial_learning_rate=config['learning_rate'], 
    end_learning_rate=config['learning_rate'] / config['tot_lr_decay_factor'],
    decay_steps=num_train_steps
  )

  full_schedule = WarmUp(
      initial_learning_rate=config['learning_rate'] / config['warmup_lr_frac'],
      decay_schedule_fn=lr_scheduler,
      warmup_steps=warmup_steps
  )

  optimizer = AdamWeightDecay(
      learning_rate=full_schedule,
      weight_decay_rate=config['weight_decay'])

  return optimizer

In [48]:
def train_tf(config):
  print('epochs: ', config['epochs'])
  
  datasets = create_datasets(config['task_idx'])
  datasets = to_tf(datasets, config)

  # optimizer with learning rate decay
  optimizer = configure_optimizer(config, batches_per_epoch=len(datasets))

  early_stopping = keras.callbacks.EarlyStopping(patience=config['patience'], restore_best_weights=True)
  wandb_callback = wandb.keras.WandbCallback(
      monitor='val_root_mean_squared_error',
      save_weights_only=True)

  # reload the model to start with ChemBERTa's weights
  model = model_loader()

  model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )

  model.fit(datasets['train'], 
            validation_data=datasets['val'],
            epochs=config['epochs'],
            callbacks=[wandb_callback, early_stopping])
  
def train(config=None):
  with wandb.init(
      project="chemberta",
      config=config):
    config = wandb.config

    train_tf(config)

    return

In [49]:
# this is for the pytorch version, not using atm
# because I had issues reporting the loss and wasn't sure if it was actually training
from transformers import Trainer, TrainingArguments

def configure_trainer(config):
  model_name = f"{model_checkpoint}-finetuned-task-{config['task_idx']}"

  datasets = create_datasets(config['task_idx'])

  batch_size = config['batch_size']
  logging_steps = len(datasets["train"]) // batch_size

  training_args = TrainingArguments(output_dir=model_name,
                                    num_train_epochs=config['epochs'],
                                    learning_rate=config['learning_rate'],
                                    per_device_train_batch_size=batch_size,
                                    per_device_eval_batch_size=batch_size,
                                    weight_decay=config['weight_decay'],
                                    evaluation_strategy="epoch",
                                    disable_tqdm=False,
                                    logging_steps=logging_steps,
                                    push_to_hub=False, 
                                    log_level="error",
                                    report_to='wandb'
                                    #label_names=[tasks[config['task_idx']]]
                                    )
  trainer = Trainer(model=model, args=training_args, 
                    compute_metrics=compute_metrics,
                    train_dataset=datasets["train"],
                    eval_dataset=datasets["val"],
                    tokenizer=tokenizer)
  return trainer

In [92]:
# def train(config=None):
#   with wandb.init(
#       project="chemberta",
#       config=config):
#     config = wandb.config
#     trainer = configure_trainer(config)
#     trainer.train()
#     return

just to test:

In [46]:
TEST_CONFIG = {'task_idx': 0, 'batch_size': 64, 'epochs': 2, 'learning_rate': 2e-5, 'weight_decay': 0.03,
               'patience': 3,
               'warmup_steps_frac': 0.1, 'warmup_lr_frac': 5,
               'tot_lr_decay_factor': 100}

In [47]:
# pytorch trianer test
# trainer = configure_trainer(TEST_CONFIG)
# trainer.train()

# tensorflow trainer test
train(TEST_CONFIG)

epochs:  2


  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['norm_std', 'regression.out_proj.bias', 'norm_mean', 'regression.dense.weight', 'regression.out_proj.weight', 'roberta.embeddings.position_ids', 'regression.dense.bias']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out

Epoch 1/2

[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Saving the model to HDF5 format requires the model to be a Functional model or a Sequential model. It does not work for subclassed models, because such models are defined via the body of a Python method, which isn't safely serializable. Consider saving to the Tensorflow SavedModel format (by setting save_format="tf") or using `save_weights`.


Epoch 2/2



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁█
loss,█▁
root_mean_squared_error,█▁
val_loss,█▁
val_root_mean_squared_error,█▁

0,1
best_epoch,1.0
best_val_root_mean_squared_error,0.02251
epoch,1.0
loss,0.00061
root_mean_squared_error,0.02479
val_loss,0.00051
val_root_mean_squared_error,0.02251


## Perform sweeps of training parameters on single task

In [50]:
import pprint

sweep_config = {'method': 'bayes'}
sweep_config['metric'] = {'name': 'best_val_root_mean_squared_error', 'goal': 'minimize'}

parameters_dict = {
    'checkpoint': {'value': model_checkpoint},
    'feature_type': {'value': feature_name},
    'task_idx': {'value': 0},
    'epochs': {'values': [2, 5, 10]},  # with linear weight decay to 0, this also controls decay speed
    'learning_rate': {'values': [3e-6, 1e-5, 2e-5, 5e-5, 1e-4]},
    #'weight_decay': {'values': [0., 1e-3, 1e-2]},
    'weight_decay': {'values': [0., 1e-3]},
    'batch_size': {'values': [64, 128, 256]}, # batch size 512 can't be handled by colab
    'patience': {'value': 3},
    'warmup_steps_frac': {'values': [0., 0.5, 1.]},
    'warmup_lr_frac': {'values': [2, 5, 10]},
    'tot_lr_decay_factor': {'values': [1, 3, 10, 30]}
}
sweep_config['parameters'] = parameters_dict
pprint.pprint(sweep_config)

sweep_id = wandb.sweep(sweep_config, project='chemberta', entity='apjansen')

{'method': 'bayes',
 'metric': {'goal': 'minimize', 'name': 'best_val_root_mean_squared_error'},
 'parameters': {'batch_size': {'values': [64, 128, 256]},
                'checkpoint': {'value': 'DeepChem/ChemBERTa-77M-MTR'},
                'epochs': {'values': [2, 5, 10]},
                'feature_type': {'value': 'token_mean'},
                'learning_rate': {'values': [3e-06,
                                             1e-05,
                                             2e-05,
                                             5e-05,
                                             0.0001]},
                'patience': {'value': 3},
                'task_idx': {'value': 0},
                'tot_lr_decay_factor': {'values': [1, 3, 10, 30]},
                'warmup_lr_frac': {'values': [2, 5, 10]},
                'warmup_steps_frac': {'values': [0.0, 0.5, 1.0]},
                'weight_decay': {'values': [0.0, 0.001]}}}
Create sweep with ID: nzjemjik
Sweep URL: https://wandb.ai/apjansen/ch

In [None]:
NUM_RUNS = 100
wandb.agent(sweep_id, train, count=NUM_RUNS)

[34m[1mwandb[0m: Agent Starting Run: usatrgnx with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	checkpoint: DeepChem/ChemBERTa-77M-MTR
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	feature_type: token_mean
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	patience: 3
[34m[1mwandb[0m: 	task_idx: 0
[34m[1mwandb[0m: 	tot_lr_decay_factor: 10
[34m[1mwandb[0m: 	warmup_lr_frac: 5
[34m[1mwandb[0m: 	warmup_steps_frac: 0
[34m[1mwandb[0m: 	weight_decay: 0


epochs:  5


  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['norm_std', 'regression.out_proj.bias', 'norm_mean', 'regression.dense.weight', 'regression.out_proj.weight', 'roberta.embeddings.position_ids', 'regression.dense.bias']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5



VBox(children=(Label(value='13.164 MB of 13.164 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
epoch,▁▃▅▆█
loss,█▄▂▁▁
root_mean_squared_error,█▄▃▂▁
val_loss,█▄▃▂▁
val_root_mean_squared_error,█▄▃▂▁

0,1
best_epoch,4.0
best_val_root_mean_squared_error,0.01263
epoch,4.0
loss,0.00018
root_mean_squared_error,0.01352
val_loss,0.00016
val_root_mean_squared_error,0.01263


[34m[1mwandb[0m: Agent Starting Run: f20v5tt7 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	checkpoint: DeepChem/ChemBERTa-77M-MTR
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	feature_type: token_mean
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	patience: 3
[34m[1mwandb[0m: 	task_idx: 0
[34m[1mwandb[0m: 	tot_lr_decay_factor: 10
[34m[1mwandb[0m: 	warmup_lr_frac: 10
[34m[1mwandb[0m: 	warmup_steps_frac: 0.5
[34m[1mwandb[0m: 	weight_decay: 0


epochs:  10


  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['norm_std', 'regression.out_proj.bias', 'norm_mean', 'regression.dense.weight', 'regression.out_proj.weight', 'roberta.embeddings.position_ids', 'regression.dense.bias']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10



VBox(children=(Label(value='13.164 MB of 13.164 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
epoch,▁▂▃▅▆▇█
loss,█▄▃▂▁▁▁
root_mean_squared_error,█▄▃▂▁▁▁
val_loss,▅█▇▁▅▄▄
val_root_mean_squared_error,▅█▇▁▅▅▄

0,1
best_epoch,3.0
best_val_root_mean_squared_error,0.01667
epoch,6.0
loss,0.00041
root_mean_squared_error,0.02026
val_loss,0.00034
val_root_mean_squared_error,0.01832


[34m[1mwandb[0m: Agent Starting Run: 63mnfm92 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	checkpoint: DeepChem/ChemBERTa-77M-MTR
[34m[1mwandb[0m: 	epochs: 2
[34m[1mwandb[0m: 	feature_type: token_mean
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	patience: 3
[34m[1mwandb[0m: 	task_idx: 0
[34m[1mwandb[0m: 	tot_lr_decay_factor: 10
[34m[1mwandb[0m: 	warmup_lr_frac: 10
[34m[1mwandb[0m: 	warmup_steps_frac: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.001


epochs:  2


  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['norm_std', 'regression.out_proj.bias', 'norm_mean', 'regression.dense.weight', 'regression.out_proj.weight', 'roberta.embeddings.position_ids', 'regression.dense.bias']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out

Epoch 1/2
Epoch 2/2



VBox(children=(Label(value='13.164 MB of 13.164 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
epoch,▁█
loss,█▁
root_mean_squared_error,█▁
val_loss,█▁
val_root_mean_squared_error,█▁

0,1
best_epoch,1.0
best_val_root_mean_squared_error,0.01941
epoch,1.0
loss,0.00049
root_mean_squared_error,0.02217
val_loss,0.00038
val_root_mean_squared_error,0.01941


[34m[1mwandb[0m: Agent Starting Run: 5g3rr8kn with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	checkpoint: DeepChem/ChemBERTa-77M-MTR
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	feature_type: token_mean
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	patience: 3
[34m[1mwandb[0m: 	task_idx: 0
[34m[1mwandb[0m: 	tot_lr_decay_factor: 3
[34m[1mwandb[0m: 	warmup_lr_frac: 5
[34m[1mwandb[0m: 	warmup_steps_frac: 0
[34m[1mwandb[0m: 	weight_decay: 0


epochs:  10


  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['norm_std', 'regression.out_proj.bias', 'norm_mean', 'regression.dense.weight', 'regression.out_proj.weight', 'roberta.embeddings.position_ids', 'regression.dense.bias']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10



VBox(children=(Label(value='13.164 MB of 13.164 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
epoch,▁▂▃▅▆▇█
loss,█▃▂▂▂▁▁
root_mean_squared_error,█▄▃▂▂▁▁
val_loss,▃██▁▇▆▇
val_root_mean_squared_error,▃██▁▇▆▇

0,1
best_epoch,3.0
best_val_root_mean_squared_error,0.00918
epoch,6.0
loss,0.00011
root_mean_squared_error,0.01051
val_loss,0.00015
val_root_mean_squared_error,0.0121


[34m[1mwandb[0m: Agent Starting Run: eq74gy3n with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	checkpoint: DeepChem/ChemBERTa-77M-MTR
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	feature_type: token_mean
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	patience: 3
[34m[1mwandb[0m: 	task_idx: 0
[34m[1mwandb[0m: 	tot_lr_decay_factor: 3
[34m[1mwandb[0m: 	warmup_lr_frac: 5
[34m[1mwandb[0m: 	warmup_steps_frac: 0
[34m[1mwandb[0m: 	weight_decay: 0.001


epochs:  10


  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['norm_std', 'regression.out_proj.bias', 'norm_mean', 'regression.dense.weight', 'regression.out_proj.weight', 'roberta.embeddings.position_ids', 'regression.dense.bias']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10



VBox(children=(Label(value='13.164 MB of 13.164 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
epoch,▁▂▃▅▆▇█
loss,█▃▃▂▁▁▁
root_mean_squared_error,█▄▃▂▁▁▁
val_loss,▆█▇▁▅▅▅
val_root_mean_squared_error,▆█▇▁▆▅▅

0,1
best_epoch,3.0
best_val_root_mean_squared_error,0.00963
epoch,6.0
loss,0.00016
root_mean_squared_error,0.01273
val_loss,0.00016
val_root_mean_squared_error,0.01254


[34m[1mwandb[0m: Agent Starting Run: ldxftgy8 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	checkpoint: DeepChem/ChemBERTa-77M-MTR
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	feature_type: token_mean
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	patience: 3
[34m[1mwandb[0m: 	task_idx: 0
[34m[1mwandb[0m: 	tot_lr_decay_factor: 1
[34m[1mwandb[0m: 	warmup_lr_frac: 5
[34m[1mwandb[0m: 	warmup_steps_frac: 0
[34m[1mwandb[0m: 	weight_decay: 0.001


epochs:  10


  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['norm_std', 'regression.out_proj.bias', 'norm_mean', 'regression.dense.weight', 'regression.out_proj.weight', 'roberta.embeddings.position_ids', 'regression.dense.bias']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10



VBox(children=(Label(value='13.164 MB of 13.164 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
epoch,▁▂▃▅▆▇█
loss,█▃▂▁▁▁▁
root_mean_squared_error,█▃▂▂▁▁▁
val_loss,▃██▁▇▇▇
val_root_mean_squared_error,▄██▁▇▇▇

0,1
best_epoch,3.0
best_val_root_mean_squared_error,0.0086
epoch,6.0
loss,0.00011
root_mean_squared_error,0.01037
val_loss,0.00014
val_root_mean_squared_error,0.01187


[34m[1mwandb[0m: Agent Starting Run: l3xpw7em with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	checkpoint: DeepChem/ChemBERTa-77M-MTR
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	feature_type: token_mean
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	patience: 3
[34m[1mwandb[0m: 	task_idx: 0
[34m[1mwandb[0m: 	tot_lr_decay_factor: 3
[34m[1mwandb[0m: 	warmup_lr_frac: 2
[34m[1mwandb[0m: 	warmup_steps_frac: 0
[34m[1mwandb[0m: 	weight_decay: 0


epochs:  10


  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['norm_std', 'regression.out_proj.bias', 'norm_mean', 'regression.dense.weight', 'regression.out_proj.weight', 'roberta.embeddings.position_ids', 'regression.dense.bias']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10



VBox(children=(Label(value='13.164 MB of 13.164 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
epoch,▁▂▃▅▆▇█
loss,█▃▂▂▁▁▁
root_mean_squared_error,█▃▃▂▁▁▁
val_loss,▃█▇▁▇▆▇
val_root_mean_squared_error,▄█▇▁▇▇▇

0,1
best_epoch,3.0
best_val_root_mean_squared_error,0.00891
epoch,6.0
loss,0.00013
root_mean_squared_error,0.01143
val_loss,0.00015
val_root_mean_squared_error,0.01228


[34m[1mwandb[0m: Agent Starting Run: m8shbe6c with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	checkpoint: DeepChem/ChemBERTa-77M-MTR
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	feature_type: token_mean
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	patience: 3
[34m[1mwandb[0m: 	task_idx: 0
[34m[1mwandb[0m: 	tot_lr_decay_factor: 1
[34m[1mwandb[0m: 	warmup_lr_frac: 2
[34m[1mwandb[0m: 	warmup_steps_frac: 0
[34m[1mwandb[0m: 	weight_decay: 0.001


epochs:  10


  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['norm_std', 'regression.out_proj.bias', 'norm_mean', 'regression.dense.weight', 'regression.out_proj.weight', 'roberta.embeddings.position_ids', 'regression.dense.bias']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out

Epoch 1/10
Epoch 2/10
Epoch 3/10


## Fingerprints to compare to

In [None]:
data_loader_cfp = dc.data.CSVLoader(tasks, feature_field='smiles', featurizer=dc.feat.CircularFingerprint())
dataset_cfp = data_loader_cfp.create_dataset('all_carboxylics.csv')

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

In [None]:
def create_datasets_fp(task_idx: int, seed=42):
  X_all = dataset_cfp.X
  y_all = carboxylics_frame[tasks[task_idx]]
  dataset = dc.data.NumpyDataset(X=X_all, y=y_all)
  splitter = dc.splits.RandomSplitter()
  data_train, data_val, data_test = splitter.train_valid_test_split(dataset, seed=seed)
  return {'train': data_train, 'val': data_val, 'test': data_test}
    
def train_eval_fp(config, callbacks):
  print('Training config ', config)

  model = minimal_model(config)

  datasets = create_datasets_fp(config['task_idx'])
  early_stopping = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
  model.fit(datasets['train'].X, datasets['train'].y, validation_data=(datasets['val'].X, datasets['val'].y), epochs=config['epochs'],
          batch_size=32, callbacks=[early_stopping] + callbacks, verbose=False)
  
  preds = model.predict(datasets['test'].X)
  labels = datasets['test'].y
  metric = dc.metrics.r2_score

  print('Test r2: ', metric(labels, preds))
  return model

def train_fp(config=None):
  with wandb.init(
      project="chemberta",
      config=config):
    config = wandb.config
    wandb_callback = wandb.keras.WandbCallback(monitor='val_root_mean_squared_error')
    return train_eval_fp(config, callbacks=[wandb_callback])

In [None]:
sweep_config_fp = {'method': 'grid'}
sweep_config_fp['metric'] = {'name': 'best_val_root_mean_squared_error', 'goal': 'minimize'}

parameters_dict_fp = {
    'learning_rate': {'value': 1e-3},
    'task_idx': {'values': list(range(len(tasks)))},
    'epochs': {'value': 50},
}
sweep_config_fp['parameters'] = parameters_dict_fp

sweep_id_fp = wandb.sweep(sweep_config_fp, project='chemberta', entity='apjansen')

Create sweep with ID: pm1ozulq
Sweep URL: https://wandb.ai/apjansen/chemberta/sweeps/pm1ozulq


In [None]:
wandb.agent(sweep_id_fp, train_fp, count=len(tasks))

[34m[1mwandb[0m: Agent Starting Run: 2yn25hxh with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 0
[34m[1mwandb[0m: Currently logged in as: [33mapjansen[0m (use `wandb login --relogin` to force relogin)


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 0}
Test r2:  0.2737773413959067


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▂▃▄▅▅▆▇▇█
loss,█▂▁▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▁▁▁▁▁▁▂▂
val_loss,█▃▂▁▁▁▁▁▁▂▂
val_root_mean_squared_error,█▅▃▂▂▁▁▁▂▂▃

0,1
best_epoch,5.0
best_val_root_mean_squared_error,0.01448
epoch,10.0
loss,0.00054
root_mean_squared_error,0.02323
val_loss,0.00063
val_root_mean_squared_error,0.02509


[34m[1mwandb[0m: Agent Starting Run: vbugta19 with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 1


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 1}
Test r2:  0.2318738711252033


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▂▃▃▄▄▅▅▆▆▇▇█
loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▁▁▁▁▁▁▁▁▂▁▁
val_loss,█▃▂▁▁▁▁▁▁▂▂▂▂▂
val_root_mean_squared_error,█▄▃▂▁▁▁▁▁▂▂▂▂▂

0,1
best_epoch,8.0
best_val_root_mean_squared_error,0.0302
epoch,13.0
loss,0.00088
root_mean_squared_error,0.02966
val_loss,0.00119
val_root_mean_squared_error,0.03449


[34m[1mwandb[0m: Agent Starting Run: jtzve6u9 with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 2


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 2}
Test r2:  0.04219228811327824


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▂▃▄▅▅▆▇▇█
loss,█▂▁▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▁▁▁▁▁▁▁▂
val_loss,█▃▂▁▁▁▁▁▁▂▂
val_root_mean_squared_error,█▄▂▂▁▁▂▁▂▂▂

0,1
best_epoch,5.0
best_val_root_mean_squared_error,0.02958
epoch,10.0
loss,0.00085
root_mean_squared_error,0.02912
val_loss,0.00109
val_root_mean_squared_error,0.03308


[34m[1mwandb[0m: Agent Starting Run: npc0a0hj with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 3


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 3}
Test r2:  0.358096792114717


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
loss,█▂▁▁▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▁▁▁▁▁▁▂▁▁▁
val_loss,█▃▂▁▁▁▁▂▁▁▂▁
val_root_mean_squared_error,█▄▂▁▁▁▁▂▁▂▂▂

0,1
best_epoch,6.0
best_val_root_mean_squared_error,0.0376
epoch,11.0
loss,0.00118
root_mean_squared_error,0.03433
val_loss,0.00155
val_root_mean_squared_error,0.03941


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ma843odh with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 4


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 4}
Test r2:  0.2850238554949249


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▂▃▄▅▅▆▇▇█
loss,█▂▁▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▁▁▁▁▁▁▂▂
val_loss,█▄▂▁▁▁▁▁▂▂▂
val_root_mean_squared_error,█▄▂▁▁▁▁▂▃▂▂

0,1
best_epoch,5.0
best_val_root_mean_squared_error,0.01842
epoch,10.0
loss,0.00056
root_mean_squared_error,0.02357
val_loss,0.00053
val_root_mean_squared_error,0.02309


[34m[1mwandb[0m: Agent Starting Run: pcu7tczu with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 5


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 5}
Test r2:  0.25491066271524243


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▂▃▄▅▅▆▇▇█
loss,█▂▁▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▁▁▁▁▁▁▁▂
val_loss,█▄▂▁▁▁▁▁▁▁▂
val_root_mean_squared_error,█▄▃▂▁▁▁▁▂▂▂

0,1
best_epoch,5.0
best_val_root_mean_squared_error,0.01967
epoch,10.0
loss,0.00054
root_mean_squared_error,0.02316
val_loss,0.00068
val_root_mean_squared_error,0.02598


[34m[1mwandb[0m: Agent Starting Run: pe6oml11 with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 6


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 6}
Test r2:  0.14859983937945642


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▂▃▃▄▅▅▆▆▇▇█
loss,█▂▁▁▁▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▁▁▁▁▁▁▁▁▁▁
val_loss,█▄▂▂▁▁▁▁▁▁▁▁▁
val_root_mean_squared_error,█▅▃▂▁▁▁▁▁▁▁▂▂

0,1
best_epoch,7.0
best_val_root_mean_squared_error,0.02498
epoch,12.0
loss,0.00071
root_mean_squared_error,0.02658
val_loss,0.00087
val_root_mean_squared_error,0.02942


[34m[1mwandb[0m: Agent Starting Run: vnau1582 with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 7


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 7}
Test r2:  0.3801775248048206


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▂▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▁▁▁▁▁▁▁
val_loss,█▃▂▁▁▂▁▁▁▂
val_root_mean_squared_error,█▄▂▁▁▂▂▁▁▂

0,1
best_epoch,4.0
best_val_root_mean_squared_error,0.02371
epoch,9.0
loss,0.00061
root_mean_squared_error,0.02461
val_loss,0.00075
val_root_mean_squared_error,0.02737


[34m[1mwandb[0m: Agent Starting Run: decz9h3b with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 8


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 8}
Test r2:  -0.08993971745528251


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
loss,█▂▁▁▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▁▁▁▁▁▁▂▂▂
val_loss,█▄▂▁▁▂▁▁▂▂▂▃
val_root_mean_squared_error,█▅▃▂▁▂▁▁▂▂▂▃

0,1
best_epoch,6.0
best_val_root_mean_squared_error,0.01998
epoch,11.0
loss,0.00056
root_mean_squared_error,0.02357
val_loss,0.00092
val_root_mean_squared_error,0.03031


[34m[1mwandb[0m: Agent Starting Run: dqou2asq with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 9


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 9}
Test r2:  0.283792382017826


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▃▂▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▂▁▁▁▁▁▁
val_loss,█▄▂▂▁▁▂▁▂▂
val_root_mean_squared_error,█▄▂▂▁▁▂▂▂▂

0,1
best_epoch,4.0
best_val_root_mean_squared_error,0.15073
epoch,9.0
loss,0.01715
root_mean_squared_error,0.13097
val_loss,0.02476
val_root_mean_squared_error,0.15735


[34m[1mwandb[0m: Agent Starting Run: dmi5ynl7 with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 10


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 10}
Test r2:  0.31136567139551075


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▂▃▄▅▅▆▇▇█
loss,█▃▂▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▂▁▁▁▁▁▁▁
val_loss,█▃▂▁▁▁▂▂▂▁▂
val_root_mean_squared_error,█▄▂▁▁▁▂▂▂▁▂

0,1
best_epoch,5.0
best_val_root_mean_squared_error,0.15264
epoch,10.0
loss,0.01702
root_mean_squared_error,0.13047
val_loss,0.02426
val_root_mean_squared_error,0.15577


[34m[1mwandb[0m: Agent Starting Run: ds6mayun with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 11


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 11}
Test r2:  0.3479861080829053


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▂▃▄▅▅▆▇▇█
loss,█▂▁▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▁▁▁▁▁▁▁▁
val_loss,█▃▂▁▁▁▁▁▁▁▁
val_root_mean_squared_error,█▄▃▁▁▁▁▁▁▁▂

0,1
best_epoch,5.0
best_val_root_mean_squared_error,0.0318
epoch,10.0
loss,0.00092
root_mean_squared_error,0.03027
val_loss,0.00123
val_root_mean_squared_error,0.03509


[34m[1mwandb[0m: Agent Starting Run: esy36sln with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 12


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 12}
Test r2:  0.098510968795539


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▂▃▃▄▅▅▆▆▇▇█
loss,█▂▁▁▁▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▁▁▁▁▁▁▁▁▁▁
val_loss,█▃▂▂▁▁▂▁▂▂▁▁▂
val_root_mean_squared_error,█▄▂▂▁▁▂▁▃▂▂▁▂

0,1
best_epoch,7.0
best_val_root_mean_squared_error,0.04508
epoch,12.0
loss,0.00157
root_mean_squared_error,0.03956
val_loss,0.0025
val_root_mean_squared_error,0.05003


[34m[1mwandb[0m: Agent Starting Run: 2crs9jb9 with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 13


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 13}
Test r2:  0.29473057704371775


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▂▃▄▅▅▆▇█
loss,█▂▁▁▁▁▁▁▁
root_mean_squared_error,█▂▁▁▁▁▁▁▁
val_loss,█▃▂▁▂▂▂▂▂
val_root_mean_squared_error,█▃▂▁▂▂▂▂▂

0,1
best_epoch,3.0
best_val_root_mean_squared_error,0.05942
epoch,8.0
loss,0.00223
root_mean_squared_error,0.04723
val_loss,0.00372
val_root_mean_squared_error,0.06098


[34m[1mwandb[0m: Agent Starting Run: ms4du2gp with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	task_idx: 14


Training config  {'epochs': 50, 'learning_rate': 0.001, 'task_idx': 14}
Test r2:  0.09877905340779969


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁
root_mean_squared_error,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
val_loss,█▅▃▃▂▂▁▁▁▁▁▁▁▁▁
val_root_mean_squared_error,█▅▄▃▂▂▁▁▁▁▁▁▁▁▁

0,1
best_epoch,9.0
best_val_root_mean_squared_error,1.02263
epoch,14.0
loss,0.52543
root_mean_squared_error,0.72487
val_loss,1.04719
val_root_mean_squared_error,1.02332
