In [None]:
!pip install wandb tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.15.3-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m31.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow_addons
  Downloading tensorflow_addons-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (591 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m591.0/591.0 kB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.24.0-py2.py3-none-any.whl (206 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m206.5/206.5 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00

In [None]:
import pandas as pd
import numpy as np

import tensorflow as tf
import tensorflow.keras as ks
import tensorflow_addons as tfa
from tensorflow.keras import backend as K

from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, mean_poisson_deviance, 
    brier_score_loss, roc_auc_score, roc_curve, RocCurveDisplay
)

import pickle
from datetime import datetime

import wandb
wandb.login(relogin = True)



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

# Prep Data Once

In [None]:
def filter_nested_array(x, keep):
  result = [[y[key] for key in keep] for y in x]

  return np.array(result, dtype = 'object')

def prepare_row(row):
  # Figure out Claims
  claims_keep_key = ['bi_ind', 'coll_ind', 'comp_ind', 'ers_ind', 'mpc_ind', 'pd_ind', 'ubi_ind',
                      'veh_had_bi_cov_ind', 'veh_had_coll_cov_ind', 'veh_had_comp_cov_ind', 'veh_had_ers_cov_ind', 
                     'veh_had_mpc_cov_ind', 'veh_had_pd_cov_ind', 'veh_had_ubi_cov_ind']
                      
  other_claims = filter_nested_array(row['other_claims'], claims_keep_key)
  other_claim_cnt = len(other_claims)
  if other_claim_cnt > 0:
    other_claims = np.append(other_claims, np.zeros([len(other_claims),1]), 1)

  veh_claims = filter_nested_array(row['vehicle_claims'], claims_keep_key)
  claim_cnt = len(veh_claims)
  if claim_cnt > 0:
    veh_claims = np.append(veh_claims, np.ones([len(veh_claims),1]), 1)

  if claim_cnt + other_claim_cnt == 0:
      all_claims = np.array([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
  elif claim_cnt == 0:
    all_claims = other_claims
  elif other_claim_cnt == 0:
    all_claims = veh_claims
  else:
    all_claims = np.append(veh_claims, other_claims, axis = 0)

  all_claims = tf.ragged.constant([all_claims.astype('float16')], ragged_rank = 1, inner_shape = (15,))

  # Figure Out Drivers
  drivers = filter_nested_array(row['driver_info'], ['driver_age', 'driver_gender', 'driver_tenure'])
  drivers[:, 0] = (drivers[:, 0] - 50)/50
  drivers[:, 1] = np.where(drivers[:, 1] == 'm', 1, 0)
  drivers[:, 2] = (drivers[:, 2] - 10)/10
  drivers = tf.ragged.constant([drivers.astype('float16')], ragged_rank = 1, inner_shape = (3,))

  # Figure out Vehicles
  vehicles = filter_nested_array(row['household_vehicles_info'], ['this_vehicle_ind', 'vehicle_age', 'vehicle_type', 'vehicle_years_owned'])
  vehicles[:, 1] = (vehicles[:, 1] - 15)/15
  vehicles[:, 3] = (vehicles[:, 3] - 15)/15

  veh_type = vehicles[:, 2]
  vehicles[:, 2] = np.where(veh_type == 'van', 1, 0) + np.where(veh_type == 'sedan', 2, 0) + np.where(veh_type == 'sports car', 3, 0) + np.where(veh_type == 'suv', 4, 0)
  vehicles = tf.ragged.constant([vehicles.astype('float16')], ragged_rank = 1, inner_shape = (4,))

  other = [(row['credit_score'] - 600)/500,
           (row['garaging_location'] == 'country') * 1 + (row['garaging_location'] == 'downtown') * 2, 
           (row['household_tenure'] - 15)/10,
           (row['multiline_houses']/2),
           row['multiline_rental'],
           row['multiline_personal_article_policy'],
           row['multiline_personal_liability_umbrella'],
           (row['vehicle_count']-3)/3,
           (row['annual_mileage'] - 10000)/10000,
           (row['vehicle_age'] - 15)/15,
           np.where(row['vehicle_type'] == 'van', 1, 0) + np.where(row['vehicle_type'] == 'sedan', 2, 0) + np.where(row['vehicle_type'] == 'sports car', 3, 0) + np.where(row['vehicle_type'] == 'suv', 4, 0),
           (row['vehicle_years_owned'] - 10)/15,
           (row['max_driver_age'] - 45)/45,
           (row['min_driver_age'] - 45)/45,
           (row['mean_driver_age'] - 45)/45,
           (row['min_driver_tenure'] - 30)/30,
           (row['youthful_driver_count'])/4,
           row['driver_count']/5,
           row['coverage_bi'],
           row['coverage_coll'],
           row['coverage_comp'],
           row['coverage_ers'],
           row['coverage_mpc'],
           row['coverage_pd'],
           row['coverage_ubi']      
           ]
  other = tf.constant(value = np.array(other, dtype = 'float16'))

  target = [
      row['vehicle_claim_cnt_pd_0'],
      row['vehicle_claim_cnt_coll_0'],
      row['vehicle_claim_cnt_bi_0'],
      row['vehicle_claim_cnt_mpc_0']
  ]

  target = tf.constant(value = np.array(target, dtype = 'float16'))

  result = {
      'driver_info': drivers,
      'vehicle_info': vehicles,
      'claims_info': all_claims,
      'other_data': other,
      'target': target
  }

  return result

ragged = lambda y : tf.concat(y.to_list(), axis = 0)

def prep_one_datas(features):

  x = [
      ragged(features['driver_info']), 
      ragged(features['vehicle_info']), 
      ragged(features['claims_info']), 
      tf.convert_to_tensor(features['other_data'].to_list())
  ]

  y = tf.convert_to_tensor(features['target'].to_list())

  return x,y

In [None]:
with wandb.init(
      project="claims_modeling",
      group = 'Data Prep',
      name = f'Data Prep for NN - {datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}',
      notes="Prep for NN Training, No Model Build!",
      tags=["data"]) as run:
    datas = run.use_artifact('msds_498_claims_modeling/claims_modeling/sythetic_data:v5')
    directory = datas.download(root = 'datasets')

    train_features = pd.read_parquet('datasets/split=train') \
      .apply(prepare_row, axis = 1, result_type = 'expand')

    test_features = pd.read_parquet('datasets/split=test')\
      .apply(prepare_row, axis = 1, result_type = 'expand')

    val_features = pd.read_parquet('datasets/split=validation')\
      .apply(prepare_row, axis = 1, result_type = 'expand')

    train_x, train_y = prep_one_datas(train_features)
    test_x, test_y = prep_one_datas(test_features)
    val_x, val_y = prep_one_datas(val_features)

[34m[1mwandb[0m: Currently logged in as: [33mtylerrosacker2022[0m ([33mmsds_498_claims_modeling[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Downloading large artifact sythetic_data:v5, 153.76MB. 3 files... 
[34m[1mwandb[0m:   3 of 3 files downloaded.  
Done. 0:0:7.5


# Model Train Code

In [None]:
def define_loss(weights):
  def weighted_loss(y_true, y_pred):
    fun = tf.keras.losses.Poisson()
    #fun = tf.keras.losses.MeanSquaredError()
    loss = 0

    loss += weights[0] * fun(y_true[:, 0], y_pred[:, 0])
    loss += weights[1] * fun(y_true[:, 1], y_pred[:, 1])
    loss += weights[2] * fun(y_true[:, 2], y_pred[:, 2])
    loss += weights[3] * fun(y_true[:, 3], y_pred[:, 3])

    return loss
  
  return weighted_loss

def build_basic_ragged_layers(name, input_node, width, dropout, agged = True):
  dense_1 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(width, 
                                        name = f"{name}_Info_Dense",
                                        activation = tfa.activations.mish,
                                        kernel_initializer='lecun_normal'
                                        ))(input_node)
  normed = tf.keras.layers.TimeDistributed(tf.keras.layers.BatchNormalization())(dense_1)  
  drop = tf.keras.layers.TimeDistributed(tf.keras.layers.AlphaDropout(dropout))(normed)
  
  # dense_2 = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(width, 
  #                                       name = f"{name}_Info_Dense_2",
  #                                       activation = tfa.activations.mish,
  #                                       kernel_initializer='lecun_normal'
  #                                       ))(drop)
  # normed_2 = tf.keras.layers.TimeDistributed(tf.keras.layers.BatchNormalization())(dense_2)
  # drop_2 = tf.keras.layers.TimeDistributed(tf.keras.layers.AlphaDropout(dropout))(normed_2)

  # combined = tf.keras.layers.Add()([drop, drop_2])

  if agged:
    agged_sum = tf.math.reduce_sum(drop, 1)
    agged_max = tf.math.reduce_max(drop, 1)
    return tf.keras.layers.Concatenate()([agged_sum, agged_max])
  else:
    return drop

def build_basic_combined_layers(name, input_node, width, dropout):
  dense = tf.keras.layers.Dense(width, 
                                  name = f"Res_Layer_{name}",
                                  activation = tfa.activations.mish,
                                  kernel_initializer='lecun_normal'
                                  )(input_node)
  #leaky =  tf.keras.layers.LeakyReLU(alpha=leakiness)(dense)
  drop = tf.keras.layers.AlphaDropout(dropout)(dense)
  norm = tf.keras.layers.BatchNormalization()(drop)

  return norm


def build_model(run):
  # driver_info
  driver_input = ks.Input(shape = (None, 3), name = 'driver_info')
  driver_agged = build_basic_ragged_layers('driver', driver_input, 
                                           width = run.config['driver_dense'],
                                           dropout = run.config['dropout'],
                                           agged = False)

  # vehicle_info
  vehicle_input = ks.Input(shape = (None, 4), name = 'vehicle_info')
  vehicle_agged = build_basic_ragged_layers('vehicle', vehicle_input, 
                                           width = run.config['veh_dense'],
                                           dropout = run.config['dropout'],
                                           agged = False)
  
  driver_vehicle_cross = tf.keras.layers.MultiHeadAttention(num_heads=run.config['attention_heads'], 
                                                            key_dim = run.config['attention_dims'], 
                                                            output_shape = run.config['attention_output_dims']
                                                            )(vehicle_agged, driver_agged)
  attent_normed = tf.keras.layers.TimeDistributed(tf.keras.layers.BatchNormalization())(driver_vehicle_cross)  
  attent_drop = tf.keras.layers.TimeDistributed(tf.keras.layers.AlphaDropout(run.config['dropout']))(attent_normed)
  agged_att_sum = tf.math.reduce_sum(attent_drop, 1)
  agged_att_max = tf.math.reduce_max(attent_drop, 1)

  # claims_info
  claims_input = ks.Input(shape = (None, 15), name = 'claims_info')
  claims_agged = build_basic_ragged_layers('claims', claims_input, 
                                           width = run.config['claim_dense'],
                                        dropout = run.config['dropout'])

  # other_data
  other_input = ks.Input(shape = (25,), name = 'other_data')
  dense_other = build_basic_combined_layers(name = 'other', 
                                        input_node = other_input,
                                        width = run.config['dense_other_block_width'],
                                        dropout = run.config['dropout']
                                        )
  target_other = tf.keras.layers.Dense(4, 
                                      bias_initializer = tf.keras.initializers.Constant(value=run.config['initial_bias']),
                                      activation=tf.keras.activations.exponential, 
                                      name = 'target_other')(dense_other)

  #driver_agged, vehicle_agged, 

  combined = tf.keras.layers.Concatenate()([claims_agged, agged_att_sum, agged_att_max])
  combined_norm = tf.keras.layers.BatchNormalization()(combined)


  dense_1 = build_basic_combined_layers(name = '1', 
                                        input_node = combined_norm,
                                        width = run.config['dense_res_block_width'],
                                        dropout = run.config['dropout']
                                        )
  
  # dense_2 = build_basic_combined_layers(name = '2', 
  #                                       input_node = dense_1,
  #                                       width = run.config['dense_res_block_width'],
  #                                       dropout = run.config['dropout']
  #                                       )

  # res_layer = tf.keras.layers.Add(name = "Combined_Res_Result")([dense_1, dense_2])
  final_features = tf.keras.layers.Concatenate()([dense_1, combined_norm])

  target_ragged = tf.keras.layers.Dense(4, 
                                      bias_initializer = tf.keras.initializers.Constant(value=0),
                                      activation=tf.keras.activations.exponential, 
                                      name = 'target')(final_features)

  final_prediction = tf.keras.layers.Multiply(name = "Final_Prediction")([target_ragged, target_other])

  model = ks.Model(inputs = [driver_input, vehicle_input, claims_input, other_input],
                outputs = [final_prediction])

  model.compile(
                optimizer=tf.keras.optimizers.experimental.AdamW(
                    learning_rate = run.config['learning_rate'],
                    weight_decay = run.config['weight_decay'], 
                    global_clipnorm=5.0,
                    clipvalue=1,
                    amsgrad=True
                ), 
                loss = define_loss(run.config['loss_weight']))
  
  simple_model = ks.Model(inputs = [other_input],
                outputs = [target_other])

  simple_model.compile(
                optimizer=tf.keras.optimizers.experimental.AdamW(
                    learning_rate = run.config['learning_rate'],
                    weight_decay = run.config['weight_decay'], 
                    global_clipnorm=5.0,
                    clipvalue=1,
                    amsgrad=True
                ), 
                loss = define_loss(run.config['loss_weight']))
  
  return model, simple_model

In [None]:
def log_stats(dataset_name, prediction, truth):
  prediction = prediction[:, 0]
  truth = truth[:, 0]
  
  prediction = np.clip(prediction, a_min = 0.001, a_max = np.inf)  
  predicted_p_gt_0 = np.clip(1 - np.exp(-prediction), a_min = 0, a_max = 1)  
  truth_capped = np.clip(truth, a_min = 0, a_max = 1)
  truth_capped = np.clip(truth, a_min = 0, a_max = 1)

  fpr, tpr, _ = roc_curve(truth_capped, predicted_p_gt_0)
  roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot()
  
  metrics = {
      f"{dataset_name}_prediction_dist": wandb.Histogram(prediction),
      f"{dataset_name}_mse": mean_squared_error(truth, prediction), 
      f"{dataset_name}_mae": mean_absolute_error(truth, prediction),
      f"{dataset_name}_mean_poisson_deviance": mean_poisson_deviance(truth, prediction),
      f"{dataset_name}_brier_loss": brier_score_loss(truth_capped, predicted_p_gt_0),
      f"{dataset_name}_auc_score": roc_auc_score(truth_capped, predicted_p_gt_0),
      f"{dataset_name}_roc": roc_display.figure_
    }
  wandb.log(metrics)
  


In [None]:
def main(config = None):
  with wandb.init(
      project="claims_modeling",
      group = 'NN Template V7 - Pretrained Simple',
      name = f'NN Train - {datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}',
      notes="Mutli-target Architecture NN",
      tags=["nn"],
      save_code = True,
      sync_tensorboard=True,
      config=config) as run:
    datas = run.use_artifact('msds_498_claims_modeling/claims_modeling/sythetic_data:v5')
    model, simple_model = build_model(run)

    tf.keras.utils.plot_model(
      model,
      to_file='model.png',
      show_shapes=True,
      show_layer_names=True,
      show_layer_activations=True,
      show_trainable=True
    )

    artifact = wandb.Artifact(
        name='model_arch_graph', 
        type='image'
        )    

    artifact.add_file(local_path='model.png')
    run.log_artifact(artifact)

    epochs = run.config['epochs'] // 4 + 1

    simple_model.fit(train_x[3], 
              train_y, 
              epochs = epochs, 
              batch_size = run.config['batch_size'], 
              validation_data=(test_x[3], test_y))
    
    
    tensorboard_callback = tf.keras.callbacks.TensorBoard(histogram_freq=1)

    # Treat as transfer learning or nah
    if run.config['keep_trainable'] == False:
      simple_model.trainable = False
      model.fit(train_x, 
                train_y, 
                epochs = 2 * epochs, 
                batch_size = run.config['batch_size'], 
                validation_data=(test_x, test_y),
                callbacks=[tensorboard_callback])
      
      # Now really burn in!
      simple_model.trainable = True
      #K.set_value(model.optimizer.learning_rate, run.config['learning_rate'] * 0.1)
      model.fit(train_x, 
                train_y, 
                epochs = epochs, 
                batch_size = run.config['batch_size'] * run.config['burnin_multiplier'], 
                validation_data=(test_x, test_y),
                callbacks=[tensorboard_callback])
      
    else:
      model.fit(train_x, 
                train_y, 
                epochs = 3 * epochs, 
                batch_size = run.config['batch_size'], 
                validation_data=(test_x, test_y),
                callbacks=[tensorboard_callback])
      
    
    train_pred = model.predict(train_x, batch_size = 1024)
    test_pred = model.predict(test_x, batch_size = 1024)
    val_pred = model.predict(val_x, batch_size = 1024)

    log_stats('train', train_pred, train_y)
    log_stats('test', test_pred, test_y)
    log_stats('val', val_pred, val_y)

    model.save('model')
    wandb.save('model')

# Grid Search!

In [None]:
# main(config = {
#         "epochs": 50,
#         "learning_rate": 0.01,
#         "weight_decay": 0.00001,
#         "relu_leakiness": 0.015,
#         "driver_dense": 15,
#         "veh_dense": 20,
#         "claim_dense": 25,
#         "attention_heads": 1,
#         "attention_dims": 10,
#         "attention_output_dims": 10,
#         "dense_res_block_width": 25,
#         "dense_other_block_width": 100,
#         "dropout": 0.05,
#         "batch_size": 1024,
#         "initial_bias": -2.5,
#         "loss_weight": [0.7, 0.1, 0.1, 0.1], # [1, 0, 0, 0] [0.4, 0.2, 0.2, 0.2] [0.55, 0.15, 0.15, 0.15] [0.7, 0.1, 0.1, 0.1] [0.85, 0.05, 0.05, 0.05] [0.75, 0.15, 0.05, 0.05] [0.4, 0.3, 0.1, 0.1] [0.25, 0.25, 0.25, 0.25],
#         "keep_trainable": False,
#         "burnin_multiplier": 16
#   })

wandb.agent(entity = "msds_498_claims_modeling", project = "claims_modeling", sweep_id="xw6922l1", function=main, count = 30)

[34m[1mwandb[0m: Agent Starting Run: j7t6n4pe with config:
[34m[1mwandb[0m: 	attention_dims: 250
[34m[1mwandb[0m: 	attention_heads: 3
[34m[1mwandb[0m: 	attention_output_dims: 25
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 25
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 100
[34m[1mwandb[0m: 	driver_dense: 25
[34m[1mwandb[0m: 	dropout: 0.15
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	loss_weight: [0.5, 0.25, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 250
[34m[1mwandb[0m: 	weight_decay: 1e-05


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇███████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██▁▁▂▂▂▂▃▃▃▃▄▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,11557.0
test_auc_score,0.61425
test_brier_loss,0.07067
test_mae,0.14882
test_mean_poisson_deviance,0.4026
test_mse,0.0792
train/epoch_loss,0.21858
train/global_step,12.0
train_auc_score,0.62241
train_brier_loss,0.06981


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hd8rre1q with config:
[34m[1mwandb[0m: 	attention_dims: 10
[34m[1mwandb[0m: 	attention_heads: 1
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 1
[34m[1mwandb[0m: 	claim_dense: 5
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 200
[34m[1mwandb[0m: 	driver_dense: 25
[34m[1mwandb[0m: 	dropout: 0.15
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	loss_weight: [0.9, 0.04, 0.03, 0.03]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 200
[34m[1mwandb[0m: 	weight_decay: 1e-06


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▂▁▂▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇███▁▁▂▂▂▃▃▃▃▄▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,16809.0
test_auc_score,0.60351
test_brier_loss,0.07211
test_mae,0.17984
test_mean_poisson_deviance,0.41824
test_mse,0.08106
train/epoch_loss,0.26989
train/global_step,12.0
train_auc_score,0.60937
train_brier_loss,0.07134


[34m[1mwandb[0m: Agent Starting Run: fzie28zg with config:
[34m[1mwandb[0m: 	attention_dims: 50
[34m[1mwandb[0m: 	attention_heads: 1
[34m[1mwandb[0m: 	attention_output_dims: 250
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 25
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 250
[34m[1mwandb[0m: 	driver_dense: 200
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss_weight: [0.5, 0.25, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 150
[34m[1mwandb[0m: 	weight_decay: 1e-07


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇█████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇██▁▁▂▂▂▂▂▃▃▃▄▄▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,1456.0
test_auc_score,0.6113
test_brier_loss,0.07299
test_mae,0.15935
test_mean_poisson_deviance,0.42189
test_mse,0.08409
train/epoch_loss,0.21966
train/global_step,12.0
train_auc_score,0.61613
train_brier_loss,0.07204


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 437ace50 with config:
[34m[1mwandb[0m: 	attention_dims: 50
[34m[1mwandb[0m: 	attention_heads: 3
[34m[1mwandb[0m: 	attention_output_dims: 100
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 25
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 200
[34m[1mwandb[0m: 	driver_dense: 200
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.015
[34m[1mwandb[0m: 	loss_weight: [0.9, 0.04, 0.03, 0.03]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 5
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇███████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇██▁▁▁▂▂▂▂▃▃▃▃▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,2899.0
test_auc_score,0.61373
test_brier_loss,0.07141
test_mae,0.15021
test_mean_poisson_deviance,0.41018
test_mse,0.08057
train/epoch_loss,0.26691
train/global_step,12.0
train_auc_score,0.61946
train_brier_loss,0.07053


[34m[1mwandb[0m: Agent Starting Run: w7ygz1ec with config:
[34m[1mwandb[0m: 	attention_dims: 50
[34m[1mwandb[0m: 	attention_heads: 1
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 150
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 100
[34m[1mwandb[0m: 	driver_dense: 25
[34m[1mwandb[0m: 	dropout: 0.6
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	loss_weight: [0.75, 0.15, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 150
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




Epoch 1/14



Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14




Epoch 1/7



Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




0,1
global_step,▁▁▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▂▂▃▃▃▄▅▅▅▆▆▆▇▇█▁▁▂▂▃▃▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,784.0
test_auc_score,0.61072
test_brier_loss,0.07721
test_mae,0.18028
test_mean_poisson_deviance,0.4482
test_mse,0.09604
train/epoch_loss,0.42811
train/global_step,6.0
train_auc_score,0.61643
train_brier_loss,0.07618


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zmwhp50a with config:
[34m[1mwandb[0m: 	attention_dims: 150
[34m[1mwandb[0m: 	attention_heads: 5
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 5
[34m[1mwandb[0m: 	dense_other_block_width: 175
[34m[1mwandb[0m: 	dense_res_block_width: 25
[34m[1mwandb[0m: 	driver_dense: 200
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.015
[34m[1mwandb[0m: 	loss_weight: [0.75, 0.15, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 200
[34m[1mwandb[0m: 	weight_decay: 1e-05


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇███████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▂▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇█▁▁▁▂▂▂▂▃▃▃▃▄▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,2899.0
test_auc_score,0.61337
test_brier_loss,0.07098
test_mae,0.14874
test_mean_poisson_deviance,0.40593
test_mse,0.0797
train/epoch_loss,0.26087
train/global_step,12.0
train_auc_score,0.61975
train_brier_loss,0.07011


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zvagdcml with config:
[34m[1mwandb[0m: 	attention_dims: 100
[34m[1mwandb[0m: 	attention_heads: 2
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 200
[34m[1mwandb[0m: 	dense_other_block_width: 175
[34m[1mwandb[0m: 	dense_res_block_width: 100
[34m[1mwandb[0m: 	driver_dense: 250
[34m[1mwandb[0m: 	dropout: 0.6
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_weight: [0.75, 0.15, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 5
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇███████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇██▁▁▁▂▂▂▂▃▃▃▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,5798.0
test_auc_score,0.60959
test_brier_loss,0.08112
test_mae,0.19932
test_mean_poisson_deviance,0.46586
test_mse,0.10457
train/epoch_loss,0.26295
train/global_step,12.0
train_auc_score,0.61408
train_brier_loss,0.0801


[34m[1mwandb[0m: Agent Starting Run: 0xcjdw7r with config:
[34m[1mwandb[0m: 	attention_dims: 150
[34m[1mwandb[0m: 	attention_heads: 5
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 8
[34m[1mwandb[0m: 	claim_dense: 250
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 200
[34m[1mwandb[0m: 	driver_dense: 150
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.015
[34m[1mwandb[0m: 	loss_weight: [0.75, 0.15, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 200
[34m[1mwandb[0m: 	weight_decay: 1e-07


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇█████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇██▁▁▂▂▂▃▃▃▄▄▄▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,11908.0
test_auc_score,0.61366
test_brier_loss,0.07151
test_mae,0.14669
test_mean_poisson_deviance,0.4122
test_mse,0.08081
train/epoch_loss,0.26098
train/global_step,12.0
train_auc_score,0.61934
train_brier_loss,0.07061


[34m[1mwandb[0m: Agent Starting Run: rirzftn5 with config:
[34m[1mwandb[0m: 	attention_dims: 50
[34m[1mwandb[0m: 	attention_heads: 5
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 150
[34m[1mwandb[0m: 	dense_other_block_width: 100
[34m[1mwandb[0m: 	dense_res_block_width: 100
[34m[1mwandb[0m: 	driver_dense: 100
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.015
[34m[1mwandb[0m: 	loss_weight: [1, 0, 0, 0]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 150
[34m[1mwandb[0m: 	weight_decay: 1e-05


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇██▁▁▂▂▂▂▂▃▃▄▄▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,5798.0
test_auc_score,0.61333
test_brier_loss,0.07077
test_mae,0.1488
test_mean_poisson_deviance,0.40382
test_mse,0.07935
train/epoch_loss,0.27472
train/global_step,12.0
train_auc_score,0.62219
train_brier_loss,0.06987


[34m[1mwandb[0m: Agent Starting Run: twdcbfz0 with config:
[34m[1mwandb[0m: 	attention_dims: 25
[34m[1mwandb[0m: 	attention_heads: 1
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 5
[34m[1mwandb[0m: 	dense_other_block_width: 175
[34m[1mwandb[0m: 	dense_res_block_width: 100
[34m[1mwandb[0m: 	driver_dense: 200
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss_weight: [1, 0, 0, 0]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 100
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▂▃▃▄▄▄▄▅▅▅▅▅▆▆▇▇▇▇██▁▁▁▂▂▂▂▃▃▃▄▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,2899.0
test_auc_score,0.61375
test_brier_loss,0.07097
test_mae,0.15141
test_mean_poisson_deviance,0.40519
test_mse,0.07971
train/epoch_loss,0.27506
train/global_step,12.0
train_auc_score,0.6202
train_brier_loss,0.0701


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: u1sg08sq with config:
[34m[1mwandb[0m: 	attention_dims: 250
[34m[1mwandb[0m: 	attention_heads: 3
[34m[1mwandb[0m: 	attention_output_dims: 25
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 25
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 100
[34m[1mwandb[0m: 	driver_dense: 5
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	loss_weight: [0.9, 0.04, 0.03, 0.03]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 250
[34m[1mwandb[0m: 	weight_decay: 1e-06


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇███████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██▁▁▂▂▂▂▂▃▃▃▄▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,5798.0
test_auc_score,0.61264
test_brier_loss,0.07101
test_mae,0.1526
test_mean_poisson_deviance,0.4056
test_mse,0.07971
train/epoch_loss,0.26753
train/global_step,12.0
train_auc_score,0.61744
train_brier_loss,0.07021


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ga3826e8 with config:
[34m[1mwandb[0m: 	attention_dims: 150
[34m[1mwandb[0m: 	attention_heads: 3
[34m[1mwandb[0m: 	attention_output_dims: 200
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	burnin_multiplier: 8
[34m[1mwandb[0m: 	claim_dense: 25
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 100
[34m[1mwandb[0m: 	driver_dense: 200
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	loss_weight: [0.75, 0.15, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 25
[34m[1mwandb[0m: 	weight_decay: 1e-06


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██▁▁▁▂▂▂▃▃▃▃▄▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,5967.0
test_auc_score,0.61392
test_brier_loss,0.07207
test_mae,0.1537
test_mean_poisson_deviance,0.4139
test_mse,0.08238
train/epoch_loss,0.26087
train/global_step,12.0
train_auc_score,0.61916
train_brier_loss,0.07114


[34m[1mwandb[0m: Agent Starting Run: rd7b8ibh with config:
[34m[1mwandb[0m: 	attention_dims: 100
[34m[1mwandb[0m: 	attention_heads: 2
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 100
[34m[1mwandb[0m: 	dense_other_block_width: 150
[34m[1mwandb[0m: 	dense_res_block_width: 175
[34m[1mwandb[0m: 	driver_dense: 250
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	loss_weight: [0.5, 0.25, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 100
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇███▁▁▂▂▂▂▃▃▄▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,2899.0
test_auc_score,0.61185
test_brier_loss,0.07299
test_mae,0.16021
test_mean_poisson_deviance,0.42069
test_mse,0.08424
train/epoch_loss,0.21923
train/global_step,12.0
train_auc_score,0.61837
train_brier_loss,0.07202


[34m[1mwandb[0m: Agent Starting Run: qlirq78w with config:
[34m[1mwandb[0m: 	attention_dims: 50
[34m[1mwandb[0m: 	attention_heads: 5
[34m[1mwandb[0m: 	attention_output_dims: 150
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 150
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 100
[34m[1mwandb[0m: 	driver_dense: 250
[34m[1mwandb[0m: 	dropout: 0.15
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	loss_weight: [0.75, 0.15, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 100
[34m[1mwandb[0m: 	weight_decay: 1e-05


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇██▁▁▁▂▂▂▃▃▃▄▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,5798.0
test_auc_score,0.61441
test_brier_loss,0.07064
test_mae,0.14705
test_mean_poisson_deviance,0.40245
test_mse,0.07915
train/epoch_loss,0.2604
train/global_step,12.0
train_auc_score,0.62349
train_brier_loss,0.06976


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j4nd208w with config:
[34m[1mwandb[0m: 	attention_dims: 100
[34m[1mwandb[0m: 	attention_heads: 3
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 8
[34m[1mwandb[0m: 	claim_dense: 150
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 100
[34m[1mwandb[0m: 	driver_dense: 200
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss_weight: [0.9, 0.04, 0.03, 0.03]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 5
[34m[1mwandb[0m: 	weight_decay: 1e-07


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇█████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▂▂▂▂▂▂▃▃▄▄▄▄▅▅▅▅▆▆▇▇▇▇██▁▁▁▂▂▂▃▃▃▄▄▄▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,11908.0
test_auc_score,0.61227
test_brier_loss,0.07314
test_mae,0.15896
test_mean_poisson_deviance,0.42285
test_mse,0.08432
train/epoch_loss,0.2671
train/global_step,12.0
train_auc_score,0.61826
train_brier_loss,0.07221


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g4pifn06 with config:
[34m[1mwandb[0m: 	attention_dims: 50
[34m[1mwandb[0m: 	attention_heads: 1
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	burnin_multiplier: 8
[34m[1mwandb[0m: 	claim_dense: 5
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 100
[34m[1mwandb[0m: 	driver_dense: 150
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	loss_weight: [0.75, 0.15, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 150
[34m[1mwandb[0m: 	weight_decay: 1e-07


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




Epoch 1/14



Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14




Epoch 1/7



Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




0,1
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇█████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▃▃▃▃▃▃▂▂▂▂▂▃▂▂▁▁▁▁▁▁
train/global_step,▁▁▂▂▂▂▃▃▄▄▅▅▆▆▆▇▇▇█▁▁▂▂▂▃▃▃▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,3213.0
test_auc_score,0.61413
test_brier_loss,0.07071
test_mae,0.14606
test_mean_poisson_deviance,0.40339
test_mse,0.07926
train/epoch_loss,0.26094
train/global_step,6.0
train_auc_score,0.61957
train_brier_loss,0.06989


[34m[1mwandb[0m: Agent Starting Run: sfnumxjr with config:
[34m[1mwandb[0m: 	attention_dims: 50
[34m[1mwandb[0m: 	attention_heads: 2
[34m[1mwandb[0m: 	attention_output_dims: 100
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 100
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 200
[34m[1mwandb[0m: 	driver_dense: 100
[34m[1mwandb[0m: 	dropout: 0.05
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.015
[34m[1mwandb[0m: 	loss_weight: [1, 0, 0, 0]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 5
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




Epoch 1/14



Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14




Epoch 1/7



Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




0,1
global_step,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▂▂▃▃▃▄▅▅▆▆▇▇▇█▁▂▂▂▃▃▃▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,6223.0
test_auc_score,0.61416
test_brier_loss,0.07055
test_mae,0.14591
test_mean_poisson_deviance,0.40131
test_mse,0.07899
train/epoch_loss,0.27461
train/global_step,6.0
train_auc_score,0.62155
train_brier_loss,0.06973


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nls9obm2 with config:
[34m[1mwandb[0m: 	attention_dims: 10
[34m[1mwandb[0m: 	attention_heads: 1
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 100
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 250
[34m[1mwandb[0m: 	driver_dense: 150
[34m[1mwandb[0m: 	dropout: 0.15
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.015
[34m[1mwandb[0m: 	loss_weight: [0.9, 0.04, 0.03, 0.03]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 150
[34m[1mwandb[0m: 	weight_decay: 1e-06


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇██▁▁▂▂▂▂▂▃▃▃▄▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,11557.0
test_auc_score,0.61446
test_brier_loss,0.07066
test_mae,0.14572
test_mean_poisson_deviance,0.40278
test_mse,0.07918
train/epoch_loss,0.26649
train/global_step,12.0
train_auc_score,0.62207
train_brier_loss,0.0698


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: uu7mcycz with config:
[34m[1mwandb[0m: 	attention_dims: 200
[34m[1mwandb[0m: 	attention_heads: 2
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 8
[34m[1mwandb[0m: 	claim_dense: 150
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 250
[34m[1mwandb[0m: 	driver_dense: 100
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	loss_weight: [0.5, 0.25, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 150
[34m[1mwandb[0m: 	weight_decay: 1e-07


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇██▁▁▂▂▂▃▃▃▄▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,11908.0
test_auc_score,0.61266
test_brier_loss,0.07189
test_mae,0.1576
test_mean_poisson_deviance,0.41302
test_mse,0.08146
train/epoch_loss,0.21915
train/global_step,12.0
train_auc_score,0.6197
train_brier_loss,0.07098


[34m[1mwandb[0m: Agent Starting Run: udigrr54 with config:
[34m[1mwandb[0m: 	attention_dims: 200
[34m[1mwandb[0m: 	attention_heads: 3
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 8
[34m[1mwandb[0m: 	claim_dense: 25
[34m[1mwandb[0m: 	dense_other_block_width: 175
[34m[1mwandb[0m: 	dense_res_block_width: 25
[34m[1mwandb[0m: 	driver_dense: 150
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	loss_weight: [1, 0, 0, 0]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 100
[34m[1mwandb[0m: 	weight_decay: 1e-07


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇█████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██▁▁▁▂▂▂▂▃▃▃▄▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,11908.0
test_auc_score,0.61238
test_brier_loss,0.07216
test_mae,0.1513
test_mean_poisson_deviance,0.41758
test_mse,0.08202
train/epoch_loss,0.27528
train/global_step,12.0
train_auc_score,0.61771
train_brier_loss,0.07128


[34m[1mwandb[0m: Agent Starting Run: n8bnzc84 with config:
[34m[1mwandb[0m: 	attention_dims: 150
[34m[1mwandb[0m: 	attention_heads: 2
[34m[1mwandb[0m: 	attention_output_dims: 25
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 250
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 250
[34m[1mwandb[0m: 	driver_dense: 250
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	loss_weight: [0.75, 0.15, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 25
[34m[1mwandb[0m: 	weight_decay: 1e-05


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




Epoch 1/14



Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14




Epoch 1/7



Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




0,1
global_step,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇█████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▂▂▂▃▃▄▄▅▅▅▆▆▆▇▇▇█▁▁▂▂▃▃▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,6223.0
test_auc_score,0.61211
test_brier_loss,0.07169
test_mae,0.15429
test_mean_poisson_deviance,0.41192
test_mse,0.08096
train/epoch_loss,0.26134
train/global_step,6.0
train_auc_score,0.61816
train_brier_loss,0.07082


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zjemtrth with config:
[34m[1mwandb[0m: 	attention_dims: 250
[34m[1mwandb[0m: 	attention_heads: 2
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 4
[34m[1mwandb[0m: 	claim_dense: 5
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 175
[34m[1mwandb[0m: 	driver_dense: 25
[34m[1mwandb[0m: 	dropout: 0.175
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	loss_weight: [0.75, 0.15, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 250
[34m[1mwandb[0m: 	weight_decay: 1e-06


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




Epoch 1/14



Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14




Epoch 1/7



Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




0,1
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▄▄▄▄▄▄▄▄▄▄▃▃▃▂▁▁▁▁▁▁
train/global_step,▁▂▂▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇█▁▁▂▂▂▃▃▃▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,6790.0
test_auc_score,0.6119
test_brier_loss,0.07099
test_mae,0.15593
test_mean_poisson_deviance,0.40495
test_mse,0.07973
train/epoch_loss,0.26138
train/global_step,6.0
train_auc_score,0.61753
train_brier_loss,0.07016


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ii20l0rf with config:
[34m[1mwandb[0m: 	attention_dims: 200
[34m[1mwandb[0m: 	attention_heads: 3
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	burnin_multiplier: 8
[34m[1mwandb[0m: 	claim_dense: 5
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 25
[34m[1mwandb[0m: 	driver_dense: 25
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	loss_weight: [0.5, 0.25, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 150
[34m[1mwandb[0m: 	weight_decay: 1e-06


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▅▆▆▇▇▇▇███▁▁▂▂▂▂▃▃▃▄▄▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,2990.0
test_auc_score,0.61395
test_brier_loss,0.07096
test_mae,0.15233
test_mean_poisson_deviance,0.40515
test_mse,0.07966
train/epoch_loss,0.21926
train/global_step,12.0
train_auc_score,0.61901
train_brier_loss,0.07012


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xja4xhm0 with config:
[34m[1mwandb[0m: 	attention_dims: 150
[34m[1mwandb[0m: 	attention_heads: 5
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 25
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 150
[34m[1mwandb[0m: 	driver_dense: 150
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss_weight: [0.5, 0.25, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 250
[34m[1mwandb[0m: 	weight_decay: 1e-06


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇███████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▇▇▇▇██▁▁▂▂▂▃▃▃▄▄▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,2899.0
test_auc_score,0.61262
test_brier_loss,0.07329
test_mae,0.16258
test_mean_poisson_deviance,0.42299
test_mse,0.08472
train/epoch_loss,0.21921
train/global_step,12.0
train_auc_score,0.61803
train_brier_loss,0.07239


[34m[1mwandb[0m: Agent Starting Run: 2l36l1wo with config:
[34m[1mwandb[0m: 	attention_dims: 100
[34m[1mwandb[0m: 	attention_heads: 1
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 8
[34m[1mwandb[0m: 	claim_dense: 5
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 25
[34m[1mwandb[0m: 	driver_dense: 25
[34m[1mwandb[0m: 	dropout: 0.175
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	loss_weight: [0.75, 0.15, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 100
[34m[1mwandb[0m: 	weight_decay: 1e-06


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇█████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▆▆▇▇▇▇▇██▁▁▂▂▂▂▃▃▃▄▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,11908.0
test_auc_score,0.61396
test_brier_loss,0.07074
test_mae,0.14961
test_mean_poisson_deviance,0.40332
test_mse,0.07931
train/epoch_loss,0.26064
train/global_step,12.0
train_auc_score,0.62137
train_brier_loss,0.06988


[34m[1mwandb[0m: Agent Starting Run: hi16bue3 with config:
[34m[1mwandb[0m: 	attention_dims: 200
[34m[1mwandb[0m: 	attention_heads: 3
[34m[1mwandb[0m: 	attention_output_dims: 25
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 8
[34m[1mwandb[0m: 	claim_dense: 25
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 150
[34m[1mwandb[0m: 	driver_dense: 250
[34m[1mwandb[0m: 	dropout: 0.15
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	loss_weight: [0.5, 0.25, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 250
[34m[1mwandb[0m: 	weight_decay: 1e-05


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




Epoch 1/14



Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14




Epoch 1/7



Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




0,1
global_step,▁▁▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
train/global_step,▁▁▂▂▂▃▃▃▄▅▅▅▆▆▇▇▇█▁▁▂▂▂▃▃▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,6412.0
test_auc_score,0.61304
test_brier_loss,0.07101
test_mae,0.15456
test_mean_poisson_deviance,0.40494
test_mse,0.07981
train/epoch_loss,0.21908
train/global_step,6.0
train_auc_score,0.61762
train_brier_loss,0.0702


[34m[1mwandb[0m: Agent Starting Run: 4mclpu5v with config:
[34m[1mwandb[0m: 	attention_dims: 100
[34m[1mwandb[0m: 	attention_heads: 1
[34m[1mwandb[0m: 	attention_output_dims: 150
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 150
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 175
[34m[1mwandb[0m: 	driver_dense: 5
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	loss_weight: [1, 0, 0, 0]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 100
[34m[1mwandb[0m: 	weight_decay: 1e-07


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇███████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇██▁▁▁▂▂▂▃▃▃▃▃▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,11557.0
test_auc_score,0.61208
test_brier_loss,0.07224
test_mae,0.15457
test_mean_poisson_deviance,0.41722
test_mse,0.08235
train/epoch_loss,0.27541
train/global_step,12.0
train_auc_score,0.61745
train_brier_loss,0.07133


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gjvtfz5j with config:
[34m[1mwandb[0m: 	attention_dims: 150
[34m[1mwandb[0m: 	attention_heads: 3
[34m[1mwandb[0m: 	attention_output_dims: 25
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 1
[34m[1mwandb[0m: 	claim_dense: 150
[34m[1mwandb[0m: 	dense_other_block_width: 250
[34m[1mwandb[0m: 	dense_res_block_width: 25
[34m[1mwandb[0m: 	driver_dense: 5
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	loss_weight: [1, 0, 0, 0]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 250
[34m[1mwandb[0m: 	weight_decay: 1e-06


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




Epoch 1/14



Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14




Epoch 1/7



Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




0,1
global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▂▂▂▃▃▃▄▄▅▅▆▆▇▇▇█▁▁▂▂▂▂▃▃▃▄▄
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,9051.0
test_auc_score,0.60966
test_brier_loss,0.07078
test_mae,0.14283
test_mean_poisson_deviance,0.4065
test_mse,0.07929
train/epoch_loss,0.27659
train/global_step,6.0
train_auc_score,0.61437
train_brier_loss,0.06998


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pur0uyay with config:
[34m[1mwandb[0m: 	attention_dims: 150
[34m[1mwandb[0m: 	attention_heads: 5
[34m[1mwandb[0m: 	attention_output_dims: 5
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 150
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 100
[34m[1mwandb[0m: 	driver_dense: 100
[34m[1mwandb[0m: 	dropout: 0.075
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	loss_weight: [0.75, 0.15, 0.05, 0.05]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 100
[34m[1mwandb[0m: 	weight_decay: 0.0001


Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




Epoch 1/26



Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26




Epoch 1/13



Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13




0,1
global_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇███████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇██▁▁▂▂▂▂▃▃▄▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,11557.0
test_auc_score,0.61298
test_brier_loss,0.07062
test_mae,0.14794
test_mean_poisson_deviance,0.40208
test_mse,0.07909
train/epoch_loss,0.2601
train/global_step,12.0
train_auc_score,0.62538
train_brier_loss,0.06968


[34m[1mwandb[0m: Agent Starting Run: 7oeq0maf with config:
[34m[1mwandb[0m: 	attention_dims: 150
[34m[1mwandb[0m: 	attention_heads: 5
[34m[1mwandb[0m: 	attention_output_dims: 25
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	burnin_multiplier: 16
[34m[1mwandb[0m: 	claim_dense: 250
[34m[1mwandb[0m: 	dense_other_block_width: 200
[34m[1mwandb[0m: 	dense_res_block_width: 150
[34m[1mwandb[0m: 	driver_dense: 200
[34m[1mwandb[0m: 	dropout: 0.05
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	initial_bias: -2.5
[34m[1mwandb[0m: 	keep_trainable: False
[34m[1mwandb[0m: 	learning_rate: 0.025
[34m[1mwandb[0m: 	loss_weight: [0.9, 0.04, 0.03, 0.03]
[34m[1mwandb[0m: 	relu_leakiness: 0.015
[34m[1mwandb[0m: 	veh_dense: 200
[34m[1mwandb[0m: 	weight_decay: 1e-06


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




Epoch 1/14



Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14




Epoch 1/7



Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7




0,1
global_step,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇██████████████
test_auc_score,▁
test_brier_loss,▁
test_mae,▁
test_mean_poisson_deviance,▁
test_mse,▁
train/epoch_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/global_step,▁▂▂▂▃▃▃▄▅▅▅▅▆▆▇▇█▁▁▂▂▃▃▄▅
train_auc_score,▁
train_brier_loss,▁

0,1
global_step,6223.0
test_auc_score,0.61299
test_brier_loss,0.07057
test_mae,0.14779
test_mean_poisson_deviance,0.40152
test_mse,0.07902
train/epoch_loss,0.26652
train/global_step,6.0
train_auc_score,0.62105
train_brier_loss,0.06974


In [None]:
from google.colab import runtime
runtime.unassign()