In [1]:
# hide
import sys
sys.path.append("..")
%load_ext autoreload
%autoreload 2

# DCAE Hyperparameter sweep

> Run a hyperparamter sweep for the Deep Convolutional AutoEncoder defined in the notebook `01_Timecluster_replication.ipynb`

This sweep has been run using Weights & Biases, following [this tutorial](https://www.wandb.com/articles/running-hyperparameter-sweeps-to-pick-the-best-model-using-w-b).

In [2]:
#hide
import wandb
from tensorflow.keras.optimizers import Adam
from wandb.keras import WandbCallback
from pacmel_mining_use_case.load import *
from pacmel_mining_use_case.utils import *
from pacmel_mining_use_case.timecluster import *
from functools import partial
from fastcore.all import *

### Sweep configuration 

In [3]:
# Select the hyperparameters you want to tune. This is specified like the following:
sweep_config = {
    'name': 'DCAE global hyperparameter optimization',
   'method': 'random',
    "metric": {
        "name" : "val_loss",
        "goal" : "minimize"
    },
   'parameters': {
       'w': {
           'values': list(range(12, 144, 12))
       },
        'stride': {
           'values': [1, 5, 10, 20]
       },
       'lr': {
           'distribution': 'uniform',
           'min': 0.00001,
           'max': 0.01
       },
       'epochs': {
           'distribution': 'int_uniform',
           'min': 1,
           'max': 50
       },
       'batch_size': {
           'distribution': 'int_uniform',
           'min': 32,
           'max': 256
       },
       'n_filters': {
           'values': [[64,32,12],[32,16,12]]
       },
       'filter_sizes': {
           'values': [[10, 5, 5], [20, 10, 10], [30, 15, 15]]
       },
       'output_filter_size': {
           'distribution': 'int_uniform',
           'min': 5,
           'max': 30
       },
       'pool_sizes': {
           'value': [2, 2, 3]
       }
   },
    'early_terminate': {
        'type': 'hyperband',
        's': 2,
        'eta': 3,
        'max_iter': 27
    }
}

### Define the agent function (train)

Once you have initialized the sweep you need an agent. An agent is a model training script you can use to pair the sweep configurations. Let’s define a simple training script:

In [7]:
def train(dataset):
    # Specify the hyperparameter to be tuned along with an initial value
    config_defaults = {
        'lr': 0.002438113880973422,
        'n_filters': [32, 16, 12],
        'filter_sizes': [10, 5, 5],
        'output_filter_size': 10,
        'pool_sizes': [2, 2, 3],
        'batch_size': 75,
        'epochs': 34,
        'stride': 1,
        'w': 60
    }
    # Initialize wandb with a sample project name
    run = wandb.init(project="timecluster-extension", 
                     job_type='train_DCAE', 
                     allow_val_change=True, 
                     config=config_defaults)
    
    # Load and preprocess the data
    ds_artifact = run.use_artifact(dataset, type='dataset')
    df = ds_artifact.to_df() # comes from TSArtifact
    input_data = df_slicer(df, w=wandb.config.w, s=wandb.config.stride)

    # Specify the other hyperparameters to the configuration
    wandb.config.update(allow_val_change=True, 
                        params={
                            'ds_artifact_type': ds_artifact.type,
                            'ds_artifact_name': ds_artifact.name,
                            'ds_artifact_digest': ds_artifact.digest,
                            'val_pct': 0.2
                        })

    # Define the model
    model = createDCAE(w=wandb.config.w, 
                       d=input_data.shape[2], 
                       delta=wandb.config.w, 
                       n_filters=wandb.config.n_filters, 
                       filter_sizes=wandb.config.filter_sizes, 
                       pool_sizes=wandb.config.pool_sizes, 
                       output_filter_size=wandb.config.output_filter_size)

    opt = Adam(learning_rate=wandb.config.lr)

    # Compile the model
    model.compile(optimizer=opt,
                 loss='mean_squared_error',
                 metrics=['mean_squared_error'])

    # Train the model
    model.fit(x=input_data, y=input_data, batch_size=wandb.config.batch_size, 
      validation_split=wandb.config.val_pct, epochs=wandb.config.epochs, verbose=0, 
      callbacks=[WandbCallback()])

### Create the sweep and call the training function from a wandb agent 

In [8]:
sweep_id = wandb.sweep(sweep_config)

Create sweep with ID: fcslffsu
Sweep URL: https://app.wandb.ai/vrodriguezf/timecluster-extension/sweeps/fcslffsu


In [None]:
wandb.agent(sweep_id, function=partial(train, dataset='JNK:interpolated-normalized-10000'))

wandb: Agent Starting Run: efoi1fo8 with config:
	batch_size: 143
	epochs: 12
	filter_sizes: [10, 5, 5]
	lr: 0.004396853999685441
	n_filters: [64, 32, 12]
	output_filter_size: 8
	pool_sizes: [2, 2, 3]
	stride: 20
	w: 132
wandb: Agent Started Run: efoi1fo8


wandb: Agent Finished Run: efoi1fo8 

wandb: Agent Starting Run: uaxbzy69 with config:
	batch_size: 46
	epochs: 33
	filter_sizes: [10, 5, 5]
	lr: 0.004254171235870954
	n_filters: [64, 32, 12]
	output_filter_size: 11
	pool_sizes: [2, 2, 3]
	stride: 20
	w: 60
wandb: Agent Started Run: uaxbzy69


wandb: Agent Finished Run: uaxbzy69 

wandb: Agent Starting Run: 0ijyjygy with config:
	batch_size: 126
	epochs: 22
	filter_sizes: [20, 10, 10]
	lr: 0.002097564764091532
	n_filters: [32, 16, 12]
	output_filter_size: 19
	pool_sizes: [2, 2, 3]
	stride: 20
	w: 84
wandb: Agent Started Run: 0ijyjygy


wandb: Agent Finished Run: 0ijyjygy 

wandb: Agent Starting Run: slqe54w8 with config:
	batch_size: 92
	epochs: 15
	filter_sizes: [30, 15, 15]
	lr: 0.00538019133929386
	n_filters: [64, 32, 12]
	output_filter_size: 6
	pool_sizes: [2, 2, 3]
	stride: 10
	w: 96
wandb: Agent Started Run: slqe54w8


wandb: Agent Finished Run: slqe54w8 

wandb: Agent Starting Run: uabkwutq with config:
	batch_size: 70
	epochs: 45
	filter_sizes: [20, 10, 10]
	lr: 0.006394857905961577
	n_filters: [32, 16, 12]
	output_filter_size: 21
	pool_sizes: [2, 2, 3]
	stride: 1
	w: 132
wandb: Agent Started Run: uabkwutq


wandb: Agent Finished Run: uabkwutq 

wandb: Agent Starting Run: 6dcp6lrp with config:
	batch_size: 173
	epochs: 18
	filter_sizes: [30, 15, 15]
	lr: 0.00848765361778136
	n_filters: [64, 32, 12]
	output_filter_size: 23
	pool_sizes: [2, 2, 3]
	stride: 10
	w: 48
wandb: Agent Started Run: 6dcp6lrp


wandb: Agent Finished Run: 6dcp6lrp 

wandb: Agent Starting Run: jj6npx6f with config:
	batch_size: 143
	epochs: 40
	filter_sizes: [20, 10, 10]
	lr: 0.001541519877603301
	n_filters: [32, 16, 12]
	output_filter_size: 21
	pool_sizes: [2, 2, 3]
	stride: 20
	w: 24
wandb: Agent Started Run: jj6npx6f


wandb: Agent Finished Run: jj6npx6f 

wandb: Agent Starting Run: 9z49arvy with config:
	batch_size: 82
	epochs: 12
	filter_sizes: [20, 10, 10]
	lr: 0.006462279905294705
	n_filters: [32, 16, 12]
	output_filter_size: 26
	pool_sizes: [2, 2, 3]
	stride: 20
	w: 132
wandb: Agent Started Run: 9z49arvy


wandb: Agent Finished Run: 9z49arvy 

wandb: Agent Starting Run: x8au3t28 with config:
	batch_size: 217
	epochs: 33
	filter_sizes: [30, 15, 15]
	lr: 0.006140716787296173
	n_filters: [64, 32, 12]
	output_filter_size: 11
	pool_sizes: [2, 2, 3]
	stride: 20
	w: 24
wandb: Agent Started Run: x8au3t28


wandb: Agent Finished Run: x8au3t28 

wandb: Agent Starting Run: 6v5ecbie with config:
	batch_size: 242
	epochs: 7
	filter_sizes: [30, 15, 15]
	lr: 0.004681727280435912
	n_filters: [64, 32, 12]
	output_filter_size: 8
	pool_sizes: [2, 2, 3]
	stride: 5
	w: 24
wandb: Agent Started Run: 6v5ecbie


wandb: Agent Finished Run: 6v5ecbie 

wandb: Agent Starting Run: k23bftxl with config:
	batch_size: 36
	epochs: 16
	filter_sizes: [30, 15, 15]
	lr: 0.0013370631857877395
	n_filters: [64, 32, 12]
	output_filter_size: 16
	pool_sizes: [2, 2, 3]
	stride: 20
	w: 84
wandb: Agent Started Run: k23bftxl


wandb: Agent Finished Run: k23bftxl 

wandb: Agent Starting Run: lvg4ho9q with config:
	batch_size: 221
	epochs: 35
	filter_sizes: [10, 5, 5]
	lr: 0.009661067806822172
	n_filters: [32, 16, 12]
	output_filter_size: 15
	pool_sizes: [2, 2, 3]
	stride: 10
	w: 96
wandb: Agent Started Run: lvg4ho9q


wandb: Agent Finished Run: lvg4ho9q 

wandb: Agent Starting Run: njeppmfy with config:
	batch_size: 144
	epochs: 1
	filter_sizes: [30, 15, 15]
	lr: 0.002013026548354724
	n_filters: [64, 32, 12]
	output_filter_size: 26
	pool_sizes: [2, 2, 3]
	stride: 20
	w: 48
wandb: Agent Started Run: njeppmfy


wandb: Agent Finished Run: njeppmfy 

wandb: Agent Starting Run: gcvklab9 with config:
	batch_size: 36
	epochs: 26
	filter_sizes: [20, 10, 10]
	lr: 0.007356117453921903
	n_filters: [32, 16, 12]
	output_filter_size: 10
	pool_sizes: [2, 2, 3]
	stride: 10
	w: 84
wandb: Agent Started Run: gcvklab9


wandb: Agent Finished Run: gcvklab9 

wandb: Agent Starting Run: cipkj4f9 with config:
	batch_size: 91
	epochs: 31
	filter_sizes: [30, 15, 15]
	lr: 0.008366172632411859
	n_filters: [32, 16, 12]
	output_filter_size: 19
	pool_sizes: [2, 2, 3]
	stride: 1
	w: 120
wandb: Agent Started Run: cipkj4f9


wandb: Agent Finished Run: cipkj4f9 

wandb: Agent Starting Run: 75d6w0i1 with config:
	batch_size: 155
	epochs: 44
	filter_sizes: [30, 15, 15]
	lr: 0.008565081875457582
	n_filters: [32, 16, 12]
	output_filter_size: 28
	pool_sizes: [2, 2, 3]
	stride: 5
	w: 36
wandb: Agent Started Run: 75d6w0i1


wandb: Agent Finished Run: 75d6w0i1 

wandb: Agent Starting Run: usboc7km with config:
	batch_size: 84
	epochs: 44
	filter_sizes: [30, 15, 15]
	lr: 0.0010887489009632492
	n_filters: [64, 32, 12]
	output_filter_size: 12
	pool_sizes: [2, 2, 3]
	stride: 20
	w: 96
wandb: Agent Started Run: usboc7km


wandb: Agent Finished Run: usboc7km 

wandb: Agent Starting Run: 13mawxi5 with config:
	batch_size: 162
	epochs: 42
	filter_sizes: [20, 10, 10]
	lr: 0.0032681648515246603
	n_filters: [32, 16, 12]
	output_filter_size: 13
	pool_sizes: [2, 2, 3]
	stride: 20
	w: 120
wandb: Agent Started Run: 13mawxi5


wandb: Agent Finished Run: 13mawxi5 

wandb: Agent Starting Run: nlfx7dz4 with config:
	batch_size: 213
	epochs: 15
	filter_sizes: [20, 10, 10]
	lr: 0.0061089933154715595
	n_filters: [64, 32, 12]
	output_filter_size: 26
	pool_sizes: [2, 2, 3]
	stride: 1
	w: 108
wandb: Agent Started Run: nlfx7dz4


wandb: Agent Finished Run: nlfx7dz4 

