In [None]:
!pip install ax-platform

Collecting ax-platform
  Downloading ax_platform-0.4.3-py3-none-any.whl.metadata (11 kB)
Collecting botorch==0.12.0 (from ax-platform)
  Downloading botorch-0.12.0-py3-none-any.whl.metadata (11 kB)
Collecting pyre-extensions (from ax-platform)
  Downloading pyre_extensions-0.0.31-py3-none-any.whl.metadata (4.0 kB)
Collecting pyro-ppl>=1.8.4 (from botorch==0.12.0->ax-platform)
  Downloading pyro_ppl-1.9.1-py3-none-any.whl.metadata (7.8 kB)
Collecting gpytorch==1.13 (from botorch==0.12.0->ax-platform)
  Downloading gpytorch-1.13-py3-none-any.whl.metadata (8.0 kB)
Collecting linear-operator==0.5.3 (from botorch==0.12.0->ax-platform)
  Downloading linear_operator-0.5.3-py3-none-any.whl.metadata (15 kB)
Collecting jaxtyping==0.2.19 (from gpytorch==1.13->botorch==0.12.0->ax-platform)
  Downloading jaxtyping-0.2.19-py3-none-any.whl.metadata (5.7 kB)
Collecting typing-inspect (from pyre-extensions->ax-platform)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting je

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader, random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
import tempfile
import pickle
from pathlib import Path

from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.service.managed_loop import optimize

In [None]:
from ax.service.ax_client import AxClient, ObjectiveProperties
from ax.service.utils.report_utils import exp_to_df
from ax.utils.notebook.plotting import init_notebook_plotting, render
from ax.utils.tutorials.cnn_utils import evaluate, load_mnist, train



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Loading data

In [None]:

# from scipy.fft import dst

class SlidingWindowDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        f = self.data[idx].reshape(10, 10)

        return f # Reshape to (10, 1) to verify for our new data

In [None]:
def data_loading(data_path, batch_size):

    data = np.load(data_path)

    # Normalize only the RGB features (last 5 features)
    # Compute mean and std for RGB features across all samples and time steps
    rgb_features = data[:, :, 5:]
    rgb_mean = np.mean(rgb_features, axis=(0, 1))
    rgb_std = np.std(rgb_features, axis=(0, 1))
    # Avoid division by zero
    rgb_std[rgb_std == 0] = 1.0

    # Normalize the RGB features
    data[:, :, 5:] = (data[:, :, 5:] - rgb_mean) / rgb_std

    # NOTE: Optional, maybe for later: Scale the normalized RGB features to match the scale of motion features
    # For example, you can scale them to have the same standard deviation as the motion features
    motion_features = data[:, :, :5]
    motion_std = np.std(motion_features, axis=(0, 1))
    scaling_factor = motion_std.mean()
    data[:, :, 5:] *= scaling_factor

    print(data[215080:265080].shape)
    # Create the dataset
    dataset = SlidingWindowDataset(data[215080:265080])

    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size

    # Split the dataset
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    # Set up data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, val_loader


In [None]:
data_path = '/content/drive/MyDrive/Dataset_d14_d15/combined_d14_d15.npy'
batch_size = 2048
train_loader,val_loader = data_loading(data_path,batch_size)

(50000, 10, 10)


### Model Loading

In [None]:
class Encoder_Decoder(nn.Module):
  def __init__(self,in_size,hidden_size,no_layers):
    super(Encoder_Decoder,self).__init__()
    ### layer1 is the encoder part, which is bidirectional

    self.layer1 = nn.GRU(input_size=in_size,
                                         hidden_size=hidden_size//2,
                                         num_layers=no_layers,
                                         batch_first=True,
                                         bidirectional=True)
    ### layer 2 is decoder unidirectional we are doing fixed state
    self.layer2 = nn.GRU(input_size=hidden_size,hidden_size=hidden_size,num_layers=1,batch_first=True)

    self.layer3 = nn.Sequential(
                                nn.Tanh(),
                                nn.Linear(hidden_size,in_size) ) # adding batchnormalization at the end

    # self.constant_hidden = constant_hidden.unsqueeze(0).unsqueeze(0)
  def forward(self,x):
    # x = nn.BatchNorm1d(x.shape[1]).to(device)(x)
    # x = nn.BatchNorm1d(x.shape[1])
    x,_ = self.layer1(x) # output is hideen state
    # print(x.shape)
    outputs = []
    input_vector = torch.zeros_like(x[:,0,:]).unsqueeze(dim=1)
    # cell_state = torch.zeros_like(x[:,0,:]).unsqueeze(dim=0)
    # print(input_vector.shape)
    # print( x[:,-1,:].unsqueeze(1).shape)
    for t in range(x.size(1)):
            # print( x[:,-1,:].squeeze(1).unsqueeze(0).shape,input_vector.shape)
            # Perform GRU step with constant hidden state
            # out, (hidden_state, cell_state) = self.layer2(input_vector, (x[:,-1,:].squeeze(1).unsqueeze(0), cell_state))
            out, _ = self.layer2(input_vector # current state
                                 , x[:,-1,:].squeeze(1).unsqueeze(0) # last hidden state
                                 )
            # print(out.shape)
            # print(out.shape)
            # Pass the GRU output through a fully connected layer
            # print(out.shape)
            prediction = self.layer3(out.squeeze(1)) # fc is to get back to pose dimension

            # Store the prediction and use it as the input for the next step
            outputs.append(prediction)
            input_vector = out


        # Stack all the outputs to form the final output sequence
    outputs = torch.stack(outputs, dim=1)
        # return outputs
    # x,_ = self.layer2(x)
    return outputs

In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
in_size = 1
hidden_size = 32
no_layers=2 ## number of bi grus in encoder
enc_dec_model = Encoder_Decoder(in_size,hidden_size,no_layers)
enc_dec_model = enc_dec_model.to(device)

In [None]:

count = 0
count+=(sum(param.numel() for param in enc_dec_model.parameters() if param.requires_grad))
print(f"Number of parameters in the Encoder Decoder model {count}")

Number of parameters in the Encoder Decoder model 12993


### Bayesian_optimization

In [None]:
def net_train(net, train_loader, parameters, dtype, device):
  net.to( device=device)

  # Define loss and optimizer
  criterion = nn.MSELoss()
  # optimizer = optim.SGD(net.parameters(), # or any optimizer you prefer
  #                       lr=parameters.get("lr", 0.001), # 0.001 is used if no lr is specified
  #                       momentum=parameters.get("momentum", 0.9)
  # )
  optimizer = optim.Adam(net.parameters(), lr=parameters.get('lr',0.001),
                         weight_decay=parameters.get("weight_decay",0.00039147384191798724))
  scheduler = ReduceLROnPlateau(optimizer, 'min',
                                patience=parameters.get("patience",5),
                                factor=0.5)
  # scheduler = optim.lr_scheduler.StepLR(
  #     optimizer,
  #     step_size=int(parameters.get("step_size", 30)),
  #     gamma=parameters.get("gamma", 1.0),  # default is no learning rate decay
  # )
  best_val_loss = float('inf')
  num_epochs = 15 # Play around with epoch number

  net.train()
  # Train Network
  for _ in range(num_epochs):

      for inputs in train_loader:
          # move data to proper dtype and device
          inputs = inputs.to(dtype=dtype, device=device)

          # zero the parameter gradient
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs = net(inputs)
          loss = criterion(outputs, inputs)
          loss.backward()
          torch.nn.utils.clip_grad_norm_(net.parameters(), parameters.get("clip_value", 10.47314254460227))
          optimizer.step()
      val_loss = 0
      # Evaluate Network
      for inputs in val_loader:
          inputs = inputs.to(dtype=dtype, device=device)
          outputs = net(inputs)
          loss = criterion(outputs, inputs)
          val_loss += loss.item()
          # if loss.item() < best_val_loss:
          #     best_val_loss = loss.item()
          #     torch.save(net.state_dict(), 'best_model.pth')

      scheduler.step(val_loss)
  return net

In [None]:
def init_net(parameterization):

    model = Encoder_Decoder(10,parameterization.get("hidden_dim",512),parameterization.get("no_layers",5))

    print("Number of parameters to train: {}".format(sum(p.numel() for p in model.parameters())))
    return model # return untrained model

In [None]:
def evaluate(net, data_loader, dtype, device, parametrization): # changed model to net, val_loader to data_loader
    net.eval()
    total_loss = 0
    criterion = nn.MSELoss() #Moved criterion outside of the loop
    with torch.no_grad():
        for batch in data_loader: # Changed batch to batch,_ to fit the expected output from train_loader
            batch = batch.to(dtype=dtype, device=device) #Added dtype and device
            outputs = net(batch)
            loss = criterion(outputs, batch)  # Changed inputs to batch
            total_loss += loss.item()
    # parametrization.get("loss", total_loss / len(data_loader)) # Removed - Incorrect behavior
    return total_loss / len(data_loader) # Return the calculated loss

In [None]:
def train_evaluate(parameterization):

    # constructing a new training data loader allows us to tune the batch size
    # train_loader = torch.utils.data.DataLoader(train_set,
    #                             batch_size=parameterization.get("batchsize", 32),
    #                             shuffle=True,
    #                             num_workers=1,
    #                             pin_memory=True)

    # Get neural net
    untrained_net = init_net(parameterization)

    # train
    trained_net = net_train(net=untrained_net, train_loader=train_loader,
                            parameters=parameterization, dtype=dtype, device=device)

    # return the accuracy of the model as it was trained in this run
    loss = evaluate(
        net=trained_net,
        data_loader=val_loader,
        dtype=dtype,
        device=device,
        parametrization=parameterization
    )

    return {"loss": loss} # Return the loss as a dictionary for AxClient

In [None]:

# best_parameters, values, experiment, model = optimize
ax_client = AxClient()

[INFO 09-30 09:51:42] ax.service.ax_client: Starting optimization with verbose logging. To disable logging, set the `verbose_logging` argument to `False`. Note that float values in the logs are rounded to 6 decimal points.


In [None]:
#torch.cuda.set_device(0) #this is sometimes necessary for me
from ax.modelbridge.registry import Models
dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ax_client.create_experiment(
    name='hyper_parameters_tuning',
    parameters=[
        {"name": "lr", "type": "range", "bounds": [1e-6, 0.4],'value_type':"float", "log_scale": True},
        {"name": "hidden_dim", "type": "choice", "values": [256,512,1024],'value_type':"int"},
        {"name": "no_layers", "type": "range", "bounds": [3, 5],'value_type':"int"},
        {"name": "weight_decay", "type": "range", "bounds": [1e-5, 0.001],'value_type':"float"},
        {"name": "patience", "type": "range", "bounds": [1, 10],'value_type':"int"},
        # {"name": "num_epochs", "type": "range", "bounds": [10, 100],'value_type':"int"},
        {"name": "clip_value", "type": "range", "bounds": [5.1, 100.0],'value_type':"float"},
        #{"name": "max_epoch", "type": "range", "bounds": [1, 30]},
        #{"name": "stepsize", "type": "range", "bounds": [20, 40]},
    ],
    objectives={"loss": ObjectiveProperties(minimize=True)},
    overwrite_existing_experiment=True
    # evaluation_function=train_evaluate,

    # objective_name='loss',
    # minimize=True,
    # experiment_name='hyper_parameters_tuning',
    # total_trials=10
)

# print(best_parameters)
# means, covariances = values
# print(means)
# print(covariances)





[INFO 09-30 09:51:46] ax.service.utils.instantiation: Created search space: SearchSpace(parameters=[RangeParameter(name='lr', parameter_type=FLOAT, range=[1e-06, 0.4], log_scale=True), ChoiceParameter(name='hidden_dim', parameter_type=INT, values=[256, 512, 1024], is_ordered=True, sort_values=True), RangeParameter(name='no_layers', parameter_type=INT, range=[3, 5]), RangeParameter(name='weight_decay', parameter_type=FLOAT, range=[1e-05, 0.001]), RangeParameter(name='patience', parameter_type=INT, range=[1, 10]), RangeParameter(name='clip_value', parameter_type=FLOAT, range=[5.1, 100.0])], parameter_constraints=[]).
[INFO 09-30 09:51:46] ax.modelbridge.dispatch_utils: Using Models.BOTORCH_MODULAR since there is at least one ordered parameter and there are no unordered categorical parameters.
[INFO 09-30 09:51:46] ax.modelbridge.dispatch_utils: Calculating the number of remaining initialization trials based on num_initialization_trials=None max_initialization_trials=None num_tunable_

In [None]:
for i in range(50):
    parameters, trial_index = ax_client.get_next_trial()
    # Local evaluation here can be replaced with deployment to external system.
    ax_client.complete_trial(trial_index=trial_index, raw_data=train_evaluate(parameters))


Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 09:52:07] ax.service.ax_client: Generated new trial 0 with parameters {'lr': 0.013386, 'hidden_dim': 256, 'no_layers': 3, 'weight_decay': 0.000302, 'patience': 6, 'clip_value': 83.631474} using model Sobol.


Number of parameters to train: 1097738


[INFO 09-30 09:52:35] ax.service.ax_client: Completed trial 0 with data: {'loss': (493.722241, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 09:52:35] ax.service.ax_client: Generated new trial 1 with parameters {'lr': 0.000369, 'hidden_dim': 1024, 'no_layers': 5, 'weight_decay': 0.000648, 'patience': 4, 'clip_value': 41.444931} using model Sobol.


Number of parameters to train: 26816522


[INFO 09-30 09:58:12] ax.service.ax_client: Completed trial 1 with data: {'loss': (2505.885742, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 09:58:12] ax.service.ax_client: Generated new trial 2 with parameters {'lr': 1e-06, 'hidden_dim': 256, 'no_layers': 4, 'weight_decay': 0.000196, 'patience': 1, 'clip_value': 22.814624} using model Sobol.


Number of parameters to train: 1394186


[INFO 09-30 09:58:47] ax.service.ax_client: Completed trial 2 with data: {'loss': (5583.086914, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 09:58:47] ax.service.ax_client: Generated new trial 3 with parameters {'lr': 0.029183, 'hidden_dim': 512, 'no_layers': 4, 'weight_decay': 0.000871, 'patience': 9, 'clip_value': 63.70938} using model Sobol.


Number of parameters to train: 5540874


[INFO 09-30 09:59:59] ax.service.ax_client: Completed trial 3 with data: {'loss': (223.146011, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 09:59:59] ax.service.ax_client: Generated new trial 4 with parameters {'lr': 0.089323, 'hidden_dim': 512, 'no_layers': 5, 'weight_decay': 0.000932, 'patience': 10, 'clip_value': 92.988826} using model Sobol.


Number of parameters to train: 6723594


[INFO 09-30 10:01:22] ax.service.ax_client: Completed trial 4 with data: {'loss': (2309.062256, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 10:01:22] ax.service.ax_client: Generated new trial 5 with parameters {'lr': 9e-06, 'hidden_dim': 512, 'no_layers': 3, 'weight_decay': 1.4e-05, 'patience': 2, 'clip_value': 39.490056} using model Sobol.


Number of parameters to train: 4358154


[INFO 09-30 10:02:23] ax.service.ax_client: Completed trial 5 with data: {'loss': (5295.864355, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 10:02:23] ax.service.ax_client: Generated new trial 6 with parameters {'lr': 0.0001, 'hidden_dim': 256, 'no_layers': 5, 'weight_decay': 0.00059, 'patience': 5, 'clip_value': 11.412602} using model Sobol.


Number of parameters to train: 1690634


[INFO 09-30 10:03:06] ax.service.ax_client: Completed trial 6 with data: {'loss': (4969.165234, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 10:03:06] ax.service.ax_client: Generated new trial 7 with parameters {'lr': 0.001619, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.000487, 'patience': 7, 'clip_value': 64.72013} using model Sobol.


Number of parameters to train: 17367050


[INFO 09-30 10:06:39] ax.service.ax_client: Completed trial 7 with data: {'loss': (795.276331, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 10:06:39] ax.service.ax_client: Generated new trial 8 with parameters {'lr': 0.000656, 'hidden_dim': 512, 'no_layers': 4, 'weight_decay': 0.000104, 'patience': 3, 'clip_value': 55.378168} using model Sobol.


Number of parameters to train: 5540874


[INFO 09-30 10:07:56] ax.service.ax_client: Completed trial 8 with data: {'loss': (2648.862061, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 10:07:56] ax.service.ax_client: Generated new trial 9 with parameters {'lr': 4.1e-05, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.000965, 'patience': 7, 'clip_value': 25.283368} using model Sobol.


Number of parameters to train: 17367050


[INFO 09-30 10:11:30] ax.service.ax_client: Completed trial 9 with data: {'loss': (4539.784668, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 10:11:30] ax.service.ax_client: Generated new trial 10 with parameters {'lr': 1.8e-05, 'hidden_dim': 256, 'no_layers': 5, 'weight_decay': 0.000396, 'patience': 8, 'clip_value': 50.951998} using model Sobol.


Number of parameters to train: 1690634


[INFO 09-30 10:12:13] ax.service.ax_client: Completed trial 10 with data: {'loss': (5346.679004, None)}.

Encountered exception in computing model fit quality: RandomModelBridge does not support prediction.

[INFO 09-30 10:12:13] ax.service.ax_client: Generated new trial 11 with parameters {'lr': 0.179779, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.000557, 'patience': 1, 'clip_value': 80.125888} using model Sobol.


Number of parameters to train: 17367050


[INFO 09-30 10:15:19] ax.service.ax_client: Completed trial 11 with data: {'loss': (490.937604, None)}.
[INFO 09-30 10:15:21] ax.service.ax_client: Generated new trial 12 with parameters {'lr': 0.025613, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.000677, 'patience': 2, 'clip_value': 54.075773} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 10:18:39] ax.service.ax_client: Completed trial 12 with data: {'loss': (122.560612, None)}.
[INFO 09-30 10:18:41] ax.service.ax_client: Generated new trial 13 with parameters {'lr': 0.04213, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.000435, 'patience': 10, 'clip_value': 63.021066} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 10:21:53] ax.service.ax_client: Completed trial 13 with data: {'loss': (291.532794, None)}.
[INFO 09-30 10:21:55] ax.service.ax_client: Generated new trial 14 with parameters {'lr': 0.01272, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.000983, 'patience': 1, 'clip_value': 73.48672} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 10:25:17] ax.service.ax_client: Completed trial 14 with data: {'loss': (154.966718, None)}.
[INFO 09-30 10:25:19] ax.service.ax_client: Generated new trial 15 with parameters {'lr': 0.01761, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.000635, 'patience': 1, 'clip_value': 70.299251} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 10:28:38] ax.service.ax_client: Completed trial 15 with data: {'loss': (129.137242, None)}.
[INFO 09-30 10:28:41] ax.service.ax_client: Generated new trial 16 with parameters {'lr': 0.03724, 'hidden_dim': 256, 'no_layers': 3, 'weight_decay': 0.000917, 'patience': 6, 'clip_value': 62.635968} using model BoTorch.


Number of parameters to train: 1097738


[INFO 09-30 10:29:08] ax.service.ax_client: Completed trial 16 with data: {'loss': (224.145938, None)}.
[INFO 09-30 10:29:11] ax.service.ax_client: Generated new trial 17 with parameters {'lr': 0.013626, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.000954, 'patience': 7, 'clip_value': 60.498779} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 10:32:32] ax.service.ax_client: Completed trial 17 with data: {'loss': (168.221121, None)}.
[INFO 09-30 10:32:34] ax.service.ax_client: Generated new trial 18 with parameters {'lr': 0.101852, 'hidden_dim': 1024, 'no_layers': 5, 'weight_decay': 0.001, 'patience': 1, 'clip_value': 23.330479} using model BoTorch.


Number of parameters to train: 26816522


[INFO 09-30 10:37:34] ax.service.ax_client: Completed trial 18 with data: {'loss': (337.852448, None)}.
[INFO 09-30 10:37:36] ax.modelbridge.base: Untransformed parameter 0.40000000000000013 greater than upper bound 0.4, clamping
[INFO 09-30 10:37:36] ax.service.ax_client: Generated new trial 19 with parameters {'lr': 0.4, 'hidden_dim': 256, 'no_layers': 4, 'weight_decay': 0.000303, 'patience': 1, 'clip_value': 37.260116} using model BoTorch.


Number of parameters to train: 1394186


[INFO 09-30 10:38:10] ax.service.ax_client: Completed trial 19 with data: {'loss': (1189.79104, None)}.
[INFO 09-30 10:38:13] ax.service.ax_client: Generated new trial 20 with parameters {'lr': 0.021486, 'hidden_dim': 1024, 'no_layers': 4, 'weight_decay': 0.001, 'patience': 1, 'clip_value': 47.385565} using model BoTorch.


Number of parameters to train: 22091786


[INFO 09-30 10:42:29] ax.service.ax_client: Completed trial 20 with data: {'loss': (132.976883, None)}.
[INFO 09-30 10:42:31] ax.modelbridge.base: Untransformed parameter 0.40000000000000013 greater than upper bound 0.4, clamping
[INFO 09-30 10:42:31] ax.service.ax_client: Generated new trial 21 with parameters {'lr': 0.4, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.001, 'patience': 10, 'clip_value': 38.595204} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 10:45:39] ax.service.ax_client: Completed trial 21 with data: {'loss': (1546.452344, None)}.
[INFO 09-30 10:45:41] ax.service.ax_client: Generated new trial 22 with parameters {'lr': 0.021544, 'hidden_dim': 1024, 'no_layers': 5, 'weight_decay': 0.001, 'patience': 10, 'clip_value': 33.201767} using model BoTorch.


Number of parameters to train: 26816522


[INFO 09-30 10:50:55] ax.service.ax_client: Completed trial 22 with data: {'loss': (260.887109, None)}.
[INFO 09-30 10:50:58] ax.service.ax_client: Generated new trial 23 with parameters {'lr': 0.03668, 'hidden_dim': 256, 'no_layers': 5, 'weight_decay': 0.001, 'patience': 1, 'clip_value': 34.758825} using model BoTorch.


Number of parameters to train: 1690634


[INFO 09-30 10:51:40] ax.service.ax_client: Completed trial 23 with data: {'loss': (237.700714, None)}.
[INFO 09-30 10:51:43] ax.service.ax_client: Generated new trial 24 with parameters {'lr': 0.035432, 'hidden_dim': 1024, 'no_layers': 5, 'weight_decay': 0.000781, 'patience': 1, 'clip_value': 37.930906} using model BoTorch.


Number of parameters to train: 26816522


[INFO 09-30 10:56:48] ax.service.ax_client: Completed trial 24 with data: {'loss': (198.554385, None)}.
[INFO 09-30 10:56:51] ax.service.ax_client: Generated new trial 25 with parameters {'lr': 0.015234, 'hidden_dim': 256, 'no_layers': 3, 'weight_decay': 0.000695, 'patience': 1, 'clip_value': 71.161692} using model BoTorch.


Number of parameters to train: 1097738


[INFO 09-30 10:57:19] ax.service.ax_client: Completed trial 25 with data: {'loss': (395.283881, None)}.
[INFO 09-30 10:57:22] ax.service.ax_client: Generated new trial 26 with parameters {'lr': 0.011794, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.000613, 'patience': 10, 'clip_value': 81.532153} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 11:00:42] ax.service.ax_client: Completed trial 26 with data: {'loss': (144.558246, None)}.
[INFO 09-30 11:00:46] ax.service.ax_client: Generated new trial 27 with parameters {'lr': 0.024501, 'hidden_dim': 1024, 'no_layers': 5, 'weight_decay': 0.001, 'patience': 1, 'clip_value': 36.927431} using model BoTorch.


Number of parameters to train: 26816522


[INFO 09-30 11:05:57] ax.service.ax_client: Completed trial 27 with data: {'loss': (229.558109, None)}.
[INFO 09-30 11:06:00] ax.service.ax_client: Generated new trial 28 with parameters {'lr': 0.02521, 'hidden_dim': 1024, 'no_layers': 5, 'weight_decay': 0.001, 'patience': 1, 'clip_value': 5.1} using model BoTorch.


Number of parameters to train: 26816522


[INFO 09-30 11:11:22] ax.service.ax_client: Completed trial 28 with data: {'loss': (250.521939, None)}.
[INFO 09-30 11:11:25] ax.service.ax_client: Generated new trial 29 with parameters {'lr': 0.045861, 'hidden_dim': 256, 'no_layers': 5, 'weight_decay': 0.001, 'patience': 9, 'clip_value': 5.1} using model BoTorch.


Number of parameters to train: 1690634


[INFO 09-30 11:12:07] ax.service.ax_client: Completed trial 29 with data: {'loss': (403.97052, None)}.
[INFO 09-30 11:12:10] ax.service.ax_client: Generated new trial 30 with parameters {'lr': 0.002506, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.001, 'patience': 10, 'clip_value': 100.0} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 11:15:43] ax.service.ax_client: Completed trial 30 with data: {'loss': (452.569379, None)}.
[INFO 09-30 11:15:46] ax.service.ax_client: Generated new trial 31 with parameters {'lr': 0.042653, 'hidden_dim': 1024, 'no_layers': 5, 'weight_decay': 0.000334, 'patience': 1, 'clip_value': 5.1} using model BoTorch.


Number of parameters to train: 26816522


[INFO 09-30 11:20:58] ax.service.ax_client: Completed trial 31 with data: {'loss': (201.556631, None)}.
[INFO 09-30 11:21:01] ax.service.ax_client: Generated new trial 32 with parameters {'lr': 0.039051, 'hidden_dim': 512, 'no_layers': 4, 'weight_decay': 1e-05, 'patience': 1, 'clip_value': 34.644315} using model BoTorch.


Number of parameters to train: 5540874


[INFO 09-30 11:22:11] ax.service.ax_client: Completed trial 32 with data: {'loss': (194.759232, None)}.
[INFO 09-30 11:22:14] ax.service.ax_client: Generated new trial 33 with parameters {'lr': 0.034304, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.001, 'patience': 1, 'clip_value': 61.959941} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 11:25:30] ax.service.ax_client: Completed trial 33 with data: {'loss': (193.924908, None)}.
[INFO 09-30 11:25:33] ax.service.ax_client: Generated new trial 34 with parameters {'lr': 0.032839, 'hidden_dim': 1024, 'no_layers': 5, 'weight_decay': 1e-05, 'patience': 1, 'clip_value': 27.857323} using model BoTorch.


Number of parameters to train: 26816522


[INFO 09-30 11:30:19] ax.service.ax_client: Completed trial 34 with data: {'loss': (301.309174, None)}.
[INFO 09-30 11:30:23] ax.service.ax_client: Generated new trial 35 with parameters {'lr': 0.036945, 'hidden_dim': 1024, 'no_layers': 4, 'weight_decay': 0.001, 'patience': 1, 'clip_value': 28.073911} using model BoTorch.


Number of parameters to train: 22091786


[INFO 09-30 11:34:35] ax.service.ax_client: Completed trial 35 with data: {'loss': (266.32048, None)}.
[INFO 09-30 11:34:37] ax.service.ax_client: Generated new trial 36 with parameters {'lr': 0.012457, 'hidden_dim': 256, 'no_layers': 3, 'weight_decay': 1e-05, 'patience': 1, 'clip_value': 5.1} using model BoTorch.


Number of parameters to train: 1097738


[INFO 09-30 11:35:05] ax.service.ax_client: Completed trial 36 with data: {'loss': (468.229285, None)}.
[INFO 09-30 11:35:10] ax.service.ax_client: Generated new trial 37 with parameters {'lr': 0.026446, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 1e-05, 'patience': 10, 'clip_value': 5.1} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 11:38:23] ax.service.ax_client: Completed trial 37 with data: {'loss': (167.835748, None)}.
[INFO 09-30 11:38:27] ax.service.ax_client: Generated new trial 38 with parameters {'lr': 0.007016, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.001, 'patience': 10, 'clip_value': 5.1} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 11:42:01] ax.service.ax_client: Completed trial 38 with data: {'loss': (201.574963, None)}.
[INFO 09-30 11:42:09] ax.service.ax_client: Generated new trial 39 with parameters {'lr': 0.00335, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 1e-05, 'patience': 2, 'clip_value': 100.0} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 11:45:40] ax.service.ax_client: Completed trial 39 with data: {'loss': (284.635883, None)}.
[INFO 09-30 11:45:46] ax.service.ax_client: Generated new trial 40 with parameters {'lr': 0.002998, 'hidden_dim': 256, 'no_layers': 5, 'weight_decay': 1e-05, 'patience': 5, 'clip_value': 5.1} using model BoTorch.


Number of parameters to train: 1690634


[INFO 09-30 11:46:29] ax.service.ax_client: Completed trial 40 with data: {'loss': (1951.931055, None)}.
[INFO 09-30 11:46:33] ax.service.ax_client: Generated new trial 41 with parameters {'lr': 0.017206, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 1e-05, 'patience': 1, 'clip_value': 100.0} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 11:49:47] ax.service.ax_client: Completed trial 41 with data: {'loss': (134.060826, None)}.
[INFO 09-30 11:49:50] ax.service.ax_client: Generated new trial 42 with parameters {'lr': 0.042897, 'hidden_dim': 256, 'no_layers': 4, 'weight_decay': 0.001, 'patience': 1, 'clip_value': 50.949384} using model BoTorch.


Number of parameters to train: 1394186


[INFO 09-30 11:50:25] ax.service.ax_client: Completed trial 42 with data: {'loss': (272.484296, None)}.
[INFO 09-30 11:50:28] ax.service.ax_client: Generated new trial 43 with parameters {'lr': 0.009596, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 1e-05, 'patience': 1, 'clip_value': 5.1} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 11:53:48] ax.service.ax_client: Completed trial 43 with data: {'loss': (121.281316, None)}.
[INFO 09-30 11:53:52] ax.modelbridge.base: Untransformed parameter 0.40000000000000013 greater than upper bound 0.4, clamping
[INFO 09-30 11:53:53] ax.service.ax_client: Generated new trial 44 with parameters {'lr': 0.4, 'hidden_dim': 256, 'no_layers': 3, 'weight_decay': 1e-05, 'patience': 1, 'clip_value': 100.0} using model BoTorch.


Number of parameters to train: 1097738


[INFO 09-30 11:54:19] ax.service.ax_client: Completed trial 44 with data: {'loss': (679.383167, None)}.
[INFO 09-30 11:54:23] ax.service.ax_client: Generated new trial 45 with parameters {'lr': 0.011515, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.000197, 'patience': 1, 'clip_value': 5.1} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 11:57:51] ax.service.ax_client: Completed trial 45 with data: {'loss': (102.500356, None)}.
[INFO 09-30 11:57:54] ax.service.ax_client: Generated new trial 46 with parameters {'lr': 0.011732, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 1e-05, 'patience': 1, 'clip_value': 11.608371} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 12:01:12] ax.service.ax_client: Completed trial 46 with data: {'loss': (149.213266, None)}.
[INFO 09-30 12:01:16] ax.service.ax_client: Generated new trial 47 with parameters {'lr': 0.011804, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 0.001, 'patience': 1, 'clip_value': 5.1} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 12:04:46] ax.service.ax_client: Completed trial 47 with data: {'loss': (179.230341, None)}.
[INFO 09-30 12:04:49] ax.service.ax_client: Generated new trial 48 with parameters {'lr': 0.011824, 'hidden_dim': 1024, 'no_layers': 3, 'weight_decay': 1e-05, 'patience': 1, 'clip_value': 84.308101} using model BoTorch.


Number of parameters to train: 17367050


[INFO 09-30 12:08:04] ax.service.ax_client: Completed trial 48 with data: {'loss': (155.004431, None)}.
[INFO 09-30 12:08:10] ax.service.ax_client: Generated new trial 49 with parameters {'lr': 0.011379, 'hidden_dim': 1024, 'no_layers': 4, 'weight_decay': 0.001, 'patience': 10, 'clip_value': 43.320846} using model BoTorch.


Number of parameters to train: 22091786


[INFO 09-30 12:12:32] ax.service.ax_client: Completed trial 49 with data: {'loss': (215.310403, None)}.


In [None]:
ax_client.get_trials_data_frame()



Unnamed: 0,trial_index,arm_name,trial_status,generation_method,loss,lr,hidden_dim,no_layers,weight_decay,patience,clip_value
0,0,0_0,COMPLETED,Sobol,493.722241,0.013386,256,3,0.000302,6,83.631474
1,1,1_0,COMPLETED,Sobol,2505.885742,0.000369,1024,5,0.000648,4,41.444931
2,2,2_0,COMPLETED,Sobol,5583.086914,1e-06,256,4,0.000196,1,22.814624
3,3,3_0,COMPLETED,Sobol,223.146011,0.029183,512,4,0.000871,9,63.70938
4,4,4_0,COMPLETED,Sobol,2309.062256,0.089323,512,5,0.000932,10,92.988826
5,5,5_0,COMPLETED,Sobol,5295.864355,9e-06,512,3,1.4e-05,2,39.490056
6,6,6_0,COMPLETED,Sobol,4969.165234,0.0001,256,5,0.00059,5,11.412602
7,7,7_0,COMPLETED,Sobol,795.276331,0.001619,1024,3,0.000487,7,64.72013
8,8,8_0,COMPLETED,Sobol,2648.862061,0.000656,512,4,0.000104,3,55.378168
9,9,9_0,COMPLETED,Sobol,4539.784668,4.1e-05,1024,3,0.000965,7,25.283368


In [None]:
best_parameters, values = ax_client.get_best_parameters()
best_parameters

{'lr': 0.009595835793707791,
 'hidden_dim': 1024,
 'no_layers': 3,
 'weight_decay': 1e-05,
 'patience': 1,
 'clip_value': 5.1}

In [None]:
render(ax_client.get_contour_plot(param_x="lr", param_y="weight_decay", metric_name="loss"))

[INFO 09-30 12:13:15] ax.service.ax_client: Retrieving contour plot with parameter 'lr' on X-axis and 'weight_decay' on Y-axis, for metric 'loss'. Remaining parameters are affixed to the middle of their range.


In [None]:
render(
    ax_client.get_optimization_trace()
)