In [1]:
!pip install transformers
!pip install datasets
# !pip install -U PyYAML
!pip install "ray[default]"
!pip install wandb
!pip install tensorboardX

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/fd/1a/41c644c963249fd7f3836d926afa1e3f1cc234a1c40d80c5f03ad8f6f1b2/transformers-4.8.2-py3-none-any.whl (2.5MB)
[K     |████████████████████████████████| 2.5MB 4.3MB/s 
Collecting huggingface-hub==0.0.12
  Downloading https://files.pythonhosted.org/packages/2f/ee/97e253668fda9b17e968b3f97b2f8e53aa0127e8807d24a547687423fe0b/huggingface_hub-0.0.12-py3-none-any.whl
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[K     |████████████████████████████████| 901kB 50.5MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/d4/e2/df3543e8ffdab68f5acc73f613de9c2b155ac47f162e725dcac87c521c11/tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3MB)
[K     |█████

In [3]:
import ray
ray.__version__

'1.4.1'

In [4]:
import transformers
transformers.__version__

'4.8.2'

In [None]:
from ray import tune
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# saves our models to artifacts in WandB
import wandb
# %env WANDB_LOG_MODEL=true
# %env WANDB_PROJECT=goodreads_success_predictor

In [None]:
wandb.login(key = '46cb1981ae15765be5bfb5e7c3257d0315a95a1b')

[34m[1mwandb[0m: W&B API key is configured (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
from transformers import DistilBertPreTrainedModel, DistilBertModel
from transformers.modeling_outputs import SequenceClassifierOutput
from torch import nn
import torch
from torch.nn import CrossEntropyLoss, MSELoss

class DistilBERTForMultipleSequenceClassification(DistilBertPreTrainedModel):
    def __init__(self, config, num_labels1 = 2, num_labels2 = 4):
        super().__init__(config)
        self.num_labels1 = num_labels1
        self.num_labels2 = num_labels2
        print(self.num_labels1, self.num_labels2)
        self.alpha = .5
        self.config = config

        self.distilbert = DistilBertModel(config)
        self.pre_classifier = nn.Linear(config.dim, config.dim)
        self.classifier1 = nn.Linear(config.dim, self.num_labels1)
        self.classifier2 = nn.Linear(config.dim, self.num_labels2)
        self.dropout = nn.Dropout(config.dropout)

        self.init_weights()


    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        distilbert_output = self.distilbert(
              input_ids=input_ids,
              attention_mask=attention_mask,
              head_mask=head_mask,
              inputs_embeds=inputs_embeds,
              output_attentions=output_attentions,
              output_hidden_states=output_hidden_states,
              return_dict=return_dict,
          )
        hidden_state = distilbert_output[0]  # (bs, seq_len, dim)
        pooled_output = hidden_state[:, 0]  # (bs, dim)
        pooled_output = self.pre_classifier(pooled_output)  # (bs, dim)
        pooled_output = nn.ReLU()(pooled_output)  # (bs, dim)
        pooled_output = self.dropout(pooled_output)  # (bs, dim)
        logits1 = self.classifier1(pooled_output)
        logits2 = self.classifier2(pooled_output)
        logits = torch.cat([logits1, logits2], 1)

        loss = None
        if labels is not None:
            #if self.config.problem_type is None:
            #self.config.problem_type = "single_label_classification"
            
            if self.num_labels1 > 1:
                loss_fct1 = CrossEntropyLoss()
                loss1 = loss_fct1(logits1.view(-1, self.num_labels1), labels[:, 0].view(-1))
            else:
                loss_fct1 = MSELoss()
                loss1 = loss_fct1(logits1.view(-1), labels[:, 0].view(-1))

            if self.num_labels2 > 1:
                loss_fct2 = CrossEntropyLoss()
                loss2 = loss_fct2(logits2.view(-1, self.num_labels2), labels[:, 1].view(-1))
            else:
                loss_fct2 = MSELoss()
                loss2 = loss_fct2(logits2.view(-1), labels[:, 1].view(-1))
            loss = self.alpha*loss1 + (1-self.alpha)*loss2 

        if not return_dict:
            output = (logits,) + outputs[2:] #not sure if this works
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=distilbert_output.hidden_states, #hidden_states,
            attentions=distilbert_output.attentions, #attentions,
        )

In [None]:
import wandb
import os
os.environ['WANDB_CONSOLE'] = 'off'
run = wandb.init()

model_name = 'DistilBERT_multitask_overlap50_dataset_embeddings'

artifact = None
if (model_name == 'DistilBERT_multitask_sentence_tokenized_dataset_embeddings'):
  artifact = run.use_artifact('lucaguarro/goodreads_success_predictor/model-2giwtwvy:v0', type='model')
elif (model_name == 'DistilBERT_multitask_overlap50_dataset_embeddings'):
  artifact = run.use_artifact('lucaguarro/goodreads_success_predictor/model-nlpbosie:v0', type='model')

artifact_dir = artifact.download()

model = DistilBERTForMultipleSequenceClassification.from_pretrained(artifact_dir, num_labels1 = 2, num_labels2 = 8)
model.cuda()

[34m[1mwandb[0m: Currently logged in as: [33mlucaguarro[0m (use `wandb login --relogin` to force relogin)


[34m[1mwandb[0m: Downloading large artifact model-nlpbosie:v0, 255.48MB. 3 files... Done. 0:0:0


2 8


DistilBERTForMultipleSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30523, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)

In [None]:
run.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

In [None]:
import pickle
from datasets import DatasetDict

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers.modeling_outputs import SequenceClassifierOutput

class Net(nn.Module):

    def __init__(self, pre_classifier_init, classifier_init, do_rate = 0.1):
        super(Net, self).__init__()

        self.pre_classifier = nn.Linear(768, 768)
        self.classifier = nn.Linear(768, 2)
        self.dropout = nn.Dropout(do_rate)

        self.pre_classifier.weight.data.copy_(pre_classifier_init.weight.data)
        self.classifier.weight.data.copy_(classifier_init.weight.data)

        # print(pre_classifier_init.bias.data)
        self.pre_classifier.bias.data.copy_(pre_classifier_init.bias.data)
        self.classifier.bias.data.copy_(classifier_init.bias.data)

        # DOUBLE CHECK IF BIASES ARE BEING SET AS WELL

    def forward(self, x, labels = None):
        # Max pooling over a (2, 2) window
        x = self.pre_classifier(x)
        x = nn.ReLU()(x)
        x = self.dropout(x)
        logits = self.classifier(x)

        return logits
        # loss = None
        # if labels is not None:
        #   loss_fct = CrossEntropyLoss()
        #   loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
        # return SequenceClassifierOutput(
        #     loss = loss,
        #     logits = logits
        # )

net = Net(model.pre_classifier, model.classifier1)
print(net)

Net(
  (pre_classifier): Linear(in_features=768, out_features=768, bias=True)
  (classifier): Linear(in_features=768, out_features=2, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)


In [None]:
net

Net(
  (pre_classifier): Linear(in_features=768, out_features=768, bias=True)
  (classifier): Linear(in_features=768, out_features=2, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [None]:
def load_data():
  with open(r"/content/drive/MyDrive/Thesis/Datasets/goodreads_maharjan_super/Pooled_Output/DistilBERT_multitask_overlap50_dataset_embeddings/avg_pld_outs_hf_ds.pkl", "rb") as input_file:
    avg_pld_outs_hf_ds = pickle.load(input_file)
  avg_pld_outs_hf_ds.set_format(type='pt', columns=['meaned_pooled_output', 'success_label'])
  trainset = avg_pld_outs_hf_ds['train']
  valset = avg_pld_outs_hf_ds['validation']
  return trainset, valset

def load_test_data():
  with open(r"/content/drive/MyDrive/Thesis/Datasets/goodreads_maharjan_super/Pooled_Output/DistilBERT_multitask_overlap50_dataset_embeddings/avg_pld_outs_hf_ds.pkl", "rb") as input_file:
    avg_pld_outs_hf_ds = pickle.load(input_file)
  avg_pld_outs_hf_ds.set_format(type='pt', columns=['meaned_pooled_output', 'success_label'])
  testset = avg_pld_outs_hf_ds['test']
  return testset

In [None]:
from ray import tune
# from ray.tune.integration.wandb import wandb_mixin
# '''@wandb_mixin
# run = wandb.init()

def train_nn(config, checkpoint_dir, data_dir=None):
  net = Net(model.pre_classifier, model.classifier1, config['do_rate'])
  net.train()
  device = "cpu"
  if torch.cuda.is_available():
      device = "cuda:0"
      if torch.cuda.device_count() > 1:
          net = nn.DataParallel(net)
  print(type(net))
  net.to(device)
  # net.cuda()


  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)

  trainset, valset = load_data()

  trainloader = torch.utils.data.DataLoader(trainset, batch_size=config["batch_size"], shuffle=True)
  valloader = torch.utils.data.DataLoader(valset, batch_size=config["batch_size"], shuffle=True)

  for epoch in range(config['num_epochs']):
    running_loss = 0.0
    epoch_steps = 0
    for i, data in enumerate(trainloader, 0):

      inputs = data['meaned_pooled_output']
      labels = data['success_label']

      inputs, labels = inputs.to(device), labels.to(device)

      optimizer.zero_grad()

      outputs = net(inputs)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      running_loss += loss.item()
      epoch_steps += 1

      if i % 10 == 9:
        print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                        running_loss / epoch_steps))
        running_loss = 0.0

      # Validation loss
      val_loss = 0.0
      val_steps = 0
      total = 0
      correct = 0

      all_predictions = np.array([])
      all_labels = np.array([])

      net.eval()
      with torch.no_grad():
        for i, data in enumerate(valloader, 0):

          inputs_cpu = data['meaned_pooled_output']
          labels_cpu = data['success_label']

          inputs, labels = inputs_cpu.to(device), labels_cpu.to(device)
          # inputs.cuda()
          # labels.cuda()

          outputs = net(inputs)
          _, predicted = torch.max(outputs.data, 1)

          all_predictions = np.append(all_predictions, predicted.to('cpu').numpy())
          all_labels = np.append(all_labels, labels_cpu.numpy())

          total += labels.size(0)
          correct += (predicted == labels).sum().item()

          loss = criterion(outputs, labels)
          val_loss += loss.cpu().numpy()
          val_steps += 1

      with tune.checkpoint_dir(epoch) as checkpoint_dir:
          print("saving in checkpoint dir")
          path = os.path.join(checkpoint_dir, "checkpoint")
          torch.save((net.state_dict(), optimizer.state_dict()), path)

      net.train()

      s_precision, s_recall, s_f1, _ = precision_recall_fscore_support(all_labels, all_predictions, average='weighted')
      # s_acc = accuracy_score(all_labels, all_predictions)
      # wandb.log({"val_loss": val_loss / val_steps, "val_accuracy": correct / total})
      tune.report(loss=(val_loss / val_steps), accuracy=correct / total, f1=s_f1, precision=s_precision, recall=s_recall)
  print("Finished Training")

In [None]:
from ray import tune

In [None]:
def test_results(net, device="cpu"):
    testset = load_test_data()

    testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False)

    all_predictions = np.array([])
    all_labels = np.array([])

    net.eval()
    with torch.no_grad():
        for i, data in enumerate(testloader, 0):
            inputs_cpu = data['meaned_pooled_output']
            labels_cpu = data['success_label']

            inputs, labels = inputs_cpu.to(device), labels_cpu.to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)

            all_predictions = np.append(all_predictions, predicted.to('cpu').numpy())
            all_labels = np.append(all_labels, labels_cpu.numpy())

    s_precision, s_recall, s_f1, _ = precision_recall_fscore_support(all_labels, all_predictions, average='weighted')
    return {
        'precision': s_precision,
        'recall': s_recall,
        'f1': s_f1
    }

In [None]:
from ray.tune.logger import DEFAULT_LOGGERS
from ray.tune.schedulers import ASHAScheduler
from ray.tune.integration.wandb import WandbLoggerCallback
import torch.optim as optim
from functools import partial

def main(num_samples = 15, max_num_epochs = 10):
  config = {
      "lr": tune.loguniform(1e-4, 1e-1),
      "batch_size": tune.choice([16,32,64,128]),
      "num_epochs": tune.choice([1,2]),#,2,3]),#,2,3,5,10,20]),
      "do_rate": tune.uniform(0.1, 0.5),
      "wandb": {
        "project": "AvgPooledOutputClassifier",
        "api_key": "46cb1981ae15765be5bfb5e7c3257d0315a95a1b"
      }
    }

  scheduler = ASHAScheduler(
    max_t=max_num_epochs,
    grace_period=1,
    reduction_factor=2)

  result = tune.run(
    partial(train_nn, checkpoint_dir='/tmp/ShallowNNModels'),
    config = config,
    resources_per_trial={'gpu': 1},
    metric = 'loss',
    mode = 'min',
    num_samples = num_samples,
    scheduler = scheduler,
    callbacks=[WandbLoggerCallback(
        project="AvgPooledOutputClassifier",
        group='raytune_hpsearch',
        api_key="46cb1981ae15765be5bfb5e7c3257d0315a95a1b",
        log_config=True
    )])
  
  best_trial = result.get_best_trial(metric="f1", mode="max", scope="last")
  print("Best trial config: {}".format(best_trial.config))
  print("Best trial final validation loss: {}".format(
      best_trial.last_result["loss"]))
  print("Best trial final validation accuracy: {}".format(
      best_trial.last_result["accuracy"]))
  
  best_trained_model = Net(model.pre_classifier, model.classifier1, best_trial.config['do_rate'])
  device = "cpu"
  if torch.cuda.is_available():
      device = "cuda:0"
      # if gpus_per_trial > 1:
      #     best_trained_model = nn.DataParallel(best_trained_model)
  best_trained_model.to(device)

  best_checkpoint_dir = best_trial.checkpoint.value
  model_state, optimizer_state = torch.load(os.path.join(
      best_checkpoint_dir, "checkpoint"))
  best_trained_model.load_state_dict(model_state)

  # model_save_name = "yungclassifier.pt"
  path = F"/content/drive/MyDrive/Thesis/Models/ShallowNNModels/yungclassifier1.pt"
  torch.save(best_trained_model.state_dict(), path)
  return test_results(best_trained_model, device)

In [None]:
main(num_samples=15)

2021-07-09 20:51:05,414	INFO services.py:1274 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2021-07-09 20:51:06,610	INFO registry.py:65 -- Detected unknown callable for trainable. Converting to class.


Trial name,status,loc,batch_size,do_rate,lr,num_epochs
DEFAULT_63709_00000,RUNNING,,16,0.213574,0.00105227,2
DEFAULT_63709_00001,PENDING,,128,0.496502,0.0145453,1
DEFAULT_63709_00002,PENDING,,64,0.388518,0.000108764,1
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1


Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=499)[0m <class '__main__.Net'>


Trial name,status,loc,batch_size,do_rate,lr,num_epochs
DEFAULT_63709_00000,RUNNING,,16,0.213574,0.00105227,2
DEFAULT_63709_00001,PENDING,,128,0.496502,0.0145453,1
DEFAULT_63709_00002,PENDING,,64,0.388518,0.000108764,1
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1




Result for DEFAULT_63709_00000:
  accuracy: 0.7655172413793103
  date: 2021-07-09_20-51-52
  done: false
  experiment_id: b6906896d1694b9c8891356e5ff33153
  f1: 0.7552408434387536
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5688887332615099
  node_ip: 172.28.0.2
  pid: 499
  precision: 0.7613258487035551
  recall: 0.7655172413793103
  should_checkpoint: true
  time_since_restore: 2.014686346054077
  time_this_iter_s: 2.014686346054077
  time_total_s: 2.014686346054077
  timestamp: 1625863912
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00000'
  
[2m[36m(pid=499)[0m saving in checkpoint dir




[2m[36m(pid=499)[0m saving in checkpoint dir




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00000,RUNNING,172.28.0.2:499,16,0.213574,0.00105227,2,2.0,5.0058,0.51851,0.765517,0.755241
DEFAULT_63709_00001,PENDING,,128,0.496502,0.0145453,1,,,,,
DEFAULT_63709_00002,PENDING,,64,0.388518,0.000108764,1,,,,,
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,




[2m[36m(pid=499)[0m saving in checkpoint dir




Result for DEFAULT_63709_00000:
  accuracy: 0.7655172413793103
  date: 2021-07-09_20-52-00
  done: false
  experiment_id: b6906896d1694b9c8891356e5ff33153
  f1: 0.7552408434387536
  hostname: 2b09fc6cb438
  iterations_since_restore: 3
  loss: 0.5184199190453479
  node_ip: 172.28.0.2
  pid: 499
  precision: 0.7613258487035551
  recall: 0.7655172413793103
  should_checkpoint: true
  time_since_restore: 9.280294418334961
  time_this_iter_s: 4.274494886398315
  time_total_s: 9.280294418334961
  timestamp: 1625863920
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: '63709_00000'
  




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00000,RUNNING,172.28.0.2:499,16,0.213574,0.00105227,2,3.0,9.28029,0.51842,0.765517,0.755241
DEFAULT_63709_00001,PENDING,,128,0.496502,0.0145453,1,,,,,
DEFAULT_63709_00002,PENDING,,64,0.388518,0.000108764,1,,,,,
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,


[2m[36m(pid=499)[0m saving in checkpoint dir




[2m[36m(pid=499)[0m saving in checkpoint dir




Result for DEFAULT_63709_00000:
  accuracy: 0.7689655172413793
  date: 2021-07-09_20-52-08
  done: false
  experiment_id: b6906896d1694b9c8891356e5ff33153
  f1: 0.7593190977478285
  hostname: 2b09fc6cb438
  iterations_since_restore: 5
  loss: 0.5377971702500394
  node_ip: 172.28.0.2
  pid: 499
  precision: 0.7649125633936598
  recall: 0.7689655172413793
  should_checkpoint: true
  time_since_restore: 17.923667669296265
  time_this_iter_s: 4.33392596244812
  time_total_s: 17.923667669296265
  timestamp: 1625863928
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: '63709_00000'
  


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00000,RUNNING,172.28.0.2:499,16,0.213574,0.00105227,2,5.0,17.9237,0.537797,0.768966,0.759319
DEFAULT_63709_00001,PENDING,,128,0.496502,0.0145453,1,,,,,
DEFAULT_63709_00002,PENDING,,64,0.388518,0.000108764,1,,,,,
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,




[2m[36m(pid=499)[0m saving in checkpoint dir




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00000,RUNNING,172.28.0.2:499,16,0.213574,0.00105227,2,6.0,22.234,0.544226,0.775862,0.767405
DEFAULT_63709_00001,PENDING,,128,0.496502,0.0145453,1,,,,,
DEFAULT_63709_00002,PENDING,,64,0.388518,0.000108764,1,,,,,
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,


[2m[36m(pid=499)[0m saving in checkpoint dir




Result for DEFAULT_63709_00000:
  accuracy: 0.7827586206896552
  date: 2021-07-09_20-52-17
  done: false
  experiment_id: b6906896d1694b9c8891356e5ff33153
  f1: 0.7753997263250524
  hostname: 2b09fc6cb438
  iterations_since_restore: 7
  loss: 0.5262673308974818
  node_ip: 172.28.0.2
  pid: 499
  precision: 0.7791707717569786
  recall: 0.7827586206896552
  should_checkpoint: true
  time_since_restore: 26.634493827819824
  time_this_iter_s: 4.400537490844727
  time_total_s: 26.634493827819824
  timestamp: 1625863937
  timesteps_since_restore: 0
  training_iteration: 7
  trial_id: '63709_00000'
  




[2m[36m(pid=499)[0m saving in checkpoint dir




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00000,RUNNING,172.28.0.2:499,16,0.213574,0.00105227,2,8.0,30.9842,0.523307,0.786207,0.779364
DEFAULT_63709_00001,PENDING,,128,0.496502,0.0145453,1,,,,,
DEFAULT_63709_00002,PENDING,,64,0.388518,0.000108764,1,,,,,
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,




[2m[36m(pid=499)[0m saving in checkpoint dir




Result for DEFAULT_63709_00000:
  accuracy: 0.7896551724137931
  date: 2021-07-09_20-52-26
  done: false
  experiment_id: b6906896d1694b9c8891356e5ff33153
  f1: 0.7833081269833779
  hostname: 2b09fc6cb438
  iterations_since_restore: 9
  loss: 0.5182834518583197
  node_ip: 172.28.0.2
  pid: 499
  precision: 0.7862671443358996
  recall: 0.7896551724137931
  should_checkpoint: true
  time_since_restore: 35.251176834106445
  time_this_iter_s: 4.2670204639434814
  time_total_s: 35.251176834106445
  timestamp: 1625863946
  timesteps_since_restore: 0
  training_iteration: 9
  trial_id: '63709_00000'
  




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00000,RUNNING,172.28.0.2:499,16,0.213574,0.00105227,2,9.0,35.2512,0.518283,0.789655,0.783308
DEFAULT_63709_00001,PENDING,,128,0.496502,0.0145453,1,,,,,
DEFAULT_63709_00002,PENDING,,64,0.388518,0.000108764,1,,,,,
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,


[2m[36m(pid=499)[0m [1,    10] loss: 0.481
[2m[36m(pid=499)[0m saving in checkpoint dir


Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


Result for DEFAULT_63709_00000:
  accuracy: 0.7896551724137931
  date: 2021-07-09_20-52-30
  done: true
  experiment_id: b6906896d1694b9c8891356e5ff33153
  f1: 0.7833081269833779
  hostname: 2b09fc6cb438
  iterations_since_restore: 10
  loss: 0.5669626656331515
  node_ip: 172.28.0.2
  pid: 499
  precision: 0.7862671443358996
  recall: 0.7896551724137931
  should_checkpoint: true
  time_since_restore: 39.645161390304565
  time_this_iter_s: 4.39398455619812
  time_total_s: 39.645161390304565
  timestamp: 1625863950
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: '63709_00000'
  




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00001,RUNNING,,128,0.496502,0.0145453,1,,,,,
DEFAULT_63709_00002,PENDING,,64,0.388518,0.000108764,1,,,,,
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,




Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=497)[0m <class '__main__.Net'>
[2m[36m(pid=497)[0m saving in checkpoint dir
Result for DEFAULT_63709_00001:
  accuracy: 0.7724137931034483
  date: 2021-07-09_20-52-43
  done: false
  experiment_id: a5ea70c28deb426aabe7312cdd3b561a
  f1: 0.7633735632183908
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5358835061391195
  node_ip: 172.28.0.2
  pid: 497
  precision: 0.7684888852445918
  recall: 0.7724137931034483
  should_checkpoint: true
  time_since_restore: 0.34277892112731934
  time_this_iter_s: 0.34277892112731934
  time_total_s: 0.34277892112731934
  timestamp: 1625863963
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00001'
  


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00001,RUNNING,172.28.0.2:497,128,0.496502,0.0145453,1,1.0,0.342779,0.535884,0.772414,0.763374
DEFAULT_63709_00002,PENDING,,64,0.388518,0.000108764,1,,,,,
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,




[2m[36m(pid=497)[0m saving in checkpoint dir




Result for DEFAULT_63709_00001:
  accuracy: 0.7724137931034483
  date: 2021-07-09_20-52-46
  done: true
  experiment_id: a5ea70c28deb426aabe7312cdd3b561a
  f1: 0.7633735632183908
  hostname: 2b09fc6cb438
  iterations_since_restore: 2
  loss: 0.5799937347571055
  node_ip: 172.28.0.2
  pid: 497
  precision: 0.7684888852445918
  recall: 0.7724137931034483
  should_checkpoint: true
  time_since_restore: 3.348531484603882
  time_this_iter_s: 3.0057525634765625
  time_total_s: 3.348531484603882
  timestamp: 1625863966
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: '63709_00001'
  




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00002,PENDING,,64,0.388518,0.000108764,1,,,,,
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,




Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=496)[0m <class '__main__.Net'>
Result for DEFAULT_63709_00002:
  accuracy: 0.7586206896551724
  date: 2021-07-09_20-52-59
  done: false
  experiment_id: 1da93112c36c4520be04d202ecb76862
  f1: 0.7470106924485553
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5466893970966339
  node_ip: 172.28.0.2
  pid: 496
  precision: 0.7541147045314538
  recall: 0.7586206896551724
  should_checkpoint: true
  time_since_restore: 0.3249642848968506
  time_this_iter_s: 0.3249642848968506
  time_total_s: 0.3249642848968506
  timestamp: 1625863979
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00002'
  


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00002,RUNNING,172.28.0.2:496,64,0.388518,0.000108764,1,1.0,0.324964,0.546689,0.758621,0.747011
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,


[2m[36m(pid=496)[0m saving in checkpoint dir




[2m[36m(pid=496)[0m saving in checkpoint dir




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00002,RUNNING,172.28.0.2:496,64,0.388518,0.000108764,1,2.0,3.36331,0.534832,0.758621,0.747011
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,


[2m[36m(pid=496)[0m saving in checkpoint dir




Result for DEFAULT_63709_00002:
  accuracy: 0.7586206896551724
  date: 2021-07-09_20-53-06
  done: false
  experiment_id: 1da93112c36c4520be04d202ecb76862
  f1: 0.7470106924485553
  hostname: 2b09fc6cb438
  iterations_since_restore: 3
  loss: 0.5394256770610809
  node_ip: 172.28.0.2
  pid: 496
  precision: 0.7541147045314538
  recall: 0.7586206896551724
  should_checkpoint: true
  time_since_restore: 7.684571266174316
  time_this_iter_s: 4.321263074874878
  time_total_s: 7.684571266174316
  timestamp: 1625863986
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: '63709_00002'
  




[2m[36m(pid=496)[0m saving in checkpoint dir




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00002,RUNNING,172.28.0.2:496,64,0.388518,0.000108764,1,4.0,12.03,0.560773,0.758621,0.747011
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,




[2m[36m(pid=496)[0m saving in checkpoint dir




Result for DEFAULT_63709_00002:
  accuracy: 0.7586206896551724
  date: 2021-07-09_20-53-15
  done: false
  experiment_id: 1da93112c36c4520be04d202ecb76862
  f1: 0.7470106924485553
  hostname: 2b09fc6cb438
  iterations_since_restore: 5
  loss: 0.5329466760158539
  node_ip: 172.28.0.2
  pid: 496
  precision: 0.7541147045314538
  recall: 0.7586206896551724
  should_checkpoint: true
  time_since_restore: 16.43952512741089
  time_this_iter_s: 4.409555912017822
  time_total_s: 16.43952512741089
  timestamp: 1625863995
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: '63709_00002'
  




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00002,RUNNING,172.28.0.2:496,64,0.388518,0.000108764,1,5.0,16.4395,0.532947,0.758621,0.747011
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,


[2m[36m(pid=496)[0m saving in checkpoint dir




[2m[36m(pid=496)[0m saving in checkpoint dir




Result for DEFAULT_63709_00002:
  accuracy: 0.7586206896551724
  date: 2021-07-09_20-53-24
  done: false
  experiment_id: 1da93112c36c4520be04d202ecb76862
  f1: 0.7470106924485553
  hostname: 2b09fc6cb438
  iterations_since_restore: 7
  loss: 0.5336575269699096
  node_ip: 172.28.0.2
  pid: 496
  precision: 0.7541147045314538
  recall: 0.7586206896551724
  should_checkpoint: true
  time_since_restore: 25.08343267440796
  time_this_iter_s: 4.311064958572388
  time_total_s: 25.08343267440796
  timestamp: 1625864004
  timesteps_since_restore: 0
  training_iteration: 7
  trial_id: '63709_00002'
  


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00002,RUNNING,172.28.0.2:496,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,




[2m[36m(pid=496)[0m Finished Training




Result for DEFAULT_63709_00002:
  accuracy: 0.7586206896551724
  date: 2021-07-09_20-53-24
  done: true
  experiment_id: 1da93112c36c4520be04d202ecb76862
  experiment_tag: 2_batch_size=64,do_rate=0.38852,lr=0.00010876,num_epochs=1
  f1: 0.7470106924485553
  hostname: 2b09fc6cb438
  iterations_since_restore: 7
  loss: 0.5336575269699096
  node_ip: 172.28.0.2
  pid: 496
  precision: 0.7541147045314538
  recall: 0.7586206896551724
  should_checkpoint: true
  time_since_restore: 25.08343267440796
  time_this_iter_s: 4.311064958572388
  time_total_s: 25.08343267440796
  timestamp: 1625864004
  timesteps_since_restore: 0
  training_iteration: 7
  trial_id: '63709_00002'
  




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00003,PENDING,,64,0.427725,0.00381855,1,,,,,
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,




Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=498)[0m <class '__main__.Net'>
Result for DEFAULT_63709_00003:
  accuracy: 0.7655172413793103
  date: 2021-07-09_20-53-41
  done: true
  experiment_id: 7b72ea64955849ae87a43dacfc47aca6
  f1: 0.7552408434387536
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5469793856143952
  node_ip: 172.28.0.2
  pid: 498
  precision: 0.7613258487035551
  recall: 0.7655172413793103
  should_checkpoint: true
  time_since_restore: 0.3256664276123047
  time_this_iter_s: 0.3256664276123047
  time_total_s: 0.3256664276123047
  timestamp: 1625864021
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00003'
  


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00003,RUNNING,172.28.0.2:498,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,PENDING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,


[2m[36m(pid=498)[0m saving in checkpoint dir




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00004,RUNNING,,64,0.180402,0.000976601,2,,,,,
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,


Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=989)[0m <class '__main__.Net'>
[2m[36m(pid=989)[0m saving in checkpoint dir
Result for DEFAULT_63709_00004:
  accuracy: 0.7620689655172413
  date: 2021-07-09_20-53-53
  done: true
  experiment_id: 74ca6af2cf3f41dc9cc2fdbb70d4b16b
  f1: 0.7511382341381775
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5484301388263703
  node_ip: 172.28.0.2
  pid: 989
  precision: 0.7577271236753995
  recall: 0.7620689655172413
  should_checkpoint: true
  time_since_restore: 0.32828259468078613
  time_this_iter_s: 0.32828259468078613
  time_total_s: 0.32828259468078613
  timestamp: 1625864033
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00004'
  


Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00004,RUNNING,172.28.0.2:989,64,0.180402,0.000976601,2,1.0,0.328283,0.54843,0.762069,0.751138
DEFAULT_63709_00005,PENDING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00005,RUNNING,,128,0.410034,0.0031913,1,,,,,
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,




Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=1100)[0m <class '__main__.Net'>
Result for DEFAULT_63709_00005:
  accuracy: 0.7655172413793103
  date: 2021-07-09_20-54-05
  done: false
  experiment_id: 2c5acddd4fc24c2fabf8031cc6e46883
  f1: 0.7552408434387536
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5133928755919138
  node_ip: 172.28.0.2
  pid: 1100
  precision: 0.7613258487035551
  recall: 0.7655172413793103
  should_checkpoint: true
  time_since_restore: 0.35421037673950195
  time_this_iter_s: 0.35421037673950195
  time_total_s: 0.35421037673950195
  timestamp: 1625864045
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00005'
  


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00005,RUNNING,172.28.0.2:1100,128,0.410034,0.0031913,1,1.0,0.35421,0.513393,0.765517,0.755241
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,


[2m[36m(pid=1100)[0m saving in checkpoint dir




[2m[36m(pid=1100)[0m saving in checkpoint dir




Result for DEFAULT_63709_00005:
  accuracy: 0.7689655172413793
  date: 2021-07-09_20-54-08
  done: true
  experiment_id: 2c5acddd4fc24c2fabf8031cc6e46883
  f1: 0.7593190977478285
  hostname: 2b09fc6cb438
  iterations_since_restore: 2
  loss: 0.5615142186482748
  node_ip: 172.28.0.2
  pid: 1100
  precision: 0.7649125633936598
  recall: 0.7689655172413793
  should_checkpoint: true
  time_since_restore: 3.4519340991973877
  time_this_iter_s: 3.0977237224578857
  time_total_s: 3.4519340991973877
  timestamp: 1625864048
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: '63709_00005'
  




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00006,PENDING,,32,0.312963,0.0576668,1,,,,,
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308




Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=1215)[0m <class '__main__.Net'>
[2m[36m(pid=1215)[0m saving in checkpoint dir
Result for DEFAULT_63709_00006:
  accuracy: 0.6655172413793103
  date: 2021-07-09_20-54-22
  done: true
  experiment_id: b330acef667a41b8b7a70fdb23a9bebf
  f1: 0.6722986857994308
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.6183291092514992
  node_ip: 172.28.0.2
  pid: 1215
  precision: 0.7202810120913569
  recall: 0.6655172413793103
  should_checkpoint: true
  time_since_restore: 0.3255763053894043
  time_this_iter_s: 0.3255763053894043
  time_total_s: 0.3255763053894043
  timestamp: 1625864062
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00006'
  


Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00006,RUNNING,172.28.0.2:1215,32,0.312963,0.0576668,1,1.0,0.325576,0.618329,0.665517,0.672299
DEFAULT_63709_00007,PENDING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00007,RUNNING,,64,0.318123,0.00183297,1,,,,,
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374


Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=1323)[0m <class '__main__.Net'>
Result for DEFAULT_63709_00007:
  accuracy: 0.7620689655172413
  date: 2021-07-09_20-54-34
  done: true
  experiment_id: 7fd9387e3f5142bb9c48786dc8d7af89
  f1: 0.7511382341381775
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5524328052997589
  node_ip: 172.28.0.2
  pid: 1323
  precision: 0.7577271236753995
  recall: 0.7620689655172413
  should_checkpoint: true
  time_since_restore: 0.347872257232666
  time_this_iter_s: 0.347872257232666
  time_total_s: 0.347872257232666
  timestamp: 1625864074
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00007'
  
[2m[36m(pid=1323)[0m saving in checkpoint dir


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00007,RUNNING,172.28.0.2:1323,64,0.318123,0.00183297,1,1.0,0.347872,0.552433,0.762069,0.751138
DEFAULT_63709_00008,PENDING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00008,RUNNING,,128,0.387019,0.00245907,2,,,,,
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011


Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=1434)[0m <class '__main__.Net'>
Result for DEFAULT_63709_00008:
  accuracy: 0.7586206896551724
  date: 2021-07-09_20-54-47
  done: true
  experiment_id: d8029ac04fa844948213163d4984eb61
  f1: 0.7470106924485553
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.564932515223821
  node_ip: 172.28.0.2
  pid: 1434
  precision: 0.7541147045314538
  recall: 0.7586206896551724
  should_checkpoint: true
  time_since_restore: 0.34368085861206055
  time_this_iter_s: 0.34368085861206055
  time_total_s: 0.34368085861206055
  timestamp: 1625864087
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00008'
  
[2m[36m(pid=1434)[0m saving in checkpoint dir


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00008,RUNNING,172.28.0.2:1434,128,0.387019,0.00245907,2,1.0,0.343681,0.564933,0.758621,0.747011
DEFAULT_63709_00009,PENDING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00009,RUNNING,,16,0.12072,0.0140084,1,,,,,
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241




Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=1542)[0m <class '__main__.Net'>
Result for DEFAULT_63709_00009:
  accuracy: 0.7689655172413793
  date: 2021-07-09_20-54-59
  done: true
  experiment_id: 1755ddebc0364b33b2b0742ab8ac52eb
  f1: 0.7593190977478285
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5707962661981583
  node_ip: 172.28.0.2
  pid: 1542
  precision: 0.7649125633936598
  recall: 0.7689655172413793
  should_checkpoint: true
  time_since_restore: 0.33725714683532715
  time_this_iter_s: 0.33725714683532715
  time_total_s: 0.33725714683532715
  timestamp: 1625864099
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00009'
  
[2m[36m(pid=1542)[0m saving in checkpoint dir


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00009,RUNNING,172.28.0.2:1542,16,0.12072,0.0140084,1,1.0,0.337257,0.570796,0.768966,0.759319
DEFAULT_63709_00010,PENDING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00010,RUNNING,,64,0.288867,0.0251538,2,,,,,
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,TERMINATED,,64,0.180402,0.000976601,2,1.0,0.328283,0.54843,0.762069,0.751138




Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=1657)[0m <class '__main__.Net'>
Result for DEFAULT_63709_00010:
  accuracy: 0.7275862068965517
  date: 2021-07-09_20-55-11
  done: true
  experiment_id: 21a0cc1001e74258a7f6fd0bfbe3d77c
  f1: 0.7041443819515403
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5631720721721649
  node_ip: 172.28.0.2
  pid: 1657
  precision: 0.7235731272294887
  recall: 0.7275862068965517
  should_checkpoint: true
  time_since_restore: 0.3335733413696289
  time_this_iter_s: 0.3335733413696289
  time_total_s: 0.3335733413696289
  timestamp: 1625864111
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00010'
  
[2m[36m(pid=1657)[0m saving in checkpoint dir


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00010,RUNNING,172.28.0.2:1657,64,0.288867,0.0251538,2,1.0,0.333573,0.563172,0.727586,0.704144
DEFAULT_63709_00011,PENDING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,TERMINATED,,64,0.180402,0.000976601,2,1.0,0.328283,0.54843,0.762069,0.751138




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00011,RUNNING,,64,0.476644,0.0787161,1,,,,,
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,TERMINATED,,64,0.180402,0.000976601,2,1.0,0.328283,0.54843,0.762069,0.751138
DEFAULT_63709_00005,TERMINATED,,128,0.410034,0.0031913,1,2.0,3.45193,0.561514,0.768966,0.759319




Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=1659)[0m <class '__main__.Net'>
Result for DEFAULT_63709_00011:
  accuracy: 0.7620689655172413
  date: 2021-07-09_20-55-23
  done: false
  experiment_id: e3f49d1a7f0746ea82a241e590c17d25
  f1: 0.7455206311098268
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5444348931312561
  node_ip: 172.28.0.2
  pid: 1659
  precision: 0.762682617485505
  recall: 0.7620689655172413
  should_checkpoint: true
  time_since_restore: 0.3256552219390869
  time_this_iter_s: 0.3256552219390869
  time_total_s: 0.3256552219390869
  timestamp: 1625864123
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00011'
  
[2m[36m(pid=1659)[0m saving in checkpoint dir


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00011,RUNNING,172.28.0.2:1659,64,0.476644,0.0787161,1,1.0,0.325655,0.544435,0.762069,0.745521
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,TERMINATED,,64,0.180402,0.000976601,2,1.0,0.328283,0.54843,0.762069,0.751138
DEFAULT_63709_00005,TERMINATED,,128,0.410034,0.0031913,1,2.0,3.45193,0.561514,0.768966,0.759319




[2m[36m(pid=1659)[0m saving in checkpoint dir




Result for DEFAULT_63709_00011:
  accuracy: 0.6517241379310345
  date: 2021-07-09_20-55-26
  done: true
  experiment_id: e3f49d1a7f0746ea82a241e590c17d25
  f1: 0.552733839648266
  hostname: 2b09fc6cb438
  iterations_since_restore: 2
  loss: 0.5904608130455017
  node_ip: 172.28.0.2
  pid: 1659
  precision: 0.6258317269936461
  recall: 0.6517241379310345
  should_checkpoint: true
  time_since_restore: 3.3507823944091797
  time_this_iter_s: 3.0251271724700928
  time_total_s: 3.3507823944091797
  timestamp: 1625864126
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: '63709_00011'
  




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00012,PENDING,,16,0.328854,0.00598742,2,,,,,
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,TERMINATED,,64,0.180402,0.000976601,2,1.0,0.328283,0.54843,0.762069,0.751138
DEFAULT_63709_00005,TERMINATED,,128,0.410034,0.0031913,1,2.0,3.45193,0.561514,0.768966,0.759319
DEFAULT_63709_00006,TERMINATED,,32,0.312963,0.0576668,1,1.0,0.325576,0.618329,0.665517,0.672299




Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=1879)[0m <class '__main__.Net'>
Result for DEFAULT_63709_00012:
  accuracy: 0.7586206896551724
  date: 2021-07-09_20-55-40
  done: true
  experiment_id: 7d1626c5dc034eadac82fac5f44e7172
  f1: 0.7459373761395165
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5587738015149769
  node_ip: 172.28.0.2
  pid: 1879
  precision: 0.7547885328028634
  recall: 0.7586206896551724
  should_checkpoint: true
  time_since_restore: 0.35400891304016113
  time_this_iter_s: 0.35400891304016113
  time_total_s: 0.35400891304016113
  timestamp: 1625864140
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00012'
  


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00012,RUNNING,172.28.0.2:1879,16,0.328854,0.00598742,2,1.0,0.354009,0.558774,0.758621,0.745937
DEFAULT_63709_00013,PENDING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,TERMINATED,,64,0.180402,0.000976601,2,1.0,0.328283,0.54843,0.762069,0.751138
DEFAULT_63709_00005,TERMINATED,,128,0.410034,0.0031913,1,2.0,3.45193,0.561514,0.768966,0.759319
DEFAULT_63709_00006,TERMINATED,,32,0.312963,0.0576668,1,1.0,0.325576,0.618329,0.665517,0.672299


[2m[36m(pid=1879)[0m saving in checkpoint dir




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00013,RUNNING,,32,0.331379,0.0169886,1,,,,,
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,TERMINATED,,64,0.180402,0.000976601,2,1.0,0.328283,0.54843,0.762069,0.751138
DEFAULT_63709_00005,TERMINATED,,128,0.410034,0.0031913,1,2.0,3.45193,0.561514,0.768966,0.759319
DEFAULT_63709_00006,TERMINATED,,32,0.312963,0.0576668,1,1.0,0.325576,0.618329,0.665517,0.672299
DEFAULT_63709_00007,TERMINATED,,64,0.318123,0.00183297,1,1.0,0.347872,0.552433,0.762069,0.751138


Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=1992)[0m <class '__main__.Net'>
Result for DEFAULT_63709_00013:
  accuracy: 0.7655172413793103
  date: 2021-07-09_20-55-52
  done: true
  experiment_id: 97ee48288b804a6397c7bef5db223e82
  f1: 0.7542389583785964
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5547076761722565
  node_ip: 172.28.0.2
  pid: 1992
  precision: 0.7619516357596721
  recall: 0.7655172413793103
  should_checkpoint: true
  time_since_restore: 0.3338332176208496
  time_this_iter_s: 0.3338332176208496
  time_total_s: 0.3338332176208496
  timestamp: 1625864152
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00013'
  
[2m[36m(pid=1992)[0m saving in checkpoint dir


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00013,RUNNING,172.28.0.2:1992,32,0.331379,0.0169886,1,1.0,0.333833,0.554708,0.765517,0.754239
DEFAULT_63709_00014,PENDING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,TERMINATED,,64,0.180402,0.000976601,2,1.0,0.328283,0.54843,0.762069,0.751138
DEFAULT_63709_00005,TERMINATED,,128,0.410034,0.0031913,1,2.0,3.45193,0.561514,0.768966,0.759319
DEFAULT_63709_00006,TERMINATED,,32,0.312963,0.0576668,1,1.0,0.325576,0.618329,0.665517,0.672299
DEFAULT_63709_00007,TERMINATED,,64,0.318123,0.00183297,1,1.0,0.347872,0.552433,0.762069,0.751138




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00014,RUNNING,,128,0.141985,0.00274854,2,,,,,
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10.0,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2.0,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7.0,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1.0,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,TERMINATED,,64,0.180402,0.000976601,2,1.0,0.328283,0.54843,0.762069,0.751138
DEFAULT_63709_00005,TERMINATED,,128,0.410034,0.0031913,1,2.0,3.45193,0.561514,0.768966,0.759319
DEFAULT_63709_00006,TERMINATED,,32,0.312963,0.0576668,1,1.0,0.325576,0.618329,0.665517,0.672299
DEFAULT_63709_00007,TERMINATED,,64,0.318123,0.00183297,1,1.0,0.347872,0.552433,0.762069,0.751138
DEFAULT_63709_00008,TERMINATED,,128,0.387019,0.00245907,2,1.0,0.343681,0.564933,0.758621,0.747011


Problem at: /usr/local/lib/python3.7/dist-packages/ray/tune/integration/wandb.py 197 run


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 761, in init
    run = wi.init()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/wandb_init.py", line 422, in init
    backend.ensure_launched()
  File "/usr/local/lib/python3.7/dist-packages/wandb/sdk/backend/backend.py", line 89, in ensure_launched
    self.record_q = self._multiprocessing.Queue()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 102, in Queue
    return Queue(maxsize, ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 42, in __init__
    self._rlock = ctx.Lock()
  File "/usr/lib/python3.7/multiprocessing/context.py", line 67, in Lock
    return Lock(ctx=self.get_context())
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 162, in __init__
    SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)
  File "/usr/lib/python3.7/multiprocessing/synchronize.py", line 80, in __init__
    register(self.

[2m[36m(pid=2103)[0m <class '__main__.Net'>
Result for DEFAULT_63709_00014:
  accuracy: 0.7620689655172413
  date: 2021-07-09_20-56-04
  done: true
  experiment_id: c6cfe0a7bfa64e4393c104b024b4d146
  f1: 0.7511382341381775
  hostname: 2b09fc6cb438
  iterations_since_restore: 1
  loss: 0.5764011343320211
  node_ip: 172.28.0.2
  pid: 2103
  precision: 0.7577271236753995
  recall: 0.7620689655172413
  should_checkpoint: true
  time_since_restore: 0.3432924747467041
  time_this_iter_s: 0.3432924747467041
  time_total_s: 0.3432924747467041
  timestamp: 1625864164
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '63709_00014'
  


Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00014,RUNNING,172.28.0.2:2103,128,0.141985,0.00274854,2,1,0.343292,0.576401,0.762069,0.751138
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,TERMINATED,,64,0.180402,0.000976601,2,1,0.328283,0.54843,0.762069,0.751138
DEFAULT_63709_00005,TERMINATED,,128,0.410034,0.0031913,1,2,3.45193,0.561514,0.768966,0.759319
DEFAULT_63709_00006,TERMINATED,,32,0.312963,0.0576668,1,1,0.325576,0.618329,0.665517,0.672299
DEFAULT_63709_00007,TERMINATED,,64,0.318123,0.00183297,1,1,0.347872,0.552433,0.762069,0.751138
DEFAULT_63709_00008,TERMINATED,,128,0.387019,0.00245907,2,1,0.343681,0.564933,0.758621,0.747011


[2m[36m(pid=2103)[0m saving in checkpoint dir




Trial name,status,loc,batch_size,do_rate,lr,num_epochs,iter,total time (s),loss,accuracy,f1
DEFAULT_63709_00000,TERMINATED,,16,0.213574,0.00105227,2,10,39.6452,0.566963,0.789655,0.783308
DEFAULT_63709_00001,TERMINATED,,128,0.496502,0.0145453,1,2,3.34853,0.579994,0.772414,0.763374
DEFAULT_63709_00002,TERMINATED,,64,0.388518,0.000108764,1,7,25.0834,0.533658,0.758621,0.747011
DEFAULT_63709_00003,TERMINATED,,64,0.427725,0.00381855,1,1,0.325666,0.546979,0.765517,0.755241
DEFAULT_63709_00004,TERMINATED,,64,0.180402,0.000976601,2,1,0.328283,0.54843,0.762069,0.751138
DEFAULT_63709_00005,TERMINATED,,128,0.410034,0.0031913,1,2,3.45193,0.561514,0.768966,0.759319
DEFAULT_63709_00006,TERMINATED,,32,0.312963,0.0576668,1,1,0.325576,0.618329,0.665517,0.672299
DEFAULT_63709_00007,TERMINATED,,64,0.318123,0.00183297,1,1,0.347872,0.552433,0.762069,0.751138
DEFAULT_63709_00008,TERMINATED,,128,0.387019,0.00245907,2,1,0.343681,0.564933,0.758621,0.747011
DEFAULT_63709_00009,TERMINATED,,16,0.12072,0.0140084,1,1,0.337257,0.570796,0.768966,0.759319


2021-07-09 20:56:06,480	INFO tune.py:549 -- Total run time: 299.88 seconds (296.59 seconds for the tuning loop).


Best trial config: {'lr': 0.0010522703449960447, 'batch_size': 16, 'num_epochs': 2, 'do_rate': 0.2135738268782248, 'wandb': {'project': 'AvgPooledOutputClassifier', 'api_key': '46cb1981ae15765be5bfb5e7c3257d0315a95a1b'}}
Best trial final validation loss: 0.5669626656331515
Best trial final validation accuracy: 0.7896551724137931


{'f1': 0.7018430613891009,
 'precision': 0.7034202893634344,
 'recall': 0.7137931034482758}

In [None]:
avg_pld_outs_hf_ds.set_format(type='pytorch', columns=['meaned_pooled_output', 'success_label'])
trainloader = torch.utils.data.DataLoader(avg_pld_outs_hf_ds['train'], batch_size=128)
net.train()