In [None]:
%%capture
!pip install evaluate
!pip install openpyxl
!pip install optuna
!pip install ray[tune]
!pip install wandb

!pip install datasets==2.8.0
!pip install transformers==4.26
!pip install librosa
!pip install evaluate>=0.30
!pip install audiomentations
!pip install jiwer
!pip install gradio
!pip install torchaudio<0.12
!pip install tensorboardX
!pip install accelerate -U
!pip install hazm==0.7.0

In [None]:
import re
import hazm
import string
import os
import ast
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from evaluate import load
from tqdm.notebook import tqdm,trange
from sklearn.model_selection import train_test_split
from datasets import load_dataset, Dataset, concatenate_datasets, load_metric, load_from_disk, DatasetDict

import wandb

wandb.login(key="YOUR_WANDB_KEY", relogin=True, force=True)
os.environ['WANDB_PROJECT'] = "hyperparameter_tuning_whisper_small_persian"

tqdm.pandas()

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/jupyter/.netrc


In [None]:
_normalizer = hazm.Normalizer()

chars_to_ignore = [
    ",", "?", ".", "!", "-", ";", ":", '""', "%", "'", '"', "�",
    "#", "!", "؟", "?", "«", "»", "،", "(", ")", "؛", "'ٔ", "٬",'ٔ', ",", "?",
    ".", "!", "-", ";", ":",'"',"“", "%", "‘", "”", "�", "–", "…", "_", "”", '“', '„',
    'ā', 'š',
]

chars_to_ignore = chars_to_ignore + list(string.ascii_lowercase + string.digits)

chars_to_mapping = {
    'ك': 'ک', 'دِ': 'د', 'بِ': 'ب', 'زِ': 'ز', 'ذِ': 'ذ', 'شِ': 'ش', 'سِ': 'س', 'ى': 'ی',
    'ي': 'ی', 'أ': 'ا', 'ؤ': 'و', "ے": "ی", "ۀ": "ه", "ﭘ": "پ", "ﮐ": "ک", "ﯽ": "ی",
    "ﺎ": "ا", "ﺑ": "ب", "ﺘ": "ت", "ﺧ": "خ", "ﺩ": "د", "ﺱ": "س", "ﻀ": "ض", "ﻌ": "ع",
    "ﻟ": "ل", "ﻡ": "م", "ﻢ": "م", "ﻪ": "ه", "ﻮ": "و", 'ﺍ': "ا", 'ة': "ه",
    'ﯾ': "ی", 'ﯿ': "ی", 'ﺒ': "ب", 'ﺖ': "ت", 'ﺪ': "د", 'ﺮ': "ر", 'ﺴ': "س", 'ﺷ': "ش",
    'ﺸ': "ش", 'ﻋ': "ع", 'ﻤ': "م", 'ﻥ': "ن", 'ﻧ': "ن", 'ﻭ': "و", 'ﺭ': "ر", "ﮔ": "گ",
    "۱۴ام": "۱۴ ام",

    "a": " ای ", "b": " بی ", "c": " سی ", "d": " دی ", "e": " ایی ", "f": " اف ",
    "g": " جی ", "h": " اچ ", "i": " آی ", "j": " جی ", "k": " کی ", "l": " ال ",
    "m": " ام ", "n": " ان ", "o": " او ", "p": " پی ", "q": " کیو ", "r": " آر ",
    "s": " اس ", "t": " تی ", "u": " یو ", "v": " وی ", "w": " دبلیو ", "x": " اکس ",
    "y": " وای ", "z": " زد ",
    "\u200c": " ", "\u200d": " ", "\u200e": " ", "\u200f": " ", "\ufeff": " ",
}


def multiple_replace(text, chars_to_mapping):
    pattern = "|".join(map(re.escape, chars_to_mapping.keys()))
    return re.sub(pattern, lambda m: chars_to_mapping[m.group()], str(text))

def remove_special_characters(text, chars_to_ignore_regex):
    text = re.sub(chars_to_ignore_regex, '', text).lower() + " "
    return text

def normalizer(row, chars_to_ignore=chars_to_ignore, chars_to_mapping=chars_to_mapping):
    text = row['sentence']
    chars_to_ignore_regex = f"""[{"".join(chars_to_ignore)}]"""
    text = text.lower().strip()

    text = _normalizer.normalize(text)
    text = multiple_replace(text, chars_to_mapping)
    text = remove_special_characters(text, chars_to_ignore_regex)
    text = re.sub(" +", " ", text)
    _text = []
    for word in text.split():
        try:
            word = int(word)
            _text.append(words(word))
        except:
            _text.append(word)

    text = " ".join(_text) + " "
    text = text.strip()

    if not len(text) > 0:
        return None

    row['sentence'] = text
    return row

In [None]:
common_voice = DatasetDict()

common_voice["train"] = load_dataset("mozilla-foundation/common_voice_11_0", "fa", split="train").shard(num_shards=10, index=0)
common_voice["validation"] = load_dataset("mozilla-foundation/common_voice_11_0", "fa", split="validation").shard(num_shards=10, index=0)

common_voice = common_voice.map(normalizer)
common_voice = common_voice.remove_columns(["accent", "age", "client_id", "down_votes", "gender", "locale", "path", "segment", "up_votes"])
print(common_voice)

In [None]:
from transformers import WhisperFeatureExtractor, WhisperTokenizer, WhisperProcessor, WhisperForConditionalGeneration

feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-small")
tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="persian", task="transcribe")
processor = WhisperProcessor.from_pretrained("openai/whisper-small", language="persian", task="transcribe")

In [None]:
from datasets import Audio
common_voice = common_voice.cast_column("audio", Audio(sampling_rate=16000))

In [None]:
def prepare_dataset(batch):
    # load and resample audio data from 48 to 16kHz
    audio = batch["audio"]

    # compute log-Mel input features from input audio array
    batch["input_features"] = feature_extractor(audio["array"], sampling_rate=audio["sampling_rate"]).input_features[0]

    # encode target text to label ids
    batch["labels"] = tokenizer(batch["sentence"]).input_ids
    return batch

common_voice = common_voice.map(prepare_dataset, remove_columns=common_voice.column_names["train"], num_proc=6)

In [None]:
import torch

from dataclasses import dataclass
from typing import Any, Dict, List, Union

@dataclass
class DataCollatorSpeechSeq2SeqWithPadding:
    processor: Any

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
        # split inputs and labels since they have to be of different lengths and need different padding methods
        # first treat the audio inputs by simply returning torch tensors
        input_features = [{"input_features": feature["input_features"]} for feature in features]
        batch = self.processor.feature_extractor.pad(input_features, return_tensors="pt")

        # get the tokenized label sequences
        label_features = [{"input_ids": feature["labels"]} for feature in features]
        # pad the labels to max length
        labels_batch = self.processor.tokenizer.pad(label_features, return_tensors="pt")

        # replace padding with -100 to ignore loss correctly
        labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)

        # if bos token is appended in previous tokenization step,
        # cut bos token here as it's append later anyways
        if (labels[:, 0] == self.processor.tokenizer.bos_token_id).all().cpu().item():
            labels = labels[:, 1:]

        batch["labels"] = labels

        return batch

data_collator = DataCollatorSpeechSeq2SeqWithPadding(processor=processor)

In [None]:
import evaluate

metric = evaluate.load("wer")
def compute_metrics(pred):
    pred_ids = pred.predictions
    label_ids = pred.label_ids

    # replace -100 with the pad_token_id
    label_ids[label_ids == -100] = tokenizer.pad_token_id

    # we do not want to group tokens when computing the metrics
    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    label_str = tokenizer.batch_decode(label_ids, skip_special_tokens=True)

    wer = 100 * metric.compute(predictions=pred_str, references=label_str)

    return {"wer": wer}

In [None]:
def model_init():
    return WhisperForConditionalGeneration.from_pretrained("openai/whisper-small", use_cache=False)

In [None]:
from transformers import Seq2SeqTrainingArguments

training_args = Seq2SeqTrainingArguments(
    output_dir="./whisper-small-fa",
    overwrite_output_dir=True,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=2,
    learning_rate=1e-5,
    warmup_steps=50,
    max_steps=250,
    eval_steps=250,
    fp16=True,
    save_strategy="no",
    evaluation_strategy="steps",
    gradient_checkpointing=True,
    predict_with_generate=True,
    generation_max_length=225,
    metric_for_best_model="wer",
    greater_is_better=False,
    report_to=["wandb"],
)

In [None]:
from transformers import Seq2SeqTrainer

trainer = Seq2SeqTrainer(
    args=training_args,
    model_init=model_init,
    train_dataset=common_voice["train"],
    eval_dataset=common_voice["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    tokenizer=processor.feature_extractor,
)

loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 50257,
  "forced_decoder_ids": [
    [
      1,
      50259
    ],
    [
      2,
      50359
    ],
    [
      3,
      50363
    ]
  ],
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "max_leng

In [None]:
def my_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-9, 1e-4, log=True),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [2, 4, 8]),
        "per_device_eval_batch_size": trial.suggest_categorical("per_device_eval_batch_size", [2, 4, 8]),
    }

best_run = trainer.hyperparameter_search(hp_space=my_hp_space, n_trials=100, direction="minimize")
print(best_run)

[I 2023-09-19 11:31:19,665] A new study created in memory with name: no-name-f05b5f58-7cbd-48be-9871-4083c700c5ca
Trial: {'learning_rate': 4.1669245403834655e-09, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

[34m[1mwandb[0m: Currently logged in as: [33mmohammadh[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss,Wer
250,No log,4.105794,76.555024


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 11:39:27,778] Trial 0 finished with value: 76.55502392344498 and parameters: {'learning_rate': 4.1669245403834655e-09, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 4}. Best is trial 0 with value: 76.55502392344498.
Trial: {'learning_rate': 9.688127253851351e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,4.10579
eval/runtime,47.0954
eval/samples_per_second,2.187
eval/steps_per_second,0.552
eval/wer,76.55502
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,4.04966
train/train_runtime,484.8575


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113850788873, max=1.0)))

Step,Training Loss,Validation Loss,Wer
250,No log,0.514943,48.644338


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 11:47:33,060] Trial 1 finished with value: 48.644338118022326 and parameters: {'learning_rate': 9.688127253851351e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 4}. Best is trial 1 with value: 48.644338118022326.
Trial: {'learning_rate': 2.383279101856335e-07, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.51494
eval/runtime,38.4496
eval/samples_per_second,2.679
eval/steps_per_second,0.676
eval/wer,48.64434
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.73688
train/train_runtime,482.357


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113655844464018, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,2.058267,77.671451


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 12:04:23,408] Trial 2 finished with value: 77.67145135566189 and parameters: {'learning_rate': 2.383279101856335e-07, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8}. Best is trial 1 with value: 48.644338118022326.
Trial: {'learning_rate': 1.995805431634094e-07, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,2.05827
eval/runtime,165.4024
eval/samples_per_second,0.623
eval/steps_per_second,0.079
eval/wer,77.67145
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,2.6524
train/train_runtime,1007.1652


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113344244464921, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,2.367596,75.598086


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 12:19:31,842] Trial 3 finished with value: 75.5980861244019 and parameters: {'learning_rate': 1.995805431634094e-07, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2}. Best is trial 1 with value: 48.644338118022326.
Trial: {'learning_rate': 3.4368244694542374e-09, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50

VBox(children=(Label(value='0.002 MB of 0.036 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.049466…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,2.3676
eval/runtime,72.4227
eval/samples_per_second,1.422
eval/steps_per_second,0.718
eval/wer,75.59809
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,2.80232
train/train_runtime,905.5177


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113601555553031, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,4.274571,76.555024


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 12:34:11,678] Trial 4 finished with value: 76.55502392344498 and parameters: {'learning_rate': 3.4368244694542374e-09, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2}. Best is trial 1 with value: 48.644338118022326.
Trial: {'learning_rate': 2.0798908248250458e-08, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,4.27457
eval/runtime,45.9699
eval/samples_per_second,2.241
eval/steps_per_second,1.131
eval/wer,76.55502
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,4.02625
train/train_runtime,876.9891


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112799322209968, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,3.976254,75.917065


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 12:39:19,633] Trial 5 finished with value: 75.9170653907496 and parameters: {'learning_rate': 2.0798908248250458e-08, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 4}. Best is trial 1 with value: 48.644338118022326.
Trial: {'learning_rate': 2.3193363256622462e-09, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,3.97625
eval/runtime,47.0972
eval/samples_per_second,2.187
eval/steps_per_second,0.552
eval/wer,75.91707
train/epoch,0.37
train/global_step,250.0
train/total_flos,2.8858540032e+17
train/train_loss,4.12101
train/train_runtime,305.0119


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112390599979942, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,4.112364,76.395534


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-19 12:53:55,261] Trial 6 pruned. 
Trial: {'learning_rate': 9.789082392396862e-07, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_la

VBox(children=(Label(value='0.035 MB of 0.035 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,4.11236
eval/runtime,46.7479
eval/samples_per_second,2.203
eval/steps_per_second,0.556
eval/wer,76.39553
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111262886673406, max=1.0)…

Step,Training Loss,Validation Loss,Wer
250,No log,1.307825,111.004785


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8
[I 2023-09-19 13:11:16,857] Trial 7 pruned. 
Trial: {'learning_rate': 8.444275555012582e-06, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_la

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,1.30783
eval/runtime,215.3849
eval/samples_per_second,0.478
eval/steps_per_second,0.06
eval/wer,111.00478
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113762100042853, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.470439,48.00638


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 13:25:50,023] Trial 8 finished with value: 48.006379585326954 and parameters: {'learning_rate': 8.444275555012582e-06, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2}. Best is trial 8 with value: 48.006379585326954.
Trial: {'learning_rate': 6.403757601731923e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.47044
eval/runtime,44.4699
eval/samples_per_second,2.316
eval/steps_per_second,1.169
eval/wer,48.00638
train/epoch,1.49
train/global_step,250.0
train/total_flos,3.46995085344768e+18
train/train_loss,0.92513
train/train_runtime,870.3833


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113551144484922, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.496899,49.282297


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 13:33:53,711] Trial 9 finished with value: 49.282296650717704 and parameters: {'learning_rate': 6.403757601731923e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 4}. Best is trial 8 with value: 48.006379585326954.
Trial: {'learning_rate': 6.501806437731933e-06, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.4969
eval/runtime,38.0179
eval/samples_per_second,2.709
eval/steps_per_second,0.684
eval/wer,49.2823
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.7014
train/train_runtime,480.857


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113413144468925, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.926725,59.968102


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 13:39:08,830] Trial 10 finished with value: 59.96810207336522 and parameters: {'learning_rate': 6.501806437731933e-06, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2}. Best is trial 8 with value: 48.006379585326954.
Trial: {'learning_rate': 8.369579425725039e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.92672
eval/runtime,55.4043
eval/samples_per_second,1.859
eval/steps_per_second,0.939
eval/wer,59.9681
train/epoch,0.37
train/global_step,250.0
train/total_flos,2.8858540032e+17
train/train_loss,1.28672
train/train_runtime,312.278


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113312199934928, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.506295,45.933014


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 13:47:16,172] Trial 11 finished with value: 45.933014354066984 and parameters: {'learning_rate': 8.369579425725039e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}. Best is trial 11 with value: 45.933014354066984.
Trial: {'learning_rate': 1.4376553055890788e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id"

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.50629
eval/runtime,40.1847
eval/samples_per_second,2.563
eval/steps_per_second,1.294
eval/wer,45.93301
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.71963
train/train_runtime,484.6114


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112750466660751, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.478225,48.803828


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 13:55:20,404] Trial 12 finished with value: 48.803827751196174 and parameters: {'learning_rate': 1.4376553055890788e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}. Best is trial 11 with value: 45.933014354066984.
Trial: {'learning_rate': 1.079348891152709e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id"

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.47822
eval/runtime,39.6256
eval/samples_per_second,2.599
eval/steps_per_second,1.312
eval/wer,48.80383
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.85487
train/train_runtime,481.409


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113869011104624, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.501642,51.834131


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 14:03:24,400] Trial 13 finished with value: 51.8341307814992 and parameters: {'learning_rate': 1.079348891152709e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}. Best is trial 11 with value: 45.933014354066984.
Trial: {'learning_rate': 2.3582410357908704e-06, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.50164
eval/runtime,39.6432
eval/samples_per_second,2.598
eval/steps_per_second,1.312
eval/wer,51.83413
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.95829
train/train_runtime,481.1941


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112724333280413, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,1.236982,114.035088


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2
[I 2023-09-19 14:10:19,260] Trial 14 pruned. 
Trial: {'learning_rate': 3.652544004566609e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,1.23698
eval/runtime,155.2711
eval/samples_per_second,0.663
eval/steps_per_second,0.335
eval/wer,114.03509
train/epoch,0.37
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111260322221723, max=1.0)…

Step,Training Loss,Validation Loss,Wer
250,No log,0.404841,44.816587


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 14:24:46,422] Trial 15 finished with value: 44.81658692185008 and parameters: {'learning_rate': 3.652544004566609e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 3.770070885800355e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.40484
eval/runtime,39.8581
eval/samples_per_second,2.584
eval/steps_per_second,1.305
eval/wer,44.81659
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.44523568480256e+18
train/train_loss,0.56318
train/train_runtime,864.3287


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112809866608587, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.476966,46.730463


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 14:32:50,590] Trial 16 finished with value: 46.730462519936204 and parameters: {'learning_rate': 3.770070885800355e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 9.817395467598084e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.47697
eval/runtime,39.1029
eval/samples_per_second,2.634
eval/steps_per_second,0.332
eval/wer,46.73046
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.70235
train/train_runtime,481.3146


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113763033386527, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.505528,49.601276


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 14:40:58,779] Trial 17 finished with value: 49.601275917065394 and parameters: {'learning_rate': 9.817395467598084e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 2.6767681174379726e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id":

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.50553
eval/runtime,39.6321
eval/samples_per_second,2.599
eval/steps_per_second,1.312
eval/wer,49.60128
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.73292
train/train_runtime,485.3506


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112676633314954, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.406766,51.834131


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2
[I 2023-09-19 14:55:26,771] Trial 18 pruned. 
Trial: {'learning_rate': 2.98508091976232e-06, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_la

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.40677
eval/runtime,44.2202
eval/samples_per_second,2.329
eval/steps_per_second,1.176
eval/wer,51.83413
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112639622297138, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,1.175028,108.61244


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2
[I 2023-09-19 15:01:53,574] Trial 19 pruned. 
Trial: {'learning_rate': 2.1072836130896732e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_

VBox(children=(Label(value='0.002 MB of 0.035 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.050512…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,1.17503
eval/runtime,124.5956
eval/samples_per_second,0.827
eval/steps_per_second,0.417
eval/wer,108.61244
train/epoch,0.37
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113433466622762, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.409981,51.355662


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8
[I 2023-09-19 15:16:37,345] Trial 20 pruned. 
Trial: {'learning_rate': 3.0451651641511873e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.40998
eval/runtime,53.4939
eval/samples_per_second,1.925
eval/steps_per_second,0.243
eval/wer,51.35566
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112543088933712, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.469806,46.570973


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 15:24:45,322] Trial 21 finished with value: 46.57097288676236 and parameters: {'learning_rate': 3.0451651641511873e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 2.7605274602805756e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id":

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.46981
eval/runtime,39.6171
eval/samples_per_second,2.6
eval/steps_per_second,0.328
eval/wer,46.57097
train/epoch,0.74
train/global_step,250.0
train/total_flos,3.17905676992512e+18
train/train_loss,0.7198
train/train_runtime,485.1046


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113653244521831, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.470714,46.889952


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 15:32:51,614] Trial 22 finished with value: 46.889952153110045 and parameters: {'learning_rate': 2.7605274602805756e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 4.4221160928461253e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id"

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.47071
eval/runtime,39.1756
eval/samples_per_second,2.629
eval/steps_per_second,0.332
eval/wer,46.88995
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.73148
train/train_runtime,483.4803


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112577966640756, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.483872,47.527911


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 15:40:57,966] Trial 23 finished with value: 47.52791068580542 and parameters: {'learning_rate': 4.4221160928461253e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 5.167365174356365e-06, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.48387
eval/runtime,39.4742
eval/samples_per_second,2.609
eval/steps_per_second,0.329
eval/wer,47.52791
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.69616
train/train_runtime,483.4858


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112751977796305, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.913611,63.157895


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8
[I 2023-09-19 15:49:57,350] Trial 24 pruned. 
Trial: {'learning_rate': 9.705301864080131e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.91361
eval/runtime,95.8504
eval/samples_per_second,1.075
eval/steps_per_second,0.136
eval/wer,63.15789
train/epoch,0.74
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112662766794932, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.509393,47.368421


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 15:57:59,682] Trial 25 finished with value: 47.368421052631575 and parameters: {'learning_rate': 9.705301864080131e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 1.6578113816176173e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id":

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.50939
eval/runtime,39.5698
eval/samples_per_second,2.603
eval/steps_per_second,1.314
eval/wer,47.36842
train/epoch,0.74
train/global_step,250.0
train/total_flos,1.15434160128e+18
train/train_loss,0.73991
train/train_runtime,479.5181


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113266522200623, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.478763,48.325359


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 16:06:04,280] Trial 26 finished with value: 48.32535885167464 and parameters: {'learning_rate': 1.6578113816176173e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 3.428626313374168e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.47876
eval/runtime,39.2742
eval/samples_per_second,2.623
eval/steps_per_second,0.331
eval/wer,48.32536
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.817
train/train_runtime,481.5682


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113496777802033, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.46273,47.84689


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 16:14:10,725] Trial 27 finished with value: 47.84688995215311 and parameters: {'learning_rate': 3.428626313374168e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 2}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 1.3723683077523056e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.46273
eval/runtime,40.0064
eval/samples_per_second,2.575
eval/steps_per_second,1.3
eval/wer,47.84689
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.71019
train/train_runtime,483.5545


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112654255541404, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.43176,53.748006


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2
[I 2023-09-19 16:28:56,919] Trial 28 pruned. 
Trial: {'learning_rate': 4.7022294893146995e-05, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.43176
eval/runtime,44.9633
eval/samples_per_second,2.291
eval/steps_per_second,1.156
eval/wer,53.74801
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113484866736042, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.533927,51.196172


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8
[I 2023-09-19 16:33:57,389] Trial 29 pruned. 
Trial: {'learning_rate': 1.312127998506964e-06, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.53393
eval/runtime,39.4721
eval/samples_per_second,2.609
eval/steps_per_second,0.329
eval/wer,51.19617
train/epoch,0.37
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113429022306163, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,1.305322,102.870813


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-19 16:44:44,783] Trial 30 pruned. 
Trial: {'learning_rate': 4.414112030585282e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,1.30532
eval/runtime,194.1619
eval/samples_per_second,0.53
eval/steps_per_second,0.134
eval/wer,102.87081
train/epoch,0.74
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113776866598831, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.49785,48.484848


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 16:52:54,948] Trial 31 finished with value: 48.484848484848484 and parameters: {'learning_rate': 4.414112030585282e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 9.805008535253484e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.49785
eval/runtime,39.3371
eval/samples_per_second,2.618
eval/steps_per_second,0.33
eval/wer,48.48485
train/epoch,0.74
train/global_step,250.0
train/total_flos,2.59957728608256e+18
train/train_loss,0.6999
train/train_runtime,487.3592


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111301349996615, max=1.0)…

Step,Training Loss,Validation Loss,Wer
250,No log,0.520562,49.122807


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8
[I 2023-09-19 17:01:04,789] Trial 32 pruned. 
Trial: {'learning_rate': 2.6683621924709037e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.52056
eval/runtime,39.8231
eval/samples_per_second,2.586
eval/steps_per_second,0.326
eval/wer,49.12281
train/epoch,0.74
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113625066678246, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.468642,47.208931


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 17:09:19,011] Trial 33 finished with value: 47.208931419457734 and parameters: {'learning_rate': 2.6683621924709037e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 5.74845963342417e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.46864
eval/runtime,39.8553
eval/samples_per_second,2.584
eval/steps_per_second,0.326
eval/wer,47.20893
train/epoch,0.74
train/global_step,250.0
train/total_flos,1.15434160128e+18
train/train_loss,0.73459
train/train_runtime,491.4166


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112672277765039, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.489742,46.411483


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 17:17:34,198] Trial 34 finished with value: 46.411483253588514 and parameters: {'learning_rate': 5.74845963342417e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 5.9270370038349626e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.48974
eval/runtime,39.9393
eval/samples_per_second,2.579
eval/steps_per_second,0.325
eval/wer,46.41148
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.70015
train/train_runtime,492.2007


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113787199913834, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.482425,46.251994


***** Running Evaluation *****
  Num examples = 103
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 17:25:48,314] Trial 35 finished with value: 46.25199362041467 and parameters: {'learning_rate': 5.9270370038349626e-05, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8}. Best is trial 15 with value: 44.81658692185008.
Trial: {'learning_rate': 6.150443574387932e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.48242
eval/runtime,40.1664
eval/samples_per_second,2.564
eval/steps_per_second,0.324
eval/wer,46.25199
train/epoch,0.74
train/global_step,250.0
train/total_flos,5.7717080064e+17
train/train_loss,0.69819
train/train_runtime,491.3334


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111277481120649, max=1.0)…

Step,Training Loss,Validation Loss,Wer
250,No log,0.39985,39.712919


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 17:40:26,072] Trial 36 finished with value: 39.71291866028708 and parameters: {'learning_rate': 6.150443574387932e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 1.0654085371486253e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.030 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.059345…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.39985
eval/runtime,38.595
eval/samples_per_second,2.669
eval/steps_per_second,0.674
eval/wer,39.71292
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.53859
train/train_runtime,874.7416


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113715088868048, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.44789,45.295056


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 17:55:03,797] Trial 37 finished with value: 45.29505582137161 and parameters: {'learning_rate': 1.0654085371486253e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 9.791946322403728e-06, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.44789
eval/runtime,38.4825
eval/samples_per_second,2.677
eval/steps_per_second,0.676
eval/wer,45.29506
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.81723
train/train_runtime,874.9661


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111257874435978, max=1.0)…

Step,Training Loss,Validation Loss,Wer
250,No log,0.496428,46.251994


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 18:09:52,819] Trial 38 finished with value: 46.25199362041467 and parameters: {'learning_rate': 9.791946322403728e-06, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 5.6435457754185605e-06, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.49643
eval/runtime,45.8916
eval/samples_per_second,2.244
eval/steps_per_second,0.567
eval/wer,46.25199
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.85633
train/train_runtime,885.9745


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111332895557603, max=1.0)…

Step,Training Loss,Validation Loss,Wer
250,No log,0.7978,51.515152


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-19 18:24:40,973] Trial 39 pruned. 
Trial: {'learning_rate': 2.1471854116283815e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.7978
eval/runtime,38.8032
eval/samples_per_second,2.654
eval/steps_per_second,0.67
eval/wer,51.51515
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111373878892563, max=1.0)…

Step,Training Loss,Validation Loss,Wer
250,No log,0.414465,51.515152


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-19 18:39:37,403] Trial 40 pruned. 
Trial: {'learning_rate': 6.801926347113953e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.41446
eval/runtime,45.8573
eval/samples_per_second,2.246
eval/steps_per_second,0.567
eval/wer,51.51515
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112844666543727, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.411479,50.717703


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-19 18:54:33,277] Trial 41 pruned. 
Trial: {'learning_rate': 6.266295321357901e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.41148
eval/runtime,45.9222
eval/samples_per_second,2.243
eval/steps_per_second,0.566
eval/wer,50.7177
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113529855437163, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.401638,41.786284


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 19:09:20,709] Trial 42 finished with value: 41.78628389154705 and parameters: {'learning_rate': 6.266295321357901e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 1.6979784502143475e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.036 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.049430…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.40164
eval/runtime,38.5122
eval/samples_per_second,2.674
eval/steps_per_second,0.675
eval/wer,41.78628
train/epoch,1.49
train/global_step,250.0
train/total_flos,4.62660113793024e+18
train/train_loss,0.53709
train/train_runtime,884.6079


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112672066762268, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.427182,53.110048


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-19 19:24:09,923] Trial 43 pruned. 
Trial: {'learning_rate': 6.0354641999597654e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.42718
eval/runtime,45.8218
eval/samples_per_second,2.248
eval/steps_per_second,0.567
eval/wer,53.11005
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113842811028007, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.400564,41.148325


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 19:38:54,485] Trial 44 finished with value: 41.14832535885167 and parameters: {'learning_rate': 6.0354641999597654e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 9.274824805648656e-06, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

VBox(children=(Label(value='0.002 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.097017…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.40056
eval/runtime,38.5276
eval/samples_per_second,2.673
eval/steps_per_second,0.675
eval/wer,41.14833
train/epoch,1.49
train/global_step,250.0
train/total_flos,2.31330056896512e+18
train/train_loss,0.54068
train/train_runtime,881.7738


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113977244369582, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.460339,46.889952


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 19:53:40,033] Trial 45 finished with value: 46.889952153110045 and parameters: {'learning_rate': 9.274824805648656e-06, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 5.090896319025339e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.46034
eval/runtime,38.8556
eval/samples_per_second,2.651
eval/steps_per_second,0.669
eval/wer,46.88995
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.87988
train/train_runtime,882.7514


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112595644469063, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.41946,41.945774


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 20:08:23,470] Trial 46 finished with value: 41.94577352472089 and parameters: {'learning_rate': 5.090896319025339e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 5.6049259790324566e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.41946
eval/runtime,38.7146
eval/samples_per_second,2.66
eval/steps_per_second,0.672
eval/wer,41.94577
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.53985
train/train_runtime,880.6027


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113648099934734, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.415749,53.110048


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-19 20:23:22,986] Trial 47 pruned. 
Trial: {'learning_rate': 3.3811900386153934e-07, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.41575
eval/runtime,46.2241
eval/samples_per_second,2.228
eval/steps_per_second,0.562
eval/wer,53.11005
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112721311090153, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,1.776094,112.440191


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-19 20:40:03,425] Trial 48 pruned. 
Trial: {'learning_rate': 4.125822862138353e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

VBox(children=(Label(value='0.002 MB of 0.030 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.060273…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,1.77609
eval/runtime,160.5458
eval/samples_per_second,0.642
eval/steps_per_second,0.162
eval/wer,112.44019
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113670411254538, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.408666,51.036683


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-19 20:54:46,952] Trial 49 pruned. 
Trial: {'learning_rate': 1.9075437307174494e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.40867
eval/runtime,45.5538
eval/samples_per_second,2.261
eval/steps_per_second,0.571
eval/wer,51.03668
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111370254447037, max=1.0)…

Step,Training Loss,Validation Loss,Wer
250,No log,0.424926,43.54067


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 21:09:18,722] Trial 50 finished with value: 43.54066985645933 and parameters: {'learning_rate': 1.9075437307174494e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 1.9026368922579735e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id":

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.42493
eval/runtime,38.1899
eval/samples_per_second,2.697
eval/steps_per_second,0.681
eval/wer,43.54067
train/epoch,1.49
train/global_step,250.0
train/total_flos,4.62660113793024e+18
train/train_loss,0.65847
train/train_runtime,868.9895


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113472511189887, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.424996,43.062201


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 21:23:54,033] Trial 51 finished with value: 43.0622009569378 and parameters: {'learning_rate': 1.9026368922579735e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 1.868865636383963e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.425
eval/runtime,38.5441
eval/samples_per_second,2.672
eval/steps_per_second,0.675
eval/wer,43.0622
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.65899
train/train_runtime,872.5181


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112542966697624, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.426784,53.110048


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-19 21:38:31,646] Trial 52 pruned. 
Trial: {'learning_rate': 6.454584755233857e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.42678
eval/runtime,45.2255
eval/samples_per_second,2.277
eval/steps_per_second,0.575
eval/wer,53.11005
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111353129995728, max=1.0)…

Step,Training Loss,Validation Loss,Wer
250,No log,0.411442,42.902711


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 21:53:01,582] Trial 53 finished with value: 42.90271132376395 and parameters: {'learning_rate': 6.454584755233857e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 6.934471066038605e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.41144
eval/runtime,37.9939
eval/samples_per_second,2.711
eval/steps_per_second,0.684
eval/wer,42.90271
train/epoch,1.49
train/global_step,250.0
train/total_flos,2.31330056896512e+18
train/train_loss,0.53764
train/train_runtime,867.1311


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113378477845496, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.406563,42.902711


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 22:07:45,936] Trial 54 finished with value: 42.90271132376395 and parameters: {'learning_rate': 6.934471066038605e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 7.35029578542228e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.40656
eval/runtime,38.8168
eval/samples_per_second,2.653
eval/steps_per_second,0.67
eval/wer,42.90271
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.53621
train/train_runtime,881.39


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113501855612008, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.407944,41.148325


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 22:22:28,690] Trial 55 finished with value: 41.14832535885167 and parameters: {'learning_rate': 7.35029578542228e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 7.332091448683441e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50

VBox(children=(Label(value='0.002 MB of 0.030 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.059341…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.40794
eval/runtime,38.5574
eval/samples_per_second,2.671
eval/steps_per_second,0.674
eval/wer,41.14833
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.53675
train/train_runtime,879.7654


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111276114453277, max=1.0)…

Step,Training Loss,Validation Loss,Wer
250,No log,0.402636,40.031898


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-19 22:37:08,402] Trial 56 finished with value: 40.03189792663477 and parameters: {'learning_rate': 7.332091448683441e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 9.546583272270006e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

VBox(children=(Label(value='0.002 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.094550…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.40264
eval/runtime,38.6922
eval/samples_per_second,2.662
eval/steps_per_second,0.672
eval/wer,40.0319
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.53584
train/train_runtime,876.8843


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112527666652265, max=1.0…

Step,Training Loss,Validation Loss


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-20 04:46:24,305] Trial 83 finished with value: 41.94577352472089 and parameters: {'learning_rate': 4.99610136837039e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 7.96257436274761e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e3

VBox(children=(Label(value='0.002 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.096980…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.41782
eval/runtime,38.2333
eval/samples_per_second,2.694
eval/steps_per_second,0.68
eval/wer,41.94577
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.54175
train/train_runtime,867.6949


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112656099915815, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.423746,41.786284


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-20 05:00:58,816] Trial 84 finished with value: 41.78628389154705 and parameters: {'learning_rate': 7.96257436274761e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 5.5111105079737786e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.42375
eval/runtime,38.1964
eval/samples_per_second,2.697
eval/steps_per_second,0.681
eval/wer,41.78628
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.53882
train/train_runtime,871.6326


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113581611102239, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.413019,43.859649


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-20 05:15:27,630] Trial 85 finished with value: 43.859649122807014 and parameters: {'learning_rate': 5.5111105079737786e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 2.2073850750759435e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id"

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.41302
eval/runtime,38.0795
eval/samples_per_second,2.705
eval/steps_per_second,0.683
eval/wer,43.85965
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.53788
train/train_runtime,865.9794


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.0111135297556757, max=1.0))…

Step,Training Loss,Validation Loss,Wer
250,No log,0.418698,51.196172


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-20 05:30:04,136] Trial 86 pruned. 
Trial: {'learning_rate': 1.5804396803323326e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.4187
eval/runtime,45.3311
eval/samples_per_second,2.272
eval/steps_per_second,0.574
eval/wer,51.19617
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011114361299971481, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.431553,53.748006


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-20 05:44:44,886] Trial 87 pruned. 
Trial: {'learning_rate': 3.568014316961694e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 2}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.43155
eval/runtime,45.5006
eval/samples_per_second,2.264
eval/steps_per_second,0.571
eval/wer,53.74801
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113316266629328, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.405809,51.674641


***** Running Evaluation *****
  Num examples = 103
  Batch size = 2
[I 2023-09-20 05:59:27,789] Trial 88 pruned. 
Trial: {'learning_rate': 8.228384509215973e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.40581
eval/runtime,45.1977
eval/samples_per_second,2.279
eval/steps_per_second,1.151
eval/wer,51.67464
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113421477784869, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.417497,41.148325


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-20 06:14:12,513] Trial 89 finished with value: 41.14832535885167 and parameters: {'learning_rate': 8.228384509215973e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 7.800009412894888e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.4175
eval/runtime,39.0234
eval/samples_per_second,2.639
eval/steps_per_second,0.666
eval/wer,41.14833
train/epoch,1.49
train/global_step,250.0
train/total_flos,4.62660113793024e+18
train/train_loss,0.53531
train/train_runtime,881.7036


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113254811142623, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.427138,52.631579


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-20 06:29:11,853] Trial 90 pruned. 
Trial: {'learning_rate': 5.599048443476005e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

VBox(children=(Label(value='0.035 MB of 0.035 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.42714
eval/runtime,46.6186
eval/samples_per_second,2.209
eval/steps_per_second,0.558
eval/wer,52.63158
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113875044490367, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.408336,43.38118


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-20 06:43:58,414] Trial 91 finished with value: 43.38118022328549 and parameters: {'learning_rate': 5.599048443476005e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 9.815172119812488e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.40834
eval/runtime,38.9196
eval/samples_per_second,2.646
eval/steps_per_second,0.668
eval/wer,43.38118
train/epoch,1.49
train/global_step,250.0
train/total_flos,2.31330056896512e+18
train/train_loss,0.53999
train/train_runtime,883.6978


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113629166761205, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.471863,44.657097


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-20 06:58:44,946] Trial 92 pruned. 
Trial: {'learning_rate': 4.5475975604118014e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.47186
eval/runtime,38.9581
eval/samples_per_second,2.644
eval/steps_per_second,0.667
eval/wer,44.6571
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113661744457205, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.402014,51.036683


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-20 07:13:43,112] Trial 93 pruned. 
Trial: {'learning_rate': 7.655754305676462e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.40201
eval/runtime,46.2286
eval/samples_per_second,2.228
eval/steps_per_second,0.562
eval/wer,51.03668
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113779555631077, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.412439,43.54067


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-20 07:28:29,336] Trial 94 finished with value: 43.54066985645933 and parameters: {'learning_rate': 7.655754305676462e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 3.112099508511964e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

VBox(children=(Label(value='0.002 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.097022…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.41244
eval/runtime,38.6811
eval/samples_per_second,2.663
eval/steps_per_second,0.672
eval/wer,43.54067
train/epoch,1.49
train/global_step,250.0
train/total_flos,3.46995085344768e+18
train/train_loss,0.53615
train/train_runtime,883.3946


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011114388499926362, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.40548,42.902711


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-20 07:43:24,308] Trial 95 finished with value: 42.90271132376395 and parameters: {'learning_rate': 3.112099508511964e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 6.101233758703358e-05, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.40548
eval/runtime,39.3235
eval/samples_per_second,2.619
eval/steps_per_second,0.661
eval/wer,42.90271
train/epoch,1.49
train/global_step,250.0
train/total_flos,1.15665028448256e+18
train/train_loss,0.5835
train/train_runtime,892.136


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113875211028952, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.563064,54.385965


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-20 07:48:31,869] Trial 96 pruned. 
Trial: {'learning_rate': 2.280349231895939e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.56306
eval/runtime,39.1197
eval/samples_per_second,2.633
eval/steps_per_second,0.665
eval/wer,54.38596
train/epoch,0.37
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011114172844423188, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.412865,51.515152


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4
[I 2023-09-20 08:03:33,362] Trial 97 pruned. 
Trial: {'learning_rate': 3.997730302981051e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 50257,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 50258,
  "dropout": 0.0,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_l

VBox(children=(Label(value='0.002 MB of 0.012 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.152737…

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁
train/global_step,▁

0,1
eval/loss,0.41287
eval/runtime,46.2628
eval/samples_per_second,2.226
eval/steps_per_second,0.562
eval/wer,51.51515
train/epoch,1.49
train/global_step,250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113854800027589, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.413402,41.786284


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-20 08:18:16,877] Trial 98 finished with value: 41.78628389154705 and parameters: {'learning_rate': 3.997730302981051e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.
Trial: {'learning_rate': 6.027685225621409e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}
loading configuration file config.json from cache at /home/jupyter/.cache/huggingface/hub/models--openai--whisper-small/snapshots/e34e8ae444c29815eca53e11383ea13b2e362eb0/config.json
Model config WhisperConfig {
  "_name_or_path": "openai/whisper-small",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "architectures": [
    "WhisperForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "begin_suppress_tokens": [
    220,
    50257
  ],
  "bos_token_id": 5

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
eval/wer,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/loss,0.4134
eval/runtime,38.8375
eval/samples_per_second,2.652
eval/steps_per_second,0.669
eval/wer,41.78628
train/epoch,1.49
train/global_step,250.0
train/total_flos,2.60188596928512e+18
train/train_loss,0.55599
train/train_runtime,880.7003


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113109389003107, max=1.0…

Step,Training Loss,Validation Loss,Wer
250,No log,0.394662,40.988836


***** Running Evaluation *****
  Num examples = 103
  Batch size = 4


Training completed. Do not forget to share your model on huggingface.co/models =)


[I 2023-09-20 08:33:09,372] Trial 99 finished with value: 40.98883572567783 and parameters: {'learning_rate': 6.027685225621409e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4}. Best is trial 36 with value: 39.71291866028708.


BestRun(run_id='36', objective=39.71291866028708, hyperparameters={'learning_rate': 6.150443574387932e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4})


In [None]:
print(best_run)

BestRun(run_id='36', objective=39.71291866028708, hyperparameters={'learning_rate': 6.150443574387932e-05, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4})
