In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import sys
import os
import pandas as pd
import json
import matplotlib.pyplot as plt
import torch
from sklearn.model_selection import train_test_split
import random
!pip install speechbrain -q
import speechbrain as sb
import itertools
from datetime import datetime


!git clone https://github.com/Ishant1/SpeechAnalytics.git
    
device = 'cuda' if torch.cuda.is_available() else 'cpu'

if not os.path.isdir('/kaggle/working/sample_data'):
    os.mkdir('/kaggle/working/sample_data')

[0mCloning into 'SpeechAnalytics'...
remote: Enumerating objects: 64, done.[K
remote: Counting objects: 100% (64/64), done.[K
remote: Compressing objects: 100% (42/42), done.[K
remote: Total 64 (delta 20), reused 54 (delta 10), pack-reused 0[K
Receiving objects: 100% (64/64), 8.89 KiB | 2.22 MiB/s, done.
Resolving deltas: 100% (20/20), done.


In [2]:
from hyperpyyaml import load_hyperpyyaml

hyperparam_file_address = "/kaggle/input/hyperparam-wav2vec2/train_with_wav2vec2_with_dropout.yml"

In [3]:
replacement_dict = {'asvp':{'file':"/kaggle/input/speech-address/asvp_dict.json",'replace':[('/content/gdrive/MyDrive/dataset/ERC/ASVP','/kaggle/input/asvpesdspeech-nonspeech-emotional-utterances/ASVP-ESD-Update')]},
'meld_dev':{'file':"/kaggle/input/speech-address/meld_dev_dict.json",'replace':[('/content/gdrive/MyDrive/dataset/ERC/MELD-RAW-MP3','/kaggle/input/meld-dataset/MELD-RAW'),('mp3','mp4')]},
'meld_train':{'file':"/kaggle/input/speech-address/meld_train_dict.json",'replace':[('/content/gdrive/MyDrive/dataset/ERC/MELD-RAW-MP3','/kaggle/input/meld-dataset/MELD-RAW'),('mp3','mp4')]},
'cremad':{'file':"/kaggle/input/speech-address/cremad_dict.json",'replace':[('/content/gdrive/MyDrive/dataset/ERC/CREMAD','/kaggle/input/cremad')]},
'iemocap':{'file':"/kaggle/input/speech-address/iemocap_dict.json",'replace':[('/content/gdrive/MyDrive/dataset/ERC/IEMOCAP','/kaggle/input/iemocapfullrelease/IEMOCAP_full_release')]}}

In [4]:
for i,d in replacement_dict.items():
    with open(d['file'],'r') as f:
        dict_add = json.load(f)
    
    for j,e in dict_add.items():
        for to_replace, after_replace in d['replace']:
            e['wav'] = e['wav'].replace(to_replace,after_replace)
        
        dict_add[j] = e
        
    new_dir = '/kaggle/working/speech-address'
    if not os.path.isdir(new_dir):
        os.mkdir(new_dir)
    with open(d['file'].replace('/kaggle/input/speech-address',new_dir),'w') as f:
        json.dump(dict_add,f)

In [5]:
#!/usr/bin/env python3
"""Recipe for training an emotion recognition system from speech data only using IEMOCAP.
The system classifies 4 emotions ( anger, happiness, sadness, neutrality) with wav2vec2.
To run this recipe, do the following:
> python train_with_wav2vec2.py hparams/train_with_wav2vec2.yaml --data_folder /path/to/IEMOCAP_full_release
For more wav2vec2/HuBERT results, please see https://arxiv.org/pdf/2111.02735.pdf
Authors
 * Yingzhi WANG 2021
"""

import os
import sys
import speechbrain as sb
from hyperpyyaml import load_hyperpyyaml
import torch.nn as nn


class EmoIdBrain(sb.Brain):

    def compute_forward(self, batch, stage):
        """Computation pipeline based on a encoder + emotion classifier.
        """
        batch = batch.to(self.device)
        wavs, lens = batch.sig

        outputs = self.modules.wav2vec2(wavs, lens)

        # last dim will be used for AdaptativeAVG pool
        outputs = self.hparams.avg_pool(outputs, lens)
        outputs = outputs.view(outputs.shape[0], -1)
        
        outputs = self.hparams.dropout(outputs)
        
        outputs = self.modules.output_mlp(outputs)
        outputs = self.hparams.log_softmax(outputs)
        return outputs

    def compute_objectives(self, predictions, batch, stage):
        """Computes the loss using speaker-id as label.
        """
        emoid, _ = batch.emo_encoded

        """to meet the input form of nll loss"""
        emoid = emoid.squeeze(1)
        loss = self.hparams.compute_cost(predictions, emoid)
        if stage != sb.Stage.TRAIN:
            self.error_metrics.append(batch.id, predictions, emoid)

        return loss

    def fit_batch(self, batch):
        """Trains the parameters given a single batch in input"""

        predictions = self.compute_forward(batch, sb.Stage.TRAIN)
        loss = self.compute_objectives(predictions, batch, sb.Stage.TRAIN)
        loss.backward()
        if self.check_gradients(loss):
            self.wav2vec2_optimizer.step()
            self.optimizer.step()

        self.wav2vec2_optimizer.zero_grad()
        self.optimizer.zero_grad()

        return loss.detach()

    def on_stage_start(self, stage, epoch=None):
        """Gets called at the beginning of each epoch.
        Arguments
        ---------
        stage : sb.Stage
            One of sb.Stage.TRAIN, sb.Stage.VALID, or sb.Stage.TEST.
        epoch : int
            The currently-starting epoch. This is passed
            `None` during the test stage.
        """

        # Set up statistics trackers for this stage
        self.loss_metric = sb.utils.metric_stats.MetricStats(
            metric=sb.nnet.losses.nll_loss
        )

        # Set up evaluation-only statistics trackers
        if stage != sb.Stage.TRAIN:
            self.error_metrics = self.hparams.error_stats()

    def on_stage_end(self, stage, stage_loss, epoch=None):
        """Gets called at the end of an epoch.
        Arguments
        ---------
        stage : sb.Stage
            One of sb.Stage.TRAIN, sb.Stage.VALID, sb.Stage.TEST
        stage_loss : float
            The average loss for all of the data processed in this stage.
        epoch : int
            The currently-starting epoch. This is passed
            `None` during the test stage.
        """

        # Store the train loss until the validation stage.
        if stage == sb.Stage.TRAIN:
            self.train_loss = stage_loss

        # Summarize the statistics from the stage for record-keeping.
        else:
            stats = {
                "loss": stage_loss,
                "error_rate": self.error_metrics.summarize("average"),
            }

        # At the end of validation...
        if stage == sb.Stage.VALID:

            old_lr, new_lr = self.hparams.lr_annealing(stats["error_rate"])
            sb.nnet.schedulers.update_learning_rate(self.optimizer, new_lr)

            (
                old_lr_wav2vec2,
                new_lr_wav2vec2,
            ) = self.hparams.lr_annealing_wav2vec2(stats["error_rate"])
            sb.nnet.schedulers.update_learning_rate(
                self.wav2vec2_optimizer, new_lr_wav2vec2
            )

            # The train_logger writes a summary to stdout and to the logfile.
            self.hparams.train_logger.log_stats(
                {"Epoch": epoch, "lr": old_lr, "wave2vec_lr": old_lr_wav2vec2},
                train_stats={"loss": self.train_loss},
                valid_stats=stats,
            )

            # Save the current checkpoint and delete previous checkpoints,
            self.checkpointer.save_and_keep_only(
                meta=stats, min_keys=["error_rate"]
            )

        # We also write statistics about test data to stdout and to logfile.
        if stage == sb.Stage.TEST:
            self.hparams.train_logger.log_stats(
                {"Epoch loaded": self.hparams.epoch_counter.current},
                test_stats=stats,
            )

    def init_optimizers(self):
        "Initializes the wav2vec2 optimizer and model optimizer"
        self.wav2vec2_optimizer = self.hparams.wav2vec2_opt_class(
            self.modules.wav2vec2.parameters()
        )
        self.optimizer = self.hparams.opt_class(self.hparams.model.parameters())

        if self.checkpointer is not None:
            self.checkpointer.add_recoverable(
                "wav2vec2_opt", self.wav2vec2_optimizer
            )
            self.checkpointer.add_recoverable("optimizer", self.optimizer)

    def zero_grad(self, set_to_none=False):
        self.wav2vec2_optimizer.zero_grad(set_to_none)
        self.optimizer.zero_grad(set_to_none)

In [6]:
def get_data_sample(dataset_dicts, save = True, data_root='sample_data'):

  full_data = {}

  for i, d in dataset_dicts['datasets'].items():

    with open(d['json']) as f:
      data_dict = json.load(f)
    
    indexes = data_dict.keys()
    indexes_filtered = random.sample(indexes,int(d['ratio']*len(indexes)))

    for j in indexes_filtered:
      full_data[i+'_'+j] = data_dict[j]
  

  full_keys = list(full_data.keys())
  full_labels = [d['emo'] for i,d in full_data.items()]

  train_val_keys, test_keys, train_val_label, test_label = train_test_split(full_keys, full_labels,
                                                    stratify=full_labels, 
                                                    test_size=dataset_dicts['splits']['test'])
  
  train_val_ratio = 1 - dataset_dicts['splits']['test']
  val_ratio = dataset_dicts['splits']['valid']/train_val_ratio

  
  train_keys, val_keys, train_label, val_label = train_test_split(train_val_keys, train_val_label,
                                                    stratify=train_val_label, 
                                                    test_size=val_ratio)
  

  train_data = {i:d for i,d in full_data.items() if i in train_keys}
  test_data = {i:d for i,d in full_data.items() if i in test_keys}
  val_data = {i:d for i,d in full_data.items() if i in val_keys}

  if save:
    with open(f"sample_data/train.json",'w') as f:
      json.dump(train_data,f)

    with open(f"sample_data/test.json",'w') as f:
      json.dump(test_data,f)

    with open(f"sample_data/dev.json",'w') as f:
      json.dump(val_data,f)

    keys = {'train':list(train_data.keys()),
            'test':list(test_data.keys()),
            'val':list(val_data.keys()),
            }
    
    with open(f"sample_data/keys.json",'w') as f:
      json.dump(keys,f)
  
  else:
    return train_data, test_data, val_data


def get_data_from_ids(ids, dataset_dicts, filename=None):

  all_ids = {}
  for i,d in dataset_dicts['datasets'].items():
    with open(d['json'],'r') as f:
      data_ = json.load(f)
    all_ids[i] = data_

  final_data = {}
  for i in ids:
    data = i.split('_')[0]
    uid = i.replace(data+'_','')
    final_data[i] = all_ids[data][uid]

  if filename:
    with open(filename,'w') as f:
        json.dump(final_data,f)
    return filename
  else:
    return final_data


# Define audio pipeline
@sb.utils.data_pipeline.takes("wav")
@sb.utils.data_pipeline.provides("sig")
def audio_pipeline(wav):
    """Load the signal, and pass it and its length to the corruption class.
    This is done on the CPU in the `collate_fn`."""
    sig = sb.dataio.dataio.read_audio(wav)
    return sig

# Initialization of the label encoder. The label encoder assignes to each
# of the observed label a unique index (e.g, 'spk01': 0, 'spk02': 1, ..)
label_encoder = sb.dataio.encoder.CategoricalEncoder()

# Define label pipeline:
@sb.utils.data_pipeline.takes("emo")
@sb.utils.data_pipeline.provides("emo", "emo_encoded")
def label_pipeline(emo):
    yield emo
    emo_encoded = label_encoder.encode_label_torch(emo)
    yield emo_encoded



In [7]:
train_test_valid_splits = {'train':0.5,'test':0.3,'valid':0.2}

dataset_dicts = {'datasets':{
    'cremad':{'json':"/kaggle/working/speech-address/cremad_dict.json",'ratio':1},
    'iemocap':{'json':"/kaggle/working/speech-address/iemocap_dict.json",'ratio':0.5},
    'asvp':{'json':"/kaggle/working/speech-address/asvp_dict.json",'ratio':1}
    },
 'splits':train_test_valid_splits
 }

data_root = 'sample_data'
get_data_sample(dataset_dicts, save = True,data_root=data_root)

string = f"Created the sample datasets with a split of {train_test_valid_splits}"
os.system(f'echo \"{string}\"')

Created the sample datasets with a split of {'train': 0.5, 'test': 0.3, 'valid': 0.2}


0

In [8]:
# Define datasets. We also connect the dataset with the data processing
# functions defined above.
datasets = {}
data_info = {
    "train": f"{data_root}/train.json",
    "valid": f"{data_root}/dev.json",
    "test": f"{data_root}/test.json",
}
for dataset in data_info:
    datasets[dataset] = sb.dataio.dataset.DynamicItemDataset.from_json(
        json_path=data_info[dataset],
        replacements={"data_root": data_root},
        dynamic_items=[audio_pipeline, label_pipeline],
        output_keys=["id", "sig", "emo_encoded"],
    )
# Load or compute the label encoder (with multi-GPU DDP support)
# Please, take a look into the lab_enc_file to see the label to index
# mappinng.

lab_enc_file = os.path.join('sample_data', "label_encoder.txt")
label_encoder.load_or_create(
    path=lab_enc_file,
    from_didatasets=[datasets["train"]],
    output_key="emo",
)

In [9]:
epochs = [3, 5]
dropouts = [0.1, 0.2, 0.5]
learning_rates = [0.0001, 0.001]
# epochs = [1]
# dropouts = [0.1]
# learning_rates = [0.0001]

combinations = list(itertools.product(epochs,dropouts,learning_rates))

string = f"Strating Training at time: {datetime.now()}"
os.system(f'echo \"{string}\"')

for epoch, dropout, lr in combinations:
    
    string =f"Stratigng Training for epoch: {epoch}, dropout: {dropout}, lr: {lr}"
    os.system(f'echo \"{string}\"')
    
    output_folder = f"results/{epoch}-{dropout}-{lr}-model"
    os.environ['OUTPUT_DIR'] = output_folder
    
    overrides = {'data_folder':'/kaggle/input/iemocapfullrelease/IEMOCAP_full_release',
     'number_of_epochs':epoch,
     'dropout_prob':dropout,
     'lr':lr,
     'output_folder':output_folder,
     'wav2vec2_folder':'wav2vec2_checkpoint'
    }
    
    
    with open(hyperparam_file_address) as fin:
        hparams = load_hyperpyyaml(fin, overrides)

    hparams["wav2vec2"] = hparams["wav2vec2"].to(device=device)
    
    run_opts = {'device':device}
    emo_id_brain = EmoIdBrain(
            modules=hparams["modules"],
            opt_class=hparams["opt_class"],
            hparams=hparams,
            run_opts=run_opts,
            checkpointer=hparams["checkpointer"],
    )
    
    emo_id_brain.fit(
        epoch_counter=emo_id_brain.hparams.epoch_counter,
        train_set=datasets["train"],
        valid_set=datasets["valid"],
        train_loader_kwargs=hparams["dataloader_options"],
        valid_loader_kwargs=hparams["dataloader_options"],
    )
    
    save_contents = os.listdir(f"{output_folder}/save")
    checkpoint_dir = [c for c in save_contents if 'CKPT' in c][0]
    
    os.environ['CKPT_DIR'] = checkpoint_dir
    os.environ['NEW_CKPT_DIR'] = "ckpt"
    
    
    !rm $OUTPUT_DIR/save/$CKPT_DIR/wav2vec2_opt.ckpt
    !mkdir $OUTPUT_DIR/save/$NEW_CKPT_DIR
    !cp -r $OUTPUT_DIR/save/$CKPT_DIR/. $OUTPUT_DIR/save/$NEW_CKPT_DIR
    !rm -r $OUTPUT_DIR/save/$CKPT_DIR
    !cp $OUTPUT_DIR/train_log.txt $OUTPUT_DIR/save/$NEW_CKPT_DIR
    !cp sample_data/keys.json $OUTPUT_DIR/save/$NEW_CKPT_DIR
    !cp sample_data/test.json $OUTPUT_DIR/save/$NEW_CKPT_DIR
    !cp sample_data/label_encoder.txt $OUTPUT_DIR/save/$NEW_CKPT_DIR
    
    string = f"Finished Training for epoch: {epoch}, dropout: {dropout}, lr: {lr} at {save_contents}"
    os.system(f'echo \"{string}\"')
    

!rm -r wav2vec2_checkpoint
    
    

Strating Training at time: 2023-04-03 10:46:57.940675
Stratigng Training for epoch: 3, dropout: 0.1, lr: 0.0001


Downloading (…)rocessor_config.json:   0%|          | 0.00/159 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.84k [00:00<?, ?B/s]

  "Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 "


Downloading pytorch_model.bin:   0%|          | 0.00/380M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:35<00:00,  3.22it/s, train_loss=0.947]
100%|██████████| 432/432 [00:39<00:00, 10.82it/s]
100%|██████████| 1079/1079 [05:31<00:00,  3.26it/s, train_loss=0.707]
100%|██████████| 432/432 [00:39<00:00, 10.80it/s]
100%|██████████| 1079

Finished Training for epoch: 3, dropout: 0.1, lr: 0.0001 at ['CKPT+2023-04-03+11-05-50+00']
Stratigng Training for epoch: 3, dropout: 0.1, lr: 0.001


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:30<00:00,  3.26it/s, train_loss=0.974]
100%|██████████| 432/432 [00:40<00:00, 10.76it/s]
100%|██████████| 1079/1079 [05:32<00:00,  3.24it/s, train_loss=0.735]
100%|██████████| 432/432 [00:40<00:00, 10.73it/s]
100%|██████████| 1079

Finished Training for epoch: 3, dropout: 0.1, lr: 0.001 at ['CKPT+2023-04-03+11-24-43+00']
Stratigng Training for epoch: 3, dropout: 0.2, lr: 0.0001


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:31<00:00,  3.25it/s, train_loss=0.97]
100%|██████████| 432/432 [00:40<00:00, 10.78it/s]
100%|██████████| 1079/1079 [05:31<00:00,  3.26it/s, train_loss=0.707]
100%|██████████| 432/432 [00:40<00:00, 10.71it/s]
100%|██████████| 1079/

Finished Training for epoch: 3, dropout: 0.2, lr: 0.0001 at ['CKPT+2023-04-03+11-43-35+00']
Stratigng Training for epoch: 3, dropout: 0.2, lr: 0.001


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:30<00:00,  3.26it/s, train_loss=0.98]
100%|██████████| 432/432 [00:40<00:00, 10.79it/s]
100%|██████████| 1079/1079 [05:30<00:00,  3.26it/s, train_loss=0.735]
100%|██████████| 432/432 [00:40<00:00, 10.68it/s]
100%|██████████| 1079/

Finished Training for epoch: 3, dropout: 0.2, lr: 0.001 at ['CKPT+2023-04-03+12-02-28+00']
Stratigng Training for epoch: 3, dropout: 0.5, lr: 0.0001


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:31<00:00,  3.26it/s, train_loss=1.01]
100%|██████████| 432/432 [00:40<00:00, 10.74it/s]
100%|██████████| 1079/1079 [05:31<00:00,  3.26it/s, train_loss=0.746]
100%|██████████| 432/432 [00:39<00:00, 10.85it/s]
100%|██████████| 1079/

Finished Training for epoch: 3, dropout: 0.5, lr: 0.0001 at ['CKPT+2023-04-03+12-21-18+00']
Stratigng Training for epoch: 3, dropout: 0.5, lr: 0.001


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:32<00:00,  3.25it/s, train_loss=1.03]
100%|██████████| 432/432 [00:40<00:00, 10.73it/s]
100%|██████████| 1079/1079 [05:33<00:00,  3.24it/s, train_loss=0.788]
100%|██████████| 432/432 [00:40<00:00, 10.71it/s]
100%|██████████| 1079/

Finished Training for epoch: 3, dropout: 0.5, lr: 0.001 at ['CKPT+2023-04-03+12-40-16+00']
Stratigng Training for epoch: 5, dropout: 0.1, lr: 0.0001


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:34<00:00,  3.23it/s, train_loss=0.962]
100%|██████████| 432/432 [00:40<00:00, 10.54it/s]
100%|██████████| 1079/1079 [05:33<00:00,  3.23it/s, train_loss=0.698]
100%|██████████| 432/432 [00:40<00:00, 10.66it/s]
100%|██████████| 1079

Finished Training for epoch: 5, dropout: 0.1, lr: 0.0001 at ['CKPT+2023-04-03+13-11-55+00']
Stratigng Training for epoch: 5, dropout: 0.1, lr: 0.001


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:34<00:00,  3.23it/s, train_loss=0.983]
100%|██████████| 432/432 [00:40<00:00, 10.67it/s]
100%|██████████| 1079/1079 [05:35<00:00,  3.22it/s, train_loss=0.742]
100%|██████████| 432/432 [00:40<00:00, 10.60it/s]
100%|██████████| 1079

Finished Training for epoch: 5, dropout: 0.1, lr: 0.001 at ['CKPT+2023-04-03+13-43-42+00']
Stratigng Training for epoch: 5, dropout: 0.2, lr: 0.0001


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:35<00:00,  3.22it/s, train_loss=0.965]
100%|██████████| 432/432 [00:40<00:00, 10.58it/s]
100%|██████████| 1079/1079 [05:35<00:00,  3.22it/s, train_loss=0.722]
100%|██████████| 432/432 [00:40<00:00, 10.63it/s]
100%|██████████| 1079

Finished Training for epoch: 5, dropout: 0.2, lr: 0.0001 at ['CKPT+2023-04-03+14-15-26+00']
Stratigng Training for epoch: 5, dropout: 0.2, lr: 0.001


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:32<00:00,  3.24it/s, train_loss=0.976]
100%|██████████| 432/432 [00:40<00:00, 10.72it/s]
100%|██████████| 1079/1079 [05:33<00:00,  3.24it/s, train_loss=0.745]
100%|██████████| 432/432 [00:40<00:00, 10.75it/s]
100%|██████████| 1079

Finished Training for epoch: 5, dropout: 0.2, lr: 0.001 at ['CKPT+2023-04-03+14-40-40+00', 'CKPT+2023-04-03+14-47-00+00']
Stratigng Training for epoch: 5, dropout: 0.5, lr: 0.0001


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:34<00:00,  3.23it/s, train_loss=1.02]
100%|██████████| 432/432 [00:40<00:00, 10.68it/s]
100%|██████████| 1079/1079 [05:34<00:00,  3.22it/s, train_loss=0.765]
100%|██████████| 432/432 [00:40<00:00, 10.70it/s]
100%|██████████| 1079/

Finished Training for epoch: 5, dropout: 0.5, lr: 0.0001 at ['CKPT+2023-04-03+15-18-44+00']
Stratigng Training for epoch: 5, dropout: 0.5, lr: 0.001


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_q.bias', 'project_hid.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1079/1079 [05:33<00:00,  3.24it/s, train_loss=1.04]
100%|██████████| 432/432 [00:39<00:00, 10.81it/s]
100%|██████████| 1079/1079 [05:32<00:00,  3.24it/s, train_loss=0.787]
100%|██████████| 432/432 [00:40<00:00, 10.69it/s]
100%|██████████| 1079/

Finished Training for epoch: 5, dropout: 0.5, lr: 0.001 at ['CKPT+2023-04-03+15-50-13+00', 'CKPT+2023-04-03+15-43-55+00']
