In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install transformers==4.28.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
#!pip install accelerate -U

In [None]:
!pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip uninstall numba
!pip install -U numba

Found existing installation: numba 0.57.0
Uninstalling numba-0.57.0:
  Would remove:
    /usr/local/bin/numba
    /usr/local/lib/python3.10/dist-packages/numba-0.57.0.dist-info/*
    /usr/local/lib/python3.10/dist-packages/numba/*
Proceed (Y/n)? Y
  Successfully uninstalled numba-0.57.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting numba
  Using cached numba-0.57.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.6 MB)
Installing collected packages: numba
Successfully installed numba-0.57.0


In [None]:
import json
import pandas as pd
import librosa
from transformers import Wav2Vec2FeatureExtractor
import datasets
from datasets import load_dataset

from datasets import DatasetDict, load_metric
from transformers import (
    HubertForSequenceClassification,
    PretrainedConfig,
    Trainer,
    TrainingArguments,
    Wav2Vec2FeatureExtractor,
)

import numpy as np

## data load and preprocessing

In [None]:
ds = load_dataset("csv",data_files='./drive/Shareddrives/SWDA/swb1_d1/data.csv',split='train')
ds = ds.class_encode_column("Label")

In [None]:
ds = ds.map(
        lambda x: {
            "array": librosa.load(x["file"], sr=16000, mono=True,offset=x['start'],duration=x['end']-x['start'])[0],
        },
        num_proc=4,
    )

In [None]:
ds = ds.train_test_split(test_size=0.1, stratify_by_column="Label",shuffle=True)

In [None]:
# INTRODUCE TRAIN TEST VAL SPLITS

# 90% train, 10% test + validation
train_valid = ds["train"].train_test_split(shuffle=True, test_size=0.1)

# gather everyone if you want to have a single DatasetDict
ds = DatasetDict({
    'train': train_valid['train'],
    'test': ds['test'],
    'val': train_valid['test']})

In [None]:
model_id = "facebook/hubert-base-ls960"
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_id)

In [None]:
import tensorflow as tf

def prepare_dataset(batch, feature_extractor):
    audio_arr = batch["array"]
    input = feature_extractor(
        audio_arr, sampling_rate=16000, padding=True, return_tensors="pt",padding_value = 0
    )
    batch['input_values'] = input.input_values
    batch['labels'] = batch["Label"] 
 
    return batch

In [None]:
ds=ds.map(
    prepare_dataset,
    fn_kwargs={"feature_extractor": feature_extractor},
    batch_size=16,
    batched=True,
    num_proc=8,
)

In [None]:
from dataclasses import dataclass
from typing import Dict, List, Optional, Union

import torch
from transformers import Wav2Vec2Processor

INPUT_FIELD = "input_values"
LABEL_FIELD = "labels"


@dataclass
class DataCollatorCTCWithPadding:
    processor: Wav2Vec2Processor
    padding: Union[bool, str] = True
    max_length: Optional[int] = None
    max_length_labels: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None
    pad_to_multiple_of_labels: Optional[int] = None

    def __call__(
        self, examples: List[Dict[str, Union[List[int], torch.Tensor]]]
    ) -> Dict[str, torch.Tensor]:

        input_features = [
            {INPUT_FIELD: example[INPUT_FIELD]} for example in examples
        ]  # example is basically row0, row1, etc...
        labels = [example[LABEL_FIELD] for example in examples]

        batch = self.processor.pad(
            input_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="pt",
        )
        batch["labels"] = torch.tensor(labels)

        return batch


In [None]:
# DEFINE DATA COLLATOR - TO PAD TRAINING BATCHES DYNAMICALLY
data_collator = DataCollatorCTCWithPadding(
    processor=feature_extractor, padding=True
)

In [None]:
data_collator

DataCollatorCTCWithPadding(processor=Wav2Vec2FeatureExtractor {
  "do_normalize": true,
  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
  "feature_size": 1,
  "padding_side": "right",
  "padding_value": 0,
  "return_attention_mask": false,
  "sampling_rate": 16000
}
, padding=True, max_length=None, max_length_labels=None, pad_to_multiple_of=None, pad_to_multiple_of_labels=None)

In [None]:
from transformers import AutoConfig
config = AutoConfig.from_pretrained(
    model_id,
    num_labels=41)

In [None]:
hubert_model = HubertForSequenceClassification.from_pretrained(
    model_id,
    config=config,  # because we need to update num_labels as per our dataset
    ignore_mismatched_sizes=True 
)

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-base-ls960 and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.weight', 'projector.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# FREEZE LAYERS

# freeze all layers to begin with
for param in hubert_model.parameters():
    param.requires_grad = False

layers_freeze_num = 2
n_layers = (
    4 + layers_freeze_num * 16
)  # 4 refers to projector and classifier's weights and biases.
for name, param in list(hubert_model.named_parameters())[-n_layers:]:
    param.requires_grad = True


In [None]:
# Fine-Tuning with Trainer
training_args = TrainingArguments(
    output_dir="output/",
    gradient_accumulation_steps = 4,  # accumulate the gradients before running optimization step
    num_train_epochs=3,  # total number of training epochs
    per_device_train_batch_size=2,  # batch size per device during training
    per_device_eval_batch_size=2,  # batch size for evaluation
    warmup_steps=500,  # number of warmup steps for learning rate scheduler
    weight_decay=0.01,  # strength of weight decay
    logging_steps=10,
    evaluation_strategy="epoch",  # report metric at end of each epoch
    learning_rate=1e-3,  # default = 5e-5
)


In [None]:
def compute_metrics(eval_pred):
    # DEFINE EVALUATION METRIC
    compute_accuracy_metric = load_metric("accuracy")
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return compute_accuracy_metric.compute(predictions=predictions, references=labels)

In [None]:
# START TRAINING
trainer = Trainer(
    model=hubert_model, 
    args=training_args,  
    data_collator=data_collator,
    train_dataset=ds["train"],  
    eval_dataset=ds["val"],  
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy
0,1.9311,1.933411,0.333333


  compute_accuracy_metric = load_metric("accuracy")


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

TrainOutput(global_step=18, training_loss=1.91868347591824, metrics={'train_runtime': 68.285, 'train_samples_per_second': 1.084, 'train_steps_per_second': 0.264, 'total_flos': 6331947731398500.0, 'train_loss': 1.91868347591824, 'epoch': 0.97})

In [None]:
test_results = trainer.predict(ds["test"])
test_results.metrics["test_accuracy"]

0.4

In [None]:
test_results.metrics

{'test_loss': 1.8940742015838623,
 'test_accuracy': 0.4,
 'test_runtime': 1.9906,
 'test_samples_per_second': 5.024,
 'test_steps_per_second': 5.024}