In [1]:
from dataloaders.mg_for_wavlm_backbone import dastaset_wavlm_backbone
from models.wavlm_finetuning_backbone import finetune_WavLM_backbone, featExtract_finetuned_WavLM
from omegaconf import OmegaConf
from pathlib import Path
from torch.utils.data import DataLoader
from utils.AverageMeter import AverageMeter
from utils.train_eval_utils import set_seed, save_config, train, evaluate
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import wandb
import yaml
import utils.mg_utils as utils
import os
from peft import LoraConfig, get_peft_model

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def create_optimizer(model):
    """
        If it is wavlm finetuning, sets different lr for different layers,
        else returns Adam with same lr.
        Args:
            None
        Returns:
            optimizer (torch.optim.Adam): Adam optimizer with lr
    """
    if(config["wavlm_finetuning"]) and True:
        # Parameters of pretrained WavLM model
        # param_groups1 = [{'params': model.model.encoder.layers[8:12].parameters(), 'lr': config["wavlm_finetune"]["lr1"]}]
        
        # Parameters of all other layers
        # print(model.named_parameters())
        # print them in a list
        # for name, param in model.named_parameters(recurse= False):
        #     print(name)
            
        print("="*40)
        param_groups2 = [{'params': [p for n, p in model.named_parameters(recurse=True) if "model" not in n], 'lr': config["wavlm_finetune"]["lr2"]}]
        
        # Combine the parameter groups
        param_groups = param_groups2
        optimizer = optim.AdamW(param_groups,weight_decay=0.01)
    else:
        optimizer = optim.Adam(model.parameters(), lr=1e-5)
    
    return optimizer

def get_config():
    """
    Loads the config file and overrides the config file with the command line arguments.
    """
    base_conf = OmegaConf.load("config_finetune_audio.yaml")
    overrides = OmegaConf.from_cli()
    updated_conf = OmegaConf.merge(base_conf, overrides)
    return OmegaConf.to_container(updated_conf)

def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
            # print(_, param.shape)
            # print all the trainable parameters
            print(_, param.shape, param.numel())
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}"
    )

config = get_config()
config["wavlm_finetuning"] = True
(config["wavlm_finetune"]["top_k"])= 25
model = finetune_WavLM_backbone(config["wavlm_finetune"], config["hugging_face_cache_path"]).train()
optimizer = create_optimizer(model)
print_trainable_parameters(model)

model.masked_spec_embed torch.Size([768]) 768
model.feature_extractor.conv_layers.0.conv.weight torch.Size([512, 1, 10]) 5120
model.feature_extractor.conv_layers.0.layer_norm.weight torch.Size([512]) 512
model.feature_extractor.conv_layers.0.layer_norm.bias torch.Size([512]) 512
model.feature_extractor.conv_layers.1.conv.weight torch.Size([512, 512, 3]) 786432
model.feature_extractor.conv_layers.2.conv.weight torch.Size([512, 512, 3]) 786432
model.feature_extractor.conv_layers.3.conv.weight torch.Size([512, 512, 3]) 786432
model.feature_extractor.conv_layers.4.conv.weight torch.Size([512, 512, 3]) 786432
model.feature_extractor.conv_layers.5.conv.weight torch.Size([512, 512, 2]) 524288
model.feature_extractor.conv_layers.6.conv.weight torch.Size([512, 512, 2]) 524288
model.feature_projection.layer_norm.weight torch.Size([512]) 512
model.feature_projection.layer_norm.bias torch.Size([512]) 512
model.feature_projection.projection.weight torch.Size([768, 512]) 393216
model.feature_project

In [7]:
wavlm_config = config["wavlm_finetune"]
n = wavlm_config["num_wavlm_layers"]
lora_config = LoraConfig(
    r=wavlm_config["lora"]["r"],
    lora_alpha=wavlm_config["lora"]["alpha"],
    target_modules=[
        module.replace('*', str(11 - i))
        for module in wavlm_config["lora"]["target_modules"].split("@")
        for i in range(n)
    ],            
    bias="none",
    lora_dropout=wavlm_config["lora"]["dropout"],
    modules_to_save=wavlm_config["lora"]["modules_to_save"],
)
lora_model = get_peft_model(model, lora_config)
print_trainable_parameters(lora_model)

base_model.model.model.encoder.layers.9.attention.k_proj.lora_A.default.weight torch.Size([32, 768]) 24576
base_model.model.model.encoder.layers.9.attention.k_proj.lora_B.default.weight torch.Size([768, 32]) 24576
base_model.model.model.encoder.layers.10.attention.k_proj.lora_A.default.weight torch.Size([32, 768]) 24576
base_model.model.model.encoder.layers.10.attention.k_proj.lora_B.default.weight torch.Size([768, 32]) 24576
base_model.model.model.encoder.layers.11.attention.k_proj.lora_A.default.weight torch.Size([32, 768]) 24576
base_model.model.model.encoder.layers.11.attention.k_proj.lora_B.default.weight torch.Size([768, 32]) 24576
base_model.model.head_conv1d.modules_to_save.default.weight torch.Size([512, 512, 17]) 4456448
base_model.model.head_conv1d.modules_to_save.default.bias torch.Size([512]) 512
base_model.model.head_conv1.modules_to_save.default.weight torch.Size([512, 768, 1]) 393216
base_model.model.head_conv1.modules_to_save.default.bias torch.Size([512]) 512
base_mod

In [None]:
gnode063                       Tue Oct 31 14:47:16 2023  525.125.06
[0] NVIDIA GeForce RTX 2080 Ti | 82°C,  94 % |  9355 / 11264 MB | kolubex(7532M) kolubex(606M) kolubex(606M) kolubex(606M)
[1] NVIDIA GeForce RTX 2080 Ti | 76°C, 100 % |  7535 / 11264 MB | kolubex(7532M)
[2] NVIDIA GeForce RTX 2080 Ti | 65°C, 100 % |  7535 / 11264 MB | kolubex(7532M)
[3] NVIDIA GeForce RTX 2080 Ti | 70°C, 100 % |  7535 / 11264 MB | kolubex(7532M)

In [None]:
gnode063                       Tue Oct 31 14:49:45 2023  525.125.06
[0] NVIDIA GeForce RTX 2080 Ti | 73°C,  94 % |  7899 / 11264 MB | kolubex(6076M) kolubex(606M) kolubex(606M) kolubex(606M)
[1] NVIDIA GeForce RTX 2080 Ti | 70°C, 100 % |  6079 / 11264 MB | kolubex(6076M)
[2] NVIDIA GeForce RTX 2080 Ti | 63°C, 100 % |  5975 / 11264 MB | kolubex(5972M)
[3] NVIDIA GeForce RTX 2080 Ti | 66°C, 100 % |  5975 / 11264 MB | kolubex(5972M)

In [8]:
model = featExtract_finetuned_WavLM("/ssd_scratch/cvit/kolubex/checkpoints/total/WavLM_finetuned_backbone_t25_scene.pt")

In [9]:
print(model)

featExtract_finetuned_WavLM(
  (model): PeftModel(
    (base_model): LoraModel(
      (model): finetune_WavLM_backbone(
        (model): WavLMModel(
          (feature_extractor): WavLMFeatureEncoder(
            (conv_layers): ModuleList(
              (0): WavLMGroupNormConvLayer(
                (conv): Conv1d(1, 512, kernel_size=(10,), stride=(5,), bias=False)
                (activation): GELUActivation()
                (layer_norm): GroupNorm(512, 512, eps=1e-05, affine=True)
              )
              (1-4): 4 x WavLMNoLayerNormConvLayer(
                (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,), bias=False)
                (activation): GELUActivation()
              )
              (5-6): 2 x WavLMNoLayerNormConvLayer(
                (conv): Conv1d(512, 512, kernel_size=(2,), stride=(2,), bias=False)
                (activation): GELUActivation()
              )
            )
          )
          (feature_projection): WavLMFeatureProjection(
            (lay

: 