In [1]:
from peft import (LoraConfig,IA3Config, PeftType, PrefixTuningConfig,
                  PromptEncoderConfig, PromptTuningConfig, TaskType,
                  get_peft_config, get_peft_model, get_peft_model_state_dict,
                  prepare_model_for_int8_training,
                  prepare_model_for_kbit_training, set_peft_model_state_dict)
from scipy.special import softmax
from torch.optim import AdamW
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import (AutoModelForSequenceClassification,
                          AutoModelForTokenClassification, AutoTokenizer,
                          DataCollatorForTokenClassification,
                          LlamaForSequenceClassification, LlamaTokenizer,
                          Trainer, TrainingArguments,
                          get_linear_schedule_with_warmup, set_seed)
from model_utils import count_trainable_parameters, get_model_size, get_full_model_size
import pandas as pd
from tqdm import tqdm
import yaml
# import sys and append path
import sys
sys.path.append("../")
from peft_trainer import create_peft_config
from loguru import logger as loguru_logger
from argparse import Namespace

In [2]:

arg_dict = {
  "log_save_dir": "/mnt/sdc/niallt/saved_models/peft_training/logs",
  "task_type": "SEQ_CLS",
  "peft_method": "LORA",
  "lora_rank": 8, 
  "lora_alpha": 16,
  "lora_dropout": 0.1,
  "learning_rate": 3e-4,
  "num_virtual_tokens": 10 
}

# convert above args to a namespace object

args = Namespace(**arg_dict)

In [3]:
# model = AutoModelForSequenceClassification.from_pretrained("meta-llama/Llama-2-7b-hf")

In [4]:
# model

In [5]:
# create args


In [3]:


model_type_mappings = {"roberta": "roberta-base",
               "mobile": "nlpie/bio-mobilebert",
               "distil": "nlpie/distil-biobert",
               "tiny": "nlpie/tiny-biobert",
               "llama-7b": "meta-llama/Llama-2-7b-hf",
               "bert": "dmis-lab/biobert-v1.1",
               }

peft_types = ["PROMPT_TUNING","LORA", "PREFIX_TUNING", "P_TUNING"]

def get_number_of_trainable_params(model_type_mappings:dict,
                                   peft_types:list,
                                   task_type:str = "SEQ_CLS",
                                   num_labels:int = 2):

    # set up empty dicts to full for dfs
    model_peft_dict = {}
    
    for model_type in model_type_mappings.keys():
        
        model_dict = {}
        model_name_or_path = model_type_mappings[model_type]
        model_args = dict(pretrained_model_name_or_path=model_name_or_path, 
                          num_labels=num_labels, 
                          output_hidden_states=False, 
                          trust_remote_code=True)

            
        if task_type == "SEQ_CLS":
            model = AutoModelForSequenceClassification.from_pretrained(**model_args)
        elif task_type == "TOKEN_CLS":
            model = AutoModelForTokenClassification.from_pretrained(**model_args)
        
        # falcon model seems to use model config to define pad token and the remote code panicks if you don't set it
        if "falcon" in model_name_or_path:
            model.config.use_cache = False            

        # count total trainable params before peft
        total_trainable_params = count_trainable_parameters(model)
        
        # get model size and full model size too
        model_size_MB, model_size_GB = get_model_size(model)
        full_model_size_MB, full_model_size_GB = get_full_model_size(model)
        
        for peft_method in tqdm(peft_types, desc=f"model type: {model_type}"):
            
            
            # set up some PEFT params
            peft_config, lr = create_peft_config(args, peft_method, model_name_or_path,task_type)
            model = get_peft_model(model, peft_config)
            print(f"peft config is: {peft_config}")
            # print(model)
            model.print_trainable_parameters()
            
            # lets also confirm this directly and save to args
            n_trainable_params = count_trainable_parameters(model)
            # proportion of total trainable params
            n_trainable_params_perc = (n_trainable_params / total_trainable_params) * 100
            
            # get size of peft adapter only
            peft_model_size_MB, peft_model_size_GB = get_model_size(model)
            peft_full_model_size_MB, peft_full_model_size_GB = get_full_model_size(model)
            
            # store the model name, peft method and number of trainable params
            model_dict[peft_method] = {"n_trainable_params": n_trainable_params,
                                 "total_trainable_params": total_trainable_params,
                                 "n_trainable_params_perc": n_trainable_params_perc,
                                 "model_size_MB": model_size_MB,
                                 "model_size_GB": model_size_GB,
                                 "full_model_size_MB": full_model_size_MB,
                                 "full_model_size_GB": full_model_size_GB,
                                 "peft_model_size_MB": peft_model_size_MB,
                                 "peft_model_size_GB": peft_model_size_GB,
                                 "peft_full_model_size_MB": peft_full_model_size_MB,
                                 "peft_full_model_size_GB": peft_full_model_size_GB,}
            
        model_peft_dict[model_type] = model_dict

    return model_peft_dict

In [4]:
all_df = get_number_of_trainable_params(model_type_mappings, peft_types)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model size (MB): 475.49121856689453
Total size (MB): 951.0656957626343


model type: roberta:   0%|          | 0/4 [00:00<?, ?it/s]2023-11-10 11:27:50.842 | INFO     | peft_trainer:create_peft_config:705 - Using PROMPT_TUNING


peft config is: PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='roberta-base', revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=12, prompt_tuning_init=<PromptTuningInit.RANDOM: 'RANDOM'>, prompt_tuning_init_text=None, tokenizer_name_or_path=None)
trainable params: 599,810 || all params: 125,246,980 || trainable%: 0.4789017667332178
Model size (MB): 2.2880935668945312


model type: roberta:  25%|██▌       | 1/4 [00:00<00:01,  1.98it/s]2023-11-10 11:27:51.349 | INFO     | peft_trainer:create_peft_config:648 - Using LORA


Total size (MB): 480.15754413604736
peft config is: LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=8, target_modules={'value', 'query'}, lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={})
trainable params: 887,042 || all params: 125,541,892 || trainable%: 0.7065705206991783
Model size (MB): 3.3837966918945312


model type: roberta:  50%|█████     | 2/4 [00:01<00:01,  1.99it/s]2023-11-10 11:27:51.849 | INFO     | peft_trainer:create_peft_config:699 - Using PREFIX_TUNING


Total size (MB): 482.42277240753174
peft config is: PrefixTuningConfig(peft_type=<PeftType.PREFIX_TUNING: 'PREFIX_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=12, encoder_hidden_size=768, prefix_projection=False)
trainable params: 776,450 || all params: 125,718,532 || trainable%: 0.6176098206428309
Model size (MB): 2.9619216918945312


model type: roberta:  75%|███████▌  | 3/4 [00:01<00:00,  2.03it/s]2023-11-10 11:27:52.330 | INFO     | peft_trainer:create_peft_config:719 - Using P_TUNING


Total size (MB): 482.682110786438
peft config is: PromptEncoderConfig(peft_type=<PeftType.P_TUNING: 'P_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=12, encoder_reparameterization_type=<PromptEncoderReparameterizationType.MLP: 'MLP'>, encoder_hidden_size=128, encoder_num_layers=2, encoder_dropout=0.0)
trainable params: 813,826 || all params: 125,755,908 || trainable%: 0.6471473292531115
Model size (MB): 3.1044998168945312


model type: roberta: 100%|██████████| 4/4 [00:01<00:00,  2.03it/s]

Total size (MB): 482.9772653579712



Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at nlpie/bio-mobilebert and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model size (MB): 93.77637481689453
Total size (MB): 187.98572540283203


model type: mobile:   0%|          | 0/4 [00:00<?, ?it/s]2023-11-10 11:27:54.439 | INFO     | peft_trainer:create_peft_config:705 - Using PROMPT_TUNING


peft config is: PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='nlpie/bio-mobilebert', revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=128, num_transformer_submodules=1, num_attention_heads=4, num_layers=24, prompt_tuning_init=<PromptTuningInit.RANDOM: 'RANDOM'>, prompt_tuning_init_text=None, tokenizer_name_or_path=None)
trainable params: 2,306 || all params: 24,585,220 || trainable%: 0.00937961913702623
Model size (MB): 0.00879669189453125


model type: mobile:  25%|██▌       | 1/4 [00:00<00:00,  7.12it/s]2023-11-10 11:27:54.581 | INFO     | peft_trainer:create_peft_config:648 - Using LORA
2023-11-10 11:27:54.582 | INFO     | peft_trainer:create_peft_config:673 - Using mobile config


Total size (MB): 94.25250720977783
peft config is: LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=8, target_modules={'key', 'value', 'query'}, lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={})
trainable params: 222,210 || all params: 24,806,404 || trainable%: 0.895776751841984
Model size (MB): 0.8476638793945312


model type: mobile:  50%|█████     | 2/4 [00:00<00:00,  5.25it/s]2023-11-10 11:27:54.806 | INFO     | peft_trainer:create_peft_config:699 - Using PREFIX_TUNING


Total size (MB): 96.08641529083252
peft config is: PrefixTuningConfig(peft_type=<PeftType.PREFIX_TUNING: 'PREFIX_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=512, num_transformer_submodules=1, num_attention_heads=4, num_layers=24, encoder_hidden_size=512, prefix_projection=False)
trainable params: 246,786 || all params: 25,050,884 || trainable%: 0.9851388877135034
Model size (MB): 0.9414138793945312


model type: mobile:  75%|███████▌  | 3/4 [00:00<00:00,  5.53it/s]2023-11-10 11:27:54.975 | INFO     | peft_trainer:create_peft_config:719 - Using P_TUNING


Total size (MB): 97.14476490020752
peft config is: PromptEncoderConfig(peft_type=<PeftType.P_TUNING: 'P_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=512, num_transformer_submodules=1, num_attention_heads=4, num_layers=24, encoder_reparameterization_type=<PromptEncoderReparameterizationType.MLP: 'MLP'>, encoder_hidden_size=128, encoder_num_layers=2, encoder_dropout=0.0)
trainable params: 154,370 || all params: 24,958,468 || trainable%: 0.6185075141631289


model type: mobile: 100%|██████████| 4/4 [00:00<00:00,  5.69it/s]

Model size (MB): 0.5888748168945312
Total size (MB): 96.47434902191162



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpie/distil-biobert and are newly initialized: ['classifier.weight', 'bert.pooler.dense.bias', 'classifier.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model size (MB): 250.94824981689453
Total size (MB): 501.94249629974365


model type: distil:   0%|          | 0/4 [00:00<?, ?it/s]2023-11-10 11:27:56.109 | INFO     | peft_trainer:create_peft_config:705 - Using PROMPT_TUNING


peft config is: PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='nlpie/distil-biobert', revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=6, prompt_tuning_init=<PromptTuningInit.RANDOM: 'RANDOM'>, prompt_tuning_init_text=None, tokenizer_name_or_path=None)
trainable params: 9,218 || all params: 65,793,796 || trainable%: 0.014010439525331536
Model size (MB): 0.03516387939453125


model type: distil:  25%|██▌       | 1/4 [00:00<00:01,  2.88it/s]2023-11-10 11:27:56.458 | INFO     | peft_trainer:create_peft_config:648 - Using LORA


Total size (MB): 251.0684061050415
peft config is: LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=8, target_modules={'value', 'query'}, lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={})
trainable params: 148,994 || all params: 65,941,252 || trainable%: 0.22594960738689038
Model size (MB): 0.5683670043945312


model type: distil:  50%|█████     | 2/4 [00:00<00:00,  3.03it/s]2023-11-10 11:27:56.775 | INFO     | peft_trainer:create_peft_config:699 - Using PREFIX_TUNING


Total size (MB): 252.18628025054932
peft config is: PrefixTuningConfig(peft_type=<PeftType.PREFIX_TUNING: 'PREFIX_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=6, encoder_hidden_size=768, prefix_projection=False)
trainable params: 93,698 || all params: 66,025,732 || trainable%: 0.14191133844604706
Model size (MB): 0.35742950439453125


model type: distil:  75%|███████▌  | 3/4 [00:00<00:00,  3.15it/s]2023-11-10 11:27:57.078 | INFO     | peft_trainer:create_peft_config:719 - Using P_TUNING


Total size (MB): 252.30157566070557
peft config is: PromptEncoderConfig(peft_type=<PeftType.P_TUNING: 'P_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=6, encoder_reparameterization_type=<PromptEncoderReparameterizationType.MLP: 'MLP'>, encoder_hidden_size=128, encoder_num_layers=2, encoder_dropout=0.0)
trainable params: 223,234 || all params: 66,155,268 || trainable%: 0.3374394915912063
Model size (MB): 0.8515701293945312


model type: distil: 100%|██████████| 4/4 [00:01<00:00,  3.14it/s]

Total size (MB): 253.29643726348877



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpie/tiny-biobert and are newly initialized: ['classifier.weight', 'bert.pooler.dense.bias', 'classifier.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model size (MB): 52.928016662597656
Total size (MB): 105.88920783996582


model type: tiny:   0%|          | 0/4 [00:00<?, ?it/s]2023-11-10 11:27:57.733 | INFO     | peft_trainer:create_peft_config:705 - Using PROMPT_TUNING
2023-11-10 11:27:57.780 | INFO     | peft_trainer:create_peft_config:648 - Using LORA
model type: tiny:  50%|█████     | 2/4 [00:00<00:00, 19.30it/s]2023-11-10 11:27:57.838 | INFO     | peft_trainer:create_peft_config:699 - Using PREFIX_TUNING
2023-11-10 11:27:57.889 | INFO     | peft_trainer:create_peft_config:719 - Using P_TUNING


peft config is: PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='nlpie/tiny-biobert', revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=312, num_transformer_submodules=1, num_attention_heads=12, num_layers=4, prompt_tuning_init=<PromptTuningInit.RANDOM: 'RANDOM'>, prompt_tuning_init_text=None, tokenizer_name_or_path=None)
trainable params: 3,746 || all params: 13,878,508 || trainable%: 0.026991374000721116
Model size (MB): 0.01428985595703125
Total size (MB): 52.99280643463135
peft config is: LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=8, target_modules={'value', 'query'}, lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={})
train

model type: tiny: 100%|██████████| 4/4 [00:00<00:00, 18.99it/s]


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-7b-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model size (MB): 25205.046875
Total size (MB): 50538.214926719666


model type: llama-7b:   0%|          | 0/4 [00:00<?, ?it/s]2023-11-10 11:29:11.920 | INFO     | peft_trainer:create_peft_config:705 - Using PROMPT_TUNING


peft config is: PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='meta-llama/Llama-2-7b-hf', revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=4096, num_transformer_submodules=1, num_attention_heads=32, num_layers=32, prompt_tuning_init=<PromptTuningInit.RANDOM: 'RANDOM'>, prompt_tuning_init_text=None, tokenizer_name_or_path=None)
trainable params: 49,152 || all params: 6,607,400,960 || trainable%: 0.000743893102561162
Model size (MB): 0.1875
Total size (MB): 25333.551981925964


model type: llama-7b:  25%|██▌       | 1/4 [00:26<01:19, 26.38s/it]2023-11-10 11:29:38.299 | INFO     | peft_trainer:create_peft_config:648 - Using LORA


peft config is: LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=8, target_modules={'v_proj', 'q_proj'}, lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={})
trainable params: 4,202,496 || all params: 6,611,595,264 || trainable%: 0.06356251149979648
Model size (MB): 16.03125
Total size (MB): 25365.50095653534


model type: llama-7b:  50%|█████     | 2/4 [00:52<00:52, 26.10s/it]2023-11-10 11:30:04.212 | INFO     | peft_trainer:create_peft_config:699 - Using PREFIX_TUNING


peft config is: PrefixTuningConfig(peft_type=<PeftType.PREFIX_TUNING: 'PREFIX_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=4096, num_transformer_submodules=1, num_attention_heads=32, num_layers=32, encoder_hidden_size=4096, prefix_projection=False)
trainable params: 2,629,632 || all params: 6,614,175,744 || trainable%: 0.03975751630708408
Model size (MB): 10.03125
Total size (MB): 25369.359538078308


model type: llama-7b:  75%|███████▌  | 3/4 [01:18<00:25, 25.94s/it]2023-11-10 11:30:29.967 | INFO     | peft_trainer:create_peft_config:719 - Using P_TUNING


peft config is: PromptEncoderConfig(peft_type=<PeftType.P_TUNING: 'P_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=4096, num_transformer_submodules=1, num_attention_heads=32, num_layers=32, encoder_reparameterization_type=<PromptEncoderReparameterizationType.MLP: 'MLP'>, encoder_hidden_size=128, encoder_num_layers=2, encoder_dropout=0.0)
trainable params: 1,118,464 || all params: 6,612,664,576 || trainable%: 0.016913968448654608
Model size (MB): 4.2666015625
Total size (MB): 25357.847754478455


model type: llama-7b: 100%|██████████| 4/4 [01:44<00:00, 26.12s/it]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model size (MB): 413.17676544189453
Total size (MB): 826.4355382919312


model type: bert:   0%|          | 0/4 [00:00<?, ?it/s]2023-11-10 11:30:58.900 | INFO     | peft_trainer:create_peft_config:705 - Using PROMPT_TUNING


peft config is: PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='dmis-lab/biobert-v1.1', revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=12, prompt_tuning_init=<PromptTuningInit.RANDOM: 'RANDOM'>, prompt_tuning_init_text=None, tokenizer_name_or_path=None)
trainable params: 9,218 || all params: 108,321,028 || trainable%: 0.0085098896956554
Model size (MB): 0.03516387939453125


model type: bert:  25%|██▌       | 1/4 [00:00<00:01,  1.81it/s]2023-11-10 11:30:59.452 | INFO     | peft_trainer:create_peft_config:648 - Using LORA


Total size (MB): 413.33506870269775
peft config is: LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=8, target_modules={'value', 'query'}, lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={})
trainable params: 296,450 || all params: 108,615,940 || trainable%: 0.27293415680976474
Model size (MB): 1.1308670043945312


model type: bert:  50%|█████     | 2/4 [00:01<00:01,  1.87it/s]2023-11-10 11:30:59.978 | INFO     | peft_trainer:create_peft_config:699 - Using PREFIX_TUNING


Total size (MB): 415.59956455230713
peft config is: PrefixTuningConfig(peft_type=<PeftType.PREFIX_TUNING: 'PREFIX_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=12, encoder_hidden_size=768, prefix_projection=False)
trainable params: 185,858 || all params: 108,792,580 || trainable%: 0.1708370184804883
Model size (MB): 0.7089920043945312


model type: bert:  75%|███████▌  | 3/4 [00:01<00:00,  1.92it/s]2023-11-10 11:31:00.482 | INFO     | peft_trainer:create_peft_config:719 - Using P_TUNING


Total size (MB): 415.85884189605713
peft config is: PromptEncoderConfig(peft_type=<PeftType.P_TUNING: 'P_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=12, encoder_reparameterization_type=<PromptEncoderReparameterizationType.MLP: 'MLP'>, encoder_hidden_size=128, encoder_num_layers=2, encoder_dropout=0.0)
trainable params: 223,234 || all params: 108,829,956 || trainable%: 0.20512183244841153
Model size (MB): 0.8515701293945312


model type: bert: 100%|██████████| 4/4 [00:02<00:00,  1.92it/s]

Total size (MB): 416.15399646759033





In [12]:
all_df

{'vanilla': {'PROMPT_TUNING': {'n_trainable_params': 1191940,
   'total_trainable_params': 124647170,
   'n_trainable_params_perc': 0.956251152753809,
   'model_size_MB': 475.49121856689453,
   'model_size_GB': 0.46434689313173294,
   'full_model_size_MB': 951.0699663162231,
   'full_model_size_GB': 0.9287792639806867,
   'peft_model_size_MB': 4.5468902587890625,
   'peft_model_size_GB': 0.004440322518348694,
   'peft_full_model_size_MB': 482.42061138153076,
   'peft_full_model_size_GB': 0.47111387830227613},
  'LORA': {'n_trainable_params': 294912,
   'total_trainable_params': 124647170,
   'n_trainable_params_perc': 0.23659742936803138,
   'model_size_MB': 475.49121856689453,
   'model_size_GB': 0.46434689313173294,
   'full_model_size_MB': 951.0699663162231,
   'full_model_size_GB': 0.9287792639806867,
   'peft_model_size_MB': 1.125,
   'peft_model_size_GB': 0.0010986328125,
   'peft_full_model_size_MB': 480.1683073043823,
   'peft_full_model_size_GB': 0.46891436260193586},
  'PREFI

In [5]:
# write to file
with open("../model_type_trainable_model_size.yaml", "w") as f:
    yaml.dump(all_df, f)
    


In [4]:
# reload that yaml file
with open("../model_type_trainable_model_size.yaml", "r") as f:
    all_df = yaml.load(f, Loader=yaml.FullLoader)

In [5]:
# run through they dict keys and nicely print the attributes and values
for model_type in all_df.keys():
    print(f"###### model type: {model_type} ######### \n")
    for peft_method in all_df[model_type].keys():
        print(f"peft method: {peft_method}")
        for attribute in all_df[model_type][peft_method].keys():
            print(f"{attribute}: {all_df[model_type][peft_method][attribute]}")
        print("\n")

###### model type: bert ######### 

peft method: LORA
full_model_size_GB: 0.807065955363214
full_model_size_MB: 826.4355382919312
model_size_GB: 0.40349293500185013
model_size_MB: 413.17676544189453
n_trainable_params: 296450
n_trainable_params_perc: 0.2737005318256615
peft_full_model_size_GB: 0.40585894975811243
peft_full_model_size_MB: 415.59956455230713
peft_model_size_GB: 0.0011043623089790344
peft_model_size_MB: 1.1308670043945312
total_trainable_params: 108311810


peft method: PREFIX_TUNING
full_model_size_GB: 0.807065955363214
full_model_size_MB: 826.4355382919312
model_size_GB: 0.40349293500185013
model_size_MB: 413.17676544189453
n_trainable_params: 185858
n_trainable_params_perc: 0.17159532279997905
peft_full_model_size_GB: 0.4061121502891183
peft_full_model_size_MB: 415.85884189605713
peft_model_size_GB: 0.0006923750042915344
peft_model_size_MB: 0.7089920043945312
total_trainable_params: 108311810


peft method: PROMPT_TUNING
full_model_size_GB: 0.807065955363214
full_model