In [1]:
from peft import (LoraConfig,IA3Config, PeftType, PrefixTuningConfig,
                  PromptEncoderConfig, PromptTuningConfig, TaskType,
                  get_peft_config, get_peft_model, get_peft_model_state_dict,
                  prepare_model_for_int8_training,
                  prepare_model_for_kbit_training, set_peft_model_state_dict)
from scipy.special import softmax
from torch.optim import AdamW
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import (AutoModelForSequenceClassification,
                          AutoModelForTokenClassification, AutoTokenizer,
                          DataCollatorForTokenClassification,
                          LlamaForSequenceClassification, LlamaTokenizer,
                          Trainer, TrainingArguments,
                          get_linear_schedule_with_warmup, set_seed)
from model_utils import count_trainable_parameters, get_model_size, get_full_model_size
import pandas as pd
from tqdm import tqdm
import yaml
# import sys and append path
import sys
sys.path.append("../")
from peft_trainer import create_peft_config
from loguru import logger as loguru_logger
from argparse import Namespace

In [2]:

arg_dict = {
  "log_save_dir": "/mnt/sdc/niallt/saved_models/peft_training/logs",
  "task_type": "SEQ_CLS",
  "peft_method": "LORA",
  "lora_rank": 8, 
  "lora_alpha": 16,
  "lora_dropout": 0.1,
  "learning_rate": 3e-4,
  "num_virtual_tokens": 10 
}

# convert above args to a namespace object

args = Namespace(**arg_dict)

In [3]:
# model = AutoModelForSequenceClassification.from_pretrained("meta-llama/Llama-2-7b-hf")

In [4]:
# model

In [5]:
# create args


In [10]:


model_type_mappings = {"vanilla": "roberta-base",
               "mobile": "nlpie/bio-mobilebert",
               "distil": "nlpie/distil-biobert",
               "tiny": "nlpie/tiny-biobert",
               "llama-7b": "meta-llama/Llama-2-7b-hf",
               }

peft_types = ["PROMPT_TUNING","LORA", "PREFIX_TUNING", "P_TUNING"]

def get_number_of_trainable_params(model_type_mappings:dict,
                                   peft_types:list,
                                   task_type:str = "SEQ_CLS",
                                   num_labels:int = 2):

    # set up empty dicts to full for dfs
    model_peft_dict = {}
    
    for model_type in model_type_mappings.keys():
        
        model_dict = {}
        model_name_or_path = model_type_mappings[model_type]
        model_args = dict(pretrained_model_name_or_path=model_name_or_path, 
                          num_labels=num_labels, 
                          output_hidden_states=False, 
                          trust_remote_code=True)

            
        if task_type == "SEQ_CLS":
            model = AutoModelForSequenceClassification.from_pretrained(**model_args)
        elif task_type == "TOKEN_CLS":
            model = AutoModelForTokenClassification.from_pretrained(**model_args)
        
        # falcon model seems to use model config to define pad token and the remote code panicks if you don't set it
        if "falcon" in model_name_or_path:
            model.config.use_cache = False            

        # count total trainable params before peft
        total_trainable_params = count_trainable_parameters(model)
        
        # get model size and full model size too
        model_size_MB, model_size_GB = get_model_size(model)
        full_model_size_MB, full_model_size_GB = get_full_model_size(model)
        
        for peft_method in tqdm(peft_types, desc=f"model type: {model_type}"):
            
            
            # set up some PEFT params
            peft_config, lr = create_peft_config(args, peft_method, model_name_or_path,task_type)
            model = get_peft_model(model, peft_config)
            print(f"peft config is: {peft_config}")
            # print(model)
            model.print_trainable_parameters()
            
            # lets also confirm this directly and save to args
            n_trainable_params = count_trainable_parameters(model)
            # proportion of total trainable params
            n_trainable_params_perc = (n_trainable_params / total_trainable_params) * 100
            
            # get size of peft adapter only
            peft_model_size_MB, peft_model_size_GB = get_model_size(model)
            peft_full_model_size_MB, peft_full_model_size_GB = get_full_model_size(model)
            
            # store the model name, peft method and number of trainable params
            model_dict[peft_method] = {"n_trainable_params": n_trainable_params,
                                 "total_trainable_params": total_trainable_params,
                                 "n_trainable_params_perc": n_trainable_params_perc,
                                 "model_size_MB": model_size_MB,
                                 "model_size_GB": model_size_GB,
                                 "full_model_size_MB": full_model_size_MB,
                                 "full_model_size_GB": full_model_size_GB,
                                 "peft_model_size_MB": peft_model_size_MB,
                                 "peft_model_size_GB": peft_model_size_GB,
                                 "peft_full_model_size_MB": peft_full_model_size_MB,
                                 "peft_full_model_size_GB": peft_full_model_size_GB,}
            
        model_peft_dict[model_type] = model_dict

    return model_peft_dict

In [11]:
all_df = get_number_of_trainable_params(model_type_mappings, peft_types)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should pr

Model size (MB): 475.49121856689453
Total size (MB): 951.0699663162231


model type: vanilla:   0%|          | 0/4 [00:00<?, ?it/s]2023-10-06 11:29:05.025 | INFO     | peft_trainer:create_peft_config:672 - Using PROMPT_TUNING


peft config is: PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='roberta-base', revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=12, prompt_tuning_init=<PromptTuningInit.RANDOM: 'RANDOM'>, prompt_tuning_init_text=None, tokenizer_name_or_path=None)
trainable params: 1,191,940 || all params: 125,246,980 || trainable%: 0.9516716490888643
Model size (MB): 4.5468902587890625


model type: vanilla:  25%|██▌       | 1/4 [00:00<00:01,  1.67it/s]2023-10-06 11:29:05.626 | INFO     | peft_trainer:create_peft_config:615 - Using LORA


Total size (MB): 482.42061138153076
peft config is: LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=8, target_modules=['query', 'value'], lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None)
trainable params: 294,912 || all params: 125,541,892 || trainable%: 0.23491122787921662
Model size (MB): 1.125


model type: vanilla:  50%|█████     | 2/4 [00:01<00:01,  1.42it/s]2023-10-06 11:29:06.406 | INFO     | peft_trainer:create_peft_config:666 - Using PREFIX_TUNING


Total size (MB): 480.1683073043823
peft config is: PrefixTuningConfig(peft_type=<PeftType.PREFIX_TUNING: 'PREFIX_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=12, encoder_hidden_size=768, prefix_projection=False)
trainable params: 184,320 || all params: 125,718,532 || trainable%: 0.1466132296231394
Model size (MB): 0.703125


model type: vanilla:  75%|███████▌  | 3/4 [00:02<00:00,  1.49it/s]2023-10-06 11:29:07.041 | INFO     | peft_trainer:create_peft_config:686 - Using P_TUNING


Total size (MB): 480.4276456832886
peft config is: PromptEncoderConfig(peft_type=<PeftType.P_TUNING: 'P_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=12, encoder_reparameterization_type=<PromptEncoderReparameterizationType.MLP: 'MLP'>, encoder_hidden_size=128, encoder_num_layers=2, encoder_dropout=0.0)
trainable params: 221,696 || all params: 125,755,908 || trainable%: 0.17629072345451952
Model size (MB): 0.845703125


model type: vanilla: 100%|██████████| 4/4 [00:02<00:00,  1.55it/s]

Total size (MB): 480.7228002548218



Some weights of the model checkpoint at nlpie/bio-mobilebert were not used when initializing MobileBertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing MobileBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MobileBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MobileBertForSequenceClassification were not initiali

Model size (MB): 93.77637481689453
Total size (MB): 187.98993587493896


model type: mobile:   0%|          | 0/4 [00:00<?, ?it/s]2023-10-06 11:29:09.659 | INFO     | peft_trainer:create_peft_config:672 - Using PROMPT_TUNING


peft config is: PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='nlpie/bio-mobilebert', revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=128, num_transformer_submodules=1, num_attention_heads=4, num_layers=24, prompt_tuning_init=<PromptTuningInit.RANDOM: 'RANDOM'>, prompt_tuning_init_text=None, tokenizer_name_or_path=None)
trainable params: 3,332 || all params: 24,585,220 || trainable%: 0.013552858180646747
Model size (MB): 0.0127105712890625


model type: mobile:  25%|██▌       | 1/4 [00:00<00:00,  5.43it/s]2023-10-06 11:29:09.845 | INFO     | peft_trainer:create_peft_config:615 - Using LORA
2023-10-06 11:29:09.845 | INFO     | peft_trainer:create_peft_config:640 - Using mobile config


Total size (MB): 94.2606315612793
peft config is: LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=8, target_modules=['query', 'key', 'value'], lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None)
trainable params: 221,184 || all params: 24,806,404 || trainable%: 0.8916407230971486
Model size (MB): 0.84375
Total size (MB): 96.08671188354492


model type: mobile:  50%|█████     | 2/4 [00:00<00:00,  3.61it/s]2023-10-06 11:29:10.187 | INFO     | peft_trainer:create_peft_config:666 - Using PREFIX_TUNING


peft config is: PrefixTuningConfig(peft_type=<PeftType.PREFIX_TUNING: 'PREFIX_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=512, num_transformer_submodules=1, num_attention_heads=4, num_layers=24, encoder_hidden_size=512, prefix_projection=False)
trainable params: 245,760 || all params: 25,050,884 || trainable%: 0.9810432238638764
Model size (MB): 0.9375


model type: mobile:  75%|███████▌  | 3/4 [00:00<00:00,  3.88it/s]2023-10-06 11:29:10.422 | INFO     | peft_trainer:create_peft_config:686 - Using P_TUNING


Total size (MB): 97.14512252807617
peft config is: PromptEncoderConfig(peft_type=<PeftType.P_TUNING: 'P_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=512, num_transformer_submodules=1, num_attention_heads=4, num_layers=24, encoder_reparameterization_type=<PromptEncoderReparameterizationType.MLP: 'MLP'>, encoder_hidden_size=128, encoder_num_layers=2, encoder_dropout=0.0)
trainable params: 153,344 || all params: 24,958,468 || trainable%: 0.6143966849247318
Model size (MB): 0.5849609375


model type: mobile: 100%|██████████| 4/4 [00:00<00:00,  4.01it/s]

Total size (MB): 96.47470664978027



Some weights of the model checkpoint at nlpie/distil-biobert were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpie/distil-biobert an

Model size (MB): 250.94824981689453
Total size (MB): 501.9467668533325


model type: distil:   0%|          | 0/4 [00:00<?, ?it/s]2023-10-06 11:29:11.716 | INFO     | peft_trainer:create_peft_config:672 - Using PROMPT_TUNING


peft config is: PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='nlpie/distil-biobert', revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=6, prompt_tuning_init=<PromptTuningInit.RANDOM: 'RANDOM'>, prompt_tuning_init_text=None, tokenizer_name_or_path=None)
trainable params: 10,756 || all params: 65,793,796 || trainable%: 0.016348045946459753
Model size (MB): 0.0410308837890625


model type: distil:  25%|██▌       | 1/4 [00:00<00:01,  2.96it/s]2023-10-06 11:29:12.056 | INFO     | peft_trainer:create_peft_config:615 - Using LORA


Total size (MB): 251.07848262786865
peft config is: LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=8, target_modules=['query', 'value'], lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None)
trainable params: 147,456 || all params: 65,941,252 || trainable%: 0.22361722825644864
Model size (MB): 0.5625


model type: distil:  50%|█████     | 2/4 [00:00<00:00,  2.37it/s]2023-10-06 11:29:12.535 | INFO     | peft_trainer:create_peft_config:666 - Using PREFIX_TUNING


Total size (MB): 252.18468379974365
peft config is: PrefixTuningConfig(peft_type=<PeftType.PREFIX_TUNING: 'PREFIX_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=6, encoder_hidden_size=768, prefix_projection=False)
trainable params: 92,160 || all params: 66,025,732 || trainable%: 0.13958194359738413
Model size (MB): 0.3515625


model type: distil:  75%|███████▌  | 3/4 [00:01<00:00,  2.71it/s]2023-10-06 11:29:12.842 | INFO     | peft_trainer:create_peft_config:686 - Using P_TUNING


Total size (MB): 252.2999792098999
peft config is: PromptEncoderConfig(peft_type=<PeftType.P_TUNING: 'P_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=768, num_transformer_submodules=1, num_attention_heads=12, num_layers=6, encoder_reparameterization_type=<PromptEncoderReparameterizationType.MLP: 'MLP'>, encoder_hidden_size=128, encoder_num_layers=2, encoder_dropout=0.0)
trainable params: 221,696 || all params: 66,155,268 || trainable%: 0.335114657837982
Model size (MB): 0.845703125


model type: distil: 100%|██████████| 4/4 [00:01<00:00,  2.75it/s]

Total size (MB): 253.2948408126831



Some weights of the model checkpoint at nlpie/tiny-biobert were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpie/tiny-biobert and ar

Model size (MB): 52.928016662597656
Total size (MB): 105.89353847503662


model type: tiny:   0%|          | 0/4 [00:00<?, ?it/s]2023-10-06 11:29:13.502 | INFO     | peft_trainer:create_peft_config:672 - Using PROMPT_TUNING
2023-10-06 11:29:13.575 | INFO     | peft_trainer:create_peft_config:615 - Using LORA


peft config is: PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='nlpie/tiny-biobert', revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=312, num_transformer_submodules=1, num_attention_heads=12, num_layers=4, prompt_tuning_init=<PromptTuningInit.RANDOM: 'RANDOM'>, prompt_tuning_init_text=None, tokenizer_name_or_path=None)
trainable params: 4,372 || all params: 13,878,508 || trainable%: 0.03150194530997136
Model size (MB): 0.0166778564453125
Total size (MB): 52.99952507019043
peft config is: LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=8, target_modules=['query', 'value'], lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None)
trainable params: 39,936 || all params: 13

model type: tiny:  50%|█████     | 2/4 [00:00<00:00, 12.49it/s]2023-10-06 11:29:13.663 | INFO     | peft_trainer:create_peft_config:666 - Using PREFIX_TUNING


Total size (MB): 53.30250358581543
peft config is: PrefixTuningConfig(peft_type=<PeftType.PREFIX_TUNING: 'PREFIX_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=312, num_transformer_submodules=1, num_attention_heads=12, num_layers=4, encoder_hidden_size=312, prefix_projection=False)
trainable params: 24,960 || all params: 13,940,284 || trainable%: 0.17904943686943536
Model size (MB): 0.09521484375
Total size (MB): 53.33155632019043


2023-10-06 11:29:13.719 | INFO     | peft_trainer:create_peft_config:686 - Using P_TUNING
model type: tiny: 100%|██████████| 4/4 [00:00<00:00, 14.51it/s]

peft config is: PromptEncoderConfig(peft_type=<PeftType.P_TUNING: 'P_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=312, num_transformer_submodules=1, num_attention_heads=12, num_layers=4, encoder_reparameterization_type=<PromptEncoderReparameterizationType.MLP: 'MLP'>, encoder_hidden_size=128, encoder_num_layers=2, encoder_dropout=0.0)
trainable params: 99,944 || all params: 14,015,268 || trainable%: 0.7131080190546482
Model size (MB): 0.381256103515625
Total size (MB): 53.90902328491211





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of the model checkpoint at meta-llama/Llama-2-7b-hf were not used when initializing LlamaForSequenceClassification: ['lm_head.weight']
- This IS expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-7b-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model size (MB): 25205.046875
Total size (MB): 50538.23268032074


model type: llama-7b:   0%|          | 0/4 [00:00<?, ?it/s]2023-10-06 11:30:44.168 | INFO     | peft_trainer:create_peft_config:672 - Using PROMPT_TUNING


peft config is: PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='meta-llama/Llama-2-7b-hf', revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=4096, num_transformer_submodules=1, num_attention_heads=32, num_layers=32, prompt_tuning_init=<PromptTuningInit.RANDOM: 'RANDOM'>, prompt_tuning_init_text=None, tokenizer_name_or_path=None)
trainable params: 57,344 || all params: 6,607,400,960 || trainable%: 0.0008678752863213557
Model size (MB): 0.21875
Total size (MB): 25333.601351737976


model type: llama-7b:  25%|██▌       | 1/4 [00:29<01:29, 29.96s/it]2023-10-06 11:31:14.125 | INFO     | peft_trainer:create_peft_config:615 - Using LORA


peft config is: LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, r=8, target_modules=['q_proj', 'v_proj'], lora_alpha=16, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None)
trainable params: 4,194,304 || all params: 6,611,595,264 || trainable%: 0.0634386079686078
Model size (MB): 16.0
Total size (MB): 25365.488444328308


model type: llama-7b:  50%|█████     | 2/4 [01:18<01:22, 41.07s/it]2023-10-06 11:32:02.976 | INFO     | peft_trainer:create_peft_config:666 - Using PREFIX_TUNING


peft config is: PrefixTuningConfig(peft_type=<PeftType.PREFIX_TUNING: 'PREFIX_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=4096, num_transformer_submodules=1, num_attention_heads=32, num_layers=32, encoder_hidden_size=4096, prefix_projection=False)
trainable params: 2,621,440 || all params: 6,614,175,744 || trainable%: 0.039633661116096286
Model size (MB): 10.0
Total size (MB): 25369.347331047058


model type: llama-7b:  75%|███████▌  | 3/4 [01:49<00:36, 36.36s/it]2023-10-06 11:32:33.734 | INFO     | peft_trainer:create_peft_config:686 - Using P_TUNING


peft config is: PromptEncoderConfig(peft_type=<PeftType.P_TUNING: 'P_TUNING'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='SEQ_CLS', inference_mode=False, num_virtual_tokens=10, token_dim=4096, num_transformer_submodules=1, num_attention_heads=32, num_layers=32, encoder_reparameterization_type=<PromptEncoderReparameterizationType.MLP: 'MLP'>, encoder_hidden_size=128, encoder_num_layers=2, encoder_dropout=0.0)
trainable params: 1,110,272 || all params: 6,612,664,576 || trainable%: 0.016790084953493944
Model size (MB): 4.2353515625
Total size (MB): 25357.835852622986


model type: llama-7b: 100%|██████████| 4/4 [02:21<00:00, 35.47s/it]


In [12]:
all_df

{'vanilla': {'PROMPT_TUNING': {'n_trainable_params': 1191940,
   'total_trainable_params': 124647170,
   'n_trainable_params_perc': 0.956251152753809,
   'model_size_MB': 475.49121856689453,
   'model_size_GB': 0.46434689313173294,
   'full_model_size_MB': 951.0699663162231,
   'full_model_size_GB': 0.9287792639806867,
   'peft_model_size_MB': 4.5468902587890625,
   'peft_model_size_GB': 0.004440322518348694,
   'peft_full_model_size_MB': 482.42061138153076,
   'peft_full_model_size_GB': 0.47111387830227613},
  'LORA': {'n_trainable_params': 294912,
   'total_trainable_params': 124647170,
   'n_trainable_params_perc': 0.23659742936803138,
   'model_size_MB': 475.49121856689453,
   'model_size_GB': 0.46434689313173294,
   'full_model_size_MB': 951.0699663162231,
   'full_model_size_GB': 0.9287792639806867,
   'peft_model_size_MB': 1.125,
   'peft_model_size_GB': 0.0010986328125,
   'peft_full_model_size_MB': 480.1683073043823,
   'peft_full_model_size_GB': 0.46891436260193586},
  'PREFI

In [13]:
# write to file
with open("../model_type_trainable_model_size.yaml", "w") as f:
    yaml.dump(all_df, f)