In [1]:

import argparse
import os

#set visible cuda devices
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import torch
from torch.optim import AdamW
from torch.utils.data import DataLoader
from peft import get_peft_config, get_peft_model, LoraConfig, IA3Config,TaskType
from peft import (
    get_peft_config,
    get_peft_model,
    get_peft_model_state_dict,
    set_peft_model_state_dict,
    PeftType,
    PeftConfig,
    PeftModel,
    PrefixTuningConfig,
    PromptEncoderConfig,
    PromptTuningConfig,
    prepare_model_for_int8_training,
    # AutoPeftModel,
    prepare_model_for_kbit_training # only for latest dev version of peft
)


import evaluate
from datasets import load_dataset, load_from_disk
from transformers import (AutoModelForSequenceClassification,
                          AutoModelForTokenClassification, 
                          AutoModelForCausalLM,
                          AutoModelForMaskedLM,
                          AutoModel,
                        AutoTokenizer,
                        get_linear_schedule_with_warmup,
                        set_seed,
                        LlamaForSequenceClassification,
                        LlamaForCausalLM,
                        LlamaTokenizer, LongformerForMaskedLM, LongformerForSequenceClassification)
import yaml
from tqdm import tqdm
from loguru import logger as loguru_logger
import numpy as np

import sys
sys.path.append("../")

from data_utils.model_utils import count_trainable_parameters, freeze_model, unfreeze_model

In [4]:
# turn above into a function that accepts multiple models and returns the gpu memory needed
# it should take in a list of model names and return a dictionary of model names and gpu memory needed

def get_gpu_memory_needed(model_names):
    device = torch.device('cuda:0') 
    gpu_memory_needed = {}
    for model_name in tqdm(model_names):
        model = AutoModel.from_pretrained(model_name,
                                  torch_dtype=torch.float16)
        model.to(device)
        gpu_memory_needed[model_name] = torch.cuda.memory_allocated(device.index)/1024**3
    return gpu_memory_needed

In [5]:
model_names = ["nlpie/bio-mobilebert",
                    "nlpie/tiny-biobert",
                    "roberta-base",
                    "nlpie/distil-biobert",
                    "dmis-lab/biobert-v1.1",
                     "meta-llama/Llama-2-7b-hf"]

gpu_memory_needed = get_gpu_memory_needed(model_names)

 17%|█▋        | 1/6 [00:03<00:15,  3.10s/it]Some weights of BertModel were not initialized from the model checkpoint at nlpie/tiny-biobert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 33%|███▎      | 2/6 [00:04<00:07,  1.87s/it]Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 50%|█████     | 3/6 [00:08<00:09,  3.12s/it]Some weights of BertModel were not initialized from the model checkpoint at nlpie/distil-biobert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inferen

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

 83%|████████▎ | 5/6 [02:34<00:30, 30.82s/it]


OutOfMemoryError: CUDA out of memory. Tried to allocate 32.00 MiB (GPU 0; 10.75 GiB total capacity; 10.19 GiB already allocated; 24.50 MiB free; 10.20 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# save to json
import json
with open('../gpu_memory_needed.json', 'w') as fp:
    json.dump(gpu_memory_needed, fp)