In [1]:

from transformers import AutoModelForSeq2SeqLM
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType
from peft import AutoPeftModelForCausalLM, AutoPeftModelForSequenceClassification
from transformers import AutoTokenizer
import torch
import os
from datetime import datetime
import re
import numpy as np
# import dataloader from torch
from torch.utils.data import DataLoader
import evaluate
from tqdm import tqdm

# set cuda visible to 0
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
from huggingface_hub import notebook_login, create_repo
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:


# find the model with the latest date in this folder

def extract_datetime(x):
    ''' 
    Extract the datetime from a string

    Args:
    x (str): string to extract datetime from
    
    Returns:
    datetime: datetime object
    
    '''
    
    dt = x.split('/')[-1]
    # print(dt)
    dt = datetime.strptime(dt, '%d-%m-%Y--%H-%M')
    return dt

def get_latest_model(model_dir):
    
    ''' 
    Get the latest model in a directory
    
    Args:
    model_dir (str): directory to search for the latest model
    
    Returns:
    str: path to the latest model
    
    '''
    
    # first list the full paths of all subdirectories
    subdirs = [os.path.join(model_dir, o) for o in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir,o))]
    
    # check we have any subdirs
    if len(subdirs) == 0:
        raise ValueError('No subdirectories found in model_dir')
    
    
    # now loop over the subdirs and extract the datetime from the path and sort by datetime
    subdirs = sorted(subdirs, key=extract_datetime)
    
    # latest model is the last one
    latest_subdir = subdirs[-1]
    
    print(f'Latest subdir: {latest_subdir}')
    
    # now we want to find the checkpoint folder in this directory that has the highest tail number split on '-'
    # lets get the paths inside the latest_subdir
    
    # if we have no checkpoint folder - then we want to just pull the latest model from the latest_subdir
    if any("checkpoint" in s for s in latest_subdir):
    
        subsubdirs = [os.path.join(latest_subdir, o) for o in os.listdir(latest_subdir) if os.path.isdir(os.path.join(latest_subdir,o))]
        
        print(f"subsubdirs: {subsubdirs}")
        

    
    
    
        # now loop over the subdirs and extract the tail number from the path and sort by tail number
        subsubdirs = sorted(subsubdirs, key=lambda x: int(x.split('-')[-1]))
        
        # latest model is the last one
        latest_model = subsubdirs[-1]
        return latest_model
    
    else:
        # check if the latest subdir is empty
        if len(os.listdir(latest_subdir)) == 0:
            return None
        else:
            return latest_subdir


In [4]:
# set directory for a task and model 

task = "mimic-mp"
peft_type = "LORA" # | Full
model_name = "Llama-2-7b-hf" # Llama-2-7b | bio-mobilebert

model_dir = f"/mnt/sdh/effecient_ml/ckpts/{task}/full/{model_name}/{peft_type}/"



In [6]:
full_model_dir = get_latest_model(model_dir)

Latest subdir: /mnt/sdh/effecient_ml/ckpts/mimic-mp/full/Llama-2-7b-hf/LORA/29-09-2023--12-22


In [7]:
full_model_dir

'/mnt/sdh/effecient_ml/ckpts/mimic-mp/full/Llama-2-7b-hf/LORA/29-09-2023--12-22'

In [1]:
!ls {full_model_dir}

ls: cannot access '{full_model_dir}': No such file or directory


# WARNING
The current llama adapter weights we have do not seem to work properly and lead to nan outputs

In [42]:
# from transformers import AutoModelForSequenceClassification
# # try just llama for sanity check
# full_model_dir = "meta-llama/Llama-2-7b-hf"
# reloaded_model = AutoModelForSequenceClassification.from_pretrained("meta-llama/Llama-2-7b-hf")

In [4]:
# set recent llama dir
full_model_dir = "/mnt/sdh/effecient_ml/fewshot_budget/ckpts/mimic-mp/fewshot_4096/Llama-2-7b-hf/LORA/04-03-2024--18-57/checkpoint-2380/"

## Reload from hf hub

In [5]:
full_model_dir = "NTaylor/bio-mobilebert-mimic-mp-lora"

# load using AutoPeftModelForSequenceClassification
reloaded_model = AutoPeftModelForSequenceClassification.from_pretrained(full_model_dir)

  return self.fget.__get__(instance, owner)()
Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at nlpie/bio-mobilebert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
# if Llama load in bfloat 16
if "Llama" in model_name:
    reloaded_model = AutoPeftModelForSequenceClassification.from_pretrained(full_model_dir,     # torch.bfloat16 throws errors later                       
                                                                 torch_dtype=torch.bfloat16)
else:
    reloaded_model = AutoPeftModelForSequenceClassification.from_pretrained(full_model_dir)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-7b-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
reloaded_model.config._name_or_path

'meta-llama/Llama-2-7b-hf'

In [10]:
full_model_dir

'/mnt/sdh/effecient_ml/fewshot_budget/ckpts/mimic-mp/fewshot_4096/Llama-2-7b-hf/LORA/01-03-2024--13-29/checkpoint-2380/'

In [6]:
# check dtype of model weights
# for name, param in reloaded_model.named_parameters():
#     print(name, param.dtype)

In [6]:
# some may not have a tokenizer saved - I think the library changed at some point and now saves everything? 
try:
    tokenizer = AutoTokenizer.from_pretrained(full_model_dir)

except:
    print(f"Didn't have a tokenizer saved for {full_model_dir}")
    tokenizer = AutoTokenizer.from_pretrained(reloaded_model.config._name_or_path)
    # and now save to the same repo as the full model
    tokenizer.save_pretrained(full_model_dir)
    

    
if getattr(tokenizer, "pad_token_id") is None:
    print(f"Adding pad token manually! Setting pad token to eos token: {tokenizer.eos_token_id}")
    tokenizer.pad_token_id = tokenizer.eos_token_id   
    
# set config token id
if "Llama" in model_name:
    reloaded_model.config.pad_token_id = tokenizer.eos_token_id

Didn't have a tokenizer saved for NTaylor/bio-mobilebert-mimic-mp-lora


In [7]:
tokenizer

MobileBertTokenizerFast(name_or_path='nlpie/bio-mobilebert', vocab_size=30522, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [None]:
# reloaded_model

In [23]:
model_name

'Llama-2-7b-hf'

In [24]:
# lets test pushing to hub
reloaded_model.push_to_hub(f"NTaylor/{model_name}-mimic-mp-lora", use_auth_token=True)



adapter_model.safetensors:   0%|          | 0.00/33.6M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/NTaylor/Llama-2-7b-hf-mimic-mp-lora/commit/bd5f6176abf109e50263d801e2822977b9552896', commit_message='Upload model', commit_description='', oid='bd5f6176abf109e50263d801e2822977b9552896', pr_url=None, pr_revision=None, pr_num=None)

**warning** 

The AutoPeftModel... seems to actually work based on the eval performance. 
The warning above is related to the base model being loaded and I guess the library doesn't notice that the adapter weights include the classifier head.

In [12]:
reloaded_model

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): LlamaForSequenceClassification(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): Linear(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
        

In [14]:
# reloaded_model.classifier.modules_to_save.default.weight

reloaded_model.base_model.model.model.layers[31].self_attn.k_proj.weight

Parameter containing:
tensor([[-0.0126, -0.0219,  0.0136,  ...,  0.0066, -0.0023, -0.0223],
        [ 0.0250, -0.0125,  0.0127,  ...,  0.0085, -0.0003, -0.0071],
        [ 0.0091,  0.0018, -0.0088,  ..., -0.0028,  0.0084, -0.0276],
        ...,
        [ 0.0237, -0.0330, -0.0233,  ..., -0.0121, -0.0133, -0.0488],
        [ 0.0159,  0.0172,  0.0170,  ..., -0.0229, -0.0216,  0.0198],
        [-0.0771, -0.0320,  0.0109,  ..., -0.0105, -0.0004,  0.0040]],
       dtype=torch.float16)

Not sure the autopeft model is working well with all models 

## Function to loop over many models

In [13]:
def push_models_to_hub(model_name, task, peft_type, use_auth_token=True):
    
    # set the model dir dynamically base on the task and model name
    model_dir = f"/mnt/sdh/effecient_ml/ckpts/{task}/full/{model_name}/{peft_type}/"
    
    # get the latest model
    full_model_dir = get_latest_model(model_dir)
    
    # check if full_model_dir is None
    if full_model_dir is None:
        print(f"No models found in {model_dir}")
    else:
        print(f"Pushing {full_model_dir} to hub")
        # reload using PEFT
        reloaded_model = AutoPeftModelForSequenceClassification.from_pretrained(full_model_dir)
        
        # some may not have a tokenizer saved - I think the library changed at some point and now saves everything? 
        try:
            tokenizer = AutoTokenizer.from_pretrained(full_model_dir)
        except:
            print(f"Didn't have a tokenizer saved for {full_model_dir}")
            tokenizer = AutoTokenizer.from_pretrained(reloaded_model.config._name_or_path)
            # and now save to the same repo as the full model
            tokenizer.save_pretrained(full_model_dir)
        # push to hub
        reloaded_model.push_to_hub(f"NTaylor/{model_name}-{task}-{peft_type}", use_auth_token=use_auth_token)

In [None]:
# models to push
models_to_push = [
                    # "clinical-mobilebert",                    
                    # "bio-mobilebert",
                    # "clinical-distilbert",
                    # "distil-biobert",
                    # "tiny-biobert",
                    # "tiny-clinicalbert",
                    "Llama-2-7b-hf"
                        ]

task = "mimic-mp"
# pass through funciton
for model_name in tqdm(models_to_push):
    print(f"############## Pushing {model_name} ##############")
    push_models_to_hub(model_name, task, "LORA", use_auth_token=True)
    # push_models_to_hub(model_name, task, "Full", use_auth_token=True)

  0%|          | 0/1 [00:00<?, ?it/s]

############## Pushing Llama-2-7b-hf ##############
Latest subdir: /mnt/sdh/effecient_ml/ckpts/mimic-mp/full/Llama-2-7b-hf/LORA/29-09-2023--12-22
Pushing /mnt/sdh/effecient_ml/ckpts/mimic-mp/full/Llama-2-7b-hf/LORA/29-09-2023--12-22 to hub


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-7b-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


adapter_model.safetensors:   0%|          | 0.00/16.8M [00:00<?, ?B/s]

100%|██████████| 1/1 [00:09<00:00,  9.45s/it]


# Try more manual reload

In [8]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import PeftConfig, PeftModel

In [9]:
# load config
config = PeftConfig.from_pretrained(full_model_dir)
# load base model 
model = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path, num_labels = 2)

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at nlpie/bio-mobilebert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

In [None]:
config

LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='nlpie/bio-mobilebert', revision=None, task_type='SEQ_CLS', inference_mode=True, r=8, target_modules={'value', 'query', 'key'}, lora_alpha=8, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={})

In [None]:
reloaded_model = PeftModel.from_pretrained(model, full_model_dir)

In [None]:
reloaded_model

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): MobileBertForSequenceClassification(
      (mobilebert): MobileBertModel(
        (embeddings): MobileBertEmbeddings(
          (word_embeddings): Embedding(30522, 128, padding_idx=0)
          (position_embeddings): Embedding(512, 512)
          (token_type_embeddings): Embedding(2, 512)
          (embedding_transformation): Linear(in_features=384, out_features=512, bias=True)
          (LayerNorm): NoNorm()
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (encoder): MobileBertEncoder(
          (layer): ModuleList(
            (0-23): 24 x MobileBertLayer(
              (attention): MobileBertAttention(
                (self): MobileBertSelfAttention(
                  (query): Linear(
                    in_features=128, out_features=128, bias=True
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
         

In [None]:
# now merge and unload
# reloaded_model.merge_and_unload()

# Test evaluation performance

In [11]:
import yaml
with open('../datasets.yaml', 'r') as f:
    datasets = yaml.load(f, yaml.FullLoader)

try:
    dataset_info = datasets[task]

except KeyError:
    print(f"Task name {task} not in datasets.yaml. Available tasks are: {list(datasets.keys())}")
    exit(0)


In [13]:
dataset_info

{'training_data_dir': '/mnt/sdd/efficient_ml_data/datasets/mimic3-clinical-outcomes/mp',
 'eval_data_dir': '/mnt/sdd/efficient_ml_data/datasets/mimic3-clinical-outcomes/mp',
 'data_dir': '',
 'training_file': 'train.csv',
 'validation_file': 'valid.csv',
 'test_file': 'test.csv',
 'task_type': 'SEQ_CLS',
 'label_name': 'hospital_expire_flag',
 'text_column': 'text',
 'remove_columns': ['text']}

In [12]:
from datasets import load_dataset
datasets = load_dataset("csv", 
                        data_files = {"train":f"{dataset_info['training_data_dir']}/{dataset_info['training_file']}",
                                    "validation":f"{dataset_info['eval_data_dir']}/{dataset_info['validation_file']}",
                                    "test":f"{dataset_info['eval_data_dir']}/{dataset_info['validation_file']}",
                                    },
                    cache_dir = None)

In [13]:
# create dictionary of various datasets and their sentence keys
task_to_keys ={
                "cola": ("sentence", None),
                "mnli": ("premise", "hypothesis"),
                "mnli-mm": ("premise", "hypothesis"),
                "mrpc": ("sentence1", "sentence2"),
                "qnli": ("question", "sentence"),
                "qqp": ("question1", "question2"),
                "rte": ("sentence1", "sentence2"),
                "sst2": ("sentence", None),
                "stsb": ("sentence1", "sentence2"),
                "wnli": ("sentence1", "sentence2"),
                "mimic-note-category": ("TEXT", None),
                "icd9-triage":("text", None),
                "icd9-triage-no-category-in-text":("text", None),
                "ICD9-Triage":("text", None),
                "mednli":("sentence1", "sentence2"),
                "mimic-mp":(dataset_info["text_column"], None),
                }

In [14]:
# get number of labels
num_labels = len(np.unique(datasets["train"][dataset_info["label_name"]]))


sentence1_key, sentence2_key = task_to_keys[task]

In [15]:
model_name_or_path = full_model_dir
batch_size = 2 # 2 for llama
if any(k in full_model_dir for k in ("gpt", "opt", "bloom")):
    padding_side = "left"
else:
    padding_side = "right"


if getattr(tokenizer, "pad_token_id") is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id



# own
def tokenize_function(examples):
    # max_length is important when using prompt tuning  or prefix tuning or p tuning as virtual tokens are added - which can overshoot the max length in pefts current form
    # for now set to 480 and see how it goes
    if sentence2_key is None:
        return tokenizer(examples[sentence1_key], truncation=True, max_length = 480)
    return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True, max_length=480)

# own
tokenized_datasets = datasets.map(
    tokenize_function,
    batched=True,
    remove_columns=dataset_info["remove_columns"]
)


def collate_fn(examples):
    return tokenizer.pad(examples, padding="longest", return_tensors="pt")

if "labels" not in tokenized_datasets["train"].features:
        tokenized_datasets = tokenized_datasets.rename_column(dataset_info["label_name"], "labels")

# Instantiate dataloaders.
train_dataloader = DataLoader(tokenized_datasets["train"], shuffle=True, collate_fn=collate_fn, batch_size=batch_size)
eval_dataloader = DataLoader(
    tokenized_datasets["validation"], shuffle=False, collate_fn=collate_fn, batch_size=batch_size
)

Map:   0%|          | 0/4908 [00:00<?, ? examples/s]

In [16]:
reloaded_model.device

device(type='cpu')

In [17]:
# check eval dataloader
for batch in eval_dataloader:
    # print(batch)
    # pass to model 
    batch = {k: v for k, v in batch.items()}
    outputs = reloaded_model(input_ids = batch["input_ids"], 
                                 attention_mask = batch["attention_mask"],
                                #  token_type_ids = batch["token_type_ids"]
                                 )
    break

You're using a MobileBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [18]:
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[ 1.0571, -0.2056],
        [ 2.2753, -1.6154]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [19]:
from tqdm import tqdm

In [20]:
from sklearn.metrics import roc_curve, auc, f1_score, precision_score, recall_score, classification_report, accuracy_score

In [21]:
def compute_metrics(predictions, pred_scores, labels):
    
    # use from evaluate for now
    precision_score = evaluate.load("precision")
    recall_score = evaluate.load("recall")
    accuracy_score = evaluate.load("accuracy")
    f1_score = evaluate.load("f1")    
          
    print(f"Labels are: {labels}\n")
    print(f"Preds are: {predictions}")
    precision = precision_score.compute(predictions=predictions, references=labels, average = "macro")["precision"]
    recall = recall_score.compute(predictions=predictions, references=labels, average = "macro")["recall"]
    accuracy = accuracy_score.compute(predictions=predictions, references=labels)["accuracy"]
    f1_macro = f1_score.compute(predictions=predictions, references=labels, average = "macro")["f1"]
    f1_weighted = f1_score.compute(predictions=predictions, references=labels, average = "weighted")["f1"]
    # roc_auc has slightly different format - needs the probs/scores rather than predicted labels
    # change roc based on number of labels
    if len(np.unique(labels)) == 2:   

        roc_auc_score = evaluate.load("roc_auc", "binary")
        roc_auc = roc_auc_score.compute(references=labels,
                                        # just take the probabilties of the positive class
                                        prediction_scores = pred_scores[:,1]                                         
                                        )['roc_auc']
    else:
        roc_auc_score = evaluate.load("roc_auc", "multiclass")

        roc_auc = roc_auc_score.compute(references=labels,
                                        prediction_scores = pred_scores,
                                        multi_class = 'ovr', 
                                        average = "macro")['roc_auc']        
   
    return {"precision": precision, 
            "recall": recall,
            "accuracy": accuracy,
            "f1_macro":f1_macro,
            "f1_weighted":f1_weighted,
            "roc_auc_macro":roc_auc}

In [22]:
# send model to cuda
reloaded_model.cuda()
reloaded_model.eval()
all_preds = []
all_preds_raw = []
all_labels = []
for batch in tqdm(eval_dataloader):               
    with torch.no_grad():
        # send batch to cuda
        batch = {k: v.cuda() for k, v in batch.items()}
        outputs = reloaded_model(input_ids = batch["input_ids"], 
                                 attention_mask = batch["attention_mask"],
                                #  token_type_ids = batch["token_type_ids"]
                                 )

    # apply softmax to the logits of the output - using the softmax function
    preds_raw = outputs.logits.softmax(dim=-1).cpu().float()           

    
    # get argmax of preds raw
    preds = torch.argmax(preds_raw, axis = -1)             
    
    all_preds_raw.extend(list(preds_raw))
    all_preds.extend(list(preds))
    all_labels.extend(list(batch["labels"].cpu().numpy()))



all_preds_raw = np.stack(all_preds_raw)
all_preds = np.stack(all_preds)
all_labels = np.stack(all_labels)

print(f"all_preds_raw shape is: {all_preds_raw.shape}")
print(f"all_preds shape is: {all_preds.shape} \n\n {all_preds}")
print(f"all_labels shape is: {all_labels.shape} \n\n {all_labels}")
# print(f"all_embeddings shape is: {all_embeddings.shape} \n\n {all_embeddings}")
# metrics = all_metrics(yhat=all_preds, y=all_labels, yhat_raw=all_preds_raw)


100%|██████████| 2454/2454 [01:41<00:00, 24.15it/s]

all_preds_raw shape is: (4908, 2)
all_preds shape is: (4908,) 

 [0 0 0 ... 0 0 0]
all_labels shape is: (4908,) 

 [0 0 1 ... 0 0 0]





In [17]:
# test torch argmax feature
a = torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.4, 0.3, 0.2, 0.1]])
torch.argmax(a, axis = -1)

tensor([3, 0])

In [23]:
# make tensor of bfloat 16
a = torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.4, 0.3, 0.2, 0.1]], dtype = torch.bfloat16)

# convert to float16
b = a.float()

In [24]:
np.argmax(b, axis = -1)

tensor([3, 0])

In [None]:
# test numpy argmax feature
b = np.array([[0.1, 0.2, 0.3, 0.4], [0.4, 0.3, 0.2, 0.1]])
np.argmax(b, axis = -1)

In [23]:
# use compute metrics with args: predictions, pred_scores, labels
metrics = compute_metrics(all_preds, all_preds_raw, all_labels)

Labels are: [0 0 1 ... 0 0 0]

Preds are: [0 0 0 ... 0 0 0]


In [32]:
# get value counts of all preds
np.unique(all_preds, return_counts = True)

(array([0, 1]), array([4827,   81]))

In [24]:
metrics

{'precision': 0.7036768485908739,
 'recall': 0.5350194809192531,
 'accuracy': 0.8946617766911166,
 'f1_macro': 0.5404014104010101,
 'f1_weighted': 0.8587342343219413,
 'roc_auc_macro': 0.7897098530355934}

## Test reloading from HF

In [26]:
# lora_id = "NTaylor/Llama-2-7b-hf-mimic-mp-lora" 
lora_id = "NTaylor/bio-mobilebert-mimic-mp-lora"

# load using AutoPeftModelForSequenceClassification
reloaded_model = AutoPeftModelForSequenceClassification.from_pretrained(lora_id)

Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at nlpie/bio-mobilebert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
reloaded_model

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): MobileBertForSequenceClassification(
      (mobilebert): MobileBertModel(
        (embeddings): MobileBertEmbeddings(
          (word_embeddings): Embedding(30522, 128, padding_idx=0)
          (position_embeddings): Embedding(512, 512)
          (token_type_embeddings): Embedding(2, 512)
          (embedding_transformation): Linear(in_features=384, out_features=512, bias=True)
          (LayerNorm): NoNorm()
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (encoder): MobileBertEncoder(
          (layer): ModuleList(
            (0-23): 24 x MobileBertLayer(
              (attention): MobileBertAttention(
                (self): MobileBertSelfAttention(
                  (query): Linear(
                    in_features=128, out_features=128, bias=True
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
         

In [27]:
tokenizer = AutoTokenizer.from_pretrained("nlpie/bio-mobilebert")

In [35]:
# long version worked for llama
# text = "82 year old patient initially presented with severe chest pain. They have a history of heart attacks, and there has been a struggle to bring the heart into a normal rythym ."
text = "They are likely to pass away"
inputs = tokenizer(text, return_tensors="pt")
outputs = reloaded_model(**inputs)
# extract prediction from outputs based on argmax of logits
pred = torch.argmax(outputs.logits, axis = -1)
# binary classification: 1 is positive for mortality 
print(f"Prediction is: {pred}")

Prediction is: tensor([0])


In [34]:
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[ 0.4350, -1.5346]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)