In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT", clean_up_tokenization_spaces=True)

# Set the device to GPU if available, otherwise CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate the model architecture
model = AutoModelForSequenceClassification.from_pretrained("emilyalsentzer/Bio_ClinicalBERT", num_labels=2)

# Load the fine-tuned model's state dictionary (the saved weights) safely
fine_tuned_model_path = os.path.expanduser("~/BioClinicalBERT/fine_tuned_clinical_bert1k_2002.pt")  # Update path as needed
state_dict = torch.load(fine_tuned_model_path, map_location=device, weights_only = True)

# Load the state dictionary into the model
model.load_state_dict(state_dict)

# Move the model to the correct device
model.to(device)

# Set model to evaluation mode
model.eval()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at emilyalsentzer/Bio_ClinicalBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [5]:
def predict_label_using_classifier(text_list):
    # Tokenize the input texts (batch of texts)
    inputs = tokenizer(text_list, padding=True, truncation=True, return_tensors="pt", max_length=512)
    
    # Move tensors to the correct device
    inputs = {key: value.to(device) for key, value in inputs.items()}
    
    # Get the BERT embeddings (without applying the classification head)
    with torch.no_grad():
        outputs = model.bert(**inputs)
        last_hidden_state = outputs.last_hidden_state
    
    # Apply the classification head manually
    logits = model.classifier(last_hidden_state[:, 0, :])  # Use CLS token representation for classification

    # Get the predicted classes for the batch
    predicted_classes = torch.argmax(logits, dim=1).cpu().numpy()  # Convert to numpy array for easier handling
    
    return predicted_classes

# Example inputs with abbreviations, typos, and different classes
text_input = [
    "Patient has a history of diabetes and hypertension.", 
    "MVA",  # Motor Vehicle Accident
    "Pt involved in MVC",  # Motor Vehicle Collision
    "Patient involved in a motor accident", 
    "Pt has Hx of DM & HTN",  # Abbreviations for patient (Pt), history (Hx), diabetes mellitus (DM), and hypertension (HTN)
    "Mtr vehcl collision",  # Typo in motor vehicle collision
    "Pattient hs history of htpertension",
    "Pt hs history of vehical accident",
    "Pt involved in motor cycle "
]

# Predict labels for all text inputs
predicted_labels = predict_label_using_classifier(text_input)
print(f"Predicted labels: {predicted_labels}")

Predicted labels: [0 1 0 1 0 0 0 0 1]
