# **Full Loop (1 hour)**

In [None]:
batch_size = 100 # BERT models are heavy (took over an hour!). Instead of processing all rows at once (which would crash Colab), we work in chunks of 100.
all_embeddings = [] # list to store embeddings
texts = lda_2k["exp_text"].tolist()

for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i + batch_size]
    tokenized = tokenizer(
        batch_texts,
        padding=True,
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    with torch.no_grad(): # Disables gradient tracking with torch.no_grad() to save on computing resources
        outputs = model(**tokenized) # Runs the tokenized batch through the BERT model but in a very efficient way, specifically for inference, not training.

    # Mean pooling
    attention_mask = tokenized["attention_mask"].unsqueeze(-1)
    masked = outputs.last_hidden_state * attention_mask
    embeddings = masked.sum(dim=1) / attention_mask.sum(dim=1)

    all_embeddings.extend(embeddings.cpu().numpy().tolist())

lda_2k["bert_embedding"] = all_embeddings
# This column will contain:
# One 768-dimensional vector per clinical note
# Stored as a list of floats.

