In [None]:
from pathlib import Path
import torch

# Define paths
input_dir = Path("../Data/processed/llm/entity_only/test/")
input_file_name = "0001.txt"
actual_file = input_dir / input_file_name
print(actual_file)

model_name = "DeepSeek"
predicted_dir = Path(f"../Results/test/{model_name}/")
predicted_filename = "0001_sentence_level.txt"
predicted_file = predicted_dir / predicted_filename
print(predicted_file)

output_filename = "0001_sentence_level_stat.txt"
output_file = predicted_dir/output_filename
print(output_file)


..\Data\processed\llm\entity_only\test\0001.txt
..\Results\test\DeepSeek\0001_sentence_level.txt
..\Results\test\DeepSeek\0001_sentence_level_stat_2.txt


In [2]:
# Function to read and parse data from a file
def read_data(file_path):
    data = {}
    with open(file_path, "r") as file:
        for line in file:
            if 'entity=' in line and 'label=' in line:
                parts = line.strip().split('" ')
                entity = parts[0].split('="')[1]  # Extract entity
                label = parts[1].split('="')[1]  # Extract label
                data[entity] = label
    return data
# Read actual and predicted data
actual_data = read_data(actual_file)
predicted_data = read_data(predicted_file)

# Clean all labels in advance
actual_data = {k: v.strip().strip('"') for k, v in actual_data.items()}
predicted_data = {k: v.strip().strip('"') for k, v in predicted_data.items()}

In [3]:
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity

# Load ClinicalBERT
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
model = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
model.eval()

  from .autonotebook import tqdm as notebook_tqdm


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(28996, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [4]:
# --- Step 1: Read data ---
def read_data(file_path):
    data = {}
    with open(file_path, "r") as file:
        for line in file:
            if 'entity=' in line and 'label=' in line:
                parts = line.strip().split('" ')
                entity = parts[0].split('="')[1]
                label = parts[1].split('="')[1]
                data[entity] = label
    return data

In [5]:
# --- Step 2: Get BERT embedding ---
def get_entity_embedding(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    cls_embedding = outputs.last_hidden_state[:, 0, :]  # [CLS] token
    return cls_embedding.squeeze().numpy()

In [6]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

actual_label_list = []
predicted_label_list = []


In [None]:
def evaluate_extraction_and_classification(actual_data, predicted_data, threshold):
    from sklearn.metrics.pairwise import cosine_similarity

    matched_data = []
    label_mismatches = []
    unmatched_predictions = []
    unknown_label_count = 0

    # Precompute actual embeddings
    actual_embeddings = {ent: get_entity_embedding(ent) for ent in actual_data}
    matched_actual_entities = set()

    for pred_ent, pred_label in predicted_data.items():
        pred_label_clean = pred_label.strip().lower()
        if pred_label_clean == "unknown":
            unknown_label_count += 1
            continue

        pred_emb = get_entity_embedding(pred_ent)
        best_sim = 0
        best_match = None

        for actual_ent in actual_data:
            if actual_ent in matched_actual_entities:
                continue
            sim = cosine_similarity([pred_emb], [actual_embeddings[actual_ent]])[0][0]
            if sim > threshold and sim > best_sim:
                best_sim = sim
                best_match = actual_ent

        if best_match:
            matched_actual_entities.add(best_match)
            actual_label_clean = actual_data[best_match].strip().lower()
            matched_data.append({
                "predicted_entity": pred_ent,
                "predicted_label": pred_label_clean,
                "actual_entity": best_match,
                "actual_label": actual_label_clean
            })

            if pred_label_clean != actual_label_clean:
                label_mismatches.append({
                    "predicted_entity": pred_ent,
                    "predicted_label": pred_label_clean,
                    "matched_actual_entity": best_match,
                    "actual_label": actual_label_clean
                })
        else:
            unmatched_predictions.append(pred_ent)

    # Calculate based only on the 70 predictions
    total_predictions = len(predicted_data) - unknown_label_count
    matched = len(matched_data)
    misclassified = len(label_mismatches)
    unmatched = len(unmatched_predictions)
    tp = matched - misclassified
    fp = misclassified + unmatched

    # Label metrics (only on predictions)
    precision = tp / (tp + fp) if (tp + fp) else 0
    recall = tp / matched if matched else 0
    accuracy = tp / total_predictions if total_predictions else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0

    return {
        "Total Entities in Actual": len(actual_data),
        "Total Entities in Prediction": len(predicted_data),
        "Total Unknown Entities in Prediction": unknown_label_count,
        "Total Matched Entities": matched,
        "Total Unmatched Predictions": unmatched,
        "Extraction Accuracy": round(matched / len(actual_data), 4) if len(actual_data) else 0,

        # Label classification metrics based only on predictions
        "Label Accuracy (on predictions)": round(accuracy, 4),
        "Label Precision": round(precision, 4),
        "Label Recall (on matched)": round(recall, 4),
        "Label F1 Score": round(f1, 4),

        "Label Mismatches": label_mismatches
    }


In [8]:
# Run evaluation
metrics = evaluate_extraction_and_classification(actual_data, predicted_data, threshold=0.92)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [9]:
import json

with open(output_file, "w") as f:
    for k, v in metrics.items():
        if k == "Label Mismatches":
            f.write("Label Mismatches:\n")
            json_output = json.dumps(v, indent=4)
            print("Label Mismatches:")
            print(json_output)
            f.write(json_output + "\n")
        else:
            line = f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}"
            print(line)
            f.write(line + "\n")
print(f"Results saved to {output_file}")

Total Entities in Actual: 190
Total Entities in Prediction: 106
Total Unknown Entities in Prediction: 0
Total Matched Entities: 99
Total Unmatched Predictions: 7
Extraction Accuracy: 0.5211
Label Accuracy (on predictions): 0.8962
Label Precision: 0.8962
Label Recall (Sensitivity): 0.9596
Label F1 Score: 0.9268
Label Specificity: 0.0000
Label Mismatches:
[
    {
        "predicted_entity": "HD",
        "predicted_label": "treatment",
        "matched_actual_entity": "dvt",
        "actual_label": "problem"
    },
    {
        "predicted_entity": "ETOH/IVDA",
        "predicted_label": "problem",
        "matched_actual_entity": "spironolactone/hctz",
        "actual_label": "treatment"
    },
    {
        "predicted_entity": "tob",
        "predicted_label": "problem",
        "matched_actual_entity": "actos",
        "actual_label": "treatment"
    },
    {
        "predicted_entity": "Sertraline",
        "predicted_label": "treatment",
        "matched_actual_entity": "nitrite",
 