In [1]:
!pip install datasets
!pip install transformers datasets torch scikit-learn pandas numpy

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [2]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
import torch
from torch import nn
import torch.nn.functional as F

# Load dataset
dataset = load_dataset("pubmed_qa", "pqa_labeled")
clinical_data = dataset['train']

# Create symptom and condition lists
def preprocess_data(data):
    symptoms = []
    conditions = []

    for entry in data:
        # Use questions as symptoms/complaints
        symptoms.append(entry['question'])

        # Handle nested structure in context
        if isinstance(entry['context'], dict):
            # Flatten the nested lists and join them
            context_parts = []
            for value in entry['context'].values():
                if isinstance(value, list):
                    context_parts.extend(value)  # If value is a list, extend our parts
                else:
                    context_parts.append(str(value))  # If value is not a list, just append it
            context_text = ' '.join(context_parts)
        else:
            context_text = str(entry['context'])

        conditions.append(context_text)

    # Clean the data
    df = pd.DataFrame({
        'symptoms': symptoms,
        'conditions': conditions
    })

    # Remove empty entries
    df = df[df['symptoms'].str.len() > 0]
    df = df[df['conditions'].str.len() > 0]

    # Take a subset for faster processing (adjust as needed)
    df = df.head(10000)

    return df

# Convert to DataFrame
df = preprocess_data(clinical_data)

# Initialize tokenizer (using ClinicalBERT for better medical text understanding)
tokenizer = AutoTokenizer.from_pretrained('emilyalsentzer/Bio_ClinicalBERT')

# Create simplified condition labels (extract key medical terms)
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(max_features=100, stop_words='english')
condition_features = vectorizer.fit_transform(df['conditions'])
condition_labels = vectorizer.get_feature_names_out()

# Convert to multi-label format
def extract_conditions(text):
    features = vectorizer.transform([text]).toarray()[0]
    return [1 if f > 0 else 0 for f in features]

df['encoded_conditions'] = df['conditions'].apply(extract_conditions)

# Maximum sequence length for symptoms
MAX_LENGTH = 256

# Tokenize symptoms
def tokenize_symptoms(texts):
    return tokenizer(
        texts,
        padding='max_length',
        truncation=True,
        max_length=MAX_LENGTH,
        return_tensors='pt'
    )

# Create PyTorch dataset
class MedicalDataset(torch.utils.data.Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoded = tokenize_symptoms(text)

        return {
            'input_ids': encoded['input_ids'].squeeze(),
            'attention_mask': encoded['attention_mask'].squeeze(),
            'labels': torch.tensor(label, dtype=torch.float32)
        }

# Split data
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['symptoms'].values,
    np.array(df['encoded_conditions'].tolist()),
    test_size=0.2,
    random_state=42
)

# Create datasets
train_dataset = MedicalDataset(train_texts, train_labels)
val_dataset = MedicalDataset(val_texts, val_labels)

print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Number of condition classes: {len(condition_labels)}")
print("\nSample condition labels:", condition_labels[:10])

# Sample input-output pair
print("\nSample data:")
print("Question:", df['symptoms'].iloc[0][:200], "...")
print("Answer:", df['conditions'].iloc[0][:200], "...")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/5.19k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Training samples: 800
Validation samples: 200
Number of condition classes: 100

Sample condition labels: ['001' '05' '10' '11' '12' '15' '20' '30' '80' '95']

Sample data:
Question: Do mitochondria play a role in remodelling lace plant leaves during programmed cell death? ...
Answer: Programmed cell death (PCD) is the regulated death of cells within an organism. The lace plant (Aponogeton madagascariensis) produces perforations in its leaves through PCD. The leaves of the plant co ...


***Architecture***

In [3]:
import torch
import torch.nn as nn
from transformers import AutoConfig

class MedicalSymptomClassifier(nn.Module):
    def __init__(self, pretrained_model_name='emilyalsentzer/Bio_ClinicalBERT', num_labels=100):
        super().__init__()

        # Load pretrained model configuration
        self.config = AutoConfig.from_pretrained(pretrained_model_name)

        # BERT embedding layer
        self.bert = AutoModelForSequenceClassification.from_pretrained(
            pretrained_model_name,
            num_labels=num_labels
        ).bert

        # Dropout for regularization
        self.dropout = nn.Dropout(0.1)

        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(512, num_labels),
            nn.Sigmoid()  # Using sigmoid for multi-label classification
        )

    def forward(self, input_ids, attention_mask):
        # Get BERT embeddings
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask
        )

        # Use the [CLS] token representation
        pooled_output = outputs.last_hidden_state[:, 0, :]

        # Apply dropout
        pooled_output = self.dropout(pooled_output)

        # Get predictions
        logits = self.classifier(pooled_output)

        return logits

# Training configuration
class TrainingConfig:
    def __init__(self):
        self.learning_rate = 2e-5
        self.num_epochs = 3
        self.batch_size = 16
        self.warmup_steps = 100
        self.max_grad_norm = 1.0

# Create data loaders
def create_data_loaders(train_dataset, val_dataset, batch_size):
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True
    )

    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False
    )

    return train_loader, val_loader

# Training function
def train_epoch(model, train_loader, optimizer, scheduler, device):
    model.train()
    total_loss = 0

    for batch in train_loader:
        # Move batch to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Forward pass
        outputs = model(input_ids, attention_mask)

        # Calculate loss
        loss = nn.BCELoss()(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()

        # Clip gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Update weights
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

    return total_loss / len(train_loader)

# Validation function
def evaluate(model, val_loader, device):
    model.eval()
    total_loss = 0

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask)
            loss = nn.BCELoss()(outputs, labels)

            total_loss += loss.item()

    return total_loss / len(val_loader)

***Training Pipeline***

In [4]:
import torch
from torch.utils.data import DataLoader
from transformers import get_linear_schedule_with_warmup
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
import time

class TrainingPipeline:
    def __init__(
        self,
        model,
        train_dataset,
        val_dataset,
        config=None
    ):
        self.model = model
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset

        # Default training configurations
        self.config = {
            'batch_size': 16,
            'learning_rate': 2e-5,
            'epochs': 3,
            'warmup_steps': 100,
            'threshold': 0.5  # Classification threshold
        } if config is None else config

        # Setup device
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)

        # Create data loaders
        self.train_loader = DataLoader(
            train_dataset,
            batch_size=self.config['batch_size'],
            shuffle=True
        )
        self.val_loader = DataLoader(
            val_dataset,
            batch_size=self.config['batch_size'],
            shuffle=False
        )

        # Setup optimizer and scheduler
        self.optimizer = torch.optim.AdamW(
            self.model.parameters(),
            lr=self.config['learning_rate']
        )

        total_steps = len(self.train_loader) * self.config['epochs']
        self.scheduler = get_linear_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=self.config['warmup_steps'],
            num_training_steps=total_steps
        )

        self.criterion = torch.nn.BCELoss()

    def train_epoch(self):
        self.model.train()
        total_loss = 0
        all_preds = []
        all_labels = []

        for batch in self.train_loader:
            # Move batch to device
            input_ids = batch['input_ids'].to(self.device)
            attention_mask = batch['attention_mask'].to(self.device)
            labels = batch['labels'].to(self.device)

            # Forward pass
            self.optimizer.zero_grad()
            outputs = self.model(input_ids, attention_mask)

            # Calculate loss
            loss = self.criterion(outputs, labels)

            # Backward pass
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
            self.optimizer.step()
            self.scheduler.step()

            total_loss += loss.item()

            # Store predictions and labels for metrics
            preds = (outputs > self.config['threshold']).float().cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())

        # Calculate metrics
        metrics = self.calculate_metrics(all_preds, all_labels)
        return total_loss / len(self.train_loader), metrics

    def validate(self):
        self.model.eval()
        total_loss = 0
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for batch in self.val_loader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)

                outputs = self.model(input_ids, attention_mask)
                loss = self.criterion(outputs, labels)

                total_loss += loss.item()

                preds = (outputs > self.config['threshold']).float().cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels.cpu().numpy())

        metrics = self.calculate_metrics(all_preds, all_labels)
        return total_loss / len(self.val_loader), metrics

    def calculate_metrics(self, preds, labels):
        # Convert lists to numpy arrays
        preds = np.array(preds)
        labels = np.array(labels)

        # Calculate precision, recall, and F1 score
        precision, recall, f1, _ = precision_recall_fscore_support(
            labels, preds, average='weighted', zero_division=0
        )

        return {
            'precision': precision,
            'recall': recall,
            'f1': f1
        }

    def train(self):
        best_val_loss = float('inf')
        best_model = None
        train_history = []
        val_history = []

        print(f"Training on device: {self.device}")

        for epoch in range(self.config['epochs']):
            start_time = time.time()

            # Training
            train_loss, train_metrics = self.train_epoch()

            # Validation
            val_loss, val_metrics = self.validate()

            # Store history
            train_history.append({
                'loss': train_loss,
                'metrics': train_metrics
            })
            val_history.append({
                'loss': val_loss,
                'metrics': val_metrics
            })

            # Save best model
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model = self.model.state_dict().copy()

            epoch_time = time.time() - start_time

            # Print epoch results
            print(f"\nEpoch {epoch + 1}/{self.config['epochs']}")
            print(f"Time: {epoch_time:.2f}s")
            print(f"Train Loss: {train_loss:.4f}")
            print(f"Train Metrics: Precision={train_metrics['precision']:.4f}, "
                  f"Recall={train_metrics['recall']:.4f}, F1={train_metrics['f1']:.4f}")
            print(f"Val Loss: {val_loss:.4f}")
            print(f"Val Metrics: Precision={val_metrics['precision']:.4f}, "
                  f"Recall={val_metrics['recall']:.4f}, F1={val_metrics['f1']:.4f}")

        # Restore best model
        self.model.load_state_dict(best_model)
        return train_history, val_history

***Uncertanity Estimation***

In [5]:
import torch
import torch.nn as nn
import numpy as np
from typing import Tuple, Dict, List

class UncertaintyEstimator:
    def __init__(self, model, num_samples=10, threshold=0.5):
        """
        Initialize the uncertainty estimator

        Args:
            model: The trained medical classifier model
            num_samples: Number of Monte Carlo samples
            threshold: Classification threshold
        """
        self.model = model
        self.num_samples = num_samples
        self.threshold = threshold
        self.device = next(model.parameters()).device

    def enable_dropout(self):
        """Enable dropout during inference"""
        for m in self.model.modules():
            if isinstance(m, nn.Dropout):
                m.train()

    def predict_with_uncertainty(
        self,
        input_ids: torch.Tensor,
        attention_mask: torch.Tensor
    ) -> Dict[str, np.ndarray]:
        """
        Make predictions with uncertainty estimation

        Returns dictionary containing:
        - mean_predictions: Average prediction probabilities
        - uncertainties: Standard deviation of predictions
        - confidence_scores: 1 - uncertainty
        """
        self.model.eval()
        self.enable_dropout()

        # Store multiple predictions
        predictions = []

        # Multiple forward passes
        with torch.no_grad():
            for _ in range(self.num_samples):
                outputs = self.model(input_ids, attention_mask)
                predictions.append(outputs.cpu().numpy())

        # Stack predictions
        predictions = np.stack(predictions)

        # Calculate statistics
        mean_predictions = np.mean(predictions, axis=0)
        uncertainties = np.std(predictions, axis=0)
        confidence_scores = 1 - uncertainties

        # Binary predictions using threshold
        binary_predictions = (mean_predictions > self.threshold).astype(float)

        return {
            'mean_predictions': mean_predictions,
            'binary_predictions': binary_predictions,
            'uncertainties': uncertainties,
            'confidence_scores': confidence_scores
        }

class PredictionAnalyzer:
    def __init__(self, condition_labels: List[str]):
        """
        Initialize the prediction analyzer

        Args:
            condition_labels: List of condition label names
        """
        self.condition_labels = condition_labels

    def analyze_prediction(
        self,
        predictions: Dict[str, np.ndarray],
        top_k: int = 5
    ) -> List[Dict[str, float]]:
        """
        Analyze predictions and return top k conditions with confidence

        Args:
            predictions: Output from UncertaintyEstimator
            top_k: Number of top predictions to return

        Returns:
            List of dictionaries containing condition names and their scores
        """
        mean_preds = predictions['mean_predictions']
        confidences = predictions['confidence_scores']

        # Get top k predictions
        top_indices = np.argsort(mean_preds)[-top_k:][::-1]

        results = []
        for idx in top_indices:
            results.append({
                'condition': self.condition_labels[idx],
                'probability': float(mean_preds[idx]),
                'confidence': float(confidences[idx])
            })

        return results

# Function to make predictions for new symptoms
def predict_conditions(
    model: nn.Module,
    tokenizer,
    text: str,
    condition_labels: List[str],
    device: torch.device
) -> Dict:
    """
    Make predictions for new symptom text

    Args:
        model: Trained model
        tokenizer: Tokenizer
        text: Input symptom text
        condition_labels: List of condition labels
        device: torch device

    Returns:
        Dictionary containing predictions and analysis
    """
    # Initialize estimator and analyzer
    estimator = UncertaintyEstimator(model)
    analyzer = PredictionAnalyzer(condition_labels)

    # Tokenize input
    encoded = tokenizer(
        text,
        padding='max_length',
        truncation=True,
        max_length=256,
        return_tensors='pt'
    )

    # Move to device
    input_ids = encoded['input_ids'].to(device)
    attention_mask = encoded['attention_mask'].to(device)

    # Get predictions with uncertainty
    predictions = estimator.predict_with_uncertainty(input_ids, attention_mask)

    # Analyze predictions
    analysis = analyzer.analyze_prediction(predictions)

    return {
        'predictions': predictions,
        'analysis': analysis
    }

# Example usage function
def print_prediction_results(results: Dict):
    """Print formatted prediction results"""
    print("\nPrediction Results:")
    print("-" * 50)

    for i, pred in enumerate(results['analysis'], 1):
        print(f"\n{i}. Condition: {pred['condition']}")
        print(f"   Probability: {pred['probability']:.3f}")
        print(f"   Confidence: {pred['confidence']:.3f}")

        # Add confidence level description
        confidence = pred['confidence']
        if confidence > 0.8:
            confidence_level = "High confidence"
        elif confidence > 0.5:
            confidence_level = "Moderate confidence"
        else:
            confidence_level = "Low confidence - may need more information"

        print(f"   Assessment: {confidence_level}")

In [6]:
# Check if these variables are defined:
print("Model available:", 'model' in locals())
print("Tokenizer available:", 'tokenizer' in locals())
print("Condition labels available:", 'condition_labels' in locals())
print("Device available:", 'device' in locals())

Model available: False
Tokenizer available: True
Condition labels available: True
Device available: False


***Actual Training***

In [7]:
# 1. Set up device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 2. Initialize model (from our previous architecture)
model = MedicalSymptomClassifier(num_labels=len(condition_labels))
model.to(device)

# 3. Set up and run the training pipeline
training_config = {
    'batch_size': 16,
    'learning_rate': 2e-5,
    'epochs': 3,
    'warmup_steps': 100,
    'threshold': 0.5
}

pipeline = TrainingPipeline(
    model=model,
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    config=training_config
)

# 4. Train the model
train_history, val_history = pipeline.train()

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at emilyalsentzer/Bio_ClinicalBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training on device: cuda


model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]


Epoch 1/3
Time: 35.53s
Train Loss: 0.6767
Train Metrics: Precision=0.3784, Recall=0.4275, F1=0.3382
Val Loss: 0.6256
Val Metrics: Precision=0.2771, Recall=0.3529, F1=0.2797

Epoch 2/3
Time: 34.90s
Train Loss: 0.5664
Train Metrics: Precision=0.3371, Recall=0.3298, F1=0.2973
Val Loss: 0.5002
Val Metrics: Precision=0.2430, Recall=0.3325, F1=0.2777

Epoch 3/3
Time: 36.03s
Train Loss: 0.4993
Train Metrics: Precision=0.2650, Recall=0.3111, F1=0.2696
Val Loss: 0.4746
Val Metrics: Precision=0.2430, Recall=0.3323, F1=0.2776


***INFERENCE***

In [11]:
# Test the model with uncertainty estimation
symptoms_text = "Patient presents with fever, persistent cough, and fatigue"

# Modified PredictionAnalyzer to handle the shape correctly
class PredictionAnalyzer:
    def __init__(self, condition_labels):
        self.condition_labels = condition_labels

    def analyze_prediction(self, predictions, top_k=5):
        # Squeeze the predictions to remove extra dimensions
        mean_preds = predictions['mean_predictions'].squeeze()
        confidences = predictions['confidence_scores'].squeeze()

        # Make sure top_k isn't larger than number of predictions
        top_k = min(top_k, len(mean_preds))

        # Get top k predictions
        top_indices = np.argsort(mean_preds)[-top_k:][::-1]

        results = []
        for idx in top_indices:
            if idx < len(self.condition_labels):
                results.append({
                    'condition': self.condition_labels[idx],
                    'probability': float(mean_preds[idx]),
                    'confidence': float(confidences[idx])
                })

        return results

# Make prediction
estimator = UncertaintyEstimator(model, num_samples=10)

# Tokenize the input
encoded = tokenizer(
    symptoms_text,
    padding='max_length',
    truncation=True,
    max_length=256,
    return_tensors='pt'
)

# Move to device
input_ids = encoded['input_ids'].to(device)
attention_mask = encoded['attention_mask'].to(device)

# Get predictions with uncertainty
with torch.no_grad():
    predictions = estimator.predict_with_uncertainty(input_ids, attention_mask)

# Initialize analyzer with modified class
analyzer = PredictionAnalyzer(condition_labels)
analysis = analyzer.analyze_prediction(predictions)

# Print results
print("\nPrediction Results:")
print("-" * 50)
for i, pred in enumerate(analysis, 1):
    print(f"\n{i}. Condition: {pred['condition']}")
    print(f"   Probability: {pred['probability']:.3f}")
    print(f"   Confidence: {pred['confidence']:.3f}")

    confidence = pred['confidence']
    if confidence > 0.8:
        confidence_level = "High confidence"
    elif confidence > 0.5:
        confidence_level = "Moderate confidence"
    else:
        confidence_level = "Low confidence - may need more information"

    print(f"   Assessment: {confidence_level}")


Prediction Results:
--------------------------------------------------

1. Condition: results
   Probability: 0.787
   Confidence: 0.975
   Assessment: High confidence

2. Condition: humans
   Probability: 0.759
   Confidence: 0.966
   Assessment: High confidence

3. Condition: methods
   Probability: 0.731
   Confidence: 0.968
   Assessment: High confidence

4. Condition: female
   Probability: 0.701
   Confidence: 0.973
   Assessment: High confidence

5. Condition: male
   Probability: 0.673
   Confidence: 0.962
   Assessment: High confidence


**Conclusion:**
          Looking at the prediction results for a patient presenting with fever, persistent cough, and fatigue, the model demonstrates remarkably high confidence levels across all its predictions, with confidence scores consistently above 95%. While the model shows strong certainty in its predictions, with probabilities ranging from 67.3% to 78.7%, there's a notable limitation in the actual predictions themselves. Instead of identifying medical conditions, the model has picked up research-paper related terms like "results," "humans," and "methods" as top predictions. This suggests that while the uncertainty estimation mechanism is working effectively (shown by the consistent high confidence scores), the model's underlying training data from PubMed has led it to learn academic terminology rather than clinical diagnoses. To make this system more clinically valuable, it would need to be retrained with a more focused medical dataset and potentially incorporate standardized medical terminology like SNOMED CT or ICD-10 for more relevant condition labeling. The current results indicate a technically sound model in terms of confidence estimation, but one that requires refinement in its vocabulary and training data to be practically useful in a medical context.