In [10]:
import spacy
from spacy.util import minibatch, compounding
import random
from spacy.training.example import Example

# Training data with varied clinical and non-clinical examples
training_data = [
    # Clinical examples
    ("The patient presents with severe abdominal pain", {'cats': {'clinical': 1.0, 'non_clinical': 0.0}}),
    ("Blood pressure reading shows 120/80", {'cats': {'clinical': 1.0, 'non_clinical': 0.0}}),
    ("Prescribed amoxicillin 500mg twice daily", {'cats': {'clinical': 1.0, 'non_clinical': 0.0}}),
    ("Patient history indicates diabetes type 2", {'cats': {'clinical': 1.0, 'non_clinical': 0.0}}),
    ("Referred to cardiology for follow-up", {'cats': {'clinical': 1.0, 'non_clinical': 0.0}}),
    ("Lab results show elevated white blood cell count", {'cats': {'clinical': 1.0, 'non_clinical': 0.0}}),
    ("Scheduled for MRI next week", {'cats': {'clinical': 1.0, 'non_clinical': 0.0}}),
    ("The wound shows signs of infection", {'cats': {'clinical': 1.0, 'non_clinical': 0.0}}),
    
    # Non-clinical examples
    ("Let's discuss the project timeline", {'cats': {'clinical': 0.0, 'non_clinical': 1.0}}),
    ("The weather is beautiful today", {'cats': {'clinical': 0.0, 'non_clinical': 1.0}}),
    ("Can you send me the meeting notes", {'cats': {'clinical': 0.0, 'non_clinical': 1.0}}),
    ("I'll be taking vacation next month", {'cats': {'clinical': 0.0, 'non_clinical': 1.0}}),
    ("The restaurant was excellent", {'cats': {'clinical': 0.0, 'non_clinical': 1.0}}),
    ("Traffic is heavy this morning", {'cats': {'clinical': 0.0, 'non_clinical': 1.0}}),
    ("Financial News delivers exclusive breaking news & analysis around investment banking", {'cats': {'clinical': 0.0, 'non_clinical': 1.0}})
]

def train_clinical_classifier():
    # Initialize spaCy
    nlp = spacy.blank('en')
    textcat = nlp.add_pipe('textcat', last=True)
    textcat.add_label('clinical')
    textcat.add_label('non_clinical')

    # Prepare training examples
    examples = []
    for text, annotations in training_data:
        examples.append(Example.from_dict(nlp.make_doc(text), annotations))
    
    # Initialize the model
    nlp.initialize(lambda: examples)

    # Training loop
    n_iter = 10
    for epoch in range(n_iter):
        random.shuffle(examples)
        losses = {}
        
        # Batch training
        batches = minibatch(examples, size=4)
        for batch in batches:
            nlp.update(batch, drop=0.2, losses=losses)
        
        print(f"Epoch {epoch}, Losses: {losses}")
    
    return nlp

def analyze_text(nlp, text):
    doc = nlp(text)
    
    # Get prediction scores
    clinical_score = doc.cats['clinical']
    non_clinical_score = doc.cats['non_clinical']
    
    print(f"\nAnalyzing: '{text}'")
    print(f"Clinical score: {clinical_score:.3f}")
    print(f"Non-clinical score: {non_clinical_score:.3f}")
    
    # Determine category
    is_clinical = clinical_score > non_clinical_score
    confidence = max(clinical_score, non_clinical_score)
    
    return {
        'is_clinical': is_clinical,
        'confidence': confidence,
        'scores': doc.cats
    }

# Train the model
trained_model = train_clinical_classifier()



Epoch 0, Losses: {'textcat': 1.0242377817630768}
Epoch 1, Losses: {'textcat': 0.981556624174118}
Epoch 2, Losses: {'textcat': 0.8788592368364334}
Epoch 3, Losses: {'textcat': 0.7168940454721451}
Epoch 4, Losses: {'textcat': 0.4791512116789818}
Epoch 5, Losses: {'textcat': 0.2642263248562813}
Epoch 6, Losses: {'textcat': 0.09586556442081928}
Epoch 7, Losses: {'textcat': 0.05160716688260436}
Epoch 8, Losses: {'textcat': 0.008448615903034806}
Epoch 9, Losses: {'textcat': 0.0015570279429084621}


In [11]:
# Test examples
test_texts = [
    "Patient complains of chronic back pain",
    "The diagnosis suggests acute bronchitis",
    "Please review these financial reports",
    "Going to the movies tonight",
    "ECG results show normal sinus rhythm"
]

# Run predictions
for text in test_texts:
    result = analyze_text(trained_model, text)
    print(f"Is clinical: {result['is_clinical']} (Confidence: {result['confidence']:.3f})")
    print("-" * 50)


Analyzing: 'Patient complains of chronic back pain'
Clinical score: 0.991
Non-clinical score: 0.009
Is clinical: True (Confidence: 0.991)
--------------------------------------------------

Analyzing: 'The diagnosis suggests acute bronchitis'
Clinical score: 0.870
Non-clinical score: 0.130
Is clinical: True (Confidence: 0.870)
--------------------------------------------------

Analyzing: 'Please review these financial reports'
Clinical score: 0.843
Non-clinical score: 0.157
Is clinical: True (Confidence: 0.843)
--------------------------------------------------

Analyzing: 'Going to the movies tonight'
Clinical score: 0.236
Non-clinical score: 0.764
Is clinical: False (Confidence: 0.764)
--------------------------------------------------

Analyzing: 'ECG results show normal sinus rhythm'
Clinical score: 0.872
Non-clinical score: 0.128
Is clinical: True (Confidence: 0.872)
--------------------------------------------------
