Prior to running this notebook run: `ollama serve &`. This will start the Ollama server and allow you to interact with it through this notebook

In [None]:
import dspy
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from typing import List, Dict, Union

from utils import json_to_dataframe, json_to_string_list

In [None]:
filepath = '../../data/vector_veterinary_imaging_2.json'

df = json_to_dataframe(filepath) 
rad_strings = json_to_string_list(filepath)

In [None]:
df

In [None]:
findings = list(df['findings'])
conclusions = list(df['conclusions_and_recommendations'])

## Retrieval

In [None]:
class SentenceTransformerRetriever(dspy.Retrieve):
    def __init__(self, model: str, findings: List[str], conclusions: List[str], k: int):
        self.model = model if isinstance(model, SentenceTransformer) else SentenceTransformer(model, trust_remote_code=True)
        self.findings = findings
        self.conclusions = conclusions
        self.k = k
        self.embeddings = None
        self.init_embeddings()

    def init_embeddings(self):
        self.embeddings = self.model.encode(self.findings)

    def forward(self, query: str, k: int) -> List[Dict[str, Union[str, float]]]:
        query_embedding = self.model.encode([query])
        similarities = cosine_similarity(query_embedding, self.embeddings)[0]
        top_k_indices = np.argsort(similarities)[-k:][::-1]

        results = []
        for idx in top_k_indices:
            results.append({
                'finding': self.findings[idx],
                'conclusion': self.conclusions[idx],
                'score': float(similarities[idx])
            })

        return results

In [None]:
vectorizer = "sentence-transformers/all-MiniLM-L6-v2"
# vectorizer = "dunzhang/stella_en_400M_v5"

In [None]:
retriever_model = SentenceTransformerRetriever(model=vectorizer, findings=findings, conclusions=conclusions, k=10)

In [None]:
findings[1]

In [None]:
retriever_model.forward(query=findings[1], k=2)

## Language model

In [None]:
language_model = "qwen2.5"
# language_model = "gemma2"

In [None]:
ollama_model = dspy.OllamaLocal(
    base_url='http://127.0.0.1:11434',
    timeout_s=500,
    model=language_model,
    model_type='text',
    max_tokens=1024,
    num_ctx=1024,
    temperature=0.7,
    top_p=0.8
)

In [None]:
# Configure DSPy to use Ollama
dspy.settings.configure(lm=ollama_model, rm=retriever_model)

In [None]:
class GenerateConclusions(dspy.Signature):
    """Given a radiology finding and similar examples, generate an appropriate conclusions and recommendations section.
    The response should maintain a professional medical tone and follow the style of the examples."""

    finding = dspy.InputField(desc="Findings section of the radiology report.")
    similar_examples = dspy.InputField(desc="Similar examples of findings and corresponding conclusions and recommendations sections.")
    conclusions = dspy.OutputField(desc="The conclusions and recommendations section. Give the findings section above.")

In [None]:
class RadiologyModule(dspy.Module):
    def __init__(self, retriever):
        super().__init__()
        self.generate_conclusion = dspy.Predict(GenerateConclusions)
        self.retriever = retriever

    def forward(self, finding: str) -> dict:
        # Retrieve similar examples
        retrieved = self.retriever(finding, k=3)
        
        # Format examples for prompt
        examples_text = ""
        for i, ex in enumerate(retrieved, 1):
            examples_text += f"Example {i}:\n"
            examples_text += f"Finding: {ex['finding']}\n"
            examples_text += f"Conclusion: {ex['conclusion']}\n\n"

        # Generate new conclusion
        prediction = self.generate_conclusion(
            finding=finding,
            similar_examples=examples_text
        )

        return {
            'finding': finding,
            'generated_conclusion': prediction.conclusions,
            'similar_examples': retrieved
        }

## Full pipeline

In [None]:
def setup_rad_pipeline(filepath: str, vectorizer: str = "sentence-transformers/all-MiniLM-L6-v2", k: int = 3):
    """
    Set up the complete radiology report generation pipeline
    """
    # Load data
    df = json_to_dataframe(filepath)
    findings = list(df['findings'])
    conclusions = list(df['conclusions_and_recommendations'])
    
    # Initialize retriever
    retriever = SentenceTransformerRetriever(
        model=vectorizer,
        findings=findings,
        conclusions=conclusions,
        k=k
    )
    
    # Create and return the radiology module
    return RadiologyModule(retriever)

## Example

In [None]:
filepath = '../../data/vector_veterinary_imaging_2.json'
rad_pipeline = setup_rad_pipeline(filepath)

In [None]:
# Example finding
test_finding = """
The thoracic cavity demonstrates normal cardiac silhouette size and shape. 
The pulmonary vasculature appears within normal limits. 
There is a mild interstitial pattern noted in the caudodorsal lung fields.
No evidence of pleural effusion is noted.
"""

In [None]:
result = rad_pipeline(test_finding)
    
print("Generated Conclusion:")
print(result['generated_conclusion'])
print("\nSimilar Examples Used:")
for i, example in enumerate(result['similar_examples'], 1):
    print(f"\nExample {i} (Similarity Score: {example['score']:.3f}):")
    print(f"Finding: {example['finding']}")
    print(f"Conclusion: {example['conclusion']}")

## TODO 

As we go through a handful of examples, we want to make sure we don't include the example itself in the retrieval set. But it is fine to include all other examples. 

In [None]:
import random
from typing import List, Dict, Union, Optional
import pandas as pd

class SentenceTransformerRetrieverWithExclusion(dspy.Retrieve):
    def __init__(self, model: str, findings: List[str], conclusions: List[str], k: int):
        self.model = model if isinstance(model, SentenceTransformer) else SentenceTransformer(model, trust_remote_code=True)
        self.findings = findings
        self.conclusions = conclusions
        self.k = k
        self.embeddings = None
        self.excluded_indices = set()
        self.init_embeddings()

    def init_embeddings(self):
        self.embeddings = self.model.encode(self.findings)
        
    def set_excluded_indices(self, indices: Optional[List[int]] = None):
        """Set indices to exclude from retrieval"""
        self.excluded_indices = set(indices or [])
        
    def clear_excluded_indices(self):
        """Clear all excluded indices"""
        self.excluded_indices = set()

    def forward(self, query: str, k: int) -> List[Dict[str, Union[str, float]]]:
        query_embedding = self.model.encode([query])
        similarities = cosine_similarity(query_embedding, self.embeddings)[0]
        
        # Create mask for excluded indices
        mask = np.ones_like(similarities, dtype=bool)
        if self.excluded_indices:
            mask[list(self.excluded_indices)] = False
        
        # Get top k indices excluding masked indices
        masked_similarities = similarities.copy()
        masked_similarities[~mask] = -np.inf
        top_k_indices = np.argsort(masked_similarities)[-k:][::-1]

        results = []
        for idx in top_k_indices:
            results.append({
                'finding': self.findings[idx],
                'conclusion': self.conclusions[idx],
                'score': float(similarities[idx])
            })

        return results

def setup_rad_pipeline_with_exclusion(filepath: str, vectorizer: str = "sentence-transformers/all-MiniLM-L6-v2", k: int = 3):
    """
    Set up the radiology pipeline with exclusion capability
    """
    df = json_to_dataframe(filepath)
    findings = list(df['findings'])
    conclusions = list(df['conclusions_and_recommendations'])
    
    retriever = SentenceTransformerRetrieverWithExclusion(
        model=vectorizer,
        findings=findings,
        conclusions=conclusions,
        k=k
    )
    
    return RadiologyModule(retriever), df

def run_evaluation_with_exclusion(filepath: str, num_examples: int = 5, seed: Optional[int] = None):
    """
    Run inference on a specified number of examples, excluding each example from its own retrieval set
    
    Args:
        filepath: Path to the JSON data file
        num_examples: Number of examples to evaluate
        seed: Random seed for reproducibility
        
    Returns:
        DataFrame containing the evaluation results
    """
    if seed is not None:
        random.seed(seed)
        
    # Setup pipeline with exclusion capability
    rad_pipeline, df = setup_rad_pipeline_with_exclusion(filepath)
    
    # Randomly select examples
    total_examples = len(df)
    selected_indices = random.sample(range(total_examples), min(num_examples, total_examples))
    
    results = []
    
    for idx in selected_indices:
        # Get the example
        finding = df['findings'].iloc[idx]
        actual_conclusion = df['conclusions_and_recommendations'].iloc[idx]
        
        # Set the current example to be excluded from retrieval
        rad_pipeline.retriever.set_excluded_indices([idx])
        
        # Run inference
        result = rad_pipeline(finding)
        
        # Clear exclusion for next iteration
        rad_pipeline.retriever.clear_excluded_indices()
        
        # Store results
        results.append({
            'index': idx,
            'finding': finding,
            'actual_conclusion': actual_conclusion,
            'generated_conclusion': result['generated_conclusion'],
            'similar_examples': result['similar_examples']
        })
        
    return pd.DataFrame(results)

# Example usage:
if __name__ == "__main__":
    filepath = '../../data/vector_veterinary_imaging_2.json'
    
    # Run evaluation on 5 random examples
    results_df = run_evaluation_with_exclusion(filepath, num_examples=5, seed=42)
    
    # Print results
    for idx, row in results_df.iterrows():
        print(f"\nExample {idx + 1}:")
        print("Finding:")
        print(row['finding'])
        print("\nActual Conclusion:")
        print(row['actual_conclusion'])
        print("\nGenerated Conclusion:")
        print(row['generated_conclusion'])
        print("\nSimilar Examples Used:")
        for i, example in enumerate(row['similar_examples'], 1):
            print(f"\nReference {i} (Similarity Score: {example['score']:.3f}):")
            print(f"Finding: {example['finding']}")
            print(f"Conclusion: {example['conclusion']}")
        print("\n" + "="*80)