# LLM Screening Reasoning Visualisation

This notebook displays a sample Cochrane review with the LLM's reasoning for each paper's inclusion/exclusion decision.

**Features:**
- Toggle between included and excluded papers
- Select evaluation run (Run 1 or Run 2)
- Select model √ó prompt type combination
- View LLM reasoning for each decision

In [6]:
# Install required packages
%pip install -q ipywidgets pandas

Note: you may need to restart the kernel to use updated packages.


In [7]:
import pandas as pd
import numpy as np
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# Load data
DATA_DIR = Path.cwd().parent / "Data" if not (Path.cwd() / "Data").exists() else Path.cwd() / "Data"
RESULTS_DIR = DATA_DIR / "results"

validation = pd.read_csv(DATA_DIR / "ground_truth_validation_set.csv")
cochrane = pd.read_csv(DATA_DIR / "cochrane_pubmed_abstracts.csv", dtype={"pmid": str})

# Same review as notebook 06: "Fetal movement counting for assessment of fetal wellbeing"
SELECTED_REVIEW_PMID = '26467769'

# Get review data
review_data = validation[validation['review_pmid'].astype(str) == SELECTED_REVIEW_PMID].copy()
review_full_abstract = cochrane[cochrane['pmid'] == SELECTED_REVIEW_PMID]['abstract'].iloc[0]
review_title = review_data['review_title'].iloc[0]
review_criteria = review_data['review_criteria'].iloc[0]

print(f"Review: {review_title}")
print(f"Papers: {len(review_data)} total ({review_data['label'].sum()} included, {int((1-review_data['label']).sum())} excluded)")

Review: Fetal movement counting for assessment of fetal wellbeing.
Papers: 10 total (5 included, 5 excluded)


In [8]:
# Define file mappings for each run
RUN_FILES = {
    'Run 1': {
        'Mistral Zero-Shot': 'eval_mistral_zero_shot_20260115_231802.csv',
        'Mistral CoT': 'eval_mistral_cot_20260116_003208.csv',
        'Llama 3.2 Zero-Shot': 'eval_llama3.2_zero_shot_20260115_193605.csv',
        'Llama 3.2 CoT': 'eval_llama3.2_cot_20260115_215209.csv'
    },
    'Run 2': {
        'Mistral Zero-Shot': 'eval_mistral_zero_shot_20260116_050656.csv',
        'Mistral CoT': 'eval_mistral_cot_20260116_073058.csv',
        'Llama 3.2 Zero-Shot': 'eval_llama3.2_zero_shot_20260116_025453.csv',
        'Llama 3.2 CoT': 'eval_llama3.2_cot_20260116_041136.csv'
    }
}

def load_eval_with_reasoning(run_name, model_name):
    """Load evaluation data including reasoning for the selected review."""
    filepath = RESULTS_DIR / RUN_FILES[run_name][model_name]
    df = pd.read_csv(filepath)
    
    # Handle column name differences
    if 'label' in df.columns and 'true_label' not in df.columns:
        df['true_label'] = df['label']
    
    # Get response column (different names in different files)
    response_col = 'raw_response' if 'raw_response' in df.columns else 'response'
    
    # Convert predictions
    def convert_prediction(x):
        if pd.isna(x):
            return 0
        if isinstance(x, (int, float)):
            return int(x)
        return 1 if 'include' in str(x).lower() else 0
    
    df['pred_binary'] = df['prediction'].apply(convert_prediction)
    df['reasoning'] = df[response_col].fillna('No reasoning available')
    
    # Filter to our review's papers
    paper_pmids = review_data['paper_pmid'].astype(int).tolist()
    filtered = df[df['paper_pmid'].isin(paper_pmids)].copy()
    
    return filtered

print("Functions defined. Ready to create visualization.")

Functions defined. Ready to create visualization.


In [9]:
# Create interactive visualization

# Widgets
run_dropdown = widgets.Dropdown(
    options=['Run 1', 'Run 2'],
    value='Run 2',
    description='Run:',
    style={'description_width': '50px'},
    layout=widgets.Layout(width='150px')
)

model_dropdown = widgets.Dropdown(
    options=list(RUN_FILES['Run 2'].keys()),
    value='Mistral CoT',  # Default to CoT to show reasoning
    description='Model:',
    style={'description_width': '50px'},
    layout=widgets.Layout(width='220px')
)

label_toggle = widgets.ToggleButtons(
    options=['Included', 'Excluded'],
    value='Included',
    description='',
    button_style='',
    tooltips=['Papers cited in this review', 'Papers from related reviews']
)

output = widgets.Output()

def generate_html_content(run_name, model_name, show_included):
    """Generate HTML for the visualization."""
    
    # Load evaluation data with reasoning
    eval_data = load_eval_with_reasoning(run_name, model_name)
    
    # Merge with review data to get paper details
    merged = review_data.merge(
        eval_data[['paper_pmid', 'pred_binary', 'reasoning']],
        on='paper_pmid',
        how='left'
    )
    
    # Filter by label
    target_label = 1 if show_included else 0
    papers = merged[merged['label'] == target_label]
    
    # Calculate accuracy for this subset
    correct = (papers['pred_binary'] == papers['label']).sum()
    total = len(papers)
    accuracy = correct / total * 100 if total > 0 else 0
    
    # Generate paper cards HTML
    cards_html = ""
    for i, (_, paper) in enumerate(papers.iterrows()):
        abstract = paper['paper_abstract'] if pd.notna(paper['paper_abstract']) else 'No abstract available.'
        reasoning = paper['reasoning'] if pd.notna(paper['reasoning']) else 'No reasoning available.'
        
        # Truncate very long reasoning
        if len(str(reasoning)) > 1500:
            reasoning = str(reasoning)[:1500] + '...'
        
        # Determine if prediction was correct
        is_correct = paper['pred_binary'] == paper['label']
        pred_text = 'INCLUDE' if paper['pred_binary'] == 1 else 'EXCLUDE'
        true_text = 'Included' if paper['label'] == 1 else 'Excluded'
        
        # Status styling
        status_color = '#28a745' if is_correct else '#dc3545'
        status_icon = '‚úì' if is_correct else '‚úó'
        status_text = 'Correct' if is_correct else 'Incorrect'
        
        cards_html += f'''
        <div class="paper-card">
            <div class="paper-header">
                <span class="paper-number">{i+1}</span>
                <span class="prediction-badge" style="background: {status_color};">
                    {status_icon} LLM: {pred_text} ({status_text})
                </span>
            </div>
            <div class="paper-title">{paper['paper_title']}</div>
            <div class="paper-pmid">PMID: {int(paper['paper_pmid'])} | Ground Truth: {true_text}</div>
            
            <div class="section-label">Abstract</div>
            <div class="paper-abstract">{abstract}</div>
            
            <div class="section-label">LLM Reasoning</div>
            <div class="reasoning-box">{reasoning}</div>
        </div>
        '''
    
    label_text = 'Included' if show_included else 'Excluded'
    label_color = '#28a745' if show_included else '#dc3545'
    
    html = f'''
    <style>
        .viz-container {{
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            max-width: 1000px;
            margin: 0 auto;
        }}
        
        .review-header {{
            background: linear-gradient(135deg, #e8e8e8 0%, #f5f5f5 100%);
            padding: 20px;
            border-radius: 12px;
            margin-bottom: 20px;
        }}
        
        .review-title {{
            font-size: 1.3em;
            font-weight: bold;
            color: #333;
            margin-bottom: 10px;
        }}
        
        .review-meta {{
            display: flex;
            gap: 10px;
            flex-wrap: wrap;
            margin-bottom: 15px;
        }}
        
        .meta-badge {{
            background: rgba(0,0,0,0.1);
            padding: 4px 10px;
            border-radius: 15px;
            font-size: 0.8em;
            color: #555;
        }}
        
        .accuracy-badge {{
            background: {label_color};
            color: white;
            padding: 4px 12px;
            border-radius: 15px;
            font-size: 0.85em;
            font-weight: bold;
        }}
        
        .review-abstract {{
            background: rgba(0,0,0,0.05);
            padding: 12px;
            border-radius: 8px;
            font-size: 0.85em;
            line-height: 1.5;
            color: #444;
            max-height: 150px;
            overflow-y: auto;
        }}
        
        .papers-header {{
            text-align: center;
            margin-bottom: 15px;
        }}
        
        .papers-header h3 {{
            color: {label_color};
            margin: 0;
        }}
        
        .paper-card {{
            background: white;
            border: 1px solid #e0e0e0;
            border-radius: 10px;
            padding: 15px 20px;
            margin-bottom: 15px;
            box-shadow: 0 2px 8px rgba(0,0,0,0.05);
        }}
        
        .paper-header {{
            display: flex;
            justify-content: space-between;
            align-items: center;
            margin-bottom: 10px;
        }}
        
        .paper-number {{
            background: {label_color};
            color: white;
            width: 28px;
            height: 28px;
            border-radius: 50%;
            display: inline-flex;
            align-items: center;
            justify-content: center;
            font-weight: bold;
            font-size: 0.9em;
        }}
        
        .prediction-badge {{
            color: white;
            padding: 4px 10px;
            border-radius: 15px;
            font-size: 0.8em;
            font-weight: 500;
        }}
        
        .paper-title {{
            font-weight: 600;
            font-size: 1em;
            color: #333;
            margin-bottom: 5px;
            line-height: 1.4;
        }}
        
        .paper-pmid {{
            font-size: 0.75em;
            color: #888;
            margin-bottom: 12px;
        }}
        
        .section-label {{
            font-size: 0.75em;
            font-weight: bold;
            color: #666;
            text-transform: uppercase;
            letter-spacing: 0.5px;
            margin-bottom: 5px;
            margin-top: 10px;
        }}
        
        .paper-abstract {{
            font-size: 0.85em;
            color: #555;
            line-height: 1.6;
            background: #f9f9f9;
            padding: 10px;
            border-radius: 6px;
            max-height: 120px;
            overflow-y: auto;
        }}
        
        .reasoning-box {{
            font-size: 0.85em;
            color: #444;
            line-height: 1.6;
            background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%);
            padding: 12px;
            border-radius: 6px;
            border-left: 4px solid #2196F3;
            font-style: italic;
        }}
    </style>
    
    <div class="viz-container">
        <div class="review-header">
            <div class="review-title">üìã {review_title}</div>
            <div class="review-meta">
                <span class="meta-badge">PMID: {SELECTED_REVIEW_PMID}</span>
                <span class="meta-badge">{run_name}</span>
                <span class="meta-badge">{model_name}</span>
                <span class="accuracy-badge">{label_text} Accuracy: {accuracy:.0f}% ({correct}/{total})</span>
            </div>
            <div class="review-abstract">
                <strong>Selection Criteria:</strong> {review_criteria if pd.notna(review_criteria) else 'Not available'}
            </div>
        </div>
        
        <div class="papers-header">
            <h3>{label_text} Papers ({total})</h3>
        </div>
        
        {cards_html}
    </div>
    '''
    
    return html

def update_visualization(*args):
    """Update the visualization based on widget values."""
    with output:
        clear_output(wait=True)
        show_included = label_toggle.value == 'Included'
        html = generate_html_content(run_dropdown.value, model_dropdown.value, show_included)
        display(HTML(html))

# Link widgets
run_dropdown.observe(update_visualization, names='value')
model_dropdown.observe(update_visualization, names='value')
label_toggle.observe(update_visualization, names='value')

# Layout
controls = widgets.HBox(
    [run_dropdown, model_dropdown, label_toggle],
    layout=widgets.Layout(justify_content='center', margin='10px 0', gap='15px')
)

title = widgets.HTML(
    value="<h2 style='text-align: center; color: #2c3e50; margin-bottom: 5px;'>" +
          "üîç LLM Screening Reasoning Viewer</h2>" +
          "<p style='text-align: center; color: #6c757d; font-size: 13px;'>" +
          "Explore how LLMs reason about paper inclusion/exclusion decisions</p>"
)

dashboard = widgets.VBox([title, controls, output])
display(dashboard)

# Initial render
update_visualization()

VBox(children=(HTML(value="<h2 style='text-align: center; color: #2c3e50; margin-bottom: 5px;'>üîç LLM Screening‚Ä¶

In [10]:
# Export to interactive HTML
import json

def save_interactive_reasoning_html():
    """Save as interactive HTML with all combinations pre-rendered."""
    
    # Pre-render all HTML content
    content = {}
    for run_name in ['Run 1', 'Run 2']:
        content[run_name] = {}
        for model_name in RUN_FILES[run_name].keys():
            content[run_name][model_name] = {
                'included': generate_html_content(run_name, model_name, True),
                'excluded': generate_html_content(run_name, model_name, False)
            }
    
    # Escape for JavaScript
    def escape_for_js(s):
        return s.replace('\\', '\\\\').replace('`', '\\`').replace('${', '\\${')
    
    # Build JavaScript object
    js_content = "const content = {\n"
    for run_name in content:
        js_content += f"  '{run_name}': {{\n"
        for model_name in content[run_name]:
            js_content += f"    '{model_name}': {{\n"
            js_content += f"      'included': `{escape_for_js(content[run_name][model_name]['included'])}`,\n"
            js_content += f"      'excluded': `{escape_for_js(content[run_name][model_name]['excluded'])}`\n"
            js_content += "    },\n"
        js_content += "  },\n"
    js_content += "};\n"
    
    html = f'''<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>LLM Screening Reasoning Viewer</title>
    <style>
        * {{ margin: 0; padding: 0; box-sizing: border-box; }}
        
        body {{
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background: transparent;
            padding: 15px;
        }}
        
        .header {{
            text-align: center;
            margin-bottom: 15px;
        }}
        
        .header h1 {{
            color: #2c3e50;
            font-size: 1.5em;
            margin-bottom: 5px;
        }}
        
        .header .subtitle {{
            color: #6c757d;
            font-size: 13px;
        }}
        
        .controls {{
            display: flex;
            justify-content: center;
            gap: 15px;
            margin-bottom: 15px;
            flex-wrap: wrap;
        }}
        
        .control-group {{
            display: flex;
            align-items: center;
            gap: 8px;
        }}
        
        .control-group label {{
            font-weight: 500;
            color: #2c3e50;
            font-size: 14px;
        }}
        
        .control-group select {{
            padding: 8px 12px;
            border: 2px solid #e0e0e0;
            border-radius: 8px;
            font-size: 14px;
            background: white;
            cursor: pointer;
        }}
        
        .toggle-group {{
            display: flex;
            gap: 0;
        }}
        
        .toggle-btn {{
            padding: 8px 16px;
            border: 2px solid #e0e0e0;
            background: white;
            cursor: pointer;
            font-size: 14px;
            font-weight: 500;
            transition: all 0.2s;
        }}
        
        .toggle-btn:first-child {{
            border-radius: 8px 0 0 8px;
        }}
        
        .toggle-btn:last-child {{
            border-radius: 0 8px 8px 0;
            border-left: none;
        }}
        
        .toggle-btn.active.included {{
            background: #28a745;
            color: white;
            border-color: #28a745;
        }}
        
        .toggle-btn.active.excluded {{
            background: #dc3545;
            color: white;
            border-color: #dc3545;
        }}
        
        #content-container {{
            max-width: 1000px;
            margin: 0 auto;
        }}
        
        .footer {{
            text-align: center;
            margin-top: 20px;
            color: #888;
            font-size: 11px;
        }}
    </style>
</head>
<body>
    <div class="header">
        <h1>üîç LLM Screening Reasoning Viewer</h1>
        <p class="subtitle">Explore how LLMs reason about paper inclusion/exclusion decisions</p>
    </div>
    
    <div class="controls">
        <div class="control-group">
            <label>Run:</label>
            <select id="run-select" onchange="updateContent()">
                <option value="Run 1">Run 1</option>
                <option value="Run 2" selected>Run 2</option>
            </select>
        </div>
        <div class="control-group">
            <label>Model:</label>
            <select id="model-select" onchange="updateContent()">
                <option value="Mistral Zero-Shot">Mistral Zero-Shot</option>
                <option value="Mistral CoT" selected>Mistral CoT</option>
                <option value="Llama 3.2 Zero-Shot">Llama 3.2 Zero-Shot</option>
                <option value="Llama 3.2 CoT">Llama 3.2 CoT</option>
            </select>
        </div>
        <div class="toggle-group">
            <button class="toggle-btn active included" id="btn-included" onclick="setLabel('included')">Included</button>
            <button class="toggle-btn excluded" id="btn-excluded" onclick="setLabel('excluded')">Excluded</button>
        </div>
    </div>
    
    <div id="content-container"></div>
    
    <div class="footer">
        LSE-UKHSA Systematic Review Screening Project
    </div>
    
    <script>
        {js_content}
        
        let currentLabel = 'included';
        
        function setLabel(label) {{
            currentLabel = label;
            document.getElementById('btn-included').className = 'toggle-btn' + (label === 'included' ? ' active included' : '');
            document.getElementById('btn-excluded').className = 'toggle-btn' + (label === 'excluded' ? ' active excluded' : '');
            updateContent();
        }}
        
        function updateContent() {{
            const run = document.getElementById('run-select').value;
            const model = document.getElementById('model-select').value;
            const container = document.getElementById('content-container');
            
            if (content[run] && content[run][model]) {{
                container.innerHTML = content[run][model][currentLabel];
            }}
        }}
        
        document.addEventListener('DOMContentLoaded', updateContent);
    </script>
</body>
</html>
'''
    
    output_path = Path.cwd().parent / "llm_reasoning_viewer.html"
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(html)
    
    print(f"‚úÖ Interactive reasoning viewer saved to: {output_path}")
    print(f"   File size: {output_path.stat().st_size / 1024:.1f} KB")
    return output_path

# Save the HTML
save_interactive_reasoning_html()

‚úÖ Interactive reasoning viewer saved to: c:\Users\juanx\Documents\LSE-UKHSA Project\llm_reasoning_viewer.html
   File size: 303.3 KB


WindowsPath('c:/Users/juanx/Documents/LSE-UKHSA Project/llm_reasoning_viewer.html')