# Interactive Validation Dataset Visualisation

This notebook creates an interactive widget to explore a sample from the validation dataset used to evaluate LLM screening performance.

In [18]:
# Install required packages
%pip install -q ipywidgets pandas

Note: you may need to restart the kernel to use updated packages.


In [31]:
import pandas as pd
from pathlib import Path

# Load data
DATA_DIR = Path.cwd().parent / "Data" if not (Path.cwd() / "Data").exists() else Path.cwd() / "Data"
validation = pd.read_csv(DATA_DIR / "ground_truth_validation_set.csv")

# Also load Cochrane abstracts to get the full review abstract
cochrane = pd.read_csv(DATA_DIR / "cochrane_pubmed_abstracts.csv", dtype={"pmid": str})

# Select a review with 100% accuracy on included and 60% on excluded:
# "Fetal movement counting for assessment of fetal wellbeing"
SELECTED_REVIEW_PMID = '26467769'

# Get all records for this review
review_data = validation[validation['review_pmid'].astype(str) == SELECTED_REVIEW_PMID].copy()

# Get the full review abstract from Cochrane data
review_full_abstract = cochrane[cochrane['pmid'] == SELECTED_REVIEW_PMID]['abstract'].iloc[0]

print(f"Review: {review_data['review_title'].iloc[0]}")
print(f"Papers: {len(review_data)} total ({review_data['label'].sum()} included, {int((1-review_data['label']).sum())} excluded)")

Review: Fetal movement counting for assessment of fetal wellbeing.
Papers: 10 total (5 included, 5 excluded)


In [32]:
# Create a standalone HTML visualization and export it

# Get review info
review_title = review_data['review_title'].iloc[0]
review_criteria = review_data['review_criteria'].iloc[0]
review_objectives = review_data['review_objectives'].iloc[0]

# Split papers by label
included_papers = review_data[review_data['label'] == 1][['paper_pmid', 'paper_title', 'paper_abstract']].to_dict('records')
excluded_papers = review_data[review_data['label'] == 0][['paper_pmid', 'paper_title', 'paper_abstract']].to_dict('records')

def generate_paper_cards(papers, label_type):
    """Generate HTML for paper cards."""
    css_class = 'included' if label_type == 'Included' else 'excluded'
    html = ""
    for i, paper in enumerate(papers):
        abstract = paper['paper_abstract'] if pd.notna(paper['paper_abstract']) else 'No abstract available.'
        html += f'''
        <div class="paper-card {css_class}" data-label="{label_type.lower()}">
            <div class="paper-number">{i+1}</div>
            <div class="paper-title">{paper['paper_title']}</div>
            <div class="paper-abstract">{abstract}</div>
            <div class="paper-pmid">PMID: {int(paper['paper_pmid'])}</div>
        </div>
        '''
    return html

# Generate the complete HTML
html_content = f'''<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Validation Dataset Example - {review_title}</title>
    <style>
        * {{
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }}
        
        body {{
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background: transparent;
            min-height: 100vh;
            padding: 20px;
        }}
        
        .container {{
            max-width: 1200px;
            margin: 0 auto;
            background: transparent;
        }}
        
        /* Review Header Section */
        .review-header {{
            background: linear-gradient(135deg, #e8e8e8 0%, #f5f5f5 100%);
            color: #333;
            padding: 25px 30px;
            border-radius: 15px;
            margin-bottom: 25px;
            box-shadow: 0 4px 20px rgba(0,0,0,0.1);
        }}
        
        .review-title {{
            font-size: 1.6em;
            font-weight: bold;
            margin-bottom: 20px;
            line-height: 1.3;
            display: flex;
            align-items: flex-start;
            gap: 12px;
        }}
        
        .review-title-icon {{
            font-size: 1.2em;
        }}
        
        .review-meta {{
            display: flex;
            gap: 15px;
            margin-bottom: 20px;
            flex-wrap: wrap;
        }}
        
        .review-badge {{
            background: rgba(0,0,0,0.1);
            padding: 5px 12px;
            border-radius: 20px;
            font-size: 0.8em;
            font-weight: 500;
        }}
        
        .review-section {{
            background: rgba(0,0,0,0.05);
            padding: 15px 18px;
            border-radius: 10px;
            margin-top: 15px;
            font-size: 0.9em;
            line-height: 1.6;
        }}
        
        .review-section-title {{
            font-weight: bold;
            color: #555;
            margin-bottom: 10px;
            font-size: 0.85em;
            text-transform: uppercase;
            letter-spacing: 0.5px;
        }}
        
        /* Toggle Buttons */
        .toggle-container {{
            display: flex;
            justify-content: center;
            margin-bottom: 25px;
            gap: 0;
        }}
        
        .toggle-btn {{
            padding: 12px 30px;
            font-size: 1em;
            font-weight: 600;
            border: none;
            cursor: pointer;
            transition: all 0.3s ease;
            background: rgba(200, 200, 200, 0.3);
            color: #555;
        }}
        
        .toggle-btn:first-child {{
            border-radius: 25px 0 0 25px;
        }}
        
        .toggle-btn:last-child {{
            border-radius: 0 25px 25px 0;
        }}
        
        .toggle-btn.active.included {{
            background: #28a745;
            color: white;
            box-shadow: 0 4px 15px rgba(40, 167, 69, 0.4);
        }}
        
        .toggle-btn.active.excluded {{
            background: #dc3545;
            color: white;
            box-shadow: 0 4px 15px rgba(220, 53, 69, 0.4);
        }}
        
        .toggle-btn:hover:not(.active) {{
            background: rgba(150, 150, 150, 0.3);
        }}
        
        /* Papers Container */
        .papers-section {{
            background: transparent;
        }}
        
        .papers-grid {{
            display: grid;
            gap: 20px;
        }}
        
        .paper-card {{
            background: rgba(255,255,255,0.98);
            border-radius: 12px;
            padding: 20px 25px;
            box-shadow: 0 3px 12px rgba(0,0,0,0.08);
            border-left: 6px solid;
            transition: transform 0.2s, box-shadow 0.2s, opacity 0.3s;
            position: relative;
        }}
        
        .paper-card:hover {{
            transform: translateX(8px);
            box-shadow: 0 6px 20px rgba(0,0,0,0.12);
        }}
        
        .paper-card.included {{
            border-left-color: #28a745;
        }}
        
        .paper-card.excluded {{
            border-left-color: #dc3545;
        }}
        
        .paper-card.hidden {{
            display: none;
        }}
        
        .paper-number {{
            position: absolute;
            top: -10px;
            left: -10px;
            width: 28px;
            height: 28px;
            border-radius: 50%;
            display: flex;
            align-items: center;
            justify-content: center;
            font-weight: bold;
            font-size: 0.85em;
            color: white;
        }}
        
        .paper-card.included .paper-number {{
            background: #28a745;
        }}
        
        .paper-card.excluded .paper-number {{
            background: #dc3545;
        }}
        
        .paper-title {{
            font-weight: 600;
            font-size: 1.05em;
            color: #333;
            margin-bottom: 12px;
            line-height: 1.4;
            padding-right: 10px;
        }}
        
        .paper-abstract {{
            font-size: 0.88em;
            color: #555;
            line-height: 1.7;
            text-align: justify;
        }}
        
        .paper-pmid {{
            font-size: 0.75em;
            color: #888;
            margin-top: 12px;
            font-weight: 500;
        }}
        
        /* Section Label */
        .section-label {{
            text-align: center;
            margin-bottom: 20px;
        }}
        
        .section-label span {{
            display: inline-block;
            padding: 8px 20px;
            border-radius: 25px;
            font-size: 0.9em;
            font-weight: bold;
        }}
        
        .section-label.included span {{
            background: #d4edda;
            color: #155724;
        }}
        
        .section-label.excluded span {{
            background: #f8d7da;
            color: #721c24;
        }}
        
        /* Footer */
        .footer {{
            text-align: center;
            margin-top: 30px;
            padding: 15px;
            color: #888;
            font-size: 0.8em;
        }}
    </style>
</head>
<body>
    <div class="container">
        <!-- Review Header -->
        <div class="review-header">
            <div class="review-title">
                <span class="review-title-icon">ðŸ“‹</span>
                <span>{review_title}</span>
            </div>
            <div class="review-meta">
                <span class="review-badge">PMID: {SELECTED_REVIEW_PMID}</span>
                <span class="review-badge">Cochrane Systematic Review</span>
            </div>
            <div class="review-section">
                <div class="review-section-title">Full Abstract</div>
                {review_full_abstract}
            </div>
            <div class="review-section">
                <div class="review-section-title">Selection Criteria</div>
                {review_criteria if pd.notna(review_criteria) else 'Not available'}
            </div>
        </div>
        
        <!-- Toggle Buttons -->
        <div class="toggle-container">
            <button class="toggle-btn active included" onclick="showPapers('included')">
                âœ“ Included ({len(included_papers)})
            </button>
            <button class="toggle-btn excluded" onclick="showPapers('excluded')">
                âœ— Excluded ({len(excluded_papers)})
            </button>
        </div>
        
        <!-- Papers Section -->
        <div class="papers-section">
            <div class="section-label included" id="section-label-included">
                <span>Papers Cited in This Review (Passed Screening)</span>
            </div>
            <div class="section-label excluded" id="section-label-excluded" style="display:none;">
                <span>Papers from Related Reviews (Not Cited Here)</span>
            </div>
            
            <div class="papers-grid">
                {generate_paper_cards(included_papers, 'Included')}
                {generate_paper_cards(excluded_papers, 'Excluded')}
            </div>
        </div>
        
        <div class="footer">
            LSE-UKHSA Systematic Review Screening Project | Validation Dataset Example
        </div>
    </div>
    
    <script>
        // Initialize - hide excluded papers
        document.addEventListener('DOMContentLoaded', function() {{
            showPapers('included');
        }});
        
        function showPapers(label) {{
            // Update toggle buttons
            const buttons = document.querySelectorAll('.toggle-btn');
            buttons.forEach(btn => {{
                btn.classList.remove('active');
                if (btn.textContent.toLowerCase().includes(label)) {{
                    btn.classList.add('active');
                }}
            }});
            
            // Update section labels
            document.getElementById('section-label-included').style.display = label === 'included' ? 'block' : 'none';
            document.getElementById('section-label-excluded').style.display = label === 'excluded' ? 'block' : 'none';
            
            // Show/hide papers
            const papers = document.querySelectorAll('.paper-card');
            papers.forEach(paper => {{
                if (paper.dataset.label === label) {{
                    paper.classList.remove('hidden');
                }} else {{
                    paper.classList.add('hidden');
                }}
            }});
        }}
    </script>
</body>
</html>
'''

# Save to project root
output_path = Path.cwd().parent / "validation_dataset_example.html"

with open(output_path, 'w', encoding='utf-8') as f:
    f.write(html_content)

print(f"âœ… HTML visualization saved to: {output_path}")
print(f"   File size: {output_path.stat().st_size / 1024:.1f} KB")

âœ… HTML visualization saved to: c:\Users\juanx\Documents\LSE-UKHSA Project\validation_dataset_example.html
   File size: 37.2 KB


In [33]:
# Display the visualization inline in the notebook
from IPython.display import HTML, display

# Display the HTML content directly
display(HTML(html_content))