In [10]:
import json
import math
import os
from pathlib import Path

In [9]:
def generate_html_with_token_probs_from_log(
    log_filename,
    output_filename="token_probabilities.html"
):
    """
    Generate an interactive HTML file showing tokens with a side panel for
    probabilities, using a JSON log file of the form:

    {
      "model": "...",
      "prompt": "...",
      "story": "...",
      "top_k": 10,
      "token_positions": [
        {
          "position": 0,
          "top_logprobs": [
            {"token": "Once", "logprob": ..., "probability": ...},
            ...
          ]
        },
        ...
      ]
    }
    """

    # --- Load your JSON log ---
    with open(log_filename, "r", encoding="utf-8") as f:
        data = json.load(f)

    token_positions = data["token_positions"]

    # --- Build tokens_data in the same shape the HTML/JS expects ---
    tokens_data = []
    for pos in token_positions:
        alternatives = []
        for lp in pos["top_logprobs"]:
            token = lp["token"]
            token_logprob = lp["logprob"]
            # Use probability from the file if present, otherwise compute it
            prob = lp.get("probability", math.exp(token_logprob))
            alternatives.append({
                "token": token,
                "logprob": token_logprob,
                "probability": prob
            })

        # Actual token chosen by the model:
        # If your JSON has pos["token"], we use it.
        # Otherwise, we assume the first alternative is the chosen token.
        actual_token = pos.get("token", alternatives[0]["token"])

        tokens_data.append({
            "index": pos["position"],
            "token": actual_token,
            "alternatives": alternatives
        })

    tokens_json = json.dumps(tokens_data, ensure_ascii=False)

    # --- Generate HTML with click-to-focus behaviour ---
    html_content = f"""<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Token Probability Visualization</title>
    <style>
        * {{
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }}
        
        body {{
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
            background: #f5f7fa;
            height: 100vh;
            display: flex;
            overflow: hidden;
        }}
        
        .left-panel {{
            flex: 0 0 60%;
            overflow-y: auto;
            padding: 40px;
            background: white;
            border-right: 1px solid #e0e0e0;
        }}
        
        .right-panel {{
            flex: 0 0 40%;
            background: #f5f7fa;
            padding: 40px;
            overflow-y: auto;
            border-left: 1px solid #e0e0e0;
        }}
        
        h1 {{
            color: #2c3e50;
            margin-bottom: 30px;
            font-size: 28px;
        }}
        
        .text-content {{
            font-size: 18px;
            line-height: 1.8;
            color: #333;
            word-wrap: break-word;
            overflow-wrap: break-word;
            white-space: pre-wrap;
        }}
        
        .token {{
            position: relative;
            cursor: pointer;
            padding: 0;
            margin: 0;
            border-radius: 3px;
            transition: all 0.15s ease;
            background-color: transparent;
            border-bottom: 2px solid transparent;
            display: inline;
        }}
        
        .token:hover {{
            background-color: #fffacd;
            border-bottom-color: #ffd700;
            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
        }}

        /* Focused (clicked) token */
        .token-focused {{
            background-color: #3498db;
            color: white;
            border-bottom-color: #1f78d1;
        }}
        
        .panel-title {{
            color: #2c3e50;
            font-size: 18px;
            font-weight: 600;
            margin-bottom: 20px;
            border-bottom: 2px solid #3498db;
            padding-bottom: 10px;
        }}
        
        .current-token {{
            background: #3498db;
            color: white;
            padding: 12px 16px;
            border-radius: 6px;
            font-size: 16px;
            font-weight: 600;
            margin-bottom: 20px;
            text-align: center;
            word-break: break-all;
        }}
        
        .placeholder {{
            color: #999;
            font-style: italic;
            text-align: center;
            padding: 40px 20px;
        }}
        
        .token-option {{
            margin-bottom: 16px;
            padding: 12px;
            background: white;
            border-radius: 6px;
            border-left: 4px solid #3498db;
            transition: all 0.2s ease;
        }}
        
        .token-option.selected {{
            border-left-color: #e74c3c;
            background: #fff5f5;
        }}
        
        .rank {{
            display: inline-block;
            background: #3498db;
            color: white;
            width: 28px;
            height: 28px;
            border-radius: 50%;
            text-align: center;
            line-height: 28px;
            font-size: 12px;
            font-weight: 600;
            margin-right: 8px;
        }}
        
        .token-option.selected .rank {{
            background: #e74c3c;
        }}
        
        .token-name {{
            font-weight: 600;
            color: #2c3e50;
            margin-bottom: 6px;
            display: flex;
            align-items: center;
            font-size: 14px;
        }}
        
        .probability-bar {{
            background: #ecf0f1;
            height: 8px;
            border-radius: 4px;
            overflow: hidden;
            margin: 6px 0;
        }}
        
        .probability-fill {{
            background: linear-gradient(90deg, #2ecc71, #27ae60);
            height: 100%;
            border-radius: 4px;
            transition: width 0.3s ease;
        }}
        
        .token-option.selected .probability-fill {{
            background: linear-gradient(90deg, #e74c3c, #c0392b);
        }}
        
        .probability-text {{
            font-size: 12px;
            color: #666;
            margin-top: 4px;
            display: flex;
            justify-content: space-between;
        }}
        
        @media (max-width: 1024px) {{
            .left-panel {{
                flex: 0 0 50%;
            }}
            .right-panel {{
                flex: 0 0 50%;
            }}
        }}
        
        @media (max-width: 768px) {{
            body {{
                flex-direction: column;
            }}
            .left-panel {{
                flex: 0 0 50%;
                border-right: none;
                border-bottom: 1px solid #e0e0e0;
            }}
            .right-panel {{
                flex: 0 0 50%;
                border-left: none;
            }}
        }}
    </style>
</head>
<body>
    <div class="left-panel">
        <h1>Token Probabilities</h1>
        <div class="text-content" id="textContent"></div>
    </div>
    
    <div class="right-panel">
        <div class="panel-title">Alternatives</div>
        <div id="probabilitiesPanel" class="placeholder">
            Click on a token to see alternatives
        </div>
    </div>
    
    <script>
        const tokensData = {tokens_json};
        let currentTokenIndex = null;
        
        function createTokens() {{
            const textContainer = document.getElementById('textContent');
            
            tokensData.forEach((tokenData, index) => {{
                const tokenSpan = document.createElement('span');
                tokenSpan.className = 'token';
                tokenSpan.textContent = tokenData.token;
                
                // Click to focus on this token
                tokenSpan.addEventListener('click', () => {{
                    selectToken(index);
                }});
                
                textContainer.appendChild(tokenSpan);
            }});

            // Optionally auto-focus first token
            if (tokensData.length > 0) {{
                selectToken(0);
            }}
        }}

        function selectToken(index) {{
            currentTokenIndex = index;
            updateProbabilitiesPanel(index);
            updateTokenHighlight();
        }}

        function updateTokenHighlight() {{
            const tokenSpans = document.querySelectorAll('.token');
            tokenSpans.forEach((span, idx) => {{
                if (idx === currentTokenIndex) {{
                    span.classList.add('token-focused');
                }} else {{
                    span.classList.remove('token-focused');
                }}
            }});
        }}
        
        function updateProbabilitiesPanel(tokenIndex) {{
            const panel = document.getElementById('probabilitiesPanel');
            const alternatives = tokensData[tokenIndex].alternatives;
            const currentToken = tokensData[tokenIndex].token;
            
            let html = '<div class="current-token">Focused on: ' + escapeHtml(currentToken) + '</div>';
            
            alternatives.forEach((alt, idx) => {{
                const percentage = (alt.probability * 100).toFixed(2);
                const barWidth = (alt.probability * 100);
                const isSelected = alt.token === currentToken ? 'selected' : '';
                
                html += ''
                    + '<div class="token-option ' + isSelected + '">'
                    +   '<div class="token-name">'
                    +     '<span class="rank">' + (idx + 1) + '</span>'
                    +     '<span>' + escapeHtml(alt.token) + '</span>'
                    +   '</div>'
                    +   '<div class="probability-bar">'
                    +     '<div class="probability-fill" style="width: ' + barWidth + '%;"></div>'
                    +   '</div>'
                    +   '<div class="probability-text">'
                    +     '<span>Log: ' + alt.logprob.toFixed(4) + '</span>'
                    +     '<span>' + percentage + '%</span>'
                    +   '</div>'
                    + '</div>';
            }});
            
            panel.innerHTML = html;
        }}
        
        function escapeHtml(text) {{
            if (text === null || text === undefined) {{
                return '';
            }}
            text = String(text);
            const map = {{
                '&': '&amp;',
                '<': '&lt;',
                '>': '&gt;',
                '"': '&quot;',
                "'": '&#039;'
            }};
            return text.replace(/[&<>"']/g, m => map[m]);
        }}
        
        createTokens();
    </script>
</body>
</html>"""

    with open(output_filename, 'w', encoding='utf-8') as f:
        f.write(html_content)
    
    print(f"HTML file generated: {output_filename}")


In [7]:
generate_html_with_token_probs_from_log("logs/trial_1/2025-11-21_14-43-55_give_me_a_story_about_bells_and_clocks_in_english.json")
# -> opens token_probabilities.html in a browser

HTML file generated: token_probabilities.html


In [11]:
# Adjust this if your notebook is not in the same folder as "logs"
base_dir = Path.cwd()  # current working directory

logs_dir = base_dir / "logs"
output_root = base_dir / "visualize_logs"

# Create root output folder if it doesn't exist
output_root.mkdir(parents=True, exist_ok=True)

trials = [f"trial_{i}" for i in range(1, 6)]  # trial_1 ... trial_5

for trial_name in trials:
    trial_input_dir = logs_dir / trial_name
    trial_output_dir = output_root / trial_name

    if not trial_input_dir.exists():
        print(f"Input folder not found, skipping: {trial_input_dir}")
        continue

    trial_output_dir.mkdir(parents=True, exist_ok=True)

    for json_file in trial_input_dir.glob("*.json"):
        output_html = trial_output_dir / (json_file.stem + ".html")
        print(f"Processing {json_file} -> {output_html}")

        generate_html_with_token_probs_from_log(
            log_filename=str(json_file),
            output_filename=str(output_html)
        )

Processing C:\Users\University\Documents\github\notebooks\5_transformers_and_gpt\story_generation\logs\trial_1\2025-11-21_14-43-55_give_me_a_story_about_bells_and_clocks_in_english.json -> C:\Users\University\Documents\github\notebooks\5_transformers_and_gpt\story_generation\visualize_logs\trial_1\2025-11-21_14-43-55_give_me_a_story_about_bells_and_clocks_in_english.html
HTML file generated: C:\Users\University\Documents\github\notebooks\5_transformers_and_gpt\story_generation\visualize_logs\trial_1\2025-11-21_14-43-55_give_me_a_story_about_bells_and_clocks_in_english.html
Processing C:\Users\University\Documents\github\notebooks\5_transformers_and_gpt\story_generation\logs\trial_1\2025-11-21_14-44-02_give_me_a_story_about_mugs_and_cups_in_english.json -> C:\Users\University\Documents\github\notebooks\5_transformers_and_gpt\story_generation\visualize_logs\trial_1\2025-11-21_14-44-02_give_me_a_story_about_mugs_and_cups_in_english.html
HTML file generated: C:\Users\University\Documents\g