# break down parent-child relationships to allow graphing

In [1]:
import json
import pandas as pd

with open('../data/orgs/uk/govuk_orgs_enriched.json', 'r') as file:
    json_data = json.load(file)

# Read JSON data into a CSV DataFrame
df = pd.read_json('../data/orgs/uk/govuk_orgs_enriched.json')
df.to_csv('data.csv', index=False)
df

# Some orgs have multiple parents. 
df['number_of_parents'] = df['parent_organisations'].apply(
    lambda y: len(y)
)

df['first_parent_id'] = df['parent_organisations'].apply(
      lambda x: x[0]['id'] if x and len(x) > 0 else None
)

df


Unnamed: 0,id,title,format,updated_at,web_url,details,analytics_identifier,parent_organisations,child_organisations,superseded_organisations,superseding_organisations,oscar_match,oscar_match_score,oscar_budget_£k,non_govuk_domain,oscar_match_name,number_of_parents,first_parent_id
0,https://www.gov.uk/api/organisations/academy-f...,Academy for Social Justice,Other,2024-08-29 13:57:07+00:00,https://www.gov.uk/government/organisations/ac...,"{'slug': 'academy-for-social-justice', 'abbrev...",OT1276,[{'id': 'https://www.gov.uk/api/organisations/...,[],[{'id': 'https://www.gov.uk/api/organisations/...,[],False,,,,,1,https://www.gov.uk/api/organisations/ministry-...
1,https://www.gov.uk/api/organisations/accelerat...,Accelerated Capability Environment,Sub organisation,2023-05-24 14:14:28+00:00,https://www.gov.uk/government/organisations/ac...,"{'slug': 'accelerated-capability-environment',...",OT1369,[{'id': 'https://www.gov.uk/api/organisations/...,[],[],[],False,,,,,1,https://www.gov.uk/api/organisations/home-office
2,https://www.gov.uk/api/organisations/active-tr...,Active Travel England,Executive agency,2024-10-03 12:15:30+00:00,https://www.gov.uk/government/organisations/ac...,"{'slug': 'active-travel-england', 'abbreviatio...",EA1350,[{'id': 'https://www.gov.uk/api/organisations/...,[],[],[],False,,,https://www.activetravelengland.gov.uk,,1,https://www.gov.uk/api/organisations/departmen...
3,https://www.gov.uk/api/organisations/administr...,Administration of Radioactive Substances Advis...,Other,2021-09-15 11:03:46+00:00,https://www.gov.uk/government/organisations/ad...,{'slug': 'administration-of-radioactive-substa...,PB523,[{'id': 'https://www.gov.uk/api/organisations/...,[],[],[],False,,,,,1,https://www.gov.uk/api/organisations/departmen...
4,https://www.gov.uk/api/organisations/administr...,Administrative Court,Court,2022-12-16 18:54:50+00:00,https://www.gov.uk/government/organisations/ad...,"{'slug': 'administrative-court', 'abbreviation...",CO1188,[{'id': 'https://www.gov.uk/api/organisations/...,[],[],[],False,,,,,1,https://www.gov.uk/api/organisations/hm-courts...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
658,https://www.gov.uk/api/organisations/women-and...,Women and Equalities Unit,Sub organisation,2024-10-11 08:57:55+00:00,https://www.gov.uk/government/organisations/wo...,"{'slug': 'women-and-equalities-unit', 'abbrevi...",OT1428,[{'id': 'https://www.gov.uk/api/organisations/...,[],[{'id': 'https://www.gov.uk/api/organisations/...,[],False,,,,,2,https://www.gov.uk/api/organisations/cabinet-o...
659,https://www.gov.uk/api/organisations/yorkshire...,Yorkshire Dales National Park Authority,Other,2021-04-15 09:01:14+00:00,https://www.gov.uk/government/organisations/yo...,{'slug': 'yorkshire-dales-national-park-author...,OT547,[{'id': 'https://www.gov.uk/api/organisations/...,[],[],[],False,,,http://www.yorkshiredales.org.uk/,,1,https://www.gov.uk/api/organisations/departmen...
660,https://www.gov.uk/api/organisations/youth-cus...,Youth Custody Service,Sub organisation,2024-08-29 10:31:14+00:00,https://www.gov.uk/government/organisations/yo...,"{'slug': 'youth-custody-service', 'abbreviatio...",OT1322,[{'id': 'https://www.gov.uk/api/organisations/...,[],[],[],False,,,,,1,https://www.gov.uk/api/organisations/hm-prison...
661,https://www.gov.uk/api/organisations/youth-jus...,Youth Justice Agency of Northern Ireland,Executive agency,2024-11-20 14:55:58+00:00,https://www.gov.uk/government/organisations/yo...,{'slug': 'youth-justice-agency-of-northern-ire...,EA697,[{'id': 'https://www.gov.uk/api/organisations/...,[],[],[{'id': 'https://www.gov.uk/api/organisations/...,False,,,https://www.justice-ni.gov.uk/topics/youth-jus...,,1,https://www.gov.uk/api/organisations/departmen...


In [None]:
"""
Basic Treemap
"""

import pandas as pd
import plotly.graph_objects as go
import json
import textwrap
from collections import defaultdict, deque
import math

def parse_orgs(org_list):
    """Extract organization IDs from the organization list"""
    # Handle None/NaN
    if org_list is None:
        return []
    # If it's already a list, extract IDs
    if isinstance(org_list, list):
        if len(org_list) == 0:
            return []
        # Extract just the 'id' field from each organization dict
        return [org.get('id') for org in org_list if isinstance(org, dict) and 'id' in org]
    # Fallback for unexpected types
    return []

def wrap_text_textwrap(text, width=20):
    """Wrap text using Python's textwrap module"""
    return '<br>'.join(textwrap.wrap(text, width=width))

# Extract relationships
df['parent_list'] = df['parent_organisations'].apply(parse_orgs)
df['child_list'] = df['child_organisations'].apply(parse_orgs) 


# Create ID lookup
df['org_id'] = df['id']
id_to_title = dict(zip(df['org_id'], df['title']))
id_to_format = dict(zip(df['org_id'], df['format']))
id_to_web_url = dict(zip(df['org_id'], df['web_url']))
id_to_budget = dict(zip(df['org_id'], df['oscar_budget_£k']))


# Build parent-child mapping
parent_to_children = defaultdict(list)
child_to_parent = {}

for idx, row in df.iterrows():
    org_id = row['org_id']
    
    # Add children relationships - child is already an ID string, not a dict
    for child_id in row['child_list']:
        parent_to_children[org_id].append(child_id)
        child_to_parent[child_id] = org_id

# Find root nodes (organizations with no parents)
roots = [org_id for org_id in df['org_id'] if org_id not in child_to_parent]

print(f"Total organizations: {len(df)}")
print(f"Root organizations (no parents): {len(roots)}")
print(f"Organizations with children: {sum(1 for children in parent_to_children.values() if children)}")

# Print all unique formats to verify them
print(f"\nUnique organization formats:")
for fmt in sorted(df['format'].unique()):
    count = len(df[df['format'] == fmt])
    print(f"  {fmt}: {count}")

FORMAT_WEIGHTS = {
    'Ministerial department': 400,        # ~£10bn equivalent on log scale
    'Non-ministerial department': 350,    # ~£3bn
    'Devolved administration': 400,       # ~£10bn (Scottish/Welsh Govt)
    'Executive agency': 250,              # ~£100m
    'Executive non-departmental public body': 200,  # ~£10m
    'Public corporation': 250,            # ~£100m
    'Executive office': 200,              # ~£10m
    'Tribunal non-departmental public body': 150,   # ~£1m
    'Advisory non-departmental public body': 100,   # <£1m (mostly advisory)
    'Independent monitoring body': 150,   # ~£1m
    'Civil service': 150,                 # ~£1m
    'Court': 150,                         # ~£1m
    'Sub-organisation': 100,              # Smallest
    'Other': 100,                         
}


# Build tree structure for plotly treemap
# We need: labels (names), parents (parent names), and values
labels = []
parents = []
ids = []
formats = []
values = []
oscar_ii_budget = []
web_urls = []

# Add all organizations
for org_id in df['org_id']:
    # Wrap long titles for better display
    title = id_to_title.get(org_id, org_id.split('/')[-1])
    wrapped_title = wrap_text_textwrap(title, width=25)
    labels.append(wrapped_title)
    web_urls.append(id_to_web_url.get(org_id, ""))
    ids.append(org_id)
    org_format = id_to_format.get(org_id, 'Other')
    formats.append(org_format)
    oscar_ii_budget.append(id_to_budget.get(org_id, None))

    # Set parent
    if org_id in child_to_parent:
        parent_id = child_to_parent[org_id]
        parent_title = id_to_title.get(parent_id, parent_id.split('/')[-1])
        wrapped_parent = wrap_text_textwrap(parent_title, width=25)
        parents.append(parent_id)
    else:
        # Root organization - set parent to empty string
        parents.append("")
    
    # Count total descendants (BFS)
    descendants = 0
    queue = deque([org_id])
    visited = {org_id}
    while queue:
        current = queue.popleft()
        for child in parent_to_children.get(current, []):
            if child not in visited:
                visited.add(child)
                queue.append(child)
                descendants += 1
    
    # Calculate value based on format weight + bonus for number of descendants
    budget = id_to_budget.get(org_id)
    if budget is not None and not pd.isna(budget) and budget > 0:
        #value = math.log10(budget + 1) * 100
        value = math.sqrt(budget) * 10

    else:
        format_weight = FORMAT_WEIGHTS.get(org_format, 50)
        descendant_bonus = descendants * 5 
        value = format_weight + descendant_bonus
        
    values.append(value)

# Create DataFrame for plotting
plot_df = pd.DataFrame({
    'labels': labels,
    'parents': parents,
    'ids': ids,
    'formats': formats,
    'oscar_ii_budget': oscar_ii_budget,
    'values': values,
    'web_urls': web_urls
})

import plotly.express as px

# Professional 
plot_colors = ['#1D70B8', '#003078', '#5694CA', '#2B8CC4', '#004C8C',
                '#505a5e', '#626a6e', '#b1b4b6', '#dee0e2', '#6f777b']

# Create Treemap
fig_treemap = px.treemap(
    plot_df,
    ids='ids',
    names='labels',
    parents='parents',
    values='values',
    color='formats',
    color_discrete_sequence=plot_colors,
    custom_data=['formats','web_urls', 'oscar_ii_budget'],  
    hover_data={'formats': True, 'web_urls': True, 'oscar_ii_budget': True},
    title='UK Government Organizational Hierarchy',
    height=1200,
    width=1600,
)

fig_treemap.update_traces(
    textposition='top left',  # Position labels at top-left to create header effect
    textfont=dict(
        size=11,
        family='Arial, sans-serif',
        color='white'
    ),
    marker=dict(
        line=dict(color='grey', width=1),
        pad=dict(t=20, l=3, r=3, b=3)  # Add top padding for header space
    ),
    hovertemplate='<b>%{label}</b><br>Type: %{customdata[0]}<br>URL: %{customdata[1]}<br>Budget: %{customdata[2]}<extra></extra>'
)

fig_treemap.update_layout(
    font=dict(size=12, family='Arial, sans-serif'),
    margin=dict(t=100, l=10, r=10, b=10),
    paper_bgcolor='#f8f9fa',
    plot_bgcolor='#f8f9fa',
    title=dict(
        text='UK Government Organizational Hierarchy<br><sub style="font-size:13px; color:#505a5e;">Size reflects OSCAR II budget data (24-25)</sub>',
        font=dict(size=26, color='#0b0c0c', family='Arial'),
        x=0.5,
        xanchor='center'
    ),
)

html_str = fig_treemap.to_html()

treemap_path = '../org_hierarchy_proper_treemap.html'

with open(treemap_path, 'w', encoding='utf-8') as f:
    f.write(html_str)


print(f"\nTreemap saved: {treemap_path}")


# todo - list of all email domains for government

In [3]:
"""
UK Government Organisational Hierarchy - D3 Zoomable Treemap
With department headers and zoom controls
"""

import pandas as pd
import json
import math
from collections import defaultdict, deque

def parse_orgs(org_list):
    """Extract organization IDs from the organization list"""
    if org_list is None:
        return []
    if isinstance(org_list, list):
        if len(org_list) == 0:
            return []
        return [org.get('id') for org in org_list if isinstance(org, dict) and 'id' in org]
    return []

def format_budget(budget):
    """Format budget for display"""
    if budget is None or (isinstance(budget, float) and math.isnan(budget)):
        return None
    if budget >= 1000000:
        return f"£{budget/1000000:.1f}bn"
    elif budget >= 1000:
        return f"£{budget/1000:.1f}m"  
    else:
        return f"£{budget:.0f}k"

def build_hierarchy(df):
    """Convert flat dataframe to nested hierarchy for D3"""
    
    # Extract relationships
    df = df.copy()
    df['parent_list'] = df['parent_organisations'].apply(parse_orgs)
    df['child_list'] = df['child_organisations'].apply(parse_orgs)
    df['org_id'] = df['id']
    
    # Create lookups
    id_to_data = {}
    for _, row in df.iterrows():
        org_id = row['org_id']
        budget = row.get('oscar_budget_£k')
        budget_val = None if pd.isna(budget) else budget
        
        # Calculate value for sizing
        if budget_val and budget_val > 0:
            value = math.sqrt(budget_val) * 10
        else:
            value = 100  # Default for orgs without budget
        
        id_to_data[org_id] = {
            'id': org_id,
            'name': row['title'],
            'format': row.get('format', 'Other'),
            'url': row.get('web_url', ''),
            'budget': budget_val,
            'budget_display': format_budget(budget_val),
            'value': value,
            'children': []
        }
    
    # Build parent-child relationships
    # For each org (row) in the df, iterate through that org's child_list
    # If the child is in the id_to_data subset df, write it into the child_to_parent dict
    # Then Then nest children under parents:
    # Append ALL the child data to the parent's children list
    # Req. for treemap vis 
    child_to_parent = {}
    for _, row in df.iterrows():
        org_id = row['org_id']
        for child_id in row['child_list']:
            if child_id in id_to_data and org_id in id_to_data:
                child_to_parent[child_id] = org_id
                id_to_data[org_id]['children'].append(id_to_data[child_id])


    # Find root nodes (no parent or parent not in dataset)
    roots = []
    for org_id, data in id_to_data.items():
        if org_id not in child_to_parent:
            roots.append(data)
    
    # Calculate stats
    total_orgs = len(df)
    orgs_with_budget = df['oscar_budget_£k'].notna().sum()
    total_budget = df['oscar_budget_£k'].sum()
    
    print(f"Total organizations: {len(df)}")
    print(f"Root organizations (no parents): {len(roots)}")
    print(f"Organizations with children: {sum(1 for children in child_to_parent.values() if children)}")

    # Print all unique formats to verify them
    print(f"\nUnique organization formats:")
    for fmt in sorted(df['format'].unique()):
        count = len(df[df['format'] == fmt])
        print(f"  {fmt}: {count}")


    return {
        'name': 'UK Government',
        'children': roots
    }, {
        'total_orgs': total_orgs,
        'orgs_with_budget': int(orgs_with_budget),
        #'total_budget': total_budget
    }


def generate_html(hierarchy, stats):
    """Generate the D3 visualisation HTML"""
    
    hierarchy_json = json.dumps(hierarchy)
    
    html = f'''<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>UK Government Structure</title>
    <script src="https://d3js.org/d3.v7.min.js"></script>
    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link href="https://fonts.googleapis.com/css2?family=Newsreader:opsz,wght@6..72,300;6..72,400;6..72,500&family=Inter:wght@300;400;500;600&display=swap" rel="stylesheet">
    
    <style>
        * {{
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }}
        
        body {{
            font-family: 'Inter', -apple-system, sans-serif;
            background: #f8f8f8;
            color: #1a1a1a;
            min-height: 100vh;
            font-weight: 300;
        }}
        
        .container {{
            max-width: 1800px;
            margin: 0 auto;
            padding: 2.5rem 2rem;
        }}
        
        header {{
            margin-bottom: 2rem;
        }}
        
        h1 {{
            font-family: 'Newsreader', Georgia, serif;
            font-size: 2.5rem;
            font-weight: 300;
            letter-spacing: -0.03em;
            color: #0b0c0c;
            margin-bottom: 0.5rem;
        }}
        
        .subtitle {{
            font-size: 0.95rem;
            color: #505a5f;
            max-width: 600px;
            line-height: 1.6;
        }}
        
        .stats {{
            display: flex;
            gap: 3rem;
            margin: 1.5rem 0;
            padding: 1.25rem 0;
            border-top: 1px solid #ddd;
            border-bottom: 1px solid #ddd;
        }}
        
        .stat-value {{
            font-family: 'Newsreader', serif;
            font-size: 1.5rem;
            font-weight: 400;
            color: #0b0c0c;
        }}
        
        .stat-label {{
            font-size: 0.7rem;
            color: #6f777b;
            text-transform: uppercase;
            letter-spacing: 0.08em;
            margin-top: 0.2rem;
        }}
        
        .controls {{
            display: flex;
            gap: 1rem;
            margin-bottom: 1rem;
            align-items: center;
            flex-wrap: wrap;
        }}
        
        .breadcrumb {{
            font-size: 0.85rem;
            color: #505a5f;
            flex: 1;
            min-width: 200px;
        }}
        
        .breadcrumb span {{
            cursor: pointer;
            transition: color 0.15s;
        }}
        
        .breadcrumb span:hover {{
            color: #1d70b8;
        }}
        
        .breadcrumb .sep {{
            margin: 0 0.4rem;
            color: #b1b4b6;
            cursor: default;
        }}
        
        .breadcrumb .sep:hover {{
            color: #b1b4b6;
        }}
        
        .control-group {{
            display: flex;
            gap: 0.5rem;
            align-items: center;
        }}
        
        .control-label {{
            font-size: 0.75rem;
            color: #6f777b;
            text-transform: uppercase;
            letter-spacing: 0.05em;
        }}
        
        .btn-group {{
            display: flex;
        }}
        
        .btn {{
            padding: 0.4rem 0.75rem;
            background: #fff;
            border: 1px solid #ccc;
            color: #505a5f;
            font-family: 'Inter', sans-serif;
            font-size: 0.75rem;
            font-weight: 400;
            cursor: pointer;
            transition: all 0.15s;
        }}
        
        .btn:first-child {{
            border-radius: 3px 0 0 3px;
        }}
        
        .btn:last-child {{
            border-radius: 0 3px 3px 0;
        }}
        
        .btn:not(:last-child) {{
            border-right: none;
        }}
        
        .btn:hover {{
            background: #f3f3f3;
            color: #0b0c0c;
        }}
        
        .btn.active {{
            background: #0b0c0c;
            border-color: #0b0c0c;
            color: #fff;
        }}
        
        .btn-icon {{
            padding: 0.4rem 0.6rem;
            font-size: 0.85rem;
        }}
        
        .treemap-wrapper {{
            position: relative;
        }}
        
        .treemap-container {{
            background: #fff;
            border: 1px solid #ddd;
            position: relative;
            overflow: hidden;
        }}
        
        #treemap {{
            width: 100%;
            height: 72vh;
            min-height: 550px;
        }}
        
        /* Department header style */
        .node {{
            position: absolute;
            overflow: hidden;
        }}
        
        .node-dept {{
            border: 2px solid rgba(255,255,255,0.3);
        }}
        
        .node-dept .node-bg {{
            opacity: 0.95;
        }}
        
        .node-child {{
            border: 1px solid rgba(255,255,255,0.15);
        }}
        
        .node-bg {{
            position: absolute;
            inset: 0;
            transition: filter 0.15s;
        }}
        
        .node:hover .node-bg {{
            filter: brightness(1.1);
        }}
        
        .node-header {{
            position: absolute;
            top: 0;
            left: 0;
            right: 0;
            padding: 6px 10px;
            background: rgba(0,0,0,0.4);
            z-index: 2;
            max-height: 42px;
            overflow: hidden;
            transition: max-height 0.2s ease, background 0.2s ease;
        }}
        
        .node-dept:hover .node-header {{
            max-height: 100px;
            background: rgba(0,0,0,0.7);
        }}

        .node-header-name {{
            font-size: 13px;
            font-weight: 600;
            color: #fff;
            line-height: 1.2;
            text-shadow: 0 1px 2px rgba(0,0,0,0.3);
        }}
        
        .node-header-budget {{
            font-size: 11px;
            font-weight: 400;
            color: rgba(255,255,255,0.8);
            margin-top: 2px;
        }}
        
        .node-label {{
            position: absolute;
            top: 5px;
            left: 6px;
            right: 6px;
            font-weight: 400;
            color: #fff;
            line-height: 1.2;
            pointer-events: none;
            overflow: hidden;
            text-overflow: ellipsis;
            display: -webkit-box;
            -webkit-box-orient: vertical;
            text-shadow: 0 1px 2px rgba(0,0,0,0.4);
        }}
        
        .node-label.size-lg {{
            font-size: 12px;
            -webkit-line-clamp: 3;
        }}
        
        .node-label.size-md {{
            font-size: 10px;
            -webkit-line-clamp: 2;
        }}
        
        .node-label.size-sm {{
            font-size: 8px;
            -webkit-line-clamp: 2;
        }}
        .node-label.size-xs {{
            font-size: 6px;
            -webkit-line-clamp: 1;
            top: 2px;
            left: 2px;
            right: 2px;
        }}
        
        .node.has-children {{
            cursor: zoom-in;
        }}
        
        .node.leaf {{
            cursor: pointer;
        }}
        
        /* Zoom controls */
        .zoom-controls {{
            position: absolute;
            bottom: 1rem;
            right: 1rem;
            display: flex;
            flex-direction: column;
            gap: 0.25rem;
            z-index: 10;
        }}
        
        .zoom-btn {{
            width: 32px;
            height: 32px;
            background: #fff;
            border: 1px solid #ccc;
            border-radius: 3px;
            font-size: 1.1rem;
            cursor: pointer;
            display: flex;
            align-items: center;
            justify-content: center;
            color: #505a5f;
            transition: all 0.15s;
        }}
        
        .zoom-btn:hover {{
            background: #f3f3f3;
            color: #0b0c0c;
        }}
        
        /* Small entities panel */
        .small-entities {{
            position: absolute;
            top: 1rem;
            right: 1rem;
            width: 220px;
            max-height: 300px;
            background: #fff;
            border: 1px solid #ddd;
            border-radius: 4px;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
            z-index: 10;
            display: none;
            overflow: hidden;
        }}
        
        .small-entities.visible {{
            display: block;
        }}
        
        .small-entities-header {{
            padding: 0.6rem 0.75rem;
            background: #f5f5f5;
            border-bottom: 1px solid #ddd;
            font-size: 0.7rem;
            font-weight: 500;
            text-transform: uppercase;
            letter-spacing: 0.05em;
            color: #505a5f;
            display: flex;
            justify-content: space-between;
            align-items: center;
        }}
        
        .small-entities-close {{
            cursor: pointer;
            font-size: 1rem;
            line-height: 1;
            color: #6f777b;
        }}
        
        .small-entities-close:hover {{
            color: #0b0c0c;
        }}
        
        .small-entities-list {{
            max-height: 250px;
            overflow-y: auto;
        }}
        
        .small-entity {{
            padding: 0.5rem 0.75rem;
            border-bottom: 1px solid #eee;
            font-size: 0.8rem;
            cursor: pointer;
            transition: background 0.1s;
        }}
        
        .small-entity:hover {{
            background: #f8f8f8;
        }}
        
        .small-entity:last-child {{
            border-bottom: none;
        }}
        
        .small-entity-name {{
            color: #0b0c0c;
            margin-bottom: 2px;
        }}
        
        .small-entity-type {{
            font-size: 0.7rem;
            color: #6f777b;
        }}
        
        .tooltip {{
            position: fixed;
            padding: 0.75rem 0.875rem;
            background: #fff;
            border: 1px solid #ccc;
            box-shadow: 0 4px 16px rgba(0,0,0,0.12);
            pointer-events: none;
            z-index: 1000;
            max-width: 260px;
            opacity: 0;
            transition: opacity 0.1s;
            font-size: 0.85rem;
        }}
        
        .tooltip.visible {{
            opacity: 1;
        }}
        
        .tooltip-name {{
            font-weight: 500;
            color: #0b0c0c;
            margin-bottom: 0.4rem;
            line-height: 1.25;
        }}
        
        .tooltip-row {{
            display: flex;
            justify-content: space-between;
            font-size: 0.75rem;
            margin: 0.2rem 0;
            gap: 1rem;
        }}
        
        .tooltip-label {{
            color: #6f777b;
        }}
        
        .tooltip-value {{
            color: #0b0c0c;
            text-align: right;
        }}
        
        .tooltip-hint {{
            margin-top: 0.5rem;
            padding-top: 0.4rem;
            border-top: 1px solid #eee;
            font-size: 0.7rem;
            color: #1d70b8;
        }}
        
        footer {{
            margin-top: 1.5rem;
            padding-top: 1rem;
            border-top: 1px solid #ddd;
            font-size: 0.75rem;
            color: #6f777b;
            display: flex;
            justify-content: space-between;
            flex-wrap: wrap;
            gap: 0.5rem;
        }}
        
        footer a {{
            color: #1d70b8;
            text-decoration: none;
        }}
        
        footer a:hover {{
            text-decoration: underline;
        }}
        
        @media (max-width: 768px) {{
            .container {{
                padding: 1.5rem 1rem;
            }}
            
            h1 {{
                font-size: 1.75rem;
            }}
            
            .stats {{
                gap: 1.5rem;
            }}
            
            .controls {{
                flex-direction: column;
                align-items: flex-start;
            }}
            
            #treemap {{
                height: 55vh;
            }}
            
            .small-entities {{
                width: 180px;
            }}
        }}
    </style>
</head>
<body>
    <div class="container">
        <header>
            <h1>UK Government</h1>
            <p class="subtitle">
                Organisational structure and budget allocation. Size represents funding from OSCAR II (2024–25).
            </p>
        </header>
        
        <div class="stats">
            <div class="stat">
                <div class="stat-value">{stats['total_orgs']:,}</div>
                <div class="stat-label">Organisations</div>
            </div>
            <div class="stat">
                <div class="stat-value">{stats['orgs_with_budget']:,}</div>
                <div class="stat-label">With Budget</div>
            </div>
            <!-- Add other <div> classes as required    -->
        </div>
        
        <div class="controls">
            <div class="breadcrumb" id="breadcrumb">
                <span data-depth="0">All Departments</span>
            </div>
            
            <div class="control-group">
                <span class="control-label">View</span>
                <div class="btn-group">
                    <button class="btn active" data-filter="all">All</button>
                    <button class="btn" data-filter="dept">Depts</button>
                    <button class="btn" data-filter="agency">Agencies</button>
                    <button class="btn" data-filter="ndpb">NDPBs</button>
                </div>
            </div>
            
            <div class="control-group">
                <span class="control-label">Size</span>
                <div class="btn-group">
                    <button class="btn active" data-sizing="budget">Budget</button>
                    <button class="btn" data-sizing="equal">Equal</button>
                </div>
            </div>
            
            <div class="control-group">
                <button class="btn btn-icon" id="showSmallBtn" title="Show small entities">⋯</button>
            </div>
        </div>
        
        <div class="treemap-wrapper">
            <div class="treemap-container">
                <div id="treemap"></div>
            </div>
            
            <div class="zoom-controls">
                <button class="zoom-btn" id="zoomOut" title="Zoom out">−</button>
                <button class="zoom-btn" id="zoomReset" title="Reset view">⌂</button>
            </div>
            
            <div class="small-entities" id="smallEntities">
                <div class="small-entities-header">
                    <span>Small Entities</span>
                    <span class="small-entities-close" id="closeSmall">×</span>
                </div>
                <div class="small-entities-list" id="smallList"></div>
            </div>
        </div>
        
        <footer>
            <span>Data: <a href="https://www.gov.uk/government/organisations">GOV.UK</a> · <a href="https://www.gov.uk/government/collections/oscar-publishing-data-from-the-online-system-for-central-accounting-and-reporting">OSCAR II</a></span>
            <span>Click department to zoom · Click body to visit page</span>
        </footer>
    </div>
    
    <div class="tooltip" id="tooltip"></div>
    
    <script>
        const data = {hierarchy_json};
        
        // Colours - departments get stronger colours
        const deptColors = {{
            'Ministerial department': '#1a4971',
            'Non-ministerial department': '#1a5235',
            'Devolved administration': '#1a4971',
            'Devolved government': '#1a4971',
        }};
        
        const childColors = {{
            'Executive agency': '#6b5620',
            'Executive non-departmental public body': '#4a3772',
            'Advisory non-departmental public body': '#5c4317',
            'Public corporation': '#722828',
            'Executive office': '#1a5235',
            'Tribunal': '#404952',
            'Tribunal non-departmental public body': '#404952',
            'Court': '#404952',
            'Sub organisation': '#52616b',
            'Sub-organisation': '#52616b',
            'Special health authority': '#4a3772',
            'Other': '#404952'
        }};
        
        const filterMap = {{
            'all': null,
            'dept': ['Ministerial department', 'Non-ministerial department', 'Devolved administration', 'Devolved government'],
            'agency': ['Executive agency', 'Executive office'],
            'ndpb': ['Executive non-departmental public body', 'Advisory non-departmental public body', 'Tribunal non-departmental public body']
        }};
        
        let currentFilter = 'all';
        let currentSizing = 'budget';
        let currentRoot = data;
        let breadcrumbPath = [{{ name: 'All Departments', data: data }}];
        let smallEntitiesVisible = false;
        
        const container = document.getElementById('treemap');
        const tooltip = document.getElementById('tooltip');
        const breadcrumb = document.getElementById('breadcrumb');
        const smallPanel = document.getElementById('smallEntities');
        const smallList = document.getElementById('smallList');
        
        function isDepartment(format) {{
            return ['Ministerial department', 'Non-ministerial department', 'Devolved administration', 'Devolved government'].includes(format);
        }}
        
        function getColor(format, isTopLevel) {{
            if (isTopLevel || isDepartment(format)) {{
                return deptColors[format] || childColors[format] || '#404952';
            }}
            return childColors[format] || '#52616b';
        }}
        
        function filterData(node, allowedFormats) {{
            if (!allowedFormats) return node;
            
            const filtered = {{ ...node }};
            if (node.children) {{
                filtered.children = node.children
                    .map(child => filterData(child, allowedFormats))
                    .filter(child => {{
                        if (child.children && child.children.length > 0) return true;
                        return allowedFormats.includes(child.format);
                    }});
            }}
            return filtered;
        }}
        
        function getValue(node) {{
            if (currentSizing === 'equal') {{
                return 100;
            }}
            return node.value || 100;
        }}
        
        function sumValues(node) {{
            if (!node.children || node.children.length === 0) {{
                return getValue(node);
            }}
            let sum = 0;
            for (const child of node.children) {{
                sum += sumValues(child);
            }}
            node._computedValue = sum;
            return sum;
        }}
        
        function collectSmallEntities(nodes, threshold) {{
            const small = [];
            nodes.forEach(d => {{
                const w = d.x1 - d.x0;
                const h = d.y1 - d.y0;
                if (w < threshold || h < threshold) {{
                    small.push(d.data);
                }}
            }});
            return small.sort((a, b) => (a.name || '').localeCompare(b.name || ''));
        }}
        
        function updateSmallEntities(entities) {{
            smallList.innerHTML = '';
            if (entities.length === 0) {{
                smallList.innerHTML = '<div class="small-entity"><em>No small entities at this level</em></div>';
                return;
            }}
            
            entities.slice(0, 50).forEach(entity => {{
                const div = document.createElement('div');
                div.className = 'small-entity';
                div.innerHTML = `
                    <div class="small-entity-name">${{entity.name}}</div>
                    <div class="small-entity-type">${{entity.format}}${{entity.budget_display ? ' · ' + entity.budget_display : ''}}</div>
                `;
                div.addEventListener('click', () => {{
                    if (entity.url) {{
                        window.open(entity.url, '_blank');
                    }}
                }});
                smallList.appendChild(div);
            }});
            
            if (entities.length > 50) {{
                const more = document.createElement('div');
                more.className = 'small-entity';
                more.innerHTML = `<em>+${{entities.length - 50}} more</em>`;
                smallList.appendChild(more);
            }}
        }}
        
        function render() {{
            container.innerHTML = '';
            
            const allowedFormats = filterMap[currentFilter];
            const filteredRoot = filterData(currentRoot, allowedFormats);
            sumValues(filteredRoot);
            
            const width = container.clientWidth;
            const height = container.clientHeight;
            
            const hierarchy = d3.hierarchy(filteredRoot)
                .sum(d => {{
                    if (d.children && d.children.length) return 0;
                    return currentSizing === 'equal' ? 100 : (d.value || 100);
                }})
                .sort((a, b) => b.value - a.value);
            
            d3.treemap()
                .size([width, height])
                .paddingTop(d => d.depth === 1 ? 42 : 2)
                .paddingRight(2)
                .paddingBottom(2)
                .paddingLeft(2)
                .paddingInner(1)
                .round(true)(hierarchy);
            
            const nodes = hierarchy.descendants().slice(1);
            
            // Collect small entities
            const smallEntities = collectSmallEntities(nodes, 40);
            updateSmallEntities(smallEntities);
            
            nodes.forEach(d => {{
                const isTopLevel = d.depth === 1;
                const hasChildren = d.children && d.children.length > 0;
                
                const node = document.createElement('div');
                node.className = 'node' + (isTopLevel ? ' node-dept' : ' node-child') + (hasChildren ? ' has-children' : ' leaf');
                node.style.left = d.x0 + 'px';
                node.style.top = d.y0 + 'px';
                node.style.width = (d.x1 - d.x0) + 'px';
                node.style.height = (d.y1 - d.y0) + 'px';
                
                const bg = document.createElement('div');
                bg.className = 'node-bg';
                bg.style.background = getColor(d.data.format, isTopLevel);
                node.appendChild(bg);
                
                const w = d.x1 - d.x0;
                const h = d.y1 - d.y0;
                const area = w * h;
                
                // Department header for top-level nodes
                if (isTopLevel && w > 80 && h > 40) {{
                    const header = document.createElement('div');
                    header.className = 'node-header';
                    
                    const headerName = document.createElement('div');
                    headerName.className = 'node-header-name';
                    headerName.textContent = d.data.name;
                    header.appendChild(headerName);
                    
                    if (d.data.budget_display && w > 120) {{
                        const headerBudget = document.createElement('div');
                        headerBudget.className = 'node-header-budget';
                        headerBudget.textContent = d.data.budget_display;
                        header.appendChild(headerBudget);
                    }}
                    
                    node.appendChild(header);
                }}
                // Fallback label for small top-level nodes
                else if (isTopLevel) {{
                    const label = document.createElement('div');
                    label.className = 'node-label size-sm';
                    label.textContent = d.data.name;
                    node.appendChild(label);
                }}      

                // Regular label for child nodes
                else if (!isTopLevel && w > 30 && h > 22) {{
                    const label = document.createElement('div');
                    label.className = 'node-label';
                    
                    if (area > 15000) {{
                        label.classList.add('size-lg');
                    }} else if (area > 5000) {{
                        label.classList.add('size-md');
                    }} else {{
                        label.classList.add('size-sm');
                    }}
                    
                    label.textContent = d.data.name;
                    node.appendChild(label);
                }}
                
                // Hover
                node.addEventListener('mouseenter', (e) => {{
                    const hint = hasChildren ? 'Click to zoom in' : (d.data.url ? 'Click to visit' : '');
                    tooltip.innerHTML = `
                        <div class="tooltip-name">${{d.data.name}}</div>
                        <div class="tooltip-row">
                            <span class="tooltip-label">Type</span>
                            <span class="tooltip-value">${{d.data.format}}</span>
                        </div>
                        ${{d.data.budget_display ? `
                        <div class="tooltip-row">
                            <span class="tooltip-label">Budget</span>
                            <span class="tooltip-value">${{d.data.budget_display}}</span>
                        </div>
                        ` : ''}}
                        ${{hint ? `<div class="tooltip-hint">${{hint}}</div>` : ''}}
                    `;
                    tooltip.classList.add('visible');
                }});
                
                node.addEventListener('mousemove', (e) => {{
                    const x = Math.min(e.clientX + 12, window.innerWidth - 280);
                    const y = Math.min(e.clientY + 12, window.innerHeight - 150);
                    tooltip.style.left = x + 'px';
                    tooltip.style.top = y + 'px';
                }});
                
                node.addEventListener('mouseleave', () => {{
                    tooltip.classList.remove('visible');
                }});
                
                // Click
                node.addEventListener('click', () => {{
                    tooltip.classList.remove('visible');
                    if (hasChildren) {{
                        currentRoot = d.data;
                        breadcrumbPath.push({{ name: d.data.name, data: d.data }});
                        updateBreadcrumb();
                        render();
                    }} else if (d.data.url) {{
                        window.open(d.data.url, '_blank');
                    }}
                }});
                
                container.appendChild(node);
            }});
        }}
        
        function updateBreadcrumb() {{
            breadcrumb.innerHTML = breadcrumbPath.map((item, i) => {{
                if (i === breadcrumbPath.length - 1) {{
                    return `<span>${{item.name}}</span>`;
                }}
                return `<span data-depth="${{i}}">${{item.name}}</span><span class="sep">›</span>`;
            }}).join('');
            
            breadcrumb.querySelectorAll('span[data-depth]').forEach(span => {{
                span.addEventListener('click', () => {{
                    const depth = parseInt(span.dataset.depth);
                    breadcrumbPath = breadcrumbPath.slice(0, depth + 1);
                    currentRoot = breadcrumbPath[depth].data;
                    updateBreadcrumb();
                    render();
                }});
            }});
        }}
        
        // Filter buttons
        document.querySelectorAll('.btn[data-filter]').forEach(btn => {{
            btn.addEventListener('click', function() {{
                document.querySelectorAll('.btn[data-filter]').forEach(b => b.classList.remove('active'));
                this.classList.add('active');
                currentFilter = this.dataset.filter;
                render();
            }});
        }});
        
        // Sizing buttons
        document.querySelectorAll('.btn[data-sizing]').forEach(btn => {{
            btn.addEventListener('click', function() {{
                document.querySelectorAll('.btn[data-sizing]').forEach(b => b.classList.remove('active'));
                this.classList.add('active');
                currentSizing = this.dataset.sizing;
                render();
            }});
        }});
        
        // Zoom controls
        document.getElementById('zoomOut').addEventListener('click', () => {{
            if (breadcrumbPath.length > 1) {{
                breadcrumbPath.pop();
                currentRoot = breadcrumbPath[breadcrumbPath.length - 1].data;
                updateBreadcrumb();
                render();
            }}
        }});
        
        document.getElementById('zoomReset').addEventListener('click', () => {{
            breadcrumbPath = [{{ name: 'All Departments', data: data }}];
            currentRoot = data;
            updateBreadcrumb();
            render();
        }});
        
        // Small entities panel
        document.getElementById('showSmallBtn').addEventListener('click', () => {{
            smallEntitiesVisible = !smallEntitiesVisible;
            smallPanel.classList.toggle('visible', smallEntitiesVisible);
        }});
        
        document.getElementById('closeSmall').addEventListener('click', () => {{
            smallEntitiesVisible = false;
            smallPanel.classList.remove('visible');
        }});
        
        // Resize
        let resizeTimeout;
        window.addEventListener('resize', () => {{
            clearTimeout(resizeTimeout);
            resizeTimeout = setTimeout(render, 150);
        }});
        
        // Initial render
        render();
    </script>
</body>
</html>'''
    
    return html


def main(df):
    """Generate the D3 treemap visualisation"""
    print("Building hierarchy...")
    hierarchy, stats = build_hierarchy(df)
    
    print(f"  {stats['total_orgs']:,} organisations")
    print(f"  {stats['orgs_with_budget']:,} with budget data")
    
    html = generate_html(hierarchy, stats)
    
    output_path = '../uk_gov_treemap_d3.html'
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(html)
    
    print(f"\nSaved to {output_path}")
    df
    return output_path


if __name__ == "__main__":
    main(df)

Building hierarchy...
Total organizations: 663
Root organizations (no parents): 69
Organizations with children: 594

Unique organization formats:
  Ad-hoc advisory group: 3
  Advisory non-departmental public body: 64
  Civil service: 1
  Court: 25
  Devolved government: 3
  Executive agency: 46
  Executive non-departmental public body: 133
  Executive office: 1
  Independent monitoring body: 4
  Ministerial department: 23
  Non-ministerial department: 20
  Other: 160
  Public corporation: 19
  Special health authority: 4
  Sub organisation: 128
  Tribunal: 29
  663 organisations
  113 with budget data

Saved to ../uk_gov_treemap_d3.html


In [7]:
import pandas as pd

# Load the OSCAR data
oscar = pd.read_csv('../data/orgs/uk/oscar_data_2024-25.csv')  # or however you load it

# Filter to British Museum
bm = oscar[oscar['ORGANISATION_LONG_NAME'].str.contains('British Museum', case=False, na=False)]

# Show all rows with key columns
print(bm[['ORGANISATION_LONG_NAME', 'CONTROL_BUDGET_L0_LONG_NAME', 'CONTROL_BUDGET_L1_LONG_NAME', 'SEGMENT_L4_LONG_NAME', 'AMOUNT']].to_string())

# Sum by budget type
print("\n--- Sum by CONTROL_BUDGET_L0 ---")
print(bm.groupby('CONTROL_BUDGET_L0_LONG_NAME')['AMOUNT'].sum())

# Total (what you're currently getting)
print(f"\n--- Totals ---")
print(f"Sum all:        {bm['AMOUNT'].sum():,.0f} (£k) = £{bm['AMOUNT'].sum()/1000:,.1f}m")
print(f"Sum positive:   {bm[bm['AMOUNT'] > 0]['AMOUNT'].sum():,.0f} (£k)")
print(f"Sum negative:   {bm[bm['AMOUNT'] < 0]['AMOUNT'].sum():,.0f} (£k)")

# DEL only (what you want)
del_only = bm[bm['CONTROL_BUDGET_L0_LONG_NAME'].isin(['DEL ADMIN', 'DEL PROG'])]
print(f"\nDEL ADMIN + DEL PROG only:")
print(f"Sum all:        {del_only['AMOUNT'].sum():,.0f} (£k) = £{del_only['AMOUNT'].sum()/1000:,.1f}m")
print(f"Sum positive:   {del_only[del_only['AMOUNT'] > 0]['AMOUNT'].sum():,.0f} (£k)")

      ORGANISATION_LONG_NAME CONTROL_BUDGET_L0_LONG_NAME CONTROL_BUDGET_L1_LONG_NAME                                                             SEGMENT_L4_LONG_NAME  AMOUNT
64            British Museum                    DEL PROG                         DEL                                    X048A044-MUSEUMS AND GALLERIES DEL PROG VOTED  -34029
65            British Museum                    DEL PROG                         DEL                                    X048A044-MUSEUMS AND GALLERIES DEL PROG VOTED  -28958
66            British Museum                    DEL PROG                         DEL                                    X048A044-MUSEUMS AND GALLERIES DEL PROG VOTED     158
67            British Museum                    DEL PROG                         DEL                                    X048A044-MUSEUMS AND GALLERIES DEL PROG VOTED    1281
68            British Museum                    DEL PROG                         DEL                                    X048A044-M

In [8]:
# What other columns differ between the +917,396 and -917,396 rows?
bm = oscar[oscar['ORGANISATION_LONG_NAME'].str.contains('British Museum', case=False, na=False)]
big_rows = bm[bm['AMOUNT'].abs() > 900000]
print(big_rows[['QUARTER_SHORT_NAME', 'MONTH_SHORT_NAME', 'FO_VALIDATION_GROUP_CODE', 'AMOUNT']].to_string())

      QUARTER_SHORT_NAME MONTH_SHORT_NAME FO_VALIDATION_GROUP_CODE  AMOUNT
69                   P00              P00                      FO2 -917396
71                   P00              P00                      FO2  915490
3140                 P00              P00                      FO2  917396
21225                P00              P00                      FO2 -918342


In [9]:
# Get the full picture of these 4 rows
big_rows = bm[bm['AMOUNT'].abs() > 900000]
print(big_rows.T)  # Transpose to see all columns vertically

                                69      71      3140     21225
YEAR_NO                        202425  202425  202425   202425
YEAR_SHORT_NAME                202425  202425  202425   202425
QUARTER_SHORT_NAME                P00     P00     P00      P00
MONTH_SHORT_NAME                  P00     P00     P00      P00
BUDGETING_ORGANISATIONS_CODE      NaN     NaN     NaN      NaN
...                               ...     ...     ...      ...
FCT_LOAD_TYPE_LONG_NAME           NaN     NaN     NaN      NaN
ROW_DESCRIPTION                   NaN     NaN     NaN      NaN
DATA_ID                           NaN     NaN     NaN      NaN
AMOUNT                        -917396  915490  917396  -918342
DATA_EFFECTIVE_DATETIME           NaN     NaN     NaN      NaN

[91 rows x 4 columns]


In [10]:
cols_to_check = [
    'YEAR_NO', 
    'QUARTER_SHORT_NAME', 
    'MONTH_SHORT_NAME',
    'DATA_EFFECTIVE_DATETIME',
    'ACCOUNTING_ARRANGEMENTS_CODE',
    'SEGMENT_L4_CODE',
    'COFOG_L1_CODE',
    'AMOUNT'
]
print(big_rows[cols_to_check].to_string())

       YEAR_NO QUARTER_SHORT_NAME MONTH_SHORT_NAME  DATA_EFFECTIVE_DATETIME  ACCOUNTING_ARRANGEMENTS_CODE SEGMENT_L4_CODE COFOG_L1_CODE  AMOUNT
69      202425                P00              P00                      NaN                           NaN        X048A044     COFOG0802 -917396
71      202425                P00              P00                      NaN                           NaN        X048A044     COFOG0802  915490
3140    202425                P00              P00                      NaN                           NaN        X048A044     COFOG0802  917396
21225   202425                P00              P00                      NaN                           NaN        X048A044     COFOG0802 -918342
