In [1]:
// Import Apollo Client core components via esm.sh for Deno compatibility
import { ApolloClient, InMemoryCache, HttpLink } from 'https://esm.sh/@apollo/client/core?pin=v3.9.9';
import { gql } from 'https://esm.sh/@apollo/client/core?pin=v3.9.9';
import * as Plot from "https://esm.sh/@observablehq/plot";
import { document } from "jsr:@manzt/jupyter-helper";

// Define the GraphQL endpoint
const graphqlUri = 'https://incidentdatabase.ai/api/graphql';

// Create the Apollo Client instance
const client = new ApolloClient({
  link: new HttpLink({ uri: graphqlUri }),
  cache: new InMemoryCache(),
  // It's often helpful to disable caching in notebook environments for fresh data
  defaultOptions: {
    watchQuery: { fetchPolicy: 'no-cache' },
    query: { fetchPolicy: 'no-cache' },
  },
});

In [2]:
// Import gql tag (if not already imported)
// import { gql } from '[https://esm.sh/@apollo/client/core?pin=v3.9.9';](https://esm.sh/@apollo/client/core?pin=v3.9.9';)

const GET_ALL_CSET_CLASSIFICATIONS = gql`
  query GetAllCSETClassifications {
    classifications(
      filter: { namespace: { EQ: "CSETv1" } }

    ) {
      attributes {
        short_name
        value_json
      }
      incidents {
        incident_id
      }
    }
  }
`;

// --- Fetching (same structure as before, just uses the new query) ---
console.log("Fetching all CSET classification data...");

const { data: {classifications : allClassificationData}, error } = await client.query({
    query: GET_ALL_CSET_CLASSIFICATIONS,
});    


Fetching all CSET classification data...


In [3]:
// === Processing Cell (Updated for Splitting Sectors & Filtering Unknowns) ===

// Assume 'allClassificationData' exists from the GraphQL fetch cell.
// Assume 'fetchAllClassError' is null if the fetch was successful.

let heatmapDataRefined = []; // Use a new variable name for the refined data

console.log(`Processing ${allClassificationData.length} CSET classification items for refinement...`);

// Helper function to safely parse value_json
const parseValueJson = (value_json) => {
    if (!value_json) return null; // Return null for missing/empty initially
    try {
        const parsed = JSON.parse(value_json);
        return String(parsed).trim(); // Ensure result is string and trimmed
    } catch (e) {
        return String(value_json).trim(); // Fallback to plain string, trimmed
    }
};

// Map incident IDs to their attributes (sectors will be an array)
const incidentAttributes = new Map(); // Map<incident_id, { harmLevel?: string, sectors: Set<string> }>
const allIncidentIds = new Set();

for (const classification of allClassificationData) {
    if (!classification?.attributes || !classification?.incidents) continue;

    let currentHarmLevel = null;
    let currentSectors = new Set<string>(); // Use a Set to handle potential duplicates easily

    classification.attributes.forEach(attr => {
        if (!attr) return;
        const parsedValue = parseValueJson(attr.value_json);

        // Skip if parsed value is null or empty string after parsing/trimming
        if (!parsedValue) return;

        if (attr.short_name === "AI Harm Level") {
            currentHarmLevel = parsedValue;
        } else if (attr.short_name === "Sector of Deployment") {
            // Split by comma, trim each part, and add non-empty results to the Set
            parsedValue.split(',')
                .map(s => s.trim())
                .filter(s => s !== "") // Filter out empty strings after split/trim
                .forEach(sector => currentSectors.add(sector));
        }
    });

    // If this classification item defined a valid harm level or at least one valid sector, apply to its incidents
    if (currentHarmLevel !== null || currentSectors.size > 0) {
        classification.incidents.forEach(incident => {
            if (incident?.incident_id != null) {
                const incidentId = incident.incident_id;
                allIncidentIds.add(incidentId);

                // Get existing data or initialize with empty sector Set
                const existingData = incidentAttributes.get(incidentId) ?? { sectors: new Set() };

                // Update harm level (last one wins)
                if (currentHarmLevel !== null) {
                    existingData.harmLevel = currentHarmLevel;
                }
                // Add any new sectors found
                currentSectors.forEach(sector => existingData.sectors.add(sector));

                incidentAttributes.set(incidentId, existingData);
            }
        });
    }
}

console.log(`Found ${allIncidentIds.size} unique incidents potentially linked to CSET classifications.`);

// Group by "harmLevel|sector", *filtering out unknowns* and iterating through split sectors
const groupedCounts = new Map(); // Map<"harmLevel|sector", count>

allIncidentIds.forEach(incidentId => {
    const attrs = incidentAttributes.get(incidentId);

    // *** Filter 1: Skip incident if harmLevel is missing/null ***
    if (!attrs?.harmLevel) {
        return; // Skip incidents without a valid harmLevel assigned
    }
    const harmLevel = attrs.harmLevel; // We know it's valid here

    // Get sectors, default to empty Set if none assigned
    const sectors = attrs.sectors ?? new Set();

    // If no valid sectors assigned, treat as ["Unknown"] only for the purpose of potentially skipping
    const sectorsToIterate = sectors.size > 0 ? Array.from(sectors) : ["Unknown"];

    // Iterate through each sector associated with the incident
    sectorsToIterate.forEach(sector => {
        // *** Filter 2: Skip "Unknown" sectors (placeholder for incidents with no valid sector) ***
        if (sector === "Unknown") {
            return; // Skip the "Unknown" sector category
        }

        // Create key and increment count for valid harmLevel/sector pair
        const key = `${harmLevel}|${sector}`;
        groupedCounts.set(key, (groupedCounts.get(key) ?? 0) + 1);
    });
});

// Convert the grouped map into the final array format
heatmapDataRefined = Array.from(groupedCounts.entries()).map(([key, count]) => {
    const [harm_level, sector] = key.split('|');
    return { harm_level, sector, count };
});

console.log(`Processed into ${heatmapDataRefined.length} refined harm/sector combinations (Unknowns filtered, sectors split).`);
// Log sample for verification
console.log("Sample Refined Data:", heatmapDataRefined.slice(0, 10));



// 'heatmapDataRefined' is now ready for the plotting cell.

Processing 214 CSET classification items for refinement...
Found 201 unique incidents potentially linked to CSET classifications.
Processed into 62 refined harm/sector combinations (Unknowns filtered, sectors split).
Sample Refined Data: [
  {
    harm_level: "none",
    sector: "administrative and support service activities",
    count: 6
  },
  {
    harm_level: "none",
    sector: "human health and social work activities",
    count: 9
  },
  {
    harm_level: "AI tangible harm event",
    sector: "human health and social work activities",
    count: 4
  },
  {
    harm_level: "none",
    sector: "information and communication",
    count: 66
  },
  { harm_level: "none", sector: "professional", count: 7 },
  {
    harm_level: "none",
    sector: "scientific and technical activities",
    count: 7
  },
  { harm_level: "none", sector: "Arts", count: 30 },
  {
    harm_level: "none",
    sector: "entertainment and recreation",
    count: 30
  },
  {
    harm_level: "AI tangible harm ev

In [4]:
// === Plotting Cell: Refined Harm Level vs Sector Heatmap ===

// Ensure Plot and document are imported in a previous cell:
// import * as Plot from "[https://esm.sh/@observablehq/plot";](https://esm.sh/@observablehq/plot";)
// import { document } from "jsr:@manzt/jupyter-helper";

// Assumes 'heatmapDataRefined' is defined and populated from the previous cell.
console.log(`Generating refined heatmap from ${heatmapDataRefined.length} data points...`);

// --- Define the Plot ---
const refinedHeatmapPlot = Plot.plot({ // Renamed variable for clarity
  // === Configuration ===
  title: "Incident Count by AI Harm Level and Sector (Refined)", // Updated title
  // Rotate x-axis labels for better readability if sector names are long
  x: { label: "Sector of Deployment", labelAnchor: "center", tickRotate: -60, labelOffset: 85 },
  y: { label: "AI Harm Level" },
  // Configure the color scale (using settings from your last code example)
  color: {
    scheme: "viridis",
    type: "log",
    // legend: true, // Legend remains removed
    label: "Number of Incidents (log scale)",
    nice: true,
  },

  // Improve layout spacing
  marginTop: 50,
  marginRight: 50,
  marginBottom: 100,
  marginLeft: 150,

  // Add style for white background if desired (uncomment)
  // style: {
  //   backgroundColor: "white",
  // },

  // === Marks ===
  marks: [
    // 1. The Heatmap Cells - using refined data
    Plot.cell(heatmapDataRefined, { // Use heatmapDataRefined
      x: "sector",
      y: "harm_level",
      fill: "count",
      title: (d) => `Sector: ${d.sector}\nHarm Level: ${d.harm_level}\nCount: ${d.count}`,
    }),

    // 2. Text Labels on Cells (Still commented out)
    /*
    Plot.text(heatmapDataRefined, { // Use heatmapDataRefined if uncommented
      x: "sector",
      y: "harm_level",
      text: (d) => (d.count > 0 ? d.count : ""),
      fill: "black",
      stroke: "white",
      strokeWidth: 2,
      dy: 0,
    }),
    */

    // 3. Tooltips for Interactivity - using refined data
    Plot.tip(heatmapDataRefined, Plot.pointer({ // Use heatmapDataRefined
      x: "sector",
      y: "harm_level",
      title: (d) => `${d.count} incidents\nSector: ${d.sector}\nHarm Level: ${d.harm_level}`
    }))
  ],

  // === Deno/Jupyter Integration ===
  document
});

// The plot object itself is the implicit return value for rendering
refinedHeatmapPlot; // Use the new variable name

Generating refined heatmap from 62 data points...
