## Overview

- High-level: Deno has a Jupyter kernel and why it’s exciting
- **why**: Use Deno to explore and analyze datasets
    - Combining backend (data wrangling) and frontend (interactive visualization) in a single environment

## The Dataset  

The **National Gallery of Art (NGA) [Open Data Program](https://www.nga.gov/open-access-images/open-data.html)** provides an up-to-date archive of over **130,000 artworks** and their creators, available [on GitHub](https://github.com/NationalGalleryOfArt/opendata/tree/main/data).  

The dataset is structured as a **relational database**, exported as individual CSV files. These files contain linked information on artworks, artists, and images. We will use this dataset to construct an **in-memory representation**, enabling **exploratory data analysis** within the notebook.  

We will focus on three key tables:  

- **`objects.csv`** – Core metadata on artworks, including titles, dates, materials, and classifications.  
- **`constituents.csv`** – Information on artists, such as names, nationalities, and lifespans.  
- **`published_images.csv`** – Links to artwork images via the NGA’s **IIIF API**.  

By leveraging the **relational structure**, we will **join these tables** to create a single dataset that integrates artworks, artist details, and image links. This dataset will serve as the foundation for **analysis and visualization** within the notebook.  

## Wrangling the data

- parse with csv (show it's really big and tough to build up relationships)
- motivate why use polars (relational tables)

In [None]:
// With JSR / Web stuff

import * as csv from "jsr:@std/csv@1.0.5";
import * as streams from "jsr:@std/streams@1.0.9";

let baseUrl = new URL(
    "https://github.com/NationalGalleryOfArt/opendata/raw/refs/heads/main/data/"
);

let response = await fetch(new URL("objects.csv", baseUrl));

let objects = await Array.fromAsync(
    response.body
        .pipeThrough(new TextDecoderStream())
        .pipeThrough(new csv.CsvParseStream({ skipFirstRow: true }))
         // Just grab the first 100 (full dataset takes too long)
        // .pipeThrough(new streams.LimitedTransformStream({ size: 100 })),
    ,
    (row) => ({
        objectid: +row.objectid,
        title: row.title,
        beginyear: +row.beginyear,
        endyear: +row.endyear,
        timespan: row.visualbrowsertimespan,
		medium: row.medium,
		attribution: row.attribution,
        classification:  row.visualbrowserclassification,
    })
);

objects.slice(0, 3)

In [None]:
import * as pl from "npm:nodejs-polars@0.18.0";

let obs = pl.readRecords(objects)

In [None]:
// Why Polars + how to with Polars
import * as pl from "npm:nodejs-polars@0.18.0";

let response = await fetch(new URL("objects.csv", baseUrl));
let objects = pl.readCSV(await response.text(), { quoteChar: "\"" })
    .select(
        "objectid",
        "title",
        "beginyear",
        "endyear",
        pl.col("visualbrowsertimespan").alias("timespan"),
        "medium",
        "attribution",
        pl.col("visualbrowserclassification").as("classification"),
    );

objects.head();

In [None]:
let response = await fetch(new URL("constituents.csv", baseUrl));
let constituents = pl.readCSV(await response.text(), { quoteChar: "\"" })
    .select(
        "constituentid",
        pl.col("forwarddisplayname").alias("name"),
        pl.col("visualbrowsernationality").alias("nationality"),
    );

constituents.head()

In [None]:
let response = await fetch(new URL("objects_constituents.csv", baseUrl));
let objectToArtist = pl.readCSV(await response.text(), { quoteChar: "\"" })
    .filter(pl.col("roletype").eq(pl.lit("artist")))
    .groupBy("objectid")
    .first("constituentid") // first artist listed for object
    .select(
        "objectid",
        "constituentid", 
        "role",
    )

objectToArtist.head()

In [None]:
let response = await fetch(new URL("published_images.csv", baseUrl));
let publishedImages = pl.readCSV(await response.text(), { quoteChar: "\"" })
    .select(
        pl.col("depictstmsobjectid").alias("objectid"),
        // pl.col("iiifurl").alias("url"),
        // pl.col("iiifthumburl").alias("thumbnail_url"),
        pl.format("https://api.nga.gov/iiif/{}/full/full/0/default.jpg", pl.col("uuid")).alias("image_url"),
    )
publishedImages.head()

In [None]:
// full data frame

let df = publishedImages
    .join(objects, { on: "objectid" })
    .join(objectToArtist, { on: "objectid" })
    .join(constituents, { on: "constituentid" })
    .select(pl.exclude("constituentid"))

console.log(df.shape)
df.head()

## Interactive tables

In [None]:
import { widget } from "jsr:@anywidget/deno";
import * as base64 from "jsr:@std/encoding@1.0.7/base64";

function agGrid(df: pl.DataFrame) {
    return widget({
    	state: {
            // TODO: Jupyter Widgets support binary data, but I'm not sure if it's implemented in Deno yet
            ipc: base64.encodeBase64(df.writeIPC()),
            _css: "https://esm.sh/ag-grid-community@33.0.4/styles/ag-grid.css"
        },
    	imports: `
import * as agGrid from "https://esm.sh/ag-grid-community@33.0.4";
import * as flech from "https://esm.sh/@uwdata/flechette@1.1.2";
import * as base64 from "https://esm.sh/jsr/@std/encoding@1.0.7/base64";
    `,
        // @ts-expect-error - function body is serialized to the front end with imports from above
    	render: ({ model, el }) => {
            agGrid.ModuleRegistry.registerModules([agGrid.AllCommunityModule]);
            el.style.height = "400px";
            let bytes = base64.decodeBase64(model.get("ipc"));
            let table = flech.tableFromIPC(bytes);
            agGrid.createGrid(el, {
                columnDefs: table.names.map(field => ({ field })),
                rowData: table.toArray(),
                pagination: true,
           });
        },
    });
}

function quak(df: pl.DataFrame) {
    return widget({
        // TODO: Jupyter Widgets support binary data, but I'm not sure if it's implemented in Deno yet
    	state: { parquet: base64.encodeBase64(df.writeParquet()) },
    	imports: `
import * as mosaic from "https://esm.sh/@uwdata/mosaic-core@~0.11?bundle";
import * as base64 from "https://esm.sh/jsr/@std/encoding@1.0.7/base64";
import * as quak from "https://esm.sh/jsr/@manzt/quak@0.0.1";
    `,
        // @ts-expect-error - function body is serialized to the front end with imports from above
    	render: async ({ model, el }) => {
            let connector = mosaic.wasmConnector();
            let db = await connector.getDuckDB();
            let coordinator = new mosaic.Coordinator();
            coordinator.databaseConnector(connector);

            let bytes = base64.decodeBase64(model.get("parquet"));
            await db.registerFileBuffer("df.parquet", bytes);
            await coordinator.exec([`CREATE OR REPLACE TABLE "df" AS SELECT * FROM "df.parquet"`])
            
            let dt = await quak.datatable("df", { coordinator, height: 400 });
            el.appendChild(dt.node());
            
            let div = document.createElement("div");
            div.style.height = "435px";
            div.appendChild(dt.node());

            el.appendChild(div);
        },
    });
}


In [None]:
// ag-grid seems to break down with >10,000
agGrid(df.head(100))

In [None]:
// quak can handle it all (keeps as compressed parquet in the front-end)
quak(objects.head(1000))

## Plotting

- explain deps observable/plot
- plot some different views / EDA

In [None]:
import * as Plot from "npm:@observablehq/plot";
import * as linkedom from "npm:linkedom";

// Plot requires a `document` instance for each plot, which we need to fill in Deno...
function Document() {
    return linkedom.parseHTML("<html></html>").document;
}

let records = df.toRecords();

Plot.plot({
  marks: [
    Plot.barX(
      records,
      Plot.groupY(
        { x: "count" },
        { y: "classification", sort: { y: "-x" } }
      )
    )
  ],
  marginLeft: 125,
  document: new Document()
})

In [None]:
Plot.plot({
  marks: [
    Plot.barX(
      records,
      Plot.groupY(
        { x: "count" },
        { y: "nationality", sort: { y: "-x" } }
      )
    )
  ],
  marginLeft: 125,
  document: new Document()
})