Skip to content

Commit cae8ebd

Browse files
authored
fix(standalone): Avoid DuckDB range requests with text files (#49)
* fix(standalone): Avoid DuckDB range requests with text files * cleanup
1 parent 4865aa3 commit cae8ebd

File tree

1 file changed

+62
-34
lines changed

1 file changed

+62
-34
lines changed

lib/demo.ts

Lines changed: 62 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,17 @@ import * as msql from "@uwdata/mosaic-sql";
55
import { assert } from "./utils/assert.ts";
66
import { DataTable, datatable } from "./clients/DataTable.ts";
77

8+
interface DuckDBClient {
9+
registerFileText(name: string, text: string): Promise<void>;
10+
registerFileBuffer(name: string, buffer: Uint8Array): Promise<void>;
11+
}
12+
813
let dropzone = document.querySelector("input")!;
914
let options = document.querySelector("#options")!;
1015
let table = document.querySelector("#table")!;
1116
let exportButton = document.querySelector("#export")! as HTMLButtonElement;
1217

13-
function getFile(): Promise<File> {
18+
function getFileSelect(): Promise<File> {
1419
return new Promise((resolve) => {
1520
// on input file change
1621
dropzone.addEventListener("input", (e) => {
@@ -52,6 +57,53 @@ function handleLoading(source: string | null) {
5257
table.appendChild(loading);
5358
}
5459

60+
async function getUrl(source: URL, { db }: { db: DuckDBClient }) {
61+
/**
62+
* DuckDB for whatever reason tries to make range requests for CSV/JSON files
63+
* We manually fetch TEXT files here and register them with DuckDB.
64+
*/
65+
if (
66+
source.pathname.endsWith(".csv") ||
67+
source.pathname.endsWith(".tsv") ||
68+
source.pathname.endsWith(".json")
69+
) {
70+
let file = source.pathname.split("/").pop() ?? "";
71+
let response = await fetch(source);
72+
await db.registerFileText(file, await response.text());
73+
if (file.endsWith(".csv")) {
74+
return msql.loadCSV("df", file, { replace: true });
75+
}
76+
if (file.endsWith(".tsv")) {
77+
return msql.loadCSV("df", file, { replace: true, delim: "\t" });
78+
}
79+
if (file.endsWith(".json")) {
80+
return msql.loadJSON("df", file, { replace: true });
81+
}
82+
}
83+
assert(source.pathname.endsWith(".parquet"), "Unsupported file format.");
84+
return msql.loadParquet(tableName, source, { replace: true });
85+
}
86+
87+
async function getFile(file: File, { db }: { db: DuckDBClient }) {
88+
let name = file.name;
89+
if (name.endsWith(".csv")) {
90+
await db.registerFileText(name, await file.text());
91+
return msql.loadCSV(tableName, name, { replace: true });
92+
}
93+
if (name.endsWith(".tsv")) {
94+
await db.registerFileText(name, await file.text());
95+
return msql.loadCSV(tableName, name, { replace: true, delim: "\t" });
96+
}
97+
if (name.endsWith(".json")) {
98+
await db.registerFileText(name, await file.text());
99+
return msql.loadJSON(tableName, name, { replace: true });
100+
}
101+
assert(name.endsWith(".parquet"));
102+
let bytes = new Uint8Array(await file.arrayBuffer());
103+
await db.registerFileBuffer(name, bytes);
104+
return msql.loadParquet(tableName, name, { replace: true });
105+
}
106+
55107
let dt: DataTable;
56108
let tableName = "df";
57109
let coordinator = new mc.Coordinator();
@@ -61,41 +113,12 @@ async function main() {
61113
let source = new URLSearchParams(location.search).get("source");
62114
handleLoading(source);
63115
let connector = mc.wasmConnector();
64-
let db = await connector.getDuckDB();
116+
let db: DuckDBClient = await connector.getDuckDB();
65117
coordinator.databaseConnector(connector);
66118

67-
let exec: string;
68-
if (source) {
69-
exec = source.endsWith(".csv")
70-
? msql.loadCSV(tableName, source, { replace: true })
71-
: source.endsWith(".tsv")
72-
? msql.loadCSV(tableName, source, { replace: true, delim: "\t" })
73-
: source.endsWith(".json")
74-
? msql.loadJSON(tableName, source, { replace: true })
75-
: msql.loadParquet(tableName, source, { replace: true });
76-
} else {
77-
let file = await getFile();
78-
if (file.name.endsWith(".csv")) {
79-
await db.registerFileText(file.name, await file.text());
80-
exec = msql.loadCSV(tableName, file.name, { replace: true });
81-
} else if (file.name.endsWith(".json")) {
82-
await db.registerFileText(file.name, await file.text());
83-
exec = msql.loadJSON(tableName, file.name, { replace: true });
84-
} else if (file.name.endsWith(".tsv")) {
85-
await db.registerFileText(file.name, await file.text());
86-
exec = msql.loadCSV(tableName, file.name, {
87-
replace: true,
88-
delim: "\t",
89-
});
90-
} else {
91-
assert(file.name.endsWith(".parquet"));
92-
await db.registerFileBuffer(
93-
file.name,
94-
new Uint8Array(await file.arrayBuffer()),
95-
);
96-
exec = msql.loadParquet(tableName, file.name, { replace: true });
97-
}
98-
}
119+
let exec = source
120+
? await getUrl(new URL(source), { db })
121+
: await getFile(await getFileSelect(), { db });
99122

100123
// Bug in mosaic-sql
101124
exec = exec.replace("json_format", "format");
@@ -109,6 +132,11 @@ async function main() {
109132
function copyToClipboard() {
110133
let from = exec.match(/ FROM .*$/)?.[0];
111134
assert(from, "Could not find FROM clause in exec string.");
135+
if (source?.startsWith("http://") || source?.startsWith("https://")) {
136+
// we need to replace the source with the actual URL
137+
let file = new URL(source).pathname.split("/").pop()!;
138+
from = from.replace(file, source);
139+
}
112140
let sql = dt.sql?.replace(' FROM "df"', from);
113141
navigator.clipboard.writeText(sql!);
114142
const icons = exportButton.querySelectorAll("svg")!;

0 commit comments

Comments
 (0)