@@ -5,12 +5,17 @@ import * as msql from "@uwdata/mosaic-sql";
55import { assert } from "./utils/assert.ts" ;
66import { DataTable , datatable } from "./clients/DataTable.ts" ;
77
8+ interface DuckDBClient {
9+ registerFileText ( name : string , text : string ) : Promise < void > ;
10+ registerFileBuffer ( name : string , buffer : Uint8Array ) : Promise < void > ;
11+ }
12+
813let dropzone = document . querySelector ( "input" ) ! ;
914let options = document . querySelector ( "#options" ) ! ;
1015let table = document . querySelector ( "#table" ) ! ;
1116let exportButton = document . querySelector ( "#export" ) ! as HTMLButtonElement ;
1217
13- function getFile ( ) : Promise < File > {
18+ function getFileSelect ( ) : Promise < File > {
1419 return new Promise ( ( resolve ) => {
1520 // on input file change
1621 dropzone . addEventListener ( "input" , ( e ) => {
@@ -52,6 +57,53 @@ function handleLoading(source: string | null) {
5257 table . appendChild ( loading ) ;
5358}
5459
60+ async function getUrl ( source : URL , { db } : { db : DuckDBClient } ) {
61+ /**
62+ * DuckDB for whatever reason tries to make range requests for CSV/JSON files
63+ * We manually fetch TEXT files here and register them with DuckDB.
64+ */
65+ if (
66+ source . pathname . endsWith ( ".csv" ) ||
67+ source . pathname . endsWith ( ".tsv" ) ||
68+ source . pathname . endsWith ( ".json" )
69+ ) {
70+ let file = source . pathname . split ( "/" ) . pop ( ) ?? "" ;
71+ let response = await fetch ( source ) ;
72+ await db . registerFileText ( file , await response . text ( ) ) ;
73+ if ( file . endsWith ( ".csv" ) ) {
74+ return msql . loadCSV ( "df" , file , { replace : true } ) ;
75+ }
76+ if ( file . endsWith ( ".tsv" ) ) {
77+ return msql . loadCSV ( "df" , file , { replace : true , delim : "\t" } ) ;
78+ }
79+ if ( file . endsWith ( ".json" ) ) {
80+ return msql . loadJSON ( "df" , file , { replace : true } ) ;
81+ }
82+ }
83+ assert ( source . pathname . endsWith ( ".parquet" ) , "Unsupported file format." ) ;
84+ return msql . loadParquet ( tableName , source , { replace : true } ) ;
85+ }
86+
87+ async function getFile ( file : File , { db } : { db : DuckDBClient } ) {
88+ let name = file . name ;
89+ if ( name . endsWith ( ".csv" ) ) {
90+ await db . registerFileText ( name , await file . text ( ) ) ;
91+ return msql . loadCSV ( tableName , name , { replace : true } ) ;
92+ }
93+ if ( name . endsWith ( ".tsv" ) ) {
94+ await db . registerFileText ( name , await file . text ( ) ) ;
95+ return msql . loadCSV ( tableName , name , { replace : true , delim : "\t" } ) ;
96+ }
97+ if ( name . endsWith ( ".json" ) ) {
98+ await db . registerFileText ( name , await file . text ( ) ) ;
99+ return msql . loadJSON ( tableName , name , { replace : true } ) ;
100+ }
101+ assert ( name . endsWith ( ".parquet" ) ) ;
102+ let bytes = new Uint8Array ( await file . arrayBuffer ( ) ) ;
103+ await db . registerFileBuffer ( name , bytes ) ;
104+ return msql . loadParquet ( tableName , name , { replace : true } ) ;
105+ }
106+
55107let dt : DataTable ;
56108let tableName = "df" ;
57109let coordinator = new mc . Coordinator ( ) ;
@@ -61,41 +113,12 @@ async function main() {
61113 let source = new URLSearchParams ( location . search ) . get ( "source" ) ;
62114 handleLoading ( source ) ;
63115 let connector = mc . wasmConnector ( ) ;
64- let db = await connector . getDuckDB ( ) ;
116+ let db : DuckDBClient = await connector . getDuckDB ( ) ;
65117 coordinator . databaseConnector ( connector ) ;
66118
67- let exec : string ;
68- if ( source ) {
69- exec = source . endsWith ( ".csv" )
70- ? msql . loadCSV ( tableName , source , { replace : true } )
71- : source . endsWith ( ".tsv" )
72- ? msql . loadCSV ( tableName , source , { replace : true , delim : "\t" } )
73- : source . endsWith ( ".json" )
74- ? msql . loadJSON ( tableName , source , { replace : true } )
75- : msql . loadParquet ( tableName , source , { replace : true } ) ;
76- } else {
77- let file = await getFile ( ) ;
78- if ( file . name . endsWith ( ".csv" ) ) {
79- await db . registerFileText ( file . name , await file . text ( ) ) ;
80- exec = msql . loadCSV ( tableName , file . name , { replace : true } ) ;
81- } else if ( file . name . endsWith ( ".json" ) ) {
82- await db . registerFileText ( file . name , await file . text ( ) ) ;
83- exec = msql . loadJSON ( tableName , file . name , { replace : true } ) ;
84- } else if ( file . name . endsWith ( ".tsv" ) ) {
85- await db . registerFileText ( file . name , await file . text ( ) ) ;
86- exec = msql . loadCSV ( tableName , file . name , {
87- replace : true ,
88- delim : "\t" ,
89- } ) ;
90- } else {
91- assert ( file . name . endsWith ( ".parquet" ) ) ;
92- await db . registerFileBuffer (
93- file . name ,
94- new Uint8Array ( await file . arrayBuffer ( ) ) ,
95- ) ;
96- exec = msql . loadParquet ( tableName , file . name , { replace : true } ) ;
97- }
98- }
119+ let exec = source
120+ ? await getUrl ( new URL ( source ) , { db } )
121+ : await getFile ( await getFileSelect ( ) , { db } ) ;
99122
100123 // Bug in mosaic-sql
101124 exec = exec . replace ( "json_format" , "format" ) ;
@@ -109,6 +132,11 @@ async function main() {
109132 function copyToClipboard ( ) {
110133 let from = exec . match ( / F R O M .* $ / ) ?. [ 0 ] ;
111134 assert ( from , "Could not find FROM clause in exec string." ) ;
135+ if ( source ?. startsWith ( "http://" ) || source ?. startsWith ( "https://" ) ) {
136+ // we need to replace the source with the actual URL
137+ let file = new URL ( source ) . pathname . split ( "/" ) . pop ( ) ! ;
138+ from = from . replace ( file , source ) ;
139+ }
112140 let sql = dt . sql ?. replace ( ' FROM "df"' , from ) ;
113141 navigator . clipboard . writeText ( sql ! ) ;
114142 const icons = exportButton . querySelectorAll ( "svg" ) ! ;
0 commit comments