Merge pull request #198 from tableflowhq/feature/schemaless-data-types

Schemaless data types support
tableflowhq · Dec 29, 2023 · 42ebfb7 · 42ebfb7
2 parents 9f0f80f + 0a8f344
commit 42ebfb7
Show file tree

Hide file tree

Showing 35 changed files with 392 additions and 149 deletions.
diff --git a/admin-server/docs/docs.go b/admin-server/docs/docs.go
diff --git a/admin-server/docs/swagger.json b/admin-server/docs/swagger.json
@@ -321,7 +321,7 @@
                         "schema": {
                             "type": "object",
                             "additionalProperties": {
-                                "type": "string"
+                                "$ref": "#/definitions/types.UploadColumnMapping"
                             }
                         }
                     }
@@ -780,6 +780,15 @@
                 }
             }
         },
+        "types.UploadColumnMapping": {
+            "type": "object",
+            "properties": {
+                "template_column_id": {
+                    "type": "string",
+                    "example": "a1ed136d-33ce-4b7e-a7a4-8a5ccfe54cd5"
+                }
+            }
+        },
         "types.UploadHeaderRowSelection": {
             "type": "object",
             "properties": {

diff --git a/admin-server/docs/swagger.yaml b/admin-server/docs/swagger.yaml
@@ -260,6 +260,12 @@ definitions:
         example: a1ed136d-33ce-4b7e-a7a4-8a5ccfe54cd5
         type: string
     type: object
+  types.UploadColumnMapping:
+    properties:
+      template_column_id:
+        example: a1ed136d-33ce-4b7e-a7a4-8a5ccfe54cd5
+        type: string
+    type: object
   types.UploadHeaderRowSelection:
     properties:
       index:
@@ -518,7 +524,7 @@ paths:
         required: true
         schema:
           additionalProperties:
-            type: string
+            $ref: '#/definitions/types.UploadColumnMapping'
           type: object
       responses:
         "200":

diff --git a/admin-server/go/pkg/types/importer.go b/admin-server/go/pkg/types/importer.go
@@ -79,6 +79,10 @@ type UploadHeaderRowSelection struct {
 	Index *int `json:"index" example:"0"`
 }
 
+type UploadColumnMapping struct {
+	TemplateColumnID string `json:"template_column_id" example:"a1ed136d-33ce-4b7e-a7a4-8a5ccfe54cd5"`
+}
+
 type UploadRow struct {
 	Index  int            `json:"index" example:"0"`
 	Values map[int]string `json:"values"`

diff --git a/admin-server/go/pkg/web/file_import_routes.go b/admin-server/go/pkg/web/file_import_routes.go
@@ -344,23 +344,28 @@ func importerSetHeaderRow(c *gin.Context) {
 //	@Success		200	{object}	types.Res
 //	@Failure		400	{object}	types.Res
 //	@Router			/file-import/v1/upload/{id}/set-column-mapping [post]
-//	@Param			id		path	string				true	"Upload ID"
-//	@Param			body	body	map[string]string	true	"Request body"
+//	@Param			id		path	string									true	"Upload ID"
+//	@Param			body	body	map[string]types.UploadColumnMapping	true	"Request body"
 func importerSetColumnMapping(c *gin.Context) {
 	id := c.Param("id")
 	if len(id) == 0 {
 		c.AbortWithStatusJSON(http.StatusBadRequest, types.Res{Err: "No upload ID provided"})
 		return
 	}
 
-	// Non-schemaless: Upload column ID -> Template column ID
-	// Schemaless:     Upload column ID -> User-provided key (i.e. first_name) (only from the request, this will be updated to IDs after the template is generated)
-	columnMapping := make(map[string]string)
-	if err := c.ShouldBindJSON(&columnMapping); err != nil {
+	columnMappingRequest := make(map[string]types.UploadColumnMapping)
+	if err := c.ShouldBindJSON(&columnMappingRequest); err != nil {
 		tf.Log.Warnw("Could not bind JSON", "error", err)
 		c.AbortWithStatusJSON(http.StatusBadRequest, types.Res{Err: err.Error()})
 		return
 	}
+
+	// Non-schemaless: Upload column ID -> Template column ID
+	// Schemaless:     Upload column ID -> User-provided key (i.e. first_name) (only from the request, this will be updated to IDs after the template is generated)
+	columnMapping := make(map[string]string)
+	for k, mappingSelection := range columnMappingRequest {
+		columnMapping[k] = mappingSelection.TemplateColumnID
+	}
 	if len(columnMapping) == 0 {
 		c.AbortWithStatusJSON(http.StatusBadRequest, types.Res{Err: "Please select at least one destination column"})
 		return

diff --git a/docs/api-reference/download-import.mdx b/docs/api-reference/download-import.mdx
@@ -2,3 +2,5 @@
 title: "Download Import CSV"
 openapi: "GET /import/{id}/download"
 ---
+
+Retrieve an import as a CSV file.
diff --git a/docs/api-reference/get-import-rows.mdx b/docs/api-reference/get-import-rows.mdx
@@ -3,8 +3,31 @@ title: "Get Import Rows"
 openapi: "GET /import/{id}/rows"
 ---
 
-Retrieve the rows of an import as JSON. This endpoint supports pagination by using a limit/offset. If the limit and offset are not provided, it will return the first 1000 rows of the import.
+Retrieve the rows of an import as JSON. This endpoint supports pagination by using a limit/offset. If the limit and offset are not provided, it will return the first 1,000 rows of the import.
 
 To use the limit/offset, start by setting the offset to 0 and the limit to 100 to get the first 100 rows of data. To get the next 100 rows, set the offset to 100 while keeping the limit the same. Continue increasing the offset by 100 until no more rows are returned.
 
-Note: the max limit is 1000.
+<Info>The maximum `limit` is 1000.</Info>
+
+### Example Response
+
+```json
+[
+  {
+    "index": 0,
+    "values": {
+      "age": 23,
+      "email": "maria@example.com",
+      "first_name": "Maria"
+    }
+  },
+  {
+    "index": 1,
+    "values": {
+      "age": 32,
+      "email": "robert@example.com",
+      "first_name": "Robert"
+    }
+  }
+]
+```
diff --git a/docs/api-reference/get-import.mdx b/docs/api-reference/get-import.mdx
@@ -1,4 +1,63 @@
 ---
-title: "Get Import Metadata"
+title: "Get Import"
 openapi: "GET /import/{id}"
 ---
+
+Retrieve the row data, column definitions, and other information about the import.
+
+<Info>The number of rows included is limited to 10,000. If there are more than 10,000 rows, an `error` will be set and
+  the data should be retrieved using the [/rows](/api-reference/get-import-rows) endpoint.</Info>
+
+### Example Response
+
+```json
+{
+  "id": "da5554e3-6c87-41b2-9366-5449a2f15b53",
+  "importer_id": "a0fadb1d-9888-4fcb-b185-25b984bcb227",
+  "num_rows": 2,
+  "num_columns": 3,
+  "num_processed_values": 5,
+  "metadata": {
+    "user_id": 1234,
+    "user_email": "user@example.com",
+    "environment": "staging"
+  },
+  "created_at": 1698172312,
+  "error": null,
+  "columns": [
+    {
+      "data_type": "number",
+      "key": "age",
+      "name": "Age"
+    },
+    {
+      "data_type": "string",
+      "key": "email",
+      "name": "Email"
+    },
+    {
+      "data_type": "string",
+      "key": "first_name",
+      "name": "First Name"
+    }
+  ],
+  "rows": [
+    {
+      "index": 0,
+      "values": {
+        "age": 23,
+        "email": "maria@example.com",
+        "first_name": "Maria"
+      }
+    },
+    {
+      "index": 1,
+      "values": {
+        "age": 32,
+        "email": "robert@example.com",
+        "first_name": "Robert"
+      }
+    }
+  ]
+}
+```
diff --git a/docs/assets/webhooks-transformations-filter.jpg b/docs/assets/webhooks-transformations-filter.jpg
diff --git a/docs/assets/webhooks-transformations-transform.jpg b/docs/assets/webhooks-transformations-transform.jpg
diff --git a/docs/assets/webhooks-transformations.jpg b/docs/assets/webhooks-transformations.jpg
diff --git a/docs/mint.json b/docs/mint.json
@@ -92,8 +92,8 @@
     {
       "group": "API",
       "pages": [
-        "api-reference/get-import-rows",
         "api-reference/get-import",
+        "api-reference/get-import-rows",
         "api-reference/download-import",
         "api-reference/create-importer",
         "api-reference/delete-importer"

diff --git a/docs/retrieve-data.mdx b/docs/retrieve-data.mdx
@@ -16,14 +16,16 @@ onComplete: (data) => console.log(data)
 
 <Info>Feature available in <a href="https://tableflow.com/pricing" target="_blank">TableFlow Cloud</a>.</Info>
 
-TableFlow provides an API to paginate the row data of an import, view metadata about an import, or download an import directly as a CSV.
+TableFlow can send webhooks to your application when an import has been completed. The webhook will contain the row data, column definitions, and other information about the import.
 
+- [Webhook Setup Guide](/webhooks)
+
+TableFlow also provides an API to retrieve import data, paginate the row data of an import (used for large imports), or download an import directly as a CSV.
+
+- [Get Import](/api-reference/get-import)
 - [Get Import Rows](/api-reference/get-import-rows)
-- [Get Import Metadata](/api-reference/get-import)
 - [Download Import CSV](/api-reference/download-import)
 
-You can also use [Webhooks](/webhooks) to get notified when an import has been completed. This allows your backend application to load the imported data as soon as an import has completed.
-
 
 ### Option 3: Admin Dashboard
 

diff --git a/docs/sdk/javascript.mdx b/docs/sdk/javascript.mdx
@@ -302,68 +302,62 @@ uploadButton.addEventListener("click", () => {
 </ResponseField>
 
 <ResponseField name="onComplete" type="function">
-  Callback function that fires when a user completes an import. It returns `data`, an object that contains the row data
-  and information about the import such as the number of rows. The number of rows returned is limited to 10,000. If
-  there are more than 10,000 rows, an `error` will be set and the data should be retrieved using the
-  [API](/api-reference/get-import-rows).
+  Callback function that fires when a user completes an import. It returns `data`, an object that contains the row data,
+  column definitions, and other information about the import.
+
+  <Info>The number of rows included is limited to 10,000. If there are more than 10,000 rows, an `error` will be set and
+    the data should be retrieved using the [API](/api-reference/get-import-rows).</Info>
+
   ```jsx
   onComplete={(data) => console.log(data)}
   ```
   Example `data`:
   ```json
   {
-    "id": "170f9ae1-c109-4e26-83a1-b31f2baa81b2",
-    "upload_id": "4f7ec0b5-16ef-4d0e-8b6a-0c182815a131",
+    "id": "da5554e3-6c87-41b2-9366-5449a2f15b53",
     "importer_id": "a0fadb1d-9888-4fcb-b185-25b984bcb227",
-    "num_rows": 4,
-    "num_columns": 4,
-    "num_processed_values": 16,
+    "num_rows": 2,
+    "num_columns": 3,
+    "num_processed_values": 5,
     "metadata": {
-      "user_id": 1234, 
-      "user_email": "test@example.com", 
-      "environment": "dev"
+      "user_id": 1234,
+      "user_email": "user@example.com",
+      "environment": "staging"
     },
-    "is_stored": true,
-    "has_errors": false,
-    "num_error_rows": 0,
-    "num_valid_rows": 4,
     "created_at": 1698172312,
     "error": null,
+    "columns": [
+      {
+        "data_type": "number",
+        "key": "age",
+        "name": "Age"
+      },
+      {
+        "data_type": "string",
+        "key": "email",
+        "name": "Email"
+      },
+      {
+        "data_type": "string",
+        "key": "first_name",
+        "name": "First Name"
+      }
+    ],
     "rows": [
       {
         "index": 0,
         "values": {
-          "age": "23",
+          "age": 23,
           "email": "maria@example.com",
-          "first_name": "Maria",
-          "last_name": "Martinez"
+          "first_name": "Maria"
         }
       },
       {
         "index": 1,
         "values": {
-          "age": "32",
+          "age": 32,
           "email": "robert@example.com",
-          "first_name": "Robert",
-          "last_name": "Jones"
-        }
-      },
-      {
-        "index": 2,
-        "values": {
-          "age": "30",
-          "email": "mary@example.com",
-          "first_name": "Mary",
-          "last_name": "Zhang"
-        }
-      },
-      {
-        "index": 3,
-        "values": {
-          "age": "24",
-          "email": "jamie@example.com",
-          "first_name": "Jamie",
-          "last_name": "Miller"
+          "first_name": "Robert"
         }
       }
     ]