Skip to content

Commit

Permalink
Merge pull request #198 from tableflowhq/feature/schemaless-data-types
Browse files Browse the repository at this point in the history
Schemaless data types support
  • Loading branch information
ciminelli committed Dec 29, 2023
2 parents 9f0f80f + 0a8f344 commit 42ebfb7
Show file tree
Hide file tree
Showing 35 changed files with 392 additions and 149 deletions.
11 changes: 10 additions & 1 deletion admin-server/docs/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 10 additions & 1 deletion admin-server/docs/swagger.json
Expand Up @@ -321,7 +321,7 @@
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
"$ref": "#/definitions/types.UploadColumnMapping"
}
}
}
Expand Down Expand Up @@ -780,6 +780,15 @@
}
}
},
"types.UploadColumnMapping": {
"type": "object",
"properties": {
"template_column_id": {
"type": "string",
"example": "a1ed136d-33ce-4b7e-a7a4-8a5ccfe54cd5"
}
}
},
"types.UploadHeaderRowSelection": {
"type": "object",
"properties": {
Expand Down
8 changes: 7 additions & 1 deletion admin-server/docs/swagger.yaml
Expand Up @@ -260,6 +260,12 @@ definitions:
example: a1ed136d-33ce-4b7e-a7a4-8a5ccfe54cd5
type: string
type: object
types.UploadColumnMapping:
properties:
template_column_id:
example: a1ed136d-33ce-4b7e-a7a4-8a5ccfe54cd5
type: string
type: object
types.UploadHeaderRowSelection:
properties:
index:
Expand Down Expand Up @@ -518,7 +524,7 @@ paths:
required: true
schema:
additionalProperties:
type: string
$ref: '#/definitions/types.UploadColumnMapping'
type: object
responses:
"200":
Expand Down
4 changes: 4 additions & 0 deletions admin-server/go/pkg/types/importer.go
Expand Up @@ -79,6 +79,10 @@ type UploadHeaderRowSelection struct {
Index *int `json:"index" example:"0"`
}

type UploadColumnMapping struct {
TemplateColumnID string `json:"template_column_id" example:"a1ed136d-33ce-4b7e-a7a4-8a5ccfe54cd5"`
}

type UploadRow struct {
Index int `json:"index" example:"0"`
Values map[int]string `json:"values"`
Expand Down
17 changes: 11 additions & 6 deletions admin-server/go/pkg/web/file_import_routes.go
Expand Up @@ -344,23 +344,28 @@ func importerSetHeaderRow(c *gin.Context) {
// @Success 200 {object} types.Res
// @Failure 400 {object} types.Res
// @Router /file-import/v1/upload/{id}/set-column-mapping [post]
// @Param id path string true "Upload ID"
// @Param body body map[string]string true "Request body"
// @Param id path string true "Upload ID"
// @Param body body map[string]types.UploadColumnMapping true "Request body"
func importerSetColumnMapping(c *gin.Context) {
id := c.Param("id")
if len(id) == 0 {
c.AbortWithStatusJSON(http.StatusBadRequest, types.Res{Err: "No upload ID provided"})
return
}

// Non-schemaless: Upload column ID -> Template column ID
// Schemaless: Upload column ID -> User-provided key (i.e. first_name) (only from the request, this will be updated to IDs after the template is generated)
columnMapping := make(map[string]string)
if err := c.ShouldBindJSON(&columnMapping); err != nil {
columnMappingRequest := make(map[string]types.UploadColumnMapping)
if err := c.ShouldBindJSON(&columnMappingRequest); err != nil {
tf.Log.Warnw("Could not bind JSON", "error", err)
c.AbortWithStatusJSON(http.StatusBadRequest, types.Res{Err: err.Error()})
return
}

// Non-schemaless: Upload column ID -> Template column ID
// Schemaless: Upload column ID -> User-provided key (i.e. first_name) (only from the request, this will be updated to IDs after the template is generated)
columnMapping := make(map[string]string)
for k, mappingSelection := range columnMappingRequest {
columnMapping[k] = mappingSelection.TemplateColumnID
}
if len(columnMapping) == 0 {
c.AbortWithStatusJSON(http.StatusBadRequest, types.Res{Err: "Please select at least one destination column"})
return
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/download-import.mdx
Expand Up @@ -2,3 +2,5 @@
title: "Download Import CSV"
openapi: "GET /import/{id}/download"
---

Retrieve an import as a CSV file.
27 changes: 25 additions & 2 deletions docs/api-reference/get-import-rows.mdx
Expand Up @@ -3,8 +3,31 @@ title: "Get Import Rows"
openapi: "GET /import/{id}/rows"
---

Retrieve the rows of an import as JSON. This endpoint supports pagination by using a limit/offset. If the limit and offset are not provided, it will return the first 1000 rows of the import.
Retrieve the rows of an import as JSON. This endpoint supports pagination by using a limit/offset. If the limit and offset are not provided, it will return the first 1,000 rows of the import.

To use the limit/offset, start by setting the offset to 0 and the limit to 100 to get the first 100 rows of data. To get the next 100 rows, set the offset to 100 while keeping the limit the same. Continue increasing the offset by 100 until no more rows are returned.

Note: the max limit is 1000.
<Info>The maximum `limit` is 1000.</Info>

### Example Response

```json
[
{
"index": 0,
"values": {
"age": 23,
"email": "maria@example.com",
"first_name": "Maria"
}
},
{
"index": 1,
"values": {
"age": 32,
"email": "robert@example.com",
"first_name": "Robert"
}
}
]
```
61 changes: 60 additions & 1 deletion docs/api-reference/get-import.mdx
@@ -1,4 +1,63 @@
---
title: "Get Import Metadata"
title: "Get Import"
openapi: "GET /import/{id}"
---

Retrieve the row data, column definitions, and other information about the import.

<Info>The number of rows included is limited to 10,000. If there are more than 10,000 rows, an `error` will be set and
the data should be retrieved using the [/rows](/api-reference/get-import-rows) endpoint.</Info>

### Example Response

```json
{
"id": "da5554e3-6c87-41b2-9366-5449a2f15b53",
"importer_id": "a0fadb1d-9888-4fcb-b185-25b984bcb227",
"num_rows": 2,
"num_columns": 3,
"num_processed_values": 5,
"metadata": {
"user_id": 1234,
"user_email": "user@example.com",
"environment": "staging"
},
"created_at": 1698172312,
"error": null,
"columns": [
{
"data_type": "number",
"key": "age",
"name": "Age"
},
{
"data_type": "string",
"key": "email",
"name": "Email"
},
{
"data_type": "string",
"key": "first_name",
"name": "First Name"
}
],
"rows": [
{
"index": 0,
"values": {
"age": 23,
"email": "maria@example.com",
"first_name": "Maria"
}
},
{
"index": 1,
"values": {
"age": 32,
"email": "robert@example.com",
"first_name": "Robert"
}
}
]
}
```
Binary file added docs/assets/webhooks-transformations-filter.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/assets/webhooks-transformations.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion docs/mint.json
Expand Up @@ -92,8 +92,8 @@
{
"group": "API",
"pages": [
"api-reference/get-import-rows",
"api-reference/get-import",
"api-reference/get-import-rows",
"api-reference/download-import",
"api-reference/create-importer",
"api-reference/delete-importer"
Expand Down
10 changes: 6 additions & 4 deletions docs/retrieve-data.mdx
Expand Up @@ -16,14 +16,16 @@ onComplete: (data) => console.log(data)

<Info>Feature available in <a href="https://tableflow.com/pricing" target="_blank">TableFlow Cloud</a>.</Info>

TableFlow provides an API to paginate the row data of an import, view metadata about an import, or download an import directly as a CSV.
TableFlow can send webhooks to your application when an import has been completed. The webhook will contain the row data, column definitions, and other information about the import.

- [Webhook Setup Guide](/webhooks)

TableFlow also provides an API to retrieve import data, paginate the row data of an import (used for large imports), or download an import directly as a CSV.

- [Get Import](/api-reference/get-import)
- [Get Import Rows](/api-reference/get-import-rows)
- [Get Import Metadata](/api-reference/get-import)
- [Download Import CSV](/api-reference/download-import)

You can also use [Webhooks](/webhooks) to get notified when an import has been completed. This allows your backend application to load the imported data as soon as an import has completed.


### Option 3: Admin Dashboard

Expand Down
74 changes: 34 additions & 40 deletions docs/sdk/javascript.mdx
Expand Up @@ -302,68 +302,62 @@ uploadButton.addEventListener("click", () => {
</ResponseField>
<ResponseField name="onComplete" type="function">
Callback function that fires when a user completes an import. It returns `data`, an object that contains the row data
and information about the import such as the number of rows. The number of rows returned is limited to 10,000. If
there are more than 10,000 rows, an `error` will be set and the data should be retrieved using the
[API](/api-reference/get-import-rows).
Callback function that fires when a user completes an import. It returns `data`, an object that contains the row data,
column definitions, and other information about the import.
<Info>The number of rows included is limited to 10,000. If there are more than 10,000 rows, an `error` will be set and
the data should be retrieved using the [API](/api-reference/get-import-rows).</Info>
```jsx
onComplete={(data) => console.log(data)}
```
Example `data`:
```json
{
"id": "170f9ae1-c109-4e26-83a1-b31f2baa81b2",
"upload_id": "4f7ec0b5-16ef-4d0e-8b6a-0c182815a131",
"id": "da5554e3-6c87-41b2-9366-5449a2f15b53",
"importer_id": "a0fadb1d-9888-4fcb-b185-25b984bcb227",
"num_rows": 4,
"num_columns": 4,
"num_processed_values": 16,
"num_rows": 2,
"num_columns": 3,
"num_processed_values": 5,
"metadata": {
"user_id": 1234,
"user_email": "test@example.com",
"environment": "dev"
"user_id": 1234,
"user_email": "user@example.com",
"environment": "staging"
},
"is_stored": true,
"has_errors": false,
"num_error_rows": 0,
"num_valid_rows": 4,
"created_at": 1698172312,
"error": null,
"columns": [
{
"data_type": "number",
"key": "age",
"name": "Age"
},
{
"data_type": "string",
"key": "email",
"name": "Email"
},
{
"data_type": "string",
"key": "first_name",
"name": "First Name"
}
],
"rows": [
{
"index": 0,
"values": {
"age": "23",
"age": 23,
"email": "maria@example.com",
"first_name": "Maria",
"last_name": "Martinez"
"first_name": "Maria"
}
},
{
"index": 1,
"values": {
"age": "32",
"age": 32,
"email": "robert@example.com",
"first_name": "Robert",
"last_name": "Jones"
}
},
{
"index": 2,
"values": {
"age": "30",
"email": "mary@example.com",
"first_name": "Mary",
"last_name": "Zhang"
}
},
{
"index": 3,
"values": {
"age": "24",
"email": "jamie@example.com",
"first_name": "Jamie",
"last_name": "Miller"
"first_name": "Robert"
}
}
]
Expand Down

0 comments on commit 42ebfb7

Please sign in to comment.