Add example to parse any block text (#416)

* add block text example * clean up comments * add embed option * update comment: * pr feeedback: name change, updated switch case, join richtext array without comma * use iteratePaginatedAPI helper to capture all blocks on page
makenotion · Jul 18, 2023 · 133b321 · 133b321
1 parent 9114e7b
commit 133b321
Show file tree

Hide file tree

Showing 6 changed files with 247 additions and 0 deletions.
diff --git a/examples/parse-text-from-any-block-type/.env.example b/examples/parse-text-from-any-block-type/.env.example
@@ -0,0 +1,2 @@
+NOTION_KEY=<your-notion-api-key>
+NOTION_PAGE_ID=<notion-page-id>
diff --git a/examples/parse-text-from-any-block-type/.prettierrc b/examples/parse-text-from-any-block-type/.prettierrc
@@ -0,0 +1,8 @@
+{
+  "arrowParens": "avoid",
+  "tabWidth": 2,
+  "semi": false,
+  "trailingComma": "es5",
+  "endOfLine": "lf",
+  "singleQuote": false
+}
diff --git a/examples/parse-text-from-any-block-type/README.md b/examples/parse-text-from-any-block-type/README.md
@@ -0,0 +1,63 @@
+# Sample integration: Parse text from any block type
+
+Parse plain text from any type of block (i.e. page content), including headers, lists, media, etc.
+
+## About the integration
+
+This integration will retrieve Notion [page content](https://developers.notion.com/docs/working-with-page-content) and parse any plain text from the block. (Note: page content is represented by [blocks](https://developers.notion.com/docs/working-with-page-content#modeling-content-as-blocks).) The plain text is printed to the command line in this example, but can be used in your Notion projects as needed.
+
+## About page content
+
+When [retrieving block children](https://developers.notion.com/reference/get-block-children) (i.e. page content) with the public API, the structure of the blocks returned will vary depending on the block type. For example, a [paragraph block](https://developers.notion.com/reference/block#paragraph) and an [image block](https://developers.notion.com/reference/block#image) are modeled differently.
+
+This example demonstrates how to parse any available text for any type of block. In many cases, [rich text](https://developers.notion.com/reference/rich-text) will be available and the `plain_text` value will be used.
+
+Note: Not all blocks contain text to display (e.g. dividers). Additionally, not all block types are currently supported by the public API.
+
+## Running Locally
+
+### 1. Setup your local project
+
+```zsh
+# Clone this repository locally
+git clone https://github.com/makenotion/notion-sdk-js.git
+
+# Switch into this project
+cd notion-sdk-js/examples/parse-text-from-any-block-type
+
+# Install the dependencies
+npm install
+```
+
+### 2. Set your environment variables in a `.env` file
+
+A `.env.example` file has been included and can be renamed `.env`. Update the environment variables below:
+
+```zsh
+NOTION_API_KEY=<your-notion-api-key>
+NOTION_PAGE_ID=<notion-page-id>
+```
+
+`NOTION_API_KEY`: Create a new integration in the [integrations dashboard](https://www.notion.com/my-integrations) and retrieve the API key from the integration's `Secrets` page.
+
+`NOTION_PAGE_ID`: Use the ID of any Notion page with content. A page with a variety of block types is recommended.
+
+The page ID is the 32 character string at the end of any page URL.
+![A Notion page URL with the ID highlighted](./assets/page_id.png)
+
+### 3. Give the integration access to your page
+
+Your Notion integration will need permission to retrieve the block children from the Notion page being used. To provide access, do the following:
+
+1. Go to the page in your workspace.
+2. Click the `•••` (more menu) on the top-right corner of the page.
+3. Scroll to the bottom of the menu and click `Add connections`.
+4. Search for and select your integration in the `Search for connections...` menu.
+
+Once selected, your integration will have permission to read content from the page.
+
+### 4. Run code
+
+```zsh
+node index.js
+```
diff --git a/examples/parse-text-from-any-block-type/assets/page_id.png b/examples/parse-text-from-any-block-type/assets/page_id.png
diff --git a/examples/parse-text-from-any-block-type/index.js b/examples/parse-text-from-any-block-type/index.js
@@ -0,0 +1,151 @@
+import { Client, iteratePaginatedAPI } from "@notionhq/client"
+import { config } from "dotenv"
+
+config()
+
+const pageId = process.env.NOTION_PAGE_ID
+const apiKey = process.env.NOTION_API_KEY
+
+const notion = new Client({ auth: apiKey })
+
+/* 
+---------------------------------------------------------------------------
+*/
+
+// Take rich text array from a block child that supports rich text and return the plain text.
+// Note: All rich text objects include a plain_text field.
+const getPlainTextFromRichText = richText => {
+  return richText.map(t => t.plain_text).join("")
+  // Note: A page mention will return "Undefined" as the page name if the page has not been shared with the integration. See: https://developers.notion.com/reference/block#mention
+}
+
+// Use the source URL and optional caption from media blocks (file, video, etc.)
+const getMediaSourceText = block => {
+  let source, caption
+
+  if (block[block.type].external) {
+    source = block[block.type].external.url
+  } else if (block[block.type].file) {
+    source = block[block.type].file.url
+  } else if (block[block.type].url) {
+    source = block[block.type].url
+  } else {
+    source = "[Missing case for media block types]: " + block.type
+  }
+  // If there's a caption, return it with the source
+  if (block[block.type].caption.length) {
+    caption = getPlainTextFromRichText(block[block.type].caption)
+    return caption + ": " + source
+  }
+  // If no caption, just return the source URL
+  return source
+}
+
+// Get the plain text from any block type supported by the public API.
+const getTextFromBlock = block => {
+  let text
+
+  // Get rich text from blocks that support it
+  if (block[block.type].rich_text) {
+    // This will be an empty string if it's an empty line.
+    text = getPlainTextFromRichText(block[block.type].rich_text)
+  }
+  // Get text for block types that don't have rich text
+  else {
+    switch (block.type) {
+      case "unsupported":
+        // The public API does not support all block types yet
+        text = "[Unsupported block type]"
+        break
+      case "bookmark":
+        text = block.bookmark.url
+        break
+      case "child_database":
+        text = block.child_database.title
+        // Use "Query a database" endpoint to get db rows: https://developers.notion.com/reference/post-database-query
+        // Use "Retrieve a database" endpoint to get additional properties: https://developers.notion.com/reference/retrieve-a-database
+        break
+      case "child_page":
+        text = block.child_page.title
+        break
+      case "embed":
+      case "video":
+      case "file":
+      case "image":
+      case "pdf":
+        text = getMediaSourceText(block)
+        break
+      case "equation":
+        text = block.equation.expression
+        break
+      case "link_preview":
+        text = block.link_preview.url
+        break
+      case "synced_block":
+        // Provides ID for block it's synced with.
+        text = block.synced_block.synced_from
+          ? "This block is synced with a block with the following ID: " +
+            block.synced_block.synced_from[block.synced_block.synced_from.type]
+          : "Source sync block that another blocked is synced with."
+        break
+      case "table":
+        // Only contains table properties.
+        // Fetch children blocks for more details.
+        text = "Table width: " + block.table.table_width
+        break
+      case "table_of_contents":
+        // Does not include text from ToC; just the color
+        text = "ToC color: " + block.table_of_contents.color
+        break
+      case "breadcrumb":
+      case "column_list":
+      case "divider":
+        text = "No text available"
+        break
+      default:
+        text = "[Needs case added]"
+        break
+    }
+  }
+  // Blocks with the has_children property will require fetching the child blocks. (Not included in this example.)
+  // e.g. nested bulleted lists
+  if (block.has_children) {
+    // For now, we'll just flag there are children blocks.
+    text = text + " (Has children)"
+  }
+  // Includes block type for readability. Update formatting as needed.
+  return block.type + ": " + text
+}
+
+async function retrieveBlockChildren(id) {
+  console.log("Retrieving blocks (async)...")
+  const blocks = []
+
+  // Use iteratePaginatedAPI helper function to get all blocks first-level blocks on the page
+  for await (const block of iteratePaginatedAPI(notion.blocks.children.list, {
+    block_id: id, // A page ID can be passed as a block ID: https://developers.notion.com/docs/working-with-page-content#modeling-content-as-blocks
+  })) {
+    blocks.push(block)
+  }
+
+  return blocks
+}
+
+const printBlockText = blocks => {
+  console.log("Displaying blocks:")
+
+  for (let i = 0; i < blocks.length; i++) {
+    const text = getTextFromBlock(blocks[i])
+    // Print plain text for each block.
+    console.log(text)
+  }
+}
+
+async function main() {
+  // Make API call to retrieve all block children from the page provided in .env
+  const blocks = await retrieveBlockChildren(pageId)
+  // Get and print plain text for each block.
+  printBlockText(blocks)
+}
+
+main()
diff --git a/examples/parse-text-from-any-block-type/package.json b/examples/parse-text-from-any-block-type/package.json
@@ -0,0 +1,23 @@
+{
+  "name": "parse-text-from-any-block-type",
+  "version": "1.0.0",
+  "description": "Retrieve blocks from a Notion page and parse text from any block type.",
+  "main": "index.js",
+  "type": "module",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1",
+    "start": ""
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/makenotion/notion-sdk-js.git"
+  },
+  "author": "Jess Mitchell",
+  "license": "MIT",
+  "bugs": {
+    "url": "https://github.com/makenotion/notion-sdk-js/issues"
+  },
+  "dependencies": {
+    "@notionhq/client": "file:../.."
+  }
+}