From d418f9356b9769a6b9b2668394da0187a352213b Mon Sep 17 00:00:00 2001 From: Andrew Polk Date: Fri, 15 Sep 2023 10:16:53 -0700 Subject: [PATCH] fix: #76 - Prevent image filename thrashing after Notion url change --- src/MakeImagePersistencePlan.ts | 25 ++++++++++----- src/makeImagePersistencePlan.spec.ts | 48 ++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 15 deletions(-) diff --git a/src/MakeImagePersistencePlan.ts b/src/MakeImagePersistencePlan.ts index e2d6eeb..00cae39 100644 --- a/src/MakeImagePersistencePlan.ts +++ b/src/MakeImagePersistencePlan.ts @@ -12,14 +12,12 @@ export function makeImagePersistencePlan( // Since most images come from pasting screenshots, there isn't normally a filename. That's fine, we just make a hash of the url // Images that are stored by notion come to us with a complex url that changes over time, so we pick out the UUID that doesn't change. Example: // https://s3.us-west-2.amazonaws.com/secure.notion-static.com/d1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject + // But around Sept 2023, they changed the url to be something like: + // https://prod-files-secure.s3.us-west-2.amazonaws.com/d9a2b712-cf69-4bd6-9d65-87a4ceeacca2/d1bcdc8c-b065-4e40-9a11-392aabeb220e/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20230915%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20230915T161258Z&X-Amz-Expires=3600&X-Amz-Signature=28fca48e65fba86d539c3c4b7676fce1fa0857aa194f7b33dd4a468ecca6ab24&X-Amz-SignedHeaders=host&x-id=GetObject + // The thing we want is the last UUID before the ? - let thingToHash = imageSet.primaryUrl; - const m = /.*secure\.notion-static\.com\/(.*)\//gm.exec( - imageSet.primaryUrl - ); - if (m && m.length > 1) { - thingToHash = m[1]; - } + const urlBeforeQuery = imageSet.primaryUrl.split("?")[0]; + const thingToHash = findLastUuid(urlBeforeQuery) ?? urlBeforeQuery; const hash = hashOfString(thingToHash); imageSet.outputFileName = `${hash}.${imageSet.fileType.ext}`; @@ -50,7 +48,18 @@ export function makeImagePersistencePlan( } } -function hashOfString(s: string) { +function findLastUuid(url: string): string | null { + // Regex for a UUID surrounded by slashes + const uuidPattern = + /(?<=\/)[0-9a-f]{8}-[0-9a-f]{4}-[0-5][0-9a-f]{3}-[089ab][0-9a-f]{3}-[0-9a-f]{12}(?=\/)/gi; + + // Find all UUIDs + const uuids = url.match(uuidPattern); + // Return the last UUID if any exist, else return null + return uuids ? uuids[uuids.length - 1].trim() : null; +} + +export function hashOfString(s: string): number { let hash = 0; for (let i = 0; i < s.length; ++i) hash = Math.imul(31, hash) + s.charCodeAt(i); diff --git a/src/makeImagePersistencePlan.spec.ts b/src/makeImagePersistencePlan.spec.ts index 87ec61c..b585865 100644 --- a/src/makeImagePersistencePlan.spec.ts +++ b/src/makeImagePersistencePlan.spec.ts @@ -1,4 +1,7 @@ -import { makeImagePersistencePlan } from "./MakeImagePersistencePlan"; +import { + hashOfString, + makeImagePersistencePlan, +} from "./MakeImagePersistencePlan"; import { ImageSet } from "./images"; test("primary file with explicit file output path and prefix", () => { @@ -9,11 +12,16 @@ test("primary file with explicit file output path and prefix", () => { fileType: { ext: "png", mime: "image/png" }, }; makeImagePersistencePlan(imageSet, "./static/notion_imgs", "/notion_imgs"); - expect(imageSet.outputFileName).toBe("463556435.png"); + const expectedHash = hashOfString( + "https://s3.us-west-2.amazonaws.com/primaryImage" + ); + expect(imageSet.outputFileName).toBe(`${expectedHash}.png`); expect(imageSet.primaryFileOutputPath).toBe( - "static/notion_imgs/463556435.png" + `static/notion_imgs/${expectedHash}.png` + ); + expect(imageSet.filePathToUseInMarkdown).toBe( + `/notion_imgs/${expectedHash}.png` ); - expect(imageSet.filePathToUseInMarkdown).toBe("/notion_imgs/463556435.png"); }); test("primary file with defaults for image output path and prefix", () => { const imageSet: ImageSet = { @@ -23,13 +31,39 @@ test("primary file with defaults for image output path and prefix", () => { fileType: { ext: "png", mime: "image/png" }, }; makeImagePersistencePlan(imageSet, "", ""); - expect(imageSet.outputFileName).toBe("463556435.png"); + const expectedHash = hashOfString( + "https://s3.us-west-2.amazonaws.com/primaryImage" + ); + expect(imageSet.outputFileName).toBe(`${expectedHash}.png`); // the default behavior is to put the image next to the markdown file expect(imageSet.primaryFileOutputPath).toBe( - "/pathToParentSomewhere/463556435.png" + `/pathToParentSomewhere/${expectedHash}.png` ); - expect(imageSet.filePathToUseInMarkdown).toBe("./463556435.png"); + expect(imageSet.filePathToUseInMarkdown).toBe(`./${expectedHash}.png`); +}); + +test("properly extract UUID from old-style notion image url", () => { + const imageSet: ImageSet = { + primaryUrl: + "https://s3.us-west-2.amazonaws.com/secure.notion-static.com/e1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject", + localizedUrls: [], + fileType: { ext: "png", mime: "image/png" }, + }; + makeImagePersistencePlan(imageSet, "./static/notion_imgs", "/notion_imgs"); + const expectedHash = hashOfString("e1058f46-4d2f-4292-8388-4ad393383439"); + expect(imageSet.outputFileName).toBe(`${expectedHash}.png`); +}); +test("properly extract UUID from new-style (Sept 2023) notion image url", () => { + const imageSet: ImageSet = { + primaryUrl: + "https://prod-files-secure.s3.us-west-2.amazonaws.com/d9a2b712-cf69-4bd6-9d65-87a4ceeacca2/d1bcdc8c-b065-4e40-9a11-392aabeb220e/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20230915%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20230915T161258Z&X-Amz-Expires=3600&X-Amz-Signature=28fca48e65fba86d539c3c4b7676fce1fa0857aa194f7b33dd4a468ecca6ab24&X-Amz-SignedHeaders=host&x-id=GetObject", + localizedUrls: [], + fileType: { ext: "png", mime: "image/png" }, + }; + makeImagePersistencePlan(imageSet, "./static/notion_imgs", "/notion_imgs"); + const expectedHash = hashOfString("d1bcdc8c-b065-4e40-9a11-392aabeb220e"); + expect(imageSet.outputFileName).toBe(`${expectedHash}.png`); }); // In order to make image fallback work with other languages, we have to have