simstudioai · waleedlatif1 · May 3, 2026 · May 2, 2026 · May 2, 2026 · May 2, 2026
diff --git a/apps/sim/connectors/confluence/confluence.ts b/apps/sim/connectors/confluence/confluence.ts
@@ -80,35 +80,57 @@ async function fetchLabelsForPages(
 }
 
 /**
- * Converts a v1 CQL search result item to a lightweight metadata stub.
+ * Produces a canonical metadata stub with a deterministic contentHash that
+ * does not depend on which API surface (v1 CQL or v2) returned the page.
  */
-function cqlResultToStub(item: Record<string, unknown>, domain: string): ExternalDocument {
-  const version = item.version as Record<string, unknown> | undefined
-  const links = item._links as Record<string, string> | undefined
-  const metadata = item.metadata as Record<string, unknown> | undefined
-  const labelsWrapper = metadata?.labels as Record<string, unknown> | undefined
-  const labelResults = (labelsWrapper?.results || []) as Record<string, unknown>[]
-  const labels = labelResults.map((l) => l.name as string)
-  const versionNumber = version?.number
+function pageToStub(
+  page: Record<string, unknown>,
+  options: {
+    spaceId?: unknown
+    labels?: string[]
+    sourceUrl?: string
+  } = {}
+): ExternalDocument {
+  const version = page.version as Record<string, unknown> | undefined
+  const versionNumber = version?.number as number | undefined
+  const lastModified = (version?.createdAt ?? version?.when ?? '') as string
+  const versionKey = versionNumber ?? lastModified
 
   return {
-    externalId: String(item.id),
-    title: (item.title as string) || 'Untitled',
+    externalId: String(page.id),
+    title: (page.title as string) || 'Untitled',
     content: '',
     contentDeferred: true,
     mimeType: 'text/plain',
-    sourceUrl: links?.webui ? `https://${domain}/wiki${links.webui}` : undefined,
-    contentHash: `confluence:${item.id}:${versionNumber ?? ''}`,
+    sourceUrl: options.sourceUrl,
+    contentHash: `confluence:${page.id}:${versionKey}`,
     metadata: {
-      spaceId: (item.space as Record<string, unknown>)?.key,
-      status: item.status,
+      spaceId: options.spaceId,
+      status: page.status,
       version: versionNumber,
-      labels,
-      lastModified: version?.when,
+      labels: options.labels ?? [],
+      lastModified,
     },
   }
 }
 
+/**
+ * Converts a v1 CQL search result item to a lightweight metadata stub.
+ */
+function cqlResultToStub(item: Record<string, unknown>, domain: string): ExternalDocument {
+  const links = item._links as Record<string, string> | undefined
+  const metadata = item.metadata as Record<string, unknown> | undefined
+  const labelsWrapper = metadata?.labels as Record<string, unknown> | undefined
+  const labelResults = (labelsWrapper?.results || []) as Record<string, unknown>[]
+  const labels = labelResults.map((l) => l.name as string)
+
+  return pageToStub(item, {
+    spaceId: (item.space as Record<string, unknown>)?.key,
+    labels,
+    sourceUrl: links?.webui ? `https://${domain}/wiki${links.webui}` : undefined,
+  })
+}
+
 export const confluenceConnector: ConnectorConfig = {
   id: 'confluence',
   name: 'Confluence',
@@ -285,24 +307,16 @@ export const confluenceConnector: ConnectorConfig = {
     const labels = labelMap.get(String(page.id)) ?? []
 
     const links = page._links as Record<string, unknown> | undefined
-    const version = page.version as Record<string, unknown> | undefined
-    const versionNumber = version?.number
+    const stub = pageToStub(page, {
+      spaceId: page.spaceId,
+      labels,
+      sourceUrl: links?.webui ? `https://${domain}/wiki${links.webui}` : undefined,
+    })
 
     return {
-      externalId: String(page.id),
-      title: (page.title as string) || 'Untitled',
+      ...stub,
       content: plainText,
       contentDeferred: false,
-      mimeType: 'text/plain',
-      sourceUrl: links?.webui ? `https://${domain}/wiki${links.webui}` : undefined,
-      contentHash: `confluence:${page.id}:${versionNumber ?? ''}`,
-      metadata: {
-        spaceId: page.spaceId,
-        status: page.status,
-        version: versionNumber,
-        labels,
-        lastModified: version?.createdAt,
-      },
     }
   },
 
@@ -323,7 +337,7 @@ export const confluenceConnector: ConnectorConfig = {
     }
 
     try {
-      const cloudId = await getConfluenceCloudId(domain, accessToken)
+      const cloudId = await getConfluenceCloudId(domain, accessToken, VALIDATE_RETRY_OPTIONS)
       const spaceUrl = `https://api.atlassian.com/ex/confluence/${cloudId}/wiki/api/v2/spaces?keys=${encodeURIComponent(spaceKey)}&limit=1`
       const response = await fetchWithRetry(
         spaceUrl,
@@ -345,8 +359,7 @@ export const confluenceConnector: ConnectorConfig = {
       }
       return { valid: true }
     } catch (error) {
-      const message = error instanceof Error ? error.message : 'Failed to validate configuration'
-      return { valid: false, error: message }
+      return { valid: false, error: toError(error).message || 'Failed to validate configuration' }
     }
   },
 
@@ -420,28 +433,11 @@ async function listDocumentsV2(
   const results = data.results || []
 
   const documents: ExternalDocument[] = results.map((page: Record<string, unknown>) => {
-    const pageId = String(page.id)
-    const version = page.version as Record<string, unknown> | undefined
-    const versionNumber = version?.number
-
-    return {
-      externalId: pageId,
-      title: (page.title as string) || 'Untitled',
-      content: '',
-      contentDeferred: true,
-      mimeType: 'text/plain',
-      sourceUrl: (page._links as Record<string, string>)?.webui
-        ? `https://${domain}/wiki${(page._links as Record<string, string>).webui}`
-        : undefined,
-      contentHash: `confluence:${pageId}:${versionNumber ?? ''}`,
-      metadata: {
-        spaceId: page.spaceId,
-        status: page.status,
-        version: versionNumber,
-        labels: [],
-        lastModified: version?.createdAt,
-      },
-    }
+    const links = page._links as Record<string, string> | undefined
+    return pageToStub(page, {
+      spaceId: page.spaceId,
+      sourceUrl: links?.webui ? `https://${domain}/wiki${links.webui}` : undefined,
+    })
   })
 
   let nextCursor: string | undefined
@@ -493,7 +489,11 @@ async function listAllContentTypes(
       pagesDone = parsed.pagesDone === true
       blogsDone = parsed.blogsDone === true
     } catch {
-      pageCursor = cursor
+      /**
+       * Older bare-string cursors are no longer emitted; fall through and
+       * restart instead of silently re-listing blogposts from page 0.
+       */
+      logger.warn('Ignoring unparseable Confluence cursor; restarting listing')
     }
   }
 

diff --git a/apps/sim/connectors/evernote/evernote.ts b/apps/sim/connectors/evernote/evernote.ts
@@ -462,7 +462,8 @@ export const evernoteConnector: ConnectorConfig = {
       const retryOptions = { maxRetries: 3, initialDelayMs: 500 }
       const note = await apiGetNote(accessToken, externalId, retryOptions)
       const plainText = htmlToPlainText(note.content)
-      if (!plainText.trim()) return null
+      const title = note.title || 'Untitled'
+      const content = plainText.trim() ? plainText : title
 
       const shardId = extractShardId(accessToken)
       const userId = extractUserId(accessToken)
@@ -494,8 +495,8 @@ export const evernoteConnector: ConnectorConfig = {
 
       return {
         externalId,
-        title: note.title || 'Untitled',
-        content: plainText,
+        title,
+        content,
         contentDeferred: false,
         mimeType: 'text/plain',
         sourceUrl: `https://${host}/shard/${shardId}/nl/${userId}/${externalId}/`,
@@ -539,7 +540,7 @@ export const evernoteConnector: ConnectorConfig = {
 
       return { valid: true }
     } catch (error) {
-      const message = error instanceof Error ? error.message : 'Failed to connect to Evernote'
+      const message = toError(error).message || 'Failed to connect to Evernote'
       return { valid: false, error: message }
     }
   },

diff --git a/apps/sim/connectors/github/github.ts b/apps/sim/connectors/github/github.ts
@@ -10,6 +10,20 @@ const logger = createLogger('GitHubConnector')
 const GITHUB_API_URL = 'https://api.github.com'
 const BATCH_SIZE = 30
 const GIT_SHA_PREFIX = 'git-sha:'
+const MAX_FILE_SIZE = 10 * 1024 * 1024 // 10 MB
+const BINARY_SNIFF_BYTES = 8000
+
+/**
+ * Heuristic binary detection: Git treats files containing a NUL byte in the
+ * first 8000 bytes as binary. Matches `git diff` / `git grep` semantics.
+ */
+function isBinaryBuffer(buf: Buffer): boolean {
+  const len = Math.min(buf.length, BINARY_SNIFF_BYTES)
+  for (let i = 0; i < len; i++) {
+    if (buf[i] === 0) return true
+  }
+  return false
+}
 
 /**
  * Parses the repository string into owner and repo.
@@ -90,6 +104,48 @@ async function fetchTree(
   return (data.tree || []).filter((item: TreeItem) => item.type === 'blob')
 }
 
+/**
+ * Fetches blob content via the Git Blobs API. Used as a fallback when the
+ * `/contents/` endpoint cannot return the file body (files larger than 1 MB
+ * return `content: ""` and `encoding: "none"`). Supports blobs up to 100 MB.
+ */
+async function fetchBlobContent(
+  accessToken: string,
+  owner: string,
+  repo: string,
+  sha: string
+): Promise<string | null> {
+  const url = `${GITHUB_API_URL}/repos/${owner}/${repo}/git/blobs/${encodeURIComponent(sha)}`
+  const response = await fetchWithRetry(url, {
+    method: 'GET',
+    headers: {
+      Accept: 'application/vnd.github+json',
+      Authorization: `Bearer ${accessToken}`,
+      'X-GitHub-Api-Version': '2022-11-28',
+    },
+  })
+
+  if (!response.ok) {
+    throw new Error(`Failed to fetch git blob ${sha}: ${response.status}`)
+  }
+
+  const data = await response.json()
+  const content = (data.content as string) || ''
+  const encoding = data.encoding as string | undefined
+
+  if (encoding === 'base64') {
+    const buf = Buffer.from(content, 'base64')
+    if (isBinaryBuffer(buf)) return null
+    return buf.toString('utf8')
+  }
+  /**
+   * Per https://docs.github.com/en/rest/git/blobs the Blobs API only ever
+   * returns base64. Refuse to silently persist empty content for an
+   * unexpected encoding so a sync surfaces the error instead.
+   */
+  throw new Error(`Unexpected git blob encoding for ${sha}: ${encoding ?? 'undefined'}`)
+}
+
 /**
  * Creates a lightweight stub ExternalDocument from a tree item.
  * Uses the Git blob SHA as contentHash for change detection, avoiding
@@ -108,7 +164,7 @@ function treeItemToStub(
     content: '',
     contentDeferred: true,
     mimeType: 'text/plain',
-    sourceUrl: `https://github.com/${owner}/${repo}/blob/${encodeURIComponent(branch)}/${item.path.split('/').map(encodeURIComponent).join('/')}`,
+    sourceUrl: `https://github.com/${owner}/${repo}/blob/${branch.split('/').map(encodeURIComponent).join('/')}/${item.path.split('/').map(encodeURIComponent).join('/')}`,
     contentHash: `${GIT_SHA_PREFIX}${item.sha}`,
     metadata: {
       path: item.path,
@@ -189,10 +245,11 @@ export const githubConnector: ConnectorConfig = {
     } else {
       const tree = await fetchTree(accessToken, owner, repo, branch)
 
-      // Filter by path prefix and extensions
+      // Filter by path prefix, extensions, and size
       const filtered = tree.filter((item) => {
         if (pathPrefix && !item.path.startsWith(pathPrefix)) return false
         if (!matchesExtension(item.path, extSet)) return false
+        if (typeof item.size === 'number' && item.size > MAX_FILE_SIZE) return false
         return true
       })
 
@@ -252,23 +309,57 @@ export const githubConnector: ConnectorConfig = {
 
       if (!response.ok) {
         if (response.status === 404) return null
+        if (response.status === 403) {
+          logger.info('Skipping GitHub file rejected by Contents API', {
+            path,
+            status: response.status,
+          })
+          return null
+        }
         throw new Error(`Failed to fetch file ${path}: ${response.status}`)
       }
 
       const lastModifiedHeader = response.headers.get('last-modified') || undefined
       const data = await response.json()
-      const content =
-        data.encoding === 'base64'
-          ? Buffer.from(data.content as string, 'base64').toString('utf-8')
-          : (data.content as string) || ''
+
+      const size = typeof data.size === 'number' ? data.size : 0
+      if (size > MAX_FILE_SIZE) {
+        logger.info('Skipping GitHub file exceeding size limit', {
+          path,
+          size,
+          limit: MAX_FILE_SIZE,
+        })
+        return null
+      }
+
+      const rawContent = (data.content as string) || ''
+      const encoding = data.encoding as string | undefined
+      let content: string
+      if (encoding === 'base64' && rawContent.length > 0) {
+        const buf = Buffer.from(rawContent, 'base64')
+        if (isBinaryBuffer(buf)) {
+          logger.info('Skipping binary GitHub file', { path, size })
+          return null
+        }
+        content = buf.toString('utf8')
+      } else if (encoding === 'none' && data.sha && size > 0) {
+        const blobContent = await fetchBlobContent(accessToken, owner, repo, data.sha as string)
+        if (blobContent === null) {
+          logger.info('Skipping binary GitHub file', { path, size })
+          return null
+        }
+        content = blobContent
+      } else {
+        content = ''
+      }
 
       return {
         externalId,
         title: path.split('/').pop() || path,
         content,
         contentDeferred: false,
         mimeType: 'text/plain',
-        sourceUrl: `https://github.com/${owner}/${repo}/blob/${encodeURIComponent(branch)}/${path.split('/').map(encodeURIComponent).join('/')}`,
+        sourceUrl: `https://github.com/${owner}/${repo}/blob/${branch.split('/').map(encodeURIComponent).join('/')}/${path.split('/').map(encodeURIComponent).join('/')}`,
         contentHash: `${GIT_SHA_PREFIX}${data.sha as string}`,
         metadata: {
           path,

diff --git a/apps/sim/connectors/google-docs/google-docs.ts b/apps/sim/connectors/google-docs/google-docs.ts
@@ -84,14 +84,22 @@ function extractTextFromDocsBody(doc: DocsDocument): string {
     if (!paragraph?.elements) continue
 
     const prefix = headingPrefix(paragraph.paragraphStyle?.namedStyleType)
-    const text = paragraph.elements.map((el) => el.textRun?.content ?? '').join('')
+    /**
+     * Each paragraph's final `textRun.content` already ends with `\n`. Strip
+     * it before joining with `\n` so a heading followed by a body paragraph
+     * is separated by a single newline, not two.
+     */
+    const text = paragraph.elements
+      .map((el) => el.textRun?.content ?? '')
+      .join('')
+      .replace(/\n+$/, '')
 
     if (text.trim()) {
       parts.push(`${prefix}${text}`)
     }
   }
 
-  return parts.join('').trim()
+  return parts.join('\n').trim()
 }
 
 /**
@@ -349,8 +357,7 @@ export const googleDocsConnector: ConnectorConfig = {
 
       return { valid: true }
     } catch (error) {
-      const message = error instanceof Error ? error.message : 'Failed to validate configuration'
-      return { valid: false, error: message }
+      return { valid: false, error: toError(error).message || 'Failed to validate configuration' }
     }
   },