Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
d9d3b56
fix(connectors): harden 10 KB connectors after audit
waleedlatif1 May 2, 2026
6e58ee3
fix(connectors/slack): include oldestTs in hash to catch window-shift…
waleedlatif1 May 2, 2026
1c597dd
fix(connectors): address remaining audit findings
waleedlatif1 May 2, 2026
f8f033e
fix(connectors): address PR review comments
waleedlatif1 May 2, 2026
e05291b
fix(zendesk): add fallback message for empty validateConfig error
waleedlatif1 May 2, 2026
cb4e2c8
docs(obsidian): clarify hash strategy and re-hydration behavior
waleedlatif1 May 2, 2026
80337c4
fix(connectors): address parallel audit findings
waleedlatif1 May 2, 2026
4e852c5
fix(servicenow): allow string workflowState values (published/draft/r…
waleedlatif1 May 2, 2026
b943c41
fix(obsidian): validateConfig must check authenticated field, not jus…
waleedlatif1 May 2, 2026
2eb96ec
fix(jira): encode collected count in cursor so maxIssues cap works wi…
waleedlatif1 May 2, 2026
9014c08
fix(connectors): address remaining audit findings
waleedlatif1 May 2, 2026
c877c31
fix(github): apply binary check to blob fallback path
waleedlatif1 May 2, 2026
265071c
fix(confluence): reuse pageToStub in getDocument to keep hashes in sync
waleedlatif1 May 2, 2026
589db98
fix(connectors): address audit findings across 7 connectors
waleedlatif1 May 2, 2026
d3f1e66
fix(servicenow): allow non-ASCII KB category names
waleedlatif1 May 2, 2026
f4790e0
fix(servicenow): restore wiki field fallback for kb_knowledge
waleedlatif1 May 2, 2026
591874c
fix(connectors): jira pagination signal and google-docs paragraph join
waleedlatif1 May 2, 2026
71f6256
fix(jira): rely solely on nextPageToken for end-of-results
waleedlatif1 May 2, 2026
42fa643
fix(slack): exclude DM channel IDs from direct lookup
waleedlatif1 May 3, 2026
75f0b1a
fix(slack): align validateConfig channel ID regex with resolveChannel
waleedlatif1 May 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 57 additions & 57 deletions apps/sim/connectors/confluence/confluence.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,35 +80,57 @@ async function fetchLabelsForPages(
}

/**
* Converts a v1 CQL search result item to a lightweight metadata stub.
* Produces a canonical metadata stub with a deterministic contentHash that
* does not depend on which API surface (v1 CQL or v2) returned the page.
*/
function cqlResultToStub(item: Record<string, unknown>, domain: string): ExternalDocument {
const version = item.version as Record<string, unknown> | undefined
const links = item._links as Record<string, string> | undefined
const metadata = item.metadata as Record<string, unknown> | undefined
const labelsWrapper = metadata?.labels as Record<string, unknown> | undefined
const labelResults = (labelsWrapper?.results || []) as Record<string, unknown>[]
const labels = labelResults.map((l) => l.name as string)
const versionNumber = version?.number
function pageToStub(
page: Record<string, unknown>,
options: {
spaceId?: unknown
labels?: string[]
sourceUrl?: string
} = {}
): ExternalDocument {
const version = page.version as Record<string, unknown> | undefined
const versionNumber = version?.number as number | undefined
const lastModified = (version?.createdAt ?? version?.when ?? '') as string
const versionKey = versionNumber ?? lastModified

return {
externalId: String(item.id),
title: (item.title as string) || 'Untitled',
externalId: String(page.id),
title: (page.title as string) || 'Untitled',
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: links?.webui ? `https://${domain}/wiki${links.webui}` : undefined,
contentHash: `confluence:${item.id}:${versionNumber ?? ''}`,
sourceUrl: options.sourceUrl,
contentHash: `confluence:${page.id}:${versionKey}`,
metadata: {
spaceId: (item.space as Record<string, unknown>)?.key,
status: item.status,
spaceId: options.spaceId,
status: page.status,
version: versionNumber,
labels,
lastModified: version?.when,
labels: options.labels ?? [],
lastModified,
},
}
}

/**
* Converts a v1 CQL search result item to a lightweight metadata stub.
*/
function cqlResultToStub(item: Record<string, unknown>, domain: string): ExternalDocument {
const links = item._links as Record<string, string> | undefined
const metadata = item.metadata as Record<string, unknown> | undefined
const labelsWrapper = metadata?.labels as Record<string, unknown> | undefined
const labelResults = (labelsWrapper?.results || []) as Record<string, unknown>[]
const labels = labelResults.map((l) => l.name as string)

return pageToStub(item, {
spaceId: (item.space as Record<string, unknown>)?.key,
labels,
sourceUrl: links?.webui ? `https://${domain}/wiki${links.webui}` : undefined,
})
}

export const confluenceConnector: ConnectorConfig = {
id: 'confluence',
name: 'Confluence',
Expand Down Expand Up @@ -285,24 +307,16 @@ export const confluenceConnector: ConnectorConfig = {
const labels = labelMap.get(String(page.id)) ?? []

const links = page._links as Record<string, unknown> | undefined
const version = page.version as Record<string, unknown> | undefined
const versionNumber = version?.number
const stub = pageToStub(page, {
spaceId: page.spaceId,
labels,
sourceUrl: links?.webui ? `https://${domain}/wiki${links.webui}` : undefined,
})

return {
externalId: String(page.id),
title: (page.title as string) || 'Untitled',
...stub,
content: plainText,
contentDeferred: false,
mimeType: 'text/plain',
sourceUrl: links?.webui ? `https://${domain}/wiki${links.webui}` : undefined,
contentHash: `confluence:${page.id}:${versionNumber ?? ''}`,
metadata: {
spaceId: page.spaceId,
status: page.status,
version: versionNumber,
labels,
lastModified: version?.createdAt,
},
}
},

Expand All @@ -323,7 +337,7 @@ export const confluenceConnector: ConnectorConfig = {
}

try {
const cloudId = await getConfluenceCloudId(domain, accessToken)
const cloudId = await getConfluenceCloudId(domain, accessToken, VALIDATE_RETRY_OPTIONS)
const spaceUrl = `https://api.atlassian.com/ex/confluence/${cloudId}/wiki/api/v2/spaces?keys=${encodeURIComponent(spaceKey)}&limit=1`
const response = await fetchWithRetry(
spaceUrl,
Expand All @@ -345,8 +359,7 @@ export const confluenceConnector: ConnectorConfig = {
}
return { valid: true }
} catch (error) {
const message = error instanceof Error ? error.message : 'Failed to validate configuration'
return { valid: false, error: message }
return { valid: false, error: toError(error).message || 'Failed to validate configuration' }
}
},

Expand Down Expand Up @@ -420,28 +433,11 @@ async function listDocumentsV2(
const results = data.results || []

const documents: ExternalDocument[] = results.map((page: Record<string, unknown>) => {
const pageId = String(page.id)
const version = page.version as Record<string, unknown> | undefined
const versionNumber = version?.number

return {
externalId: pageId,
title: (page.title as string) || 'Untitled',
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: (page._links as Record<string, string>)?.webui
? `https://${domain}/wiki${(page._links as Record<string, string>).webui}`
: undefined,
contentHash: `confluence:${pageId}:${versionNumber ?? ''}`,
metadata: {
spaceId: page.spaceId,
status: page.status,
version: versionNumber,
labels: [],
lastModified: version?.createdAt,
},
}
const links = page._links as Record<string, string> | undefined
return pageToStub(page, {
spaceId: page.spaceId,
sourceUrl: links?.webui ? `https://${domain}/wiki${links.webui}` : undefined,
})
})

let nextCursor: string | undefined
Expand Down Expand Up @@ -493,7 +489,11 @@ async function listAllContentTypes(
pagesDone = parsed.pagesDone === true
blogsDone = parsed.blogsDone === true
} catch {
pageCursor = cursor
/**
* Older bare-string cursors are no longer emitted; fall through and
* restart instead of silently re-listing blogposts from page 0.
*/
logger.warn('Ignoring unparseable Confluence cursor; restarting listing')
}
}

Expand Down
9 changes: 5 additions & 4 deletions apps/sim/connectors/evernote/evernote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,8 @@ export const evernoteConnector: ConnectorConfig = {
const retryOptions = { maxRetries: 3, initialDelayMs: 500 }
const note = await apiGetNote(accessToken, externalId, retryOptions)
const plainText = htmlToPlainText(note.content)
if (!plainText.trim()) return null
const title = note.title || 'Untitled'
const content = plainText.trim() ? plainText : title

const shardId = extractShardId(accessToken)
const userId = extractUserId(accessToken)
Expand Down Expand Up @@ -494,8 +495,8 @@ export const evernoteConnector: ConnectorConfig = {

return {
externalId,
title: note.title || 'Untitled',
content: plainText,
title,
content,
contentDeferred: false,
mimeType: 'text/plain',
sourceUrl: `https://${host}/shard/${shardId}/nl/${userId}/${externalId}/`,
Expand Down Expand Up @@ -539,7 +540,7 @@ export const evernoteConnector: ConnectorConfig = {

return { valid: true }
} catch (error) {
const message = error instanceof Error ? error.message : 'Failed to connect to Evernote'
const message = toError(error).message || 'Failed to connect to Evernote'
return { valid: false, error: message }
}
},
Expand Down
105 changes: 98 additions & 7 deletions apps/sim/connectors/github/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,20 @@ const logger = createLogger('GitHubConnector')
const GITHUB_API_URL = 'https://api.github.com'
const BATCH_SIZE = 30
const GIT_SHA_PREFIX = 'git-sha:'
const MAX_FILE_SIZE = 10 * 1024 * 1024 // 10 MB
const BINARY_SNIFF_BYTES = 8000

/**
* Heuristic binary detection: Git treats files containing a NUL byte in the
* first 8000 bytes as binary. Matches `git diff` / `git grep` semantics.
*/
function isBinaryBuffer(buf: Buffer): boolean {
const len = Math.min(buf.length, BINARY_SNIFF_BYTES)
for (let i = 0; i < len; i++) {
if (buf[i] === 0) return true
}
return false
}

/**
* Parses the repository string into owner and repo.
Expand Down Expand Up @@ -90,6 +104,48 @@ async function fetchTree(
return (data.tree || []).filter((item: TreeItem) => item.type === 'blob')
}

/**
* Fetches blob content via the Git Blobs API. Used as a fallback when the
* `/contents/` endpoint cannot return the file body (files larger than 1 MB
* return `content: ""` and `encoding: "none"`). Supports blobs up to 100 MB.
*/
async function fetchBlobContent(
accessToken: string,
owner: string,
repo: string,
sha: string
): Promise<string | null> {
const url = `${GITHUB_API_URL}/repos/${owner}/${repo}/git/blobs/${encodeURIComponent(sha)}`
const response = await fetchWithRetry(url, {
method: 'GET',
headers: {
Accept: 'application/vnd.github+json',
Authorization: `Bearer ${accessToken}`,
'X-GitHub-Api-Version': '2022-11-28',
},
})

if (!response.ok) {
throw new Error(`Failed to fetch git blob ${sha}: ${response.status}`)
}

const data = await response.json()
const content = (data.content as string) || ''
const encoding = data.encoding as string | undefined

if (encoding === 'base64') {
const buf = Buffer.from(content, 'base64')
if (isBinaryBuffer(buf)) return null
return buf.toString('utf8')
}
/**
* Per https://docs.github.com/en/rest/git/blobs the Blobs API only ever
* returns base64. Refuse to silently persist empty content for an
* unexpected encoding so a sync surfaces the error instead.
*/
throw new Error(`Unexpected git blob encoding for ${sha}: ${encoding ?? 'undefined'}`)
}
Comment thread
waleedlatif1 marked this conversation as resolved.

/**
* Creates a lightweight stub ExternalDocument from a tree item.
* Uses the Git blob SHA as contentHash for change detection, avoiding
Expand All @@ -108,7 +164,7 @@ function treeItemToStub(
content: '',
contentDeferred: true,
mimeType: 'text/plain',
sourceUrl: `https://github.com/${owner}/${repo}/blob/${encodeURIComponent(branch)}/${item.path.split('/').map(encodeURIComponent).join('/')}`,
sourceUrl: `https://github.com/${owner}/${repo}/blob/${branch.split('/').map(encodeURIComponent).join('/')}/${item.path.split('/').map(encodeURIComponent).join('/')}`,
contentHash: `${GIT_SHA_PREFIX}${item.sha}`,
metadata: {
path: item.path,
Expand Down Expand Up @@ -189,10 +245,11 @@ export const githubConnector: ConnectorConfig = {
} else {
const tree = await fetchTree(accessToken, owner, repo, branch)

// Filter by path prefix and extensions
// Filter by path prefix, extensions, and size
const filtered = tree.filter((item) => {
if (pathPrefix && !item.path.startsWith(pathPrefix)) return false
if (!matchesExtension(item.path, extSet)) return false
if (typeof item.size === 'number' && item.size > MAX_FILE_SIZE) return false
return true
})

Expand Down Expand Up @@ -252,23 +309,57 @@ export const githubConnector: ConnectorConfig = {

if (!response.ok) {
if (response.status === 404) return null
if (response.status === 403) {
logger.info('Skipping GitHub file rejected by Contents API', {
path,
status: response.status,
})
return null
}
throw new Error(`Failed to fetch file ${path}: ${response.status}`)
}

const lastModifiedHeader = response.headers.get('last-modified') || undefined
const data = await response.json()
const content =
data.encoding === 'base64'
? Buffer.from(data.content as string, 'base64').toString('utf-8')
: (data.content as string) || ''

const size = typeof data.size === 'number' ? data.size : 0
if (size > MAX_FILE_SIZE) {
logger.info('Skipping GitHub file exceeding size limit', {
path,
size,
limit: MAX_FILE_SIZE,
})
return null
}

const rawContent = (data.content as string) || ''
const encoding = data.encoding as string | undefined
let content: string
if (encoding === 'base64' && rawContent.length > 0) {
const buf = Buffer.from(rawContent, 'base64')
if (isBinaryBuffer(buf)) {
logger.info('Skipping binary GitHub file', { path, size })
return null
}
content = buf.toString('utf8')
} else if (encoding === 'none' && data.sha && size > 0) {
const blobContent = await fetchBlobContent(accessToken, owner, repo, data.sha as string)
if (blobContent === null) {
logger.info('Skipping binary GitHub file', { path, size })
return null
}
content = blobContent
} else {
content = ''
}
Comment thread
waleedlatif1 marked this conversation as resolved.

return {
externalId,
title: path.split('/').pop() || path,
content,
contentDeferred: false,
mimeType: 'text/plain',
sourceUrl: `https://github.com/${owner}/${repo}/blob/${encodeURIComponent(branch)}/${path.split('/').map(encodeURIComponent).join('/')}`,
sourceUrl: `https://github.com/${owner}/${repo}/blob/${branch.split('/').map(encodeURIComponent).join('/')}/${path.split('/').map(encodeURIComponent).join('/')}`,
contentHash: `${GIT_SHA_PREFIX}${data.sha as string}`,
metadata: {
path,
Expand Down
15 changes: 11 additions & 4 deletions apps/sim/connectors/google-docs/google-docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,22 @@ function extractTextFromDocsBody(doc: DocsDocument): string {
if (!paragraph?.elements) continue

const prefix = headingPrefix(paragraph.paragraphStyle?.namedStyleType)
const text = paragraph.elements.map((el) => el.textRun?.content ?? '').join('')
/**
* Each paragraph's final `textRun.content` already ends with `\n`. Strip
* it before joining with `\n` so a heading followed by a body paragraph
* is separated by a single newline, not two.
*/
const text = paragraph.elements
.map((el) => el.textRun?.content ?? '')
.join('')
.replace(/\n+$/, '')

if (text.trim()) {
parts.push(`${prefix}${text}`)
}
}

return parts.join('').trim()
return parts.join('\n').trim()
Comment thread
waleedlatif1 marked this conversation as resolved.
}

/**
Expand Down Expand Up @@ -349,8 +357,7 @@ export const googleDocsConnector: ConnectorConfig = {

return { valid: true }
} catch (error) {
const message = error instanceof Error ? error.message : 'Failed to validate configuration'
return { valid: false, error: message }
return { valid: false, error: toError(error).message || 'Failed to validate configuration' }
}
},

Expand Down
Loading
Loading