diff --git a/CHANGELOG.md b/CHANGELOG.md index 905cc6de..d4cc114c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## Added +- [Experimental][Sourcebot EE] Added GitLab permission syncing. [#585](https://github.com/sourcebot-dev/sourcebot/pull/585) + ### Fixed - [ask sb] Fixed issue where reasoning tokens would appear in `text` content for openai compatible models. [#582](https://github.com/sourcebot-dev/sourcebot/pull/582) - Fixed issue with GitHub app token tracking and refreshing. [#583](https://github.com/sourcebot-dev/sourcebot/pull/583) diff --git a/docs/docs/configuration/auth/providers.mdx b/docs/docs/configuration/auth/providers.mdx index 7fda085a..c3d54363 100644 --- a/docs/docs/configuration/auth/providers.mdx +++ b/docs/docs/configuration/auth/providers.mdx @@ -52,6 +52,14 @@ Optional environment variables: [Auth.js GitLab Provider Docs](https://authjs.dev/getting-started/providers/gitlab) +Authentication using GitLab is supported via a [OAuth2.0 app](https://docs.gitlab.com/integration/oauth_provider/#create-an-instance-wide-application) installed on the GitLab instance. Follow the instructions in the [GitLab docs](https://docs.gitlab.com/integration/oauth_provider/) to create an app. The callback URL should be configurd to `/api/auth/callback/gitlab`, and the following scopes need to be set: + +| Scope | Required | Notes | +|------------|----------|----------------------------------------------------------------------------------------------------| +| read_user | Yes | Allows Sourcebot to read basic user information required for authentication. | +| read_api | Conditional | Required **only** when [permission syncing](/docs/features/permission-syncing) is enabled. Enables Sourcebot to list all repositories and projects for the authenticated user. | + + **Required environment variables:** - `AUTH_EE_GITLAB_CLIENT_ID` - `AUTH_EE_GITLAB_CLIENT_SECRET` diff --git a/docs/docs/features/permission-syncing.mdx b/docs/docs/features/permission-syncing.mdx index 527b81f0..ee6a96e7 100644 --- a/docs/docs/features/permission-syncing.mdx +++ b/docs/docs/features/permission-syncing.mdx @@ -35,7 +35,7 @@ We are actively working on supporting more code hosts. If you'd like to see a sp | Platform | Permission syncing | |:----------|------------------------------| | [GitHub (GHEC & GHEC Server)](/docs/features/permission-syncing#github) | ✅ | -| GitLab | 🛑 | +| [GitLab (Self-managed & Cloud)](/docs/features/permission-syncing#gitlab) | ✅ | | Bitbucket Cloud | 🛑 | | Bitbucket Data Center | 🛑 | | Gitea | 🛑 | @@ -59,6 +59,18 @@ Permission syncing works with **GitHub.com**, **GitHub Enterprise Cloud**, and * - A GitHub OAuth provider must be configured to (1) correlate a Sourcebot user with a GitHub user, and (2) to list repositories that the user has access to for [User driven syncing](/docs/features/permission-syncing#how-it-works). - OAuth tokens must assume the `repo` scope in order to use the [List repositories for the authenticated user API](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repositories-for-the-authenticated-user) during [User driven syncing](/docs/features/permission-syncing#how-it-works). Sourcebot **will only** use this token for **reads**. +## GitLab + +Prerequisite: [Add GitLab as an OAuth provider](/docs/configuration/auth/providers#gitlab). + +Permission syncing works with **GitLab Self-managed** and **GitLab Cloud**. Users with **Guest** role or above with membership to a group or project will have their access synced to Sourcebot. Both direct and indirect membership to a group or project will be synced with Sourcebot. For more details, see the [GitLab docs](https://docs.gitlab.com/user/project/members/#membership-types). + + +**Notes:** +- A GitLab OAuth provider must be configured to (1) correlate a Sourcebot user with a GitLab user, and (2) to list repositories that the user has access to for [User driven syncing](/docs/features/permission-syncing#how-it-works). +- OAuth tokens require the `read_api` scope in order to use the [List projects for the authenticated user API](https://docs.gitlab.com/ee/api/projects.html#list-all-projects) during [User driven syncing](/docs/features/permission-syncing#how-it-works). + + # How it works Permission syncing works by periodically syncing ACLs from the code host(s) to Sourcebot to build an internal mapping between Users and Repositories. This mapping is hydrated in two directions: diff --git a/packages/backend/src/constants.ts b/packages/backend/src/constants.ts index d6db3bec..9ba858de 100644 --- a/packages/backend/src/constants.ts +++ b/packages/backend/src/constants.ts @@ -5,6 +5,7 @@ export const SINGLE_TENANT_ORG_ID = 1; export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES = [ 'github', + 'gitlab', ]; export const REPOS_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'repos'); diff --git a/packages/backend/src/ee/repoPermissionSyncer.ts b/packages/backend/src/ee/repoPermissionSyncer.ts index 1e7ec815..4353c34e 100644 --- a/packages/backend/src/ee/repoPermissionSyncer.ts +++ b/packages/backend/src/ee/repoPermissionSyncer.ts @@ -7,6 +7,7 @@ import { Redis } from 'ioredis'; import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js"; import { env } from "../env.js"; import { createOctokitFromToken, getRepoCollaborators, GITHUB_CLOUD_HOSTNAME } from "../github.js"; +import { createGitLabFromPersonalAccessToken, getProjectMembers } from "../gitlab.js"; import { Settings } from "../types.js"; import { getAuthCredentialsForRepo } from "../utils.js"; @@ -16,7 +17,9 @@ type RepoPermissionSyncJob = { const QUEUE_NAME = 'repoPermissionSyncQueue'; -const logger = createLogger('repo-permission-syncer'); +const LOG_TAG = 'repo-permission-syncer'; +const logger = createLogger(LOG_TAG); +const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`); export class RepoPermissionSyncer { private queue: Queue; @@ -109,28 +112,31 @@ export class RepoPermissionSyncer { } private async schedulePermissionSync(repos: Repo[]) { - await this.db.$transaction(async (tx) => { - const jobs = await tx.repoPermissionSyncJob.createManyAndReturn({ - data: repos.map(repo => ({ - repoId: repo.id, - })), - }); - - await this.queue.addBulk(jobs.map((job) => ({ - name: 'repoPermissionSyncJob', - data: { - jobId: job.id, - }, - opts: { - removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE, - removeOnFail: env.REDIS_REMOVE_ON_FAIL, - } - }))) + // @note: we don't perform this in a transaction because + // we want to avoid the situation where a job is created and run + // prior to the transaction being committed. + const jobs = await this.db.repoPermissionSyncJob.createManyAndReturn({ + data: repos.map(repo => ({ + repoId: repo.id, + })), }); + + await this.queue.addBulk(jobs.map((job) => ({ + name: 'repoPermissionSyncJob', + data: { + jobId: job.id, + }, + opts: { + removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE, + removeOnFail: env.REDIS_REMOVE_ON_FAIL, + } + }))) } private async runJob(job: Job) { const id = job.data.jobId; + const logger = createJobLogger(id); + const { repo } = await this.db.repoPermissionSyncJob.update({ where: { id, @@ -194,6 +200,33 @@ export class RepoPermissionSyncer { }, }); + return accounts.map(account => account.userId); + } else if (repo.external_codeHostType === 'gitlab') { + const api = await createGitLabFromPersonalAccessToken({ + token: credentials.token, + url: credentials.hostUrl, + }); + + const projectId = repo.external_id; + if (!projectId) { + throw new Error(`Repo ${id} does not have an external_id`); + } + + const members = await getProjectMembers(projectId, api); + const gitlabUserIds = members.map(member => member.id.toString()); + + const accounts = await this.db.account.findMany({ + where: { + provider: 'gitlab', + providerAccountId: { + in: gitlabUserIds, + } + }, + select: { + userId: true, + }, + }); + return accounts.map(account => account.userId); } @@ -221,6 +254,8 @@ export class RepoPermissionSyncer { } private async onJobCompleted(job: Job) { + const logger = createJobLogger(job.data.jobId); + const { repo } = await this.db.repoPermissionSyncJob.update({ where: { id: job.data.jobId, @@ -243,6 +278,8 @@ export class RepoPermissionSyncer { } private async onJobFailed(job: Job | undefined, err: Error) { + const logger = createJobLogger(job?.data.jobId ?? 'unknown'); + Sentry.captureException(err, { tags: { jobId: job?.data.jobId, diff --git a/packages/backend/src/ee/userPermissionSyncer.ts b/packages/backend/src/ee/userPermissionSyncer.ts index 6ef77bcf..f3069eaf 100644 --- a/packages/backend/src/ee/userPermissionSyncer.ts +++ b/packages/backend/src/ee/userPermissionSyncer.ts @@ -6,10 +6,13 @@ import { Redis } from "ioredis"; import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js"; import { env } from "../env.js"; import { createOctokitFromToken, getReposForAuthenticatedUser } from "../github.js"; +import { createGitLabFromOAuthToken, getProjectsForAuthenticatedUser } from "../gitlab.js"; import { hasEntitlement } from "@sourcebot/shared"; import { Settings } from "../types.js"; -const logger = createLogger('user-permission-syncer'); +const LOG_TAG = 'user-permission-syncer'; +const logger = createLogger(LOG_TAG); +const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`); const QUEUE_NAME = 'userPermissionSyncQueue'; @@ -110,28 +113,31 @@ export class UserPermissionSyncer { } private async schedulePermissionSync(users: User[]) { - await this.db.$transaction(async (tx) => { - const jobs = await tx.userPermissionSyncJob.createManyAndReturn({ - data: users.map(user => ({ - userId: user.id, - })), - }); - - await this.queue.addBulk(jobs.map((job) => ({ - name: 'userPermissionSyncJob', - data: { - jobId: job.id, - }, - opts: { - removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE, - removeOnFail: env.REDIS_REMOVE_ON_FAIL, - } - }))) + // @note: we don't perform this in a transaction because + // we want to avoid the situation where a job is created and run + // prior to the transaction being committed. + const jobs = await this.db.userPermissionSyncJob.createManyAndReturn({ + data: users.map(user => ({ + userId: user.id, + })), }); + + await this.queue.addBulk(jobs.map((job) => ({ + name: 'userPermissionSyncJob', + data: { + jobId: job.id, + }, + opts: { + removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE, + removeOnFail: env.REDIS_REMOVE_ON_FAIL, + } + }))) } private async runJob(job: Job) { const id = job.data.jobId; + const logger = createJobLogger(id); + const { user } = await this.db.userPermissionSyncJob.update({ where: { id, @@ -183,6 +189,37 @@ export class UserPermissionSyncer { } }); + repos.forEach(repo => aggregatedRepoIds.add(repo.id)); + } else if (account.provider === 'gitlab') { + if (!account.access_token) { + throw new Error(`User '${user.email}' does not have a GitLab OAuth access token associated with their GitLab account.`); + } + + const api = await createGitLabFromOAuthToken({ + oauthToken: account.access_token, + url: env.AUTH_EE_GITLAB_BASE_URL, + }); + + // @note: we only care about the private and internal repos since we don't need to build a mapping + // for public repos. + // @see: packages/web/src/prisma.ts + const privateGitLabProjects = await getProjectsForAuthenticatedUser('private', api); + const internalGitLabProjects = await getProjectsForAuthenticatedUser('internal', api); + + const gitLabProjectIds = [ + ...privateGitLabProjects, + ...internalGitLabProjects, + ].map(project => project.id.toString()); + + const repos = await this.db.repo.findMany({ + where: { + external_codeHostType: 'gitlab', + external_id: { + in: gitLabProjectIds, + } + } + }); + repos.forEach(repo => aggregatedRepoIds.add(repo.id)); } } @@ -212,6 +249,8 @@ export class UserPermissionSyncer { } private async onJobCompleted(job: Job) { + const logger = createJobLogger(job.data.jobId); + const { user } = await this.db.userPermissionSyncJob.update({ where: { id: job.data.jobId, @@ -234,6 +273,8 @@ export class UserPermissionSyncer { } private async onJobFailed(job: Job | undefined, err: Error) { + const logger = createJobLogger(job?.data.jobId ?? 'unknown'); + Sentry.captureException(err, { tags: { jobId: job?.data.jobId, @@ -260,7 +301,7 @@ export class UserPermissionSyncer { logger.error(errorMessage(user.email ?? user.id)); } else { - logger.error(errorMessage('unknown user (id not found)')); + logger.error(errorMessage('unknown job (id not found)')); } } } \ No newline at end of file diff --git a/packages/backend/src/env.ts b/packages/backend/src/env.ts index 841caf98..c3ea3679 100644 --- a/packages/backend/src/env.ts +++ b/packages/backend/src/env.ts @@ -56,6 +56,7 @@ export const env = createEnv({ EXPERIMENT_EE_PERMISSION_SYNC_ENABLED: booleanSchema.default('false'), AUTH_EE_GITHUB_BASE_URL: z.string().optional(), + AUTH_EE_GITLAB_BASE_URL: z.string().default("https://gitlab.com"), }, runtimeEnv: process.env, emptyStringAsUndefined: true, diff --git a/packages/backend/src/gitlab.ts b/packages/backend/src/gitlab.ts index e4954b34..55bae70c 100644 --- a/packages/backend/src/gitlab.ts +++ b/packages/backend/src/gitlab.ts @@ -12,6 +12,28 @@ import { getTokenFromConfig } from "@sourcebot/crypto"; const logger = createLogger('gitlab'); export const GITLAB_CLOUD_HOSTNAME = "gitlab.com"; +export const createGitLabFromPersonalAccessToken = async ({ token, url }: { token?: string, url?: string }) => { + const isGitLabCloud = url ? new URL(url).hostname === GITLAB_CLOUD_HOSTNAME : false; + return new Gitlab({ + token, + ...(isGitLabCloud ? {} : { + host: url, + }), + queryTimeout: env.GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS * 1000, + }); +} + +export const createGitLabFromOAuthToken = async ({ oauthToken, url }: { oauthToken?: string, url?: string }) => { + const isGitLabCloud = url ? new URL(url).hostname === GITLAB_CLOUD_HOSTNAME : false; + return new Gitlab({ + oauthToken, + ...(isGitLabCloud ? {} : { + host: url, + }), + queryTimeout: env.GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS * 1000, + }); +} + export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, orgId: number, db: PrismaClient) => { const hostname = config.url ? new URL(config.url).hostname : @@ -22,15 +44,10 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o hostname === GITLAB_CLOUD_HOSTNAME ? env.FALLBACK_GITLAB_CLOUD_TOKEN : undefined; - - const api = new Gitlab({ - ...(token ? { - token, - } : {}), - ...(config.url ? { - host: config.url, - } : {}), - queryTimeout: env.GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS * 1000, + + const api = await createGitLabFromPersonalAccessToken({ + token, + url: config.url, }); let allRepos: ProjectSchema[] = []; @@ -261,4 +278,38 @@ export const shouldExcludeProject = ({ } return false; +} + +export const getProjectMembers = async (projectId: string, api: InstanceType) => { + try { + const fetchFn = () => api.ProjectMembers.all(projectId, { + perPage: 100, + includeInherited: true, + }); + + const members = await fetchWithRetry(fetchFn, `project ${projectId}`, logger); + return members as Array<{ id: number }>; + } catch (error) { + Sentry.captureException(error); + logger.error(`Failed to fetch members for project ${projectId}.`, error); + throw error; + } +} + +export const getProjectsForAuthenticatedUser = async (visibility: 'private' | 'internal' | 'public' | 'all' = 'all', api: InstanceType) => { + try { + const fetchFn = () => api.Projects.all({ + membership: true, + ...(visibility !== 'all' ? { + visibility, + } : {}), + perPage: 100, + }); + const response = await fetchWithRetry(fetchFn, `authenticated user`, logger); + return response; + } catch (error) { + Sentry.captureException(error); + logger.error(`Failed to fetch projects for authenticated user.`, error); + throw error; + } } \ No newline at end of file diff --git a/packages/backend/src/repoCompileUtils.ts b/packages/backend/src/repoCompileUtils.ts index d78455e0..8e8b1f26 100644 --- a/packages/backend/src/repoCompileUtils.ts +++ b/packages/backend/src/repoCompileUtils.ts @@ -121,7 +121,6 @@ export const compileGitlabConfig = async ( const projectUrl = `${hostUrl}/${project.path_with_namespace}`; const cloneUrl = new URL(project.http_url_to_repo); const isFork = project.forked_from_project !== undefined; - // @todo: we will need to double check whether 'internal' should also be considered public or not. const isPublic = project.visibility === 'public'; const repoDisplayName = project.path_with_namespace; const repoName = path.join(repoNameRoot, repoDisplayName); diff --git a/packages/web/src/ee/features/sso/sso.ts b/packages/web/src/ee/features/sso/sso.ts index 287453d1..7accc065 100644 --- a/packages/web/src/ee/features/sso/sso.ts +++ b/packages/web/src/ee/features/sso/sso.ts @@ -51,7 +51,16 @@ export const getSSOProviders = (): Provider[] => { authorization: { url: `${env.AUTH_EE_GITLAB_BASE_URL}/oauth/authorize`, params: { - scope: "read_user", + scope: [ + "read_user", + // Permission syncing requires the `read_api` scope in order to fetch projects + // for the authenticated user and project members. + // @see: https://docs.gitlab.com/ee/api/projects.html#list-all-projects + ...(env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && hasEntitlement('permission-syncing') ? + ['read_api'] : + [] + ), + ].join(' '), }, }, token: {