diff --git a/.github/actions/custom-build-and-push/action.yml b/.github/actions/custom-build-and-push/action.yml new file mode 100644 index 00000000000..48344237059 --- /dev/null +++ b/.github/actions/custom-build-and-push/action.yml @@ -0,0 +1,76 @@ +name: 'Build and Push Docker Image with Retry' +description: 'Attempts to build and push a Docker image, with a retry on failure' +inputs: + context: + description: 'Build context' + required: true + file: + description: 'Dockerfile location' + required: true + platforms: + description: 'Target platforms' + required: true + pull: + description: 'Always attempt to pull a newer version of the image' + required: false + default: 'true' + push: + description: 'Push the image to registry' + required: false + default: 'true' + load: + description: 'Load the image into Docker daemon' + required: false + default: 'true' + tags: + description: 'Image tags' + required: true + cache-from: + description: 'Cache sources' + required: false + cache-to: + description: 'Cache destinations' + required: false + retry-wait-time: + description: 'Time to wait before retry in seconds' + required: false + default: '5' + +runs: + using: "composite" + steps: + - name: Build and push Docker image (First Attempt) + id: buildx1 + uses: docker/build-push-action@v5 + continue-on-error: true + with: + context: ${{ inputs.context }} + file: ${{ inputs.file }} + platforms: ${{ inputs.platforms }} + pull: ${{ inputs.pull }} + push: ${{ inputs.push }} + load: ${{ inputs.load }} + tags: ${{ inputs.tags }} + cache-from: ${{ inputs.cache-from }} + cache-to: ${{ inputs.cache-to }} + + - name: Wait to retry + if: steps.buildx1.outcome != 'success' + run: | + echo "First attempt failed. Waiting ${{ inputs.retry-wait-time }} seconds before retry..." + sleep ${{ inputs.retry-wait-time }} + shell: bash + + - name: Build and push Docker image (Retry Attempt) + if: steps.buildx1.outcome != 'success' + uses: docker/build-push-action@v5 + with: + context: ${{ inputs.context }} + file: ${{ inputs.file }} + platforms: ${{ inputs.platforms }} + pull: ${{ inputs.pull }} + push: ${{ inputs.push }} + load: ${{ inputs.load }} + tags: ${{ inputs.tags }} + cache-from: ${{ inputs.cache-from }} + cache-to: ${{ inputs.cache-to }} diff --git a/.github/workflows/docker-build-push-backend-container-on-tag.yml b/.github/workflows/docker-build-push-backend-container-on-tag.yml index 35adfb3f69d..dbde69a5ad4 100644 --- a/.github/workflows/docker-build-push-backend-container-on-tag.yml +++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml @@ -7,9 +7,13 @@ on: env: REGISTRY_IMAGE: ghcr.io/stackhpc/danswer/danswer-backend + LATEST_TAG: ${{ contains(github.ref_name, 'latest') }} jobs: build-and-push: + # TODO: investigate a matrix build like the web container + # See https://runs-on.com/runners/linux/ + # NOTE(sd109): Can't use Danswer custom runners here runs-on: ubuntu-latest steps: @@ -35,6 +39,11 @@ jobs: type=raw,value=${{ github.ref_name }} type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }} + - name: Install build-essential + run: | + sudo apt-get update + sudo apt-get install -y build-essential + - name: Backend Image Docker Build and Push uses: docker/build-push-action@v5 with: @@ -42,14 +51,24 @@ jobs: file: ./backend/Dockerfile platforms: linux/amd64,linux/arm64 push: true - tags: ${{ steps.meta.outputs.tags }} + tags: | + ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} + ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }} build-args: | DANSWER_VERSION=${{ github.ref_name }} cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache cache-to: type=registry,ref=${{ env.REGISTRY_IMAGE}}:buildcache,mode=max + # trivy has their own rate limiting issues causing this action to flake + # we worked around it by hardcoding to different db repos in env + # can re-enable when they figure it out + # https://github.com/aquasecurity/trivy/discussions/7538 + # https://github.com/aquasecurity/trivy-action/issues/389 - name: Run Trivy vulnerability scanner uses: aquasecurity/trivy-action@master + env: + TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2' + TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1' with: # To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-backend image-ref: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} diff --git a/.github/workflows/docker-build-push-model-server-container-on-tag.yml b/.github/workflows/docker-build-push-model-server-container-on-tag.yml index e4352b4042c..6e7b03eaa20 100644 --- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml +++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml @@ -7,9 +7,11 @@ on: env: REGISTRY_IMAGE: ghcr.io/stackhpc/danswer/danswer-model-server + LATEST_TAG: ${{ contains(github.ref_name, 'latest') }} jobs: build-and-push: + # NOTE(sd109): Can't use Danswer custom runners here runs-on: ubuntu-latest steps: @@ -35,14 +37,22 @@ jobs: push: true tags: | ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} - ${{ env.REGISTRY_IMAGE }}:latest + ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }} build-args: | DANSWER_VERSION=${{ github.ref_name }} cache-from: type=gha cache-to: type=gha,mode=max + # trivy has their own rate limiting issues causing this action to flake + # we worked around it by hardcoding to different db repos in env + # can re-enable when they figure it out + # https://github.com/aquasecurity/trivy/discussions/7538 + # https://github.com/aquasecurity/trivy-action/issues/389 - name: Run Trivy vulnerability scanner uses: aquasecurity/trivy-action@master + env: + TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2' + TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1' with: image-ref: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} severity: 'CRITICAL,HIGH' diff --git a/.github/workflows/docker-build-push-web-container-on-tag.yml b/.github/workflows/docker-build-push-web-container-on-tag.yml index ab8d9393bce..61dd1665ef7 100644 --- a/.github/workflows/docker-build-push-web-container-on-tag.yml +++ b/.github/workflows/docker-build-push-web-container-on-tag.yml @@ -7,50 +7,126 @@ on: env: REGISTRY_IMAGE: ghcr.io/stackhpc/danswer/danswer-web-server + LATEST_TAG: ${{ contains(github.ref_name, 'latest') }} jobs: - build-and-push: + build: + # NOTE(sd109): Can't use Danswer custom runners here runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + # NOTE(sd109): Arm builds currently failing with error seen here: + # https://github.com/stackhpc/danswer/actions/runs/11368042561/job/31622167035#step:7:366 + # - linux/arm64 steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY_IMAGE }} - tags: | - type=raw,value=${{ github.ref_name }} - type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }} - - - name: Web Image Docker Build and Push - uses: docker/build-push-action@v5 - with: - context: ./web - file: ./web/Dockerfile - platforms: linux/amd64 #,linux/arm64 - push: true - tags: ${{ steps.meta.outputs.tags }} - build-args: | - DANSWER_VERSION=${{ github.ref_name }} - cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache - cache-to: type=registry,ref=${{ env.REGISTRY_IMAGE}}:buildcache,mode=max - - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@master - with: - # To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-web - image-ref: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} - severity: 'CRITICAL,HIGH' + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Checkout + uses: actions/checkout@v4 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_IMAGE }} + tags: | + type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} + type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push by digest + id: build + uses: docker/build-push-action@v5 + with: + context: ./web + file: ./web/Dockerfile + platforms: ${{ matrix.platform }} + push: true + build-args: | + DANSWER_VERSION=${{ github.ref_name }} + # needed due to weird interactions with the builds for different platforms + no-cache: true + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + merge: + runs-on: ubuntu-latest + needs: + - build + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digests-* + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_IMAGE }} + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }} + + # trivy has their own rate limiting issues causing this action to flake + # we worked around it by hardcoding to different db repos in env + # can re-enable when they figure it out + # https://github.com/aquasecurity/trivy/discussions/7538 + # https://github.com/aquasecurity/trivy-action/issues/389 + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + env: + TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2' + TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1' + with: + image-ref: ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} + severity: 'CRITICAL,HIGH' diff --git a/.github/workflows/docker-tag-latest.yml b/.github/workflows/docker-tag-latest.yml index c0853ff3835..e2c7c30f31e 100644 --- a/.github/workflows/docker-tag-latest.yml +++ b/.github/workflows/docker-tag-latest.yml @@ -1,3 +1,6 @@ +# This workflow is set up to be manually triggered via the GitHub Action tab. +# Given a version, it will tag those backend and webserver images as "latest". + name: Tag Latest Version on: @@ -9,7 +12,9 @@ on: jobs: tag: - runs-on: ubuntu-latest + # See https://runs-on.com/runners/linux/ + # use a lower powered instance since this just does i/o to docker hub + runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"] steps: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 diff --git a/.github/workflows/helm-build-push.yml b/.github/workflows/helm-build-push.yml index 8f5436c7ae6..f76e8ae0c0c 100644 --- a/.github/workflows/helm-build-push.yml +++ b/.github/workflows/helm-build-push.yml @@ -20,11 +20,11 @@ jobs: # on main or a stable tag on a dev branch. - name: Fail on semver pre-release chart version - run: yq .version deployment/helm/Chart.yaml | grep -v '[a-zA-Z-]' + run: yq .version deployment/helm/charts/danswer/Chart.yaml | grep -v '[a-zA-Z-]' if: ${{ github.ref_name == 'main' }} - name: Fail on stable semver chart version - run: yq .version deployment/helm/Chart.yaml | grep '[a-zA-Z-]' + run: yq .version deployment/helm/charts/danswer/Chart.yaml | grep '[a-zA-Z-]' if: ${{ github.ref_name != 'main' }} # To reduce resource usage images are built only on tag. @@ -37,19 +37,19 @@ jobs: curl -H "Authorization: Bearer $(echo ${{ secrets.GITHUB_TOKEN }} | base64)" https://ghcr.io/v2/stackhpc/danswer/danswer-backend/tags/list | jq .tags - | grep $( yq .appVersion deployment/helm/Chart.yaml )-$( yq .tagSuffix deployment/helm/values.yaml ) + | grep $( yq .appVersion deployment/helm/charts/danswer/Chart.yaml )-$( yq .tagSuffix deployment/helm/charts/danswer/values.yaml ) && curl -H "Authorization: Bearer $(echo ${{ secrets.GITHUB_TOKEN }} | base64)" https://ghcr.io/v2/stackhpc/danswer/danswer-web-server/tags/list | jq .tags - | grep $( yq .appVersion deployment/helm/Chart.yaml )-$( yq .tagSuffix deployment/helm/values.yaml ) + | grep $( yq .appVersion deployment/helm/charts/danswer/Chart.yaml )-$( yq .tagSuffix deployment/helm/charts/danswer/values.yaml ) # Check if current chart version exists in releases already - name: Check for Helm chart version bump id: version_check run: | set -xe - chart_version=$(yq .version deployment/helm/Chart.yaml) + chart_version=$(yq .version deployment/helm/charts/danswer/Chart.yaml) if [[ $(curl https://api.github.com/repos/stackhpc/danswer/releases | jq '.[].tag_name' | grep danswer-helm-$chart_version) ]]; then echo chart_version_changed=false >> $GITHUB_OUTPUT else @@ -84,12 +84,12 @@ jobs: run: | helm repo add bitnami https://charts.bitnami.com/bitnami helm repo add vespa https://unoplat.github.io/vespa-helm-charts - helm dependency build deployment/helm + helm dependency build deployment/helm/charts/danswer - name: Run chart-releaser uses: helm/chart-releaser-action@v1.6.0 with: - charts_dir: deployment + charts_dir: deployment/helm/charts pages_branch: helm-publish mark_as_latest: ${{ github.ref_name == 'main' }} env: diff --git a/.github/workflows/pr-helm-chart-testing.yml.disabled.txt b/.github/workflows/pr-helm-chart-testing.yml.disabled.txt new file mode 100644 index 00000000000..eeb1715b1c2 --- /dev/null +++ b/.github/workflows/pr-helm-chart-testing.yml.disabled.txt @@ -0,0 +1,68 @@ +# This workflow is intentionally disabled while we're still working on it +# It's close to ready, but a race condition needs to be fixed with +# API server and Vespa startup, and it needs to have a way to build/test against +# local containers + +name: Helm - Lint and Test Charts + +on: + merge_group: + pull_request: + branches: [ main ] + +jobs: + lint-test: + # See https://runs-on.com/runners/linux/ + runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}"] + + # fetch-depth 0 is required for helm/chart-testing-action + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Helm + uses: azure/setup-helm@v4.2.0 + with: + version: v3.14.4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + cache-dependency-path: | + backend/requirements/default.txt + backend/requirements/dev.txt + backend/requirements/model_server.txt + - run: | + python -m pip install --upgrade pip + pip install --retries 5 --timeout 30 -r backend/requirements/default.txt + pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt + pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt + + - name: Set up chart-testing + uses: helm/chart-testing-action@v2.6.1 + + - name: Run chart-testing (list-changed) + id: list-changed + run: | + changed=$(ct list-changed --target-branch ${{ github.event.repository.default_branch }}) + if [[ -n "$changed" ]]; then + echo "changed=true" >> "$GITHUB_OUTPUT" + fi + + - name: Run chart-testing (lint) +# if: steps.list-changed.outputs.changed == 'true' + run: ct lint --all --config ct.yaml --target-branch ${{ github.event.repository.default_branch }} + + - name: Create kind cluster +# if: steps.list-changed.outputs.changed == 'true' + uses: helm/kind-action@v1.10.0 + + - name: Run chart-testing (install) +# if: steps.list-changed.outputs.changed == 'true' + run: ct install --all --config ct.yaml +# run: ct install --target-branch ${{ github.event.repository.default_branch }} + \ No newline at end of file diff --git a/.github/workflows/pr-python-checks.yml b/.github/workflows/pr-python-checks.yml index 9cc624fa073..0a9e9f96a63 100644 --- a/.github/workflows/pr-python-checks.yml +++ b/.github/workflows/pr-python-checks.yml @@ -3,11 +3,14 @@ name: Python Checks on: merge_group: pull_request: - branches: [ main ] + branches: + - main + - 'release/**' jobs: mypy-check: - runs-on: ubuntu-latest + # See https://runs-on.com/runners/linux/ + runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"] steps: - name: Checkout code @@ -24,9 +27,9 @@ jobs: backend/requirements/model_server.txt - run: | python -m pip install --upgrade pip - pip install -r backend/requirements/default.txt - pip install -r backend/requirements/dev.txt - pip install -r backend/requirements/model_server.txt + pip install --retries 5 --timeout 30 -r backend/requirements/default.txt + pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt + pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt - name: Run MyPy run: | diff --git a/.github/workflows/pr-python-connector-tests.yml b/.github/workflows/pr-python-connector-tests.yml new file mode 100644 index 00000000000..642618000d2 --- /dev/null +++ b/.github/workflows/pr-python-connector-tests.yml @@ -0,0 +1,61 @@ +name: Connector Tests + +on: + pull_request: + branches: [main] + schedule: + # This cron expression runs the job daily at 16:00 UTC (9am PT) + - cron: "0 16 * * *" + +env: + # Confluence + CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }} + CONFLUENCE_TEST_SPACE: ${{ secrets.CONFLUENCE_TEST_SPACE }} + CONFLUENCE_IS_CLOUD: ${{ secrets.CONFLUENCE_IS_CLOUD }} + CONFLUENCE_TEST_PAGE_ID: ${{ secrets.CONFLUENCE_TEST_PAGE_ID }} + CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }} + CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }} + # Jira + JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} + JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} + +jobs: + connectors-check: + # See https://runs-on.com/runners/linux/ + runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"] + + env: + PYTHONPATH: ./backend + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + cache: "pip" + cache-dependency-path: | + backend/requirements/default.txt + backend/requirements/dev.txt + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install --retries 5 --timeout 30 -r backend/requirements/default.txt + pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt + + - name: Run Tests + shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}" + run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors + + - name: Alert on Failure + if: failure() && github.event_name == 'schedule' + env: + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + run: | + curl -X POST \ + -H 'Content-type: application/json' \ + --data '{"text":"Scheduled Connector Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \ + $SLACK_WEBHOOK diff --git a/.github/workflows/pr-python-tests.yml b/.github/workflows/pr-python-tests.yml index 7686de019a5..ce57a7a5814 100644 --- a/.github/workflows/pr-python-tests.yml +++ b/.github/workflows/pr-python-tests.yml @@ -3,15 +3,19 @@ name: Python Unit Tests on: merge_group: pull_request: - branches: [ main ] + branches: + - main + - 'release/**' jobs: backend-check: - runs-on: ubuntu-latest + # See https://runs-on.com/runners/linux/ + runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"] env: PYTHONPATH: ./backend - + REDIS_CLOUD_PYTEST_PASSWORD: ${{ secrets.REDIS_CLOUD_PYTEST_PASSWORD }} + steps: - name: Checkout code uses: actions/checkout@v4 @@ -28,8 +32,8 @@ jobs: - name: Install Dependencies run: | python -m pip install --upgrade pip - pip install -r backend/requirements/default.txt - pip install -r backend/requirements/dev.txt + pip install --retries 5 --timeout 30 -r backend/requirements/default.txt + pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt - name: Run Tests shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}" diff --git a/.github/workflows/pr-quality-checks.yml b/.github/workflows/pr-quality-checks.yml index 8a42541ea5d..128317a79ce 100644 --- a/.github/workflows/pr-quality-checks.yml +++ b/.github/workflows/pr-quality-checks.yml @@ -1,6 +1,6 @@ name: Quality Checks PR concurrency: - group: Quality-Checks-PR-${{ github.head_ref }} + group: Quality-Checks-PR-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }} cancel-in-progress: true on: @@ -9,7 +9,8 @@ on: jobs: quality-checks: - runs-on: ubuntu-latest + # See https://runs-on.com/runners/linux/ + runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"] steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/run-it.yml b/.github/workflows/run-it.yml index 7c0c1814c3b..cbb9954e918 100644 --- a/.github/workflows/run-it.yml +++ b/.github/workflows/run-it.yml @@ -1,20 +1,22 @@ name: Run Integration Tests concurrency: - group: Run-Integration-Tests-${{ github.head_ref }} + group: Run-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }} cancel-in-progress: true on: merge_group: pull_request: - branches: [ main ] + branches: + - main + - 'release/**' env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} jobs: integration-tests: - runs-on: - group: 'arm64-image-builders' + # See https://runs-on.com/runners/linux/ + runs-on: [runs-on,runner=8cpu-linux-x64,ram=32,"run-id=${{ github.run_id }}"] steps: - name: Checkout code uses: actions/checkout@v4 @@ -28,30 +30,20 @@ jobs: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_TOKEN }} - - name: Build Web Docker image - uses: docker/build-push-action@v5 - with: - context: ./web - file: ./web/Dockerfile - platforms: linux/arm64 - pull: true - push: true - load: true - tags: danswer/danswer-web-server:it - cache-from: type=registry,ref=danswer/danswer-web-server:it - cache-to: | - type=registry,ref=danswer/danswer-web-server:it,mode=max - type=inline + # NOTE: we don't need to build the Web Docker image since it's not used + # during the IT for now. We have a separate action to verify it builds + # succesfully + - name: Pull Web Docker image + run: | + docker pull danswer/danswer-web-server:latest + docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:it - name: Build Backend Docker image - uses: docker/build-push-action@v5 + uses: ./.github/actions/custom-build-and-push with: context: ./backend file: ./backend/Dockerfile - platforms: linux/arm64 - pull: true - push: true - load: true + platforms: linux/amd64 tags: danswer/danswer-backend:it cache-from: type=registry,ref=danswer/danswer-backend:it cache-to: | @@ -59,14 +51,11 @@ jobs: type=inline - name: Build Model Server Docker image - uses: docker/build-push-action@v5 + uses: ./.github/actions/custom-build-and-push with: context: ./backend file: ./backend/Dockerfile.model_server - platforms: linux/arm64 - pull: true - push: true - load: true + platforms: linux/amd64 tags: danswer/danswer-model-server:it cache-from: type=registry,ref=danswer/danswer-model-server:it cache-to: | @@ -74,14 +63,11 @@ jobs: type=inline - name: Build integration test Docker image - uses: docker/build-push-action@v5 + uses: ./.github/actions/custom-build-and-push with: context: ./backend file: ./backend/tests/integration/Dockerfile - platforms: linux/arm64 - pull: true - push: true - load: true + platforms: linux/amd64 tags: danswer/integration-test-runner:it cache-from: type=registry,ref=danswer/integration-test-runner:it cache-to: | @@ -92,14 +78,19 @@ jobs: run: | cd deployment/docker_compose ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ + AUTH_TYPE=basic \ + REQUIRE_EMAIL_VERIFICATION=false \ + DISABLE_TELEMETRY=true \ IMAGE_TAG=it \ - docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build + docker compose -f docker-compose.dev.yml -p danswer-stack up -d id: start_docker - name: Wait for service to be ready run: | echo "Starting wait-for-service script..." + docker logs -f danswer-stack-api_server-1 & + start_time=$(date +%s) timeout=300 # 5 minutes in seconds @@ -132,13 +123,16 @@ jobs: run: | echo "Running integration tests..." docker run --rm --network danswer-stack_default \ + --name test-runner \ -e POSTGRES_HOST=relational_db \ -e POSTGRES_USER=postgres \ -e POSTGRES_PASSWORD=password \ -e POSTGRES_DB=postgres \ -e VESPA_HOST=index \ + -e REDIS_HOST=cache \ -e API_SERVER_HOST=api_server \ -e OPENAI_API_KEY=${OPENAI_API_KEY} \ + -e TEST_WEB_HOSTNAME=test-runner \ danswer/integration-test-runner:it continue-on-error: true id: run_tests diff --git a/.github/workflows/tag-nightly.yml b/.github/workflows/tag-nightly.yml new file mode 100644 index 00000000000..bf2699d9fd4 --- /dev/null +++ b/.github/workflows/tag-nightly.yml @@ -0,0 +1,54 @@ +name: Nightly Tag Push + +on: + schedule: + - cron: '0 0 * * *' # Runs every day at midnight UTC + +permissions: + contents: write # Allows pushing tags to the repository + +jobs: + create-and-push-tag: + runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"] + + steps: + # actions using GITHUB_TOKEN cannot trigger another workflow, but we do want this to trigger docker pushes + # see https://github.com/orgs/community/discussions/27028#discussioncomment-3254367 for the workaround we + # implement here which needs an actual user's deploy key + - name: Checkout code + uses: actions/checkout@v4 + with: + ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}" + + - name: Set up Git user + run: | + git config user.name "Richard Kuo [bot]" + git config user.email "rkuo[bot]@danswer.ai" + + - name: Check for existing nightly tag + id: check_tag + run: | + if git tag --points-at HEAD --list "nightly-latest*" | grep -q .; then + echo "A tag starting with 'nightly-latest' already exists on HEAD." + echo "tag_exists=true" >> $GITHUB_OUTPUT + else + echo "No tag starting with 'nightly-latest' exists on HEAD." + echo "tag_exists=false" >> $GITHUB_OUTPUT + fi + + # don't tag again if HEAD already has a nightly-latest tag on it + - name: Create Nightly Tag + if: steps.check_tag.outputs.tag_exists == 'false' + env: + DATE: ${{ github.run_id }} + run: | + TAG_NAME="nightly-latest-$(date +'%Y%m%d')" + echo "Creating tag: $TAG_NAME" + git tag $TAG_NAME + + - name: Push Tag + if: steps.check_tag.outputs.tag_exists == 'false' + run: | + TAG_NAME="nightly-latest-$(date +'%Y%m%d')" + git push origin $TAG_NAME + \ No newline at end of file diff --git a/.gitignore b/.gitignore index d9d7727b2f0..ba50495d7ff 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,6 @@ .mypy_cache .idea /deployment/data/nginx/app.conf -.vscode/launch.json +.vscode/ *.sw? /backend/tests/regression/answer_quality/search_test_config.yaml diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000000..0164457a03e --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +backend/tests/integration/tests/pruning/website diff --git a/.vscode/env_template.txt b/.vscode/env_template.txt index b3fae8cee73..89faca0abf0 100644 --- a/.vscode/env_template.txt +++ b/.vscode/env_template.txt @@ -1,5 +1,5 @@ -# Copy this file to .env at the base of the repo and fill in the values -# This will help with development iteration speed and reduce repeat tasks for dev +# Copy this file to .env in the .vscode folder +# Fill in the values as needed, it is recommended to set the GEN_AI_API_KEY value to avoid having to set up an LLM in the UI # Also check out danswer/backend/scripts/restart_containers.sh for a script to restart the containers which Danswer relies on outside of VSCode/Cursor processes # For local dev, often user Authentication is not needed @@ -15,7 +15,7 @@ LOG_LEVEL=debug # This passes top N results to LLM an additional time for reranking prior to answer generation # This step is quite heavy on token usage so we disable it for dev generally -DISABLE_LLM_DOC_RELEVANCE=True +DISABLE_LLM_DOC_RELEVANCE=False # Useful if you want to toggle auth on/off (google_oauth/OIDC specifically) @@ -27,9 +27,9 @@ REQUIRE_EMAIL_VERIFICATION=False # Set these so if you wipe the DB, you don't end up having to go through the UI every time GEN_AI_API_KEY= -# If answer quality isn't important for dev, use 3.5 turbo due to it being cheaper -GEN_AI_MODEL_VERSION=gpt-3.5-turbo -FAST_GEN_AI_MODEL_VERSION=gpt-3.5-turbo +# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper +GEN_AI_MODEL_VERSION=gpt-4o +FAST_GEN_AI_MODEL_VERSION=gpt-4o # For Danswer Slack Bot, overrides the UI values so no need to set this up via UI every time # Only needed if using DanswerBot @@ -38,7 +38,7 @@ FAST_GEN_AI_MODEL_VERSION=gpt-3.5-turbo # Python stuff -PYTHONPATH=./backend +PYTHONPATH=../backend PYTHONUNBUFFERED=1 @@ -49,4 +49,3 @@ BING_API_KEY= # Enable the full set of Danswer Enterprise Edition features # NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development) ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False - diff --git a/.vscode/launch.template.jsonc b/.vscode/launch.template.jsonc index 9aaadb32acf..c733800981c 100644 --- a/.vscode/launch.template.jsonc +++ b/.vscode/launch.template.jsonc @@ -1,15 +1,23 @@ -/* - - Copy this file into '.vscode/launch.json' or merge its - contents into your existing configurations. - -*/ +/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */ { // Use IntelliSense to learn about possible attributes. // Hover to view descriptions of existing attributes. // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", + "compounds": [ + { + "name": "Run All Danswer Services", + "configurations": [ + "Web Server", + "Model Server", + "API Server", + "Indexing", + "Background Jobs", + "Slack Bot" + ] + } + ], "configurations": [ { "name": "Web Server", @@ -17,7 +25,7 @@ "request": "launch", "cwd": "${workspaceRoot}/web", "runtimeExecutable": "npm", - "envFile": "${workspaceFolder}/.env", + "envFile": "${workspaceFolder}/.vscode/.env", "runtimeArgs": [ "run", "dev" ], @@ -25,11 +33,12 @@ }, { "name": "Model Server", - "type": "python", + "consoleName": "Model Server", + "type": "debugpy", "request": "launch", "module": "uvicorn", "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.env", + "envFile": "${workspaceFolder}/.vscode/.env", "env": { "LOG_LEVEL": "DEBUG", "PYTHONUNBUFFERED": "1" @@ -39,16 +48,16 @@ "--reload", "--port", "9000" - ], - "consoleTitle": "Model Server" + ] }, { "name": "API Server", - "type": "python", + "consoleName": "API Server", + "type": "debugpy", "request": "launch", "module": "uvicorn", "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.env", + "envFile": "${workspaceFolder}/.vscode/.env", "env": { "LOG_DANSWER_MODEL_INTERACTIONS": "True", "LOG_LEVEL": "DEBUG", @@ -59,32 +68,32 @@ "--reload", "--port", "8080" - ], - "consoleTitle": "API Server" + ] }, { "name": "Indexing", - "type": "python", + "consoleName": "Indexing", + "type": "debugpy", "request": "launch", "program": "danswer/background/update.py", "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.env", + "envFile": "${workspaceFolder}/.vscode/.env", "env": { "ENABLE_MULTIPASS_INDEXING": "false", "LOG_LEVEL": "DEBUG", "PYTHONUNBUFFERED": "1", "PYTHONPATH": "." - }, - "consoleTitle": "Indexing" + } }, // Celery and all async jobs, usually would include indexing as well but this is handled separately above for dev { "name": "Background Jobs", - "type": "python", + "consoleName": "Background Jobs", + "type": "debugpy", "request": "launch", "program": "scripts/dev_run_background_jobs.py", "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.env", + "envFile": "${workspaceFolder}/.vscode/.env", "env": { "LOG_DANSWER_MODEL_INTERACTIONS": "True", "LOG_LEVEL": "DEBUG", @@ -93,18 +102,18 @@ }, "args": [ "--no-indexing" - ], - "consoleTitle": "Background Jobs" + ] }, // For the listner to access the Slack API, // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project { "name": "Slack Bot", - "type": "python", + "consoleName": "Slack Bot", + "type": "debugpy", "request": "launch", "program": "danswer/danswerbot/slack/listener.py", "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.env", + "envFile": "${workspaceFolder}/.vscode/.env", "env": { "LOG_LEVEL": "DEBUG", "PYTHONUNBUFFERED": "1", @@ -113,11 +122,12 @@ }, { "name": "Pytest", - "type": "python", + "consoleName": "Pytest", + "type": "debugpy", "request": "launch", "module": "pytest", "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.env", + "envFile": "${workspaceFolder}/.vscode/.env", "env": { "LOG_LEVEL": "DEBUG", "PYTHONUNBUFFERED": "1", @@ -128,18 +138,16 @@ // Specify a sepcific module/test to run or provide nothing to run all tests //"tests/unit/danswer/llm/answering/test_prune_and_merge.py" ] - } - ], - "compounds": [ + }, { - "name": "Run Danswer", - "configurations": [ - "Web Server", - "Model Server", - "API Server", - "Indexing", - "Background Jobs", - ] + "name": "Clear and Restart External Volumes and Containers", + "type": "node", + "request": "launch", + "runtimeExecutable": "bash", + "runtimeArgs": ["${workspaceFolder}/backend/scripts/restart_containers.sh"], + "cwd": "${workspaceFolder}", + "console": "integratedTerminal", + "stopOnEntry": true } ] } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 116e78b6f19..3e4415188a1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -48,23 +48,26 @@ We would love to see you there! ## Get Started 🚀 -Danswer being a fully functional app, relies on some external pieces of software, specifically: +Danswer being a fully functional app, relies on some external software, specifically: - [Postgres](https://www.postgresql.org/) (Relational DB) - [Vespa](https://vespa.ai/) (Vector DB/Search Engine) +- [Redis](https://redis.io/) (Cache) +- [Nginx](https://nginx.org/) (Not needed for development flows generally) -This guide provides instructions to set up the Danswer specific services outside of Docker because it's easier for -development purposes but also feel free to just use the containers and update with local changes by providing the -`--build` flag. + +> **Note:** +> This guide provides instructions to build and run Danswer locally from source with Docker containers providing the above external software. We believe this combination is easier for +> development purposes. If you prefer to use pre-built container images, we provide instructions on running the full Danswer stack within Docker below. ### Local Set Up -It is recommended to use Python version 3.11 +Be sure to use Python version 3.11. For instructions on installing Python 3.11 on macOS, refer to the [CONTRIBUTING_MACOS.md](./CONTRIBUTING_MACOS.md) readme. If using a lower version, modifications will have to be made to the code. -If using a higher version, the version of Tensorflow we use may not be available for your platform. +If using a higher version, sometimes some libraries will not be available (i.e. we had problems with Tensorflow in the past with higher versions of python). -#### Installing Requirements +#### Backend: Python requirements Currently, we use pip and recommend creating a virtual environment. For convenience here's a command for it: @@ -73,8 +76,9 @@ python -m venv .venv source .venv/bin/activate ``` ---> Note that this virtual environment MUST NOT be set up WITHIN the danswer -directory +> **Note:** +> This virtual environment MUST NOT be set up WITHIN the danswer directory if you plan on using mypy within certain IDEs. +> For simplicity, we recommend setting up the virtual environment outside of the danswer directory. _For Windows, activate the virtual environment using Command Prompt:_ ```bash @@ -89,34 +93,38 @@ Install the required python dependencies: ```bash pip install -r danswer/backend/requirements/default.txt pip install -r danswer/backend/requirements/dev.txt +pip install -r danswer/backend/requirements/ee.txt pip install -r danswer/backend/requirements/model_server.txt ``` -Install [Node.js and npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) for the frontend. -Once the above is done, navigate to `danswer/web` run: +Install Playwright for Python (headless browser required by the Web Connector) + +In the activated Python virtualenv, install Playwright for Python by running: ```bash -npm i +playwright install ``` -Install Playwright (required by the Web Connector) +You may have to deactivate and reactivate your virtualenv for `playwright` to appear on your path. -> Note: If you have just done the pip install, open a new terminal and source the python virtual-env again. -This will update the path to include playwright +#### Frontend: Node dependencies -Then install Playwright by running: +Install [Node.js and npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) for the frontend. +Once the above is done, navigate to `danswer/web` run: ```bash -playwright install +npm i ``` +#### Docker containers for external software +You will need Docker installed to run these containers. -#### Dependent Docker Containers -First navigate to `danswer/deployment/docker_compose`, then start up Vespa and Postgres with: +First navigate to `danswer/deployment/docker_compose`, then start up Postgres/Vespa/Redis with: ```bash -docker compose -f docker-compose.dev.yml -p danswer-stack up -d index relational_db +docker compose -f docker-compose.dev.yml -p danswer-stack up -d index relational_db cache ``` -(index refers to Vespa and relational_db refers to Postgres) +(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis) + -#### Running Danswer +#### Running Danswer locally To start the frontend, navigate to `danswer/web` and run: ```bash npm run dev @@ -127,11 +135,10 @@ Navigate to `danswer/backend` and run: ```bash uvicorn model_server.main:app --reload --port 9000 ``` + _For Windows (for compatibility with both PowerShell and Command Prompt):_ ```bash -powershell -Command " - uvicorn model_server.main:app --reload --port 9000 -" +powershell -Command "uvicorn model_server.main:app --reload --port 9000" ``` The first time running Danswer, you will need to run the DB migrations for Postgres. @@ -154,6 +161,7 @@ To run the backend API server, navigate back to `danswer/backend` and run: ```bash AUTH_TYPE=disabled uvicorn danswer.main:app --reload --port 8080 ``` + _For Windows (for compatibility with both PowerShell and Command Prompt):_ ```bash powershell -Command " @@ -162,20 +170,58 @@ powershell -Command " " ``` -Note: if you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services. +> **Note:** +> If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services. + +#### Wrapping up + +You should now have 4 servers running: + +- Web server +- Backend API +- Model server +- Background jobs + +Now, visit `http://localhost:3000` in your browser. You should see the Danswer onboarding wizard where you can connect your external LLM provider to Danswer. + +You've successfully set up a local Danswer instance! 🏁 + +#### Running the Danswer application in a container + +You can run the full Danswer application stack from pre-built images including all external software dependencies. + +Navigate to `danswer/deployment/docker_compose` and run: + +```bash +docker compose -f docker-compose.dev.yml -p danswer-stack up -d +``` + +After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Danswer. + +If you want to make changes to Danswer and run those changes in Docker, you can also build a local version of the Danswer container images that incorporates your changes like so: + +```bash +docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build +``` ### Formatting and Linting #### Backend For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports). First, install pre-commit (if you don't have it already) following the instructions [here](https://pre-commit.com/#installation). + +With the virtual environment active, install the pre-commit library with: +```bash +pip install pre-commit +``` + Then, from the `danswer/backend` directory, run: ```bash pre-commit install ``` Additionally, we use `mypy` for static type checking. -Danswer is fully type-annotated, and we would like to keep it that way! +Danswer is fully type-annotated, and we want to keep it that way! To run the mypy checks manually, run `python -m mypy .` from the `danswer/backend` directory. @@ -186,6 +232,7 @@ Please double check that prettier passes before creating a pull request. ### Release Process -Danswer follows the semver versioning standard. +Danswer loosely follows the SemVer versioning standard. +Major changes are released with a "minor" version bump. Currently we use patch release versions to indicate small feature changes. A set of Docker containers will be pushed automatically to DockerHub with every tag. You can see the containers [here](https://hub.docker.com/search?q=danswer%2F). diff --git a/CONTRIBUTING_MACOS.md b/CONTRIBUTING_MACOS.md new file mode 100644 index 00000000000..519eccffd51 --- /dev/null +++ b/CONTRIBUTING_MACOS.md @@ -0,0 +1,31 @@ +## Some additional notes for Mac Users +The base instructions to set up the development environment are located in [CONTRIBUTING.md](https://github.com/danswer-ai/danswer/blob/main/CONTRIBUTING.md). + +### Setting up Python +Ensure [Homebrew](https://brew.sh/) is already set up. + +Then install python 3.11. +```bash +brew install python@3.11 +``` + +Add python 3.11 to your path: add the following line to ~/.zshrc +``` +export PATH="$(brew --prefix)/opt/python@3.11/libexec/bin:$PATH" +``` + +> **Note:** +> You will need to open a new terminal for the path change above to take effect. + + +### Setting up Docker +On macOS, you will need to install [Docker Desktop](https://www.docker.com/products/docker-desktop/) and +ensure it is running before continuing with the docker commands. + + +### Formatting and Linting +MacOS will likely require you to remove some quarantine attributes on some of the hooks for them to execute properly. +After installing pre-commit, run the following command: +```bash +sudo xattr -r -d com.apple.quarantine ~/.cache/pre-commit +``` \ No newline at end of file diff --git a/backend/Dockerfile b/backend/Dockerfile index d8c388801d7..89bacdc7020 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -9,7 +9,8 @@ LABEL com.danswer.description="This image is the web/frontend container of Dansw # Default DANSWER_VERSION, typically overriden during builds by GitHub Actions. ARG DANSWER_VERSION=0.3-dev -ENV DANSWER_VERSION=${DANSWER_VERSION} +ENV DANSWER_VERSION=${DANSWER_VERSION} \ + DANSWER_RUNNING_IN_DOCKER="true" RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}" # Install system dependencies @@ -40,8 +41,10 @@ RUN apt-get update && \ COPY ./requirements/default.txt /tmp/requirements.txt COPY ./requirements/ee.txt /tmp/ee-requirements.txt RUN pip install --no-cache-dir --upgrade \ - -r /tmp/requirements.txt \ - -r /tmp/ee-requirements.txt && \ + --retries 5 \ + --timeout 30 \ + -r /tmp/requirements.txt \ + -r /tmp/ee-requirements.txt && \ pip uninstall -y py && \ playwright install chromium && \ playwright install-deps chromium && \ @@ -74,9 +77,9 @@ Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')" # Pre-downloading NLTK for setups with limited egress RUN python -c "import nltk; \ - nltk.download('stopwords', quiet=True); \ - nltk.download('wordnet', quiet=True); \ - nltk.download('punkt', quiet=True);" +nltk.download('stopwords', quiet=True); \ +nltk.download('punkt', quiet=True);" +# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed # Set up application files WORKDIR /app diff --git a/backend/Dockerfile.model_server b/backend/Dockerfile.model_server index f2fb1ca44d0..05a284a2baa 100644 --- a/backend/Dockerfile.model_server +++ b/backend/Dockerfile.model_server @@ -8,11 +8,17 @@ visit https://github.com/danswer-ai/danswer." # Default DANSWER_VERSION, typically overriden during builds by GitHub Actions. ARG DANSWER_VERSION=0.3-dev -ENV DANSWER_VERSION=${DANSWER_VERSION} +ENV DANSWER_VERSION=${DANSWER_VERSION} \ + DANSWER_RUNNING_IN_DOCKER="true" + + RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}" COPY ./requirements/model_server.txt /tmp/requirements.txt -RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt +RUN pip install --no-cache-dir --upgrade \ + --retries 5 \ + --timeout 30 \ + -r /tmp/requirements.txt RUN apt-get remove -y --allow-remove-essential perl-base && \ apt-get autoremove -y @@ -49,6 +55,6 @@ COPY ./shared_configs /app/shared_configs # Model Server main code COPY ./model_server /app/model_server -ENV PYTHONPATH /app +ENV PYTHONPATH=/app CMD ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"] diff --git a/backend/alembic/env.py b/backend/alembic/env.py index 8c028202bfc..154d6ff3d66 100644 --- a/backend/alembic/env.py +++ b/backend/alembic/env.py @@ -16,7 +16,9 @@ # Interpret the config file for Python logging. # This line sets up loggers basically. -if config.config_file_name is not None: +if config.config_file_name is not None and config.attributes.get( + "configure_logger", True +): fileConfig(config.config_file_name) # add your model's MetaData object here diff --git a/backend/alembic/versions/0ebb1d516877_add_ccpair_deletion_failure_message.py b/backend/alembic/versions/0ebb1d516877_add_ccpair_deletion_failure_message.py new file mode 100644 index 00000000000..526c9449fce --- /dev/null +++ b/backend/alembic/versions/0ebb1d516877_add_ccpair_deletion_failure_message.py @@ -0,0 +1,27 @@ +"""add ccpair deletion failure message + +Revision ID: 0ebb1d516877 +Revises: 52a219fb5233 +Create Date: 2024-09-10 15:03:48.233926 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "0ebb1d516877" +down_revision = "52a219fb5233" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + "connector_credential_pair", + sa.Column("deletion_failure_message", sa.String(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("connector_credential_pair", "deletion_failure_message") diff --git a/backend/alembic/versions/1b8206b29c5d_add_user_delete_cascades.py b/backend/alembic/versions/1b8206b29c5d_add_user_delete_cascades.py new file mode 100644 index 00000000000..250621f74e2 --- /dev/null +++ b/backend/alembic/versions/1b8206b29c5d_add_user_delete_cascades.py @@ -0,0 +1,102 @@ +"""add_user_delete_cascades + +Revision ID: 1b8206b29c5d +Revises: 35e6853a51d5 +Create Date: 2024-09-18 11:48:59.418726 + +""" +from alembic import op + + +# revision identifiers, used by Alembic. +revision = "1b8206b29c5d" +down_revision = "35e6853a51d5" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.drop_constraint("credential_user_id_fkey", "credential", type_="foreignkey") + op.create_foreign_key( + "credential_user_id_fkey", + "credential", + "user", + ["user_id"], + ["id"], + ondelete="CASCADE", + ) + + op.drop_constraint("chat_session_user_id_fkey", "chat_session", type_="foreignkey") + op.create_foreign_key( + "chat_session_user_id_fkey", + "chat_session", + "user", + ["user_id"], + ["id"], + ondelete="CASCADE", + ) + + op.drop_constraint("chat_folder_user_id_fkey", "chat_folder", type_="foreignkey") + op.create_foreign_key( + "chat_folder_user_id_fkey", + "chat_folder", + "user", + ["user_id"], + ["id"], + ondelete="CASCADE", + ) + + op.drop_constraint("prompt_user_id_fkey", "prompt", type_="foreignkey") + op.create_foreign_key( + "prompt_user_id_fkey", "prompt", "user", ["user_id"], ["id"], ondelete="CASCADE" + ) + + op.drop_constraint("notification_user_id_fkey", "notification", type_="foreignkey") + op.create_foreign_key( + "notification_user_id_fkey", + "notification", + "user", + ["user_id"], + ["id"], + ondelete="CASCADE", + ) + + op.drop_constraint("inputprompt_user_id_fkey", "inputprompt", type_="foreignkey") + op.create_foreign_key( + "inputprompt_user_id_fkey", + "inputprompt", + "user", + ["user_id"], + ["id"], + ondelete="CASCADE", + ) + + +def downgrade() -> None: + op.drop_constraint("credential_user_id_fkey", "credential", type_="foreignkey") + op.create_foreign_key( + "credential_user_id_fkey", "credential", "user", ["user_id"], ["id"] + ) + + op.drop_constraint("chat_session_user_id_fkey", "chat_session", type_="foreignkey") + op.create_foreign_key( + "chat_session_user_id_fkey", "chat_session", "user", ["user_id"], ["id"] + ) + + op.drop_constraint("chat_folder_user_id_fkey", "chat_folder", type_="foreignkey") + op.create_foreign_key( + "chat_folder_user_id_fkey", "chat_folder", "user", ["user_id"], ["id"] + ) + + op.drop_constraint("prompt_user_id_fkey", "prompt", type_="foreignkey") + op.create_foreign_key("prompt_user_id_fkey", "prompt", "user", ["user_id"], ["id"]) + + op.drop_constraint("notification_user_id_fkey", "notification", type_="foreignkey") + op.create_foreign_key( + "notification_user_id_fkey", "notification", "user", ["user_id"], ["id"] + ) + + op.drop_constraint("inputprompt_user_id_fkey", "inputprompt", type_="foreignkey") + op.create_foreign_key( + "inputprompt_user_id_fkey", "inputprompt", "user", ["user_id"], ["id"] + ) diff --git a/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py b/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py index 42f4c22ed78..f5b21c81d8e 100644 --- a/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py +++ b/backend/alembic/versions/1f60f60c3401_embedding_model_search_settings.py @@ -30,7 +30,7 @@ def upgrade() -> None: op.add_column( "search_settings", sa.Column( - "multipass_indexing", sa.Boolean(), nullable=False, server_default="true" + "multipass_indexing", sa.Boolean(), nullable=False, server_default="false" ), ) op.add_column( diff --git a/backend/alembic/versions/35e6853a51d5_server_default_chosen_assistants.py b/backend/alembic/versions/35e6853a51d5_server_default_chosen_assistants.py new file mode 100644 index 00000000000..0db3ca849bf --- /dev/null +++ b/backend/alembic/versions/35e6853a51d5_server_default_chosen_assistants.py @@ -0,0 +1,64 @@ +"""server default chosen assistants + +Revision ID: 35e6853a51d5 +Revises: c99d76fcd298 +Create Date: 2024-09-13 13:20:32.885317 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "35e6853a51d5" +down_revision = "c99d76fcd298" +branch_labels = None +depends_on = None + +DEFAULT_ASSISTANTS = [-2, -1, 0] + + +def upgrade() -> None: + # Step 1: Update any NULL values to the default value + # This upgrades existing users without ordered assistant + # to have default assistants set to visible assistants which are + # accessible by them. + op.execute( + """ + UPDATE "user" u + SET chosen_assistants = ( + SELECT jsonb_agg( + p.id ORDER BY + COALESCE(p.display_priority, 2147483647) ASC, + p.id ASC + ) + FROM persona p + LEFT JOIN persona__user pu ON p.id = pu.persona_id AND pu.user_id = u.id + WHERE p.is_visible = true + AND (p.is_public = true OR pu.user_id IS NOT NULL) + ) + WHERE chosen_assistants IS NULL + OR chosen_assistants = 'null' + OR jsonb_typeof(chosen_assistants) = 'null' + OR (jsonb_typeof(chosen_assistants) = 'string' AND chosen_assistants = '"null"') + """ + ) + + # Step 2: Alter the column to make it non-nullable + op.alter_column( + "user", + "chosen_assistants", + type_=postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default=sa.text(f"'{DEFAULT_ASSISTANTS}'::jsonb"), + ) + + +def downgrade() -> None: + op.alter_column( + "user", + "chosen_assistants", + type_=postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + server_default=None, + ) diff --git a/backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py b/backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py new file mode 100644 index 00000000000..437d7a97e76 --- /dev/null +++ b/backend/alembic/versions/46b7a812670f_fix_user__external_user_group_id_fk.py @@ -0,0 +1,46 @@ +"""fix_user__external_user_group_id_fk + +Revision ID: 46b7a812670f +Revises: f32615f71aeb +Create Date: 2024-09-23 12:58:03.894038 + +""" +from alembic import op + +# revision identifiers, used by Alembic. +revision = "46b7a812670f" +down_revision = "f32615f71aeb" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Drop the existing primary key + op.drop_constraint( + "user__external_user_group_id_pkey", + "user__external_user_group_id", + type_="primary", + ) + + # Add the new composite primary key + op.create_primary_key( + "user__external_user_group_id_pkey", + "user__external_user_group_id", + ["user_id", "external_user_group_id", "cc_pair_id"], + ) + + +def downgrade() -> None: + # Drop the composite primary key + op.drop_constraint( + "user__external_user_group_id_pkey", + "user__external_user_group_id", + type_="primary", + ) + # Delete all entries from the table + op.execute("DELETE FROM user__external_user_group_id") + + # Recreate the original primary key on user_id + op.create_primary_key( + "user__external_user_group_id_pkey", "user__external_user_group_id", ["user_id"] + ) diff --git a/backend/alembic/versions/52a219fb5233_add_last_synced_and_last_modified_to_document_table.py b/backend/alembic/versions/52a219fb5233_add_last_synced_and_last_modified_to_document_table.py new file mode 100644 index 00000000000..068342095b6 --- /dev/null +++ b/backend/alembic/versions/52a219fb5233_add_last_synced_and_last_modified_to_document_table.py @@ -0,0 +1,66 @@ +"""Add last synced and last modified to document table + +Revision ID: 52a219fb5233 +Revises: f7e58d357687 +Create Date: 2024-08-28 17:40:46.077470 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.sql import func + +# revision identifiers, used by Alembic. +revision = "52a219fb5233" +down_revision = "f7e58d357687" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # last modified represents the last time anything needing syncing to vespa changed + # including row metadata and the document itself. This obviously does not include + # the last_synced column. + op.add_column( + "document", + sa.Column( + "last_modified", + sa.DateTime(timezone=True), + nullable=False, + server_default=func.now(), + ), + ) + + # last synced represents the last time this document was synced to Vespa + op.add_column( + "document", + sa.Column("last_synced", sa.DateTime(timezone=True), nullable=True), + ) + + # Set last_synced to the same value as last_modified for existing rows + op.execute( + """ + UPDATE document + SET last_synced = last_modified + """ + ) + + op.create_index( + op.f("ix_document_last_modified"), + "document", + ["last_modified"], + unique=False, + ) + + op.create_index( + op.f("ix_document_last_synced"), + "document", + ["last_synced"], + unique=False, + ) + + +def downgrade() -> None: + op.drop_index(op.f("ix_document_last_synced"), table_name="document") + op.drop_index(op.f("ix_document_last_modified"), table_name="document") + op.drop_column("document", "last_synced") + op.drop_column("document", "last_modified") diff --git a/backend/alembic/versions/55546a7967ee_assistant_rework.py b/backend/alembic/versions/55546a7967ee_assistant_rework.py new file mode 100644 index 00000000000..a027321a7c6 --- /dev/null +++ b/backend/alembic/versions/55546a7967ee_assistant_rework.py @@ -0,0 +1,79 @@ +"""assistant_rework + +Revision ID: 55546a7967ee +Revises: 61ff3651add4 +Create Date: 2024-09-18 17:00:23.755399 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + + +# revision identifiers, used by Alembic. +revision = "55546a7967ee" +down_revision = "61ff3651add4" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Reworking persona and user tables for new assistant features + # keep track of user's chosen assistants separate from their `ordering` + op.add_column("persona", sa.Column("builtin_persona", sa.Boolean(), nullable=True)) + op.execute("UPDATE persona SET builtin_persona = default_persona") + op.alter_column("persona", "builtin_persona", nullable=False) + op.drop_index("_default_persona_name_idx", table_name="persona") + op.create_index( + "_builtin_persona_name_idx", + "persona", + ["name"], + unique=True, + postgresql_where=sa.text("builtin_persona = true"), + ) + + op.add_column( + "user", sa.Column("visible_assistants", postgresql.JSONB(), nullable=True) + ) + op.add_column( + "user", sa.Column("hidden_assistants", postgresql.JSONB(), nullable=True) + ) + op.execute( + "UPDATE \"user\" SET visible_assistants = '[]'::jsonb, hidden_assistants = '[]'::jsonb" + ) + op.alter_column( + "user", + "visible_assistants", + nullable=False, + server_default=sa.text("'[]'::jsonb"), + ) + op.alter_column( + "user", + "hidden_assistants", + nullable=False, + server_default=sa.text("'[]'::jsonb"), + ) + op.drop_column("persona", "default_persona") + op.add_column( + "persona", sa.Column("is_default_persona", sa.Boolean(), nullable=True) + ) + + +def downgrade() -> None: + # Reverting changes made in upgrade + op.drop_column("user", "hidden_assistants") + op.drop_column("user", "visible_assistants") + op.drop_index("_builtin_persona_name_idx", table_name="persona") + + op.drop_column("persona", "is_default_persona") + op.add_column("persona", sa.Column("default_persona", sa.Boolean(), nullable=True)) + op.execute("UPDATE persona SET default_persona = builtin_persona") + op.alter_column("persona", "default_persona", nullable=False) + op.drop_column("persona", "builtin_persona") + op.create_index( + "_default_persona_name_idx", + "persona", + ["name"], + unique=True, + postgresql_where=sa.text("default_persona = true"), + ) diff --git a/backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py b/backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py new file mode 100644 index 00000000000..0e49b603cec --- /dev/null +++ b/backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py @@ -0,0 +1,35 @@ +"""match_any_keywords flag for standard answers + +Revision ID: 5c7fdadae813 +Revises: efb35676026c +Create Date: 2024-09-13 18:52:59.256478 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "5c7fdadae813" +down_revision = "efb35676026c" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "standard_answer", + sa.Column( + "match_any_keywords", + sa.Boolean(), + nullable=False, + server_default=sa.false(), + ), + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("standard_answer", "match_any_keywords") + # ### end Alembic commands ### diff --git a/backend/alembic/versions/61ff3651add4_add_permission_syncing.py b/backend/alembic/versions/61ff3651add4_add_permission_syncing.py new file mode 100644 index 00000000000..697e1060e0b --- /dev/null +++ b/backend/alembic/versions/61ff3651add4_add_permission_syncing.py @@ -0,0 +1,162 @@ +"""Add Permission Syncing + +Revision ID: 61ff3651add4 +Revises: 1b8206b29c5d +Create Date: 2024-09-05 13:57:11.770413 + +""" +import fastapi_users_db_sqlalchemy + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "61ff3651add4" +down_revision = "1b8206b29c5d" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Admin user who set up connectors will lose access to the docs temporarily + # only way currently to give back access is to rerun from beginning + op.add_column( + "connector_credential_pair", + sa.Column( + "access_type", + sa.String(), + nullable=True, + ), + ) + op.execute( + "UPDATE connector_credential_pair SET access_type = 'PUBLIC' WHERE is_public = true" + ) + op.execute( + "UPDATE connector_credential_pair SET access_type = 'PRIVATE' WHERE is_public = false" + ) + op.alter_column("connector_credential_pair", "access_type", nullable=False) + + op.add_column( + "connector_credential_pair", + sa.Column( + "auto_sync_options", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + ), + ) + op.add_column( + "connector_credential_pair", + sa.Column("last_time_perm_sync", sa.DateTime(timezone=True), nullable=True), + ) + op.drop_column("connector_credential_pair", "is_public") + + op.add_column( + "document", + sa.Column("external_user_emails", postgresql.ARRAY(sa.String()), nullable=True), + ) + op.add_column( + "document", + sa.Column( + "external_user_group_ids", postgresql.ARRAY(sa.String()), nullable=True + ), + ) + op.add_column( + "document", + sa.Column("is_public", sa.Boolean(), nullable=True), + ) + + op.create_table( + "user__external_user_group_id", + sa.Column( + "user_id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False + ), + sa.Column("external_user_group_id", sa.String(), nullable=False), + sa.Column("cc_pair_id", sa.Integer(), nullable=False), + sa.PrimaryKeyConstraint("user_id"), + ) + + op.drop_column("external_permission", "user_id") + op.drop_column("email_to_external_user_cache", "user_id") + op.drop_table("permission_sync_run") + op.drop_table("external_permission") + op.drop_table("email_to_external_user_cache") + + +def downgrade() -> None: + op.add_column( + "connector_credential_pair", + sa.Column("is_public", sa.BOOLEAN(), nullable=True), + ) + op.execute( + "UPDATE connector_credential_pair SET is_public = (access_type = 'PUBLIC')" + ) + op.alter_column("connector_credential_pair", "is_public", nullable=False) + + op.drop_column("connector_credential_pair", "auto_sync_options") + op.drop_column("connector_credential_pair", "access_type") + op.drop_column("connector_credential_pair", "last_time_perm_sync") + op.drop_column("document", "external_user_emails") + op.drop_column("document", "external_user_group_ids") + op.drop_column("document", "is_public") + + op.drop_table("user__external_user_group_id") + + # Drop the enum type at the end of the downgrade + op.create_table( + "permission_sync_run", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "source_type", + sa.String(), + nullable=False, + ), + sa.Column("update_type", sa.String(), nullable=False), + sa.Column("cc_pair_id", sa.Integer(), nullable=True), + sa.Column( + "status", + sa.String(), + nullable=False, + ), + sa.Column("error_msg", sa.Text(), nullable=True), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["cc_pair_id"], + ["connector_credential_pair.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "external_permission", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("user_id", sa.UUID(), nullable=True), + sa.Column("user_email", sa.String(), nullable=False), + sa.Column( + "source_type", + sa.String(), + nullable=False, + ), + sa.Column("external_permission_group", sa.String(), nullable=False), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "email_to_external_user_cache", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("external_user_id", sa.String(), nullable=False), + sa.Column("user_id", sa.UUID(), nullable=True), + sa.Column("user_email", sa.String(), nullable=False), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) diff --git a/backend/alembic/versions/797089dfb4d2_persona_start_date.py b/backend/alembic/versions/797089dfb4d2_persona_start_date.py new file mode 100644 index 00000000000..52ade3dea4e --- /dev/null +++ b/backend/alembic/versions/797089dfb4d2_persona_start_date.py @@ -0,0 +1,27 @@ +"""persona_start_date + +Revision ID: 797089dfb4d2 +Revises: 55546a7967ee +Create Date: 2024-09-11 14:51:49.785835 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "797089dfb4d2" +down_revision = "55546a7967ee" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + "persona", + sa.Column("search_start_date", sa.DateTime(timezone=True), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("persona", "search_start_date") diff --git a/backend/alembic/versions/a3795dce87be_migration_confluence_to_be_explicit.py b/backend/alembic/versions/a3795dce87be_migration_confluence_to_be_explicit.py new file mode 100644 index 00000000000..20e33d0e227 --- /dev/null +++ b/backend/alembic/versions/a3795dce87be_migration_confluence_to_be_explicit.py @@ -0,0 +1,158 @@ +"""migration confluence to be explicit + +Revision ID: a3795dce87be +Revises: 1f60f60c3401 +Create Date: 2024-09-01 13:52:12.006740 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from sqlalchemy.sql import table, column + +revision = "a3795dce87be" +down_revision = "1f60f60c3401" +branch_labels: None = None +depends_on: None = None + + +def extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str, str, bool]: + from urllib.parse import urlparse + + def _extract_confluence_keys_from_cloud_url(wiki_url: str) -> tuple[str, str, str]: + parsed_url = urlparse(wiki_url) + wiki_base = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.split('/spaces')[0]}" + path_parts = parsed_url.path.split("/") + space = path_parts[3] + page_id = path_parts[5] if len(path_parts) > 5 else "" + return wiki_base, space, page_id + + def _extract_confluence_keys_from_datacenter_url( + wiki_url: str, + ) -> tuple[str, str, str]: + DISPLAY = "/display/" + PAGE = "/pages/" + parsed_url = urlparse(wiki_url) + wiki_base = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.split(DISPLAY)[0]}" + space = DISPLAY.join(parsed_url.path.split(DISPLAY)[1:]).split("/")[0] + page_id = "" + if (content := parsed_url.path.split(PAGE)) and len(content) > 1: + page_id = content[1] + return wiki_base, space, page_id + + is_confluence_cloud = ( + ".atlassian.net/wiki/spaces/" in wiki_url + or ".jira.com/wiki/spaces/" in wiki_url + ) + + if is_confluence_cloud: + wiki_base, space, page_id = _extract_confluence_keys_from_cloud_url(wiki_url) + else: + wiki_base, space, page_id = _extract_confluence_keys_from_datacenter_url( + wiki_url + ) + + return wiki_base, space, page_id, is_confluence_cloud + + +def reconstruct_confluence_url( + wiki_base: str, space: str, page_id: str, is_cloud: bool +) -> str: + if is_cloud: + url = f"{wiki_base}/spaces/{space}" + if page_id: + url += f"/pages/{page_id}" + else: + url = f"{wiki_base}/display/{space}" + if page_id: + url += f"/pages/{page_id}" + return url + + +def upgrade() -> None: + connector = table( + "connector", + column("id", sa.Integer), + column("source", sa.String()), + column("input_type", sa.String()), + column("connector_specific_config", postgresql.JSONB), + ) + + # Fetch all Confluence connectors + connection = op.get_bind() + confluence_connectors = connection.execute( + sa.select(connector).where( + sa.and_( + connector.c.source == "CONFLUENCE", connector.c.input_type == "POLL" + ) + ) + ).fetchall() + + for row in confluence_connectors: + config = row.connector_specific_config + wiki_page_url = config["wiki_page_url"] + wiki_base, space, page_id, is_cloud = extract_confluence_keys_from_url( + wiki_page_url + ) + + new_config = { + "wiki_base": wiki_base, + "space": space, + "page_id": page_id, + "is_cloud": is_cloud, + } + + for key, value in config.items(): + if key not in ["wiki_page_url"]: + new_config[key] = value + + op.execute( + connector.update() + .where(connector.c.id == row.id) + .values(connector_specific_config=new_config) + ) + + +def downgrade() -> None: + connector = table( + "connector", + column("id", sa.Integer), + column("source", sa.String()), + column("input_type", sa.String()), + column("connector_specific_config", postgresql.JSONB), + ) + + confluence_connectors = ( + op.get_bind() + .execute( + sa.select(connector).where( + connector.c.source == "CONFLUENCE", connector.c.input_type == "POLL" + ) + ) + .fetchall() + ) + + for row in confluence_connectors: + config = row.connector_specific_config + if all(key in config for key in ["wiki_base", "space", "is_cloud"]): + wiki_page_url = reconstruct_confluence_url( + config["wiki_base"], + config["space"], + config.get("page_id", ""), + config["is_cloud"], + ) + + new_config = {"wiki_page_url": wiki_page_url} + new_config.update( + { + k: v + for k, v in config.items() + if k not in ["wiki_base", "space", "page_id", "is_cloud"] + } + ) + + op.execute( + connector.update() + .where(connector.c.id == row.id) + .values(connector_specific_config=new_config) + ) diff --git a/backend/alembic/versions/ba98eba0f66a_add_support_for_litellm_proxy_in_.py b/backend/alembic/versions/ba98eba0f66a_add_support_for_litellm_proxy_in_.py new file mode 100644 index 00000000000..2d45a15f2c6 --- /dev/null +++ b/backend/alembic/versions/ba98eba0f66a_add_support_for_litellm_proxy_in_.py @@ -0,0 +1,26 @@ +"""add support for litellm proxy in reranking + +Revision ID: ba98eba0f66a +Revises: bceb1e139447 +Create Date: 2024-09-06 10:36:04.507332 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "ba98eba0f66a" +down_revision = "bceb1e139447" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "search_settings", sa.Column("rerank_api_url", sa.String(), nullable=True) + ) + + +def downgrade() -> None: + op.drop_column("search_settings", "rerank_api_url") diff --git a/backend/alembic/versions/bceb1e139447_add_base_url_to_cloudembeddingprovider.py b/backend/alembic/versions/bceb1e139447_add_base_url_to_cloudembeddingprovider.py new file mode 100644 index 00000000000..968500e6aaf --- /dev/null +++ b/backend/alembic/versions/bceb1e139447_add_base_url_to_cloudembeddingprovider.py @@ -0,0 +1,26 @@ +"""Add base_url to CloudEmbeddingProvider + +Revision ID: bceb1e139447 +Revises: a3795dce87be +Create Date: 2024-08-28 17:00:52.554580 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "bceb1e139447" +down_revision = "a3795dce87be" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "embedding_provider", sa.Column("api_url", sa.String(), nullable=True) + ) + + +def downgrade() -> None: + op.drop_column("embedding_provider", "api_url") diff --git a/backend/alembic/versions/bd2921608c3a_non_nullable_default_persona.py b/backend/alembic/versions/bd2921608c3a_non_nullable_default_persona.py new file mode 100644 index 00000000000..834d3f6731c --- /dev/null +++ b/backend/alembic/versions/bd2921608c3a_non_nullable_default_persona.py @@ -0,0 +1,43 @@ +"""non nullable default persona + +Revision ID: bd2921608c3a +Revises: 797089dfb4d2 +Create Date: 2024-09-20 10:28:37.992042 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "bd2921608c3a" +down_revision = "797089dfb4d2" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Set existing NULL values to False + op.execute( + "UPDATE persona SET is_default_persona = FALSE WHERE is_default_persona IS NULL" + ) + + # Alter the column to be not nullable with a default value of False + op.alter_column( + "persona", + "is_default_persona", + existing_type=sa.Boolean(), + nullable=False, + server_default=sa.text("false"), + ) + + +def downgrade() -> None: + # Revert the changes + op.alter_column( + "persona", + "is_default_persona", + existing_type=sa.Boolean(), + nullable=True, + server_default=None, + ) diff --git a/backend/alembic/versions/c99d76fcd298_add_nullable_to_persona_id_in_chat_.py b/backend/alembic/versions/c99d76fcd298_add_nullable_to_persona_id_in_chat_.py new file mode 100644 index 00000000000..58fcf482c85 --- /dev/null +++ b/backend/alembic/versions/c99d76fcd298_add_nullable_to_persona_id_in_chat_.py @@ -0,0 +1,31 @@ +"""add nullable to persona id in Chat Session + +Revision ID: c99d76fcd298 +Revises: 5c7fdadae813 +Create Date: 2024-07-09 19:27:01.579697 + +""" + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "c99d76fcd298" +down_revision = "5c7fdadae813" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.alter_column( + "chat_session", "persona_id", existing_type=sa.INTEGER(), nullable=True + ) + + +def downgrade() -> None: + op.alter_column( + "chat_session", + "persona_id", + existing_type=sa.INTEGER(), + nullable=False, + ) diff --git a/backend/alembic/versions/efb35676026c_standard_answer_match_regex_flag.py b/backend/alembic/versions/efb35676026c_standard_answer_match_regex_flag.py new file mode 100644 index 00000000000..e67d31b81ff --- /dev/null +++ b/backend/alembic/versions/efb35676026c_standard_answer_match_regex_flag.py @@ -0,0 +1,32 @@ +"""standard answer match_regex flag + +Revision ID: efb35676026c +Revises: 0ebb1d516877 +Create Date: 2024-09-11 13:55:46.101149 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "efb35676026c" +down_revision = "0ebb1d516877" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "standard_answer", + sa.Column( + "match_regex", sa.Boolean(), nullable=False, server_default=sa.false() + ), + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("standard_answer", "match_regex") + # ### end Alembic commands ### diff --git a/backend/alembic/versions/f32615f71aeb_add_custom_headers_to_tools.py b/backend/alembic/versions/f32615f71aeb_add_custom_headers_to_tools.py new file mode 100644 index 00000000000..904059e6ee3 --- /dev/null +++ b/backend/alembic/versions/f32615f71aeb_add_custom_headers_to_tools.py @@ -0,0 +1,26 @@ +"""add custom headers to tools + +Revision ID: f32615f71aeb +Revises: bd2921608c3a +Create Date: 2024-09-12 20:26:38.932377 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "f32615f71aeb" +down_revision = "bd2921608c3a" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + "tool", sa.Column("custom_headers", postgresql.JSONB(), nullable=True) + ) + + +def downgrade() -> None: + op.drop_column("tool", "custom_headers") diff --git a/backend/alembic/versions/f7e58d357687_add_has_web_column_to_user.py b/backend/alembic/versions/f7e58d357687_add_has_web_column_to_user.py new file mode 100644 index 00000000000..c2a131d6002 --- /dev/null +++ b/backend/alembic/versions/f7e58d357687_add_has_web_column_to_user.py @@ -0,0 +1,26 @@ +"""add has_web_login column to user + +Revision ID: f7e58d357687 +Revises: ba98eba0f66a +Create Date: 2024-09-07 20:20:54.522620 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "f7e58d357687" +down_revision = "ba98eba0f66a" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "user", + sa.Column("has_web_login", sa.Boolean(), nullable=False, server_default="true"), + ) + + +def downgrade() -> None: + op.drop_column("user", "has_web_login") diff --git a/backend/danswer/access/access.py b/backend/danswer/access/access.py index 5501980ab48..7c879099594 100644 --- a/backend/danswer/access/access.py +++ b/backend/danswer/access/access.py @@ -1,26 +1,81 @@ from sqlalchemy.orm import Session from danswer.access.models import DocumentAccess -from danswer.access.utils import prefix_user +from danswer.access.utils import prefix_user_email from danswer.configs.constants import PUBLIC_DOC_PAT -from danswer.db.document import get_acccess_info_for_documents +from danswer.db.document import get_access_info_for_document +from danswer.db.document import get_access_info_for_documents from danswer.db.models import User from danswer.utils.variable_functionality import fetch_versioned_implementation +def _get_access_for_document( + document_id: str, + db_session: Session, +) -> DocumentAccess: + info = get_access_info_for_document( + db_session=db_session, + document_id=document_id, + ) + + return DocumentAccess.build( + user_emails=info[1] if info and info[1] else [], + user_groups=[], + external_user_emails=[], + external_user_group_ids=[], + is_public=info[2] if info else False, + ) + + +def get_access_for_document( + document_id: str, + db_session: Session, +) -> DocumentAccess: + versioned_get_access_for_document_fn = fetch_versioned_implementation( + "danswer.access.access", "_get_access_for_document" + ) + return versioned_get_access_for_document_fn(document_id, db_session) # type: ignore + + +def get_null_document_access() -> DocumentAccess: + return DocumentAccess( + user_emails=set(), + user_groups=set(), + is_public=False, + external_user_emails=set(), + external_user_group_ids=set(), + ) + + def _get_access_for_documents( document_ids: list[str], db_session: Session, ) -> dict[str, DocumentAccess]: - document_access_info = get_acccess_info_for_documents( + document_access_info = get_access_info_for_documents( db_session=db_session, document_ids=document_ids, ) - return { - document_id: DocumentAccess.build(user_ids, [], is_public) - for document_id, user_ids, is_public in document_access_info + doc_access = { + document_id: DocumentAccess( + user_emails=set([email for email in user_emails if email]), + # MIT version will wipe all groups and external groups on update + user_groups=set(), + is_public=is_public, + external_user_emails=set(), + external_user_group_ids=set(), + ) + for document_id, user_emails, is_public in document_access_info } + # Sometimes the document has not be indexed by the indexing job yet, in those cases + # the document does not exist and so we use least permissive. Specifically the EE version + # checks the MIT version permissions and creates a superset. This ensures that this flow + # does not fail even if the Document has not yet been indexed. + for doc_id in document_ids: + if doc_id not in doc_access: + doc_access[doc_id] = get_null_document_access() + return doc_access + def get_access_for_documents( document_ids: list[str], @@ -42,7 +97,7 @@ def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]: matches one entry in the returned set. """ if user: - return {prefix_user(str(user.id)), PUBLIC_DOC_PAT} + return {prefix_user_email(user.email), PUBLIC_DOC_PAT} return {PUBLIC_DOC_PAT} diff --git a/backend/danswer/access/models.py b/backend/danswer/access/models.py index a87e2d94f25..af5a021ca97 100644 --- a/backend/danswer/access/models.py +++ b/backend/danswer/access/models.py @@ -1,30 +1,72 @@ from dataclasses import dataclass -from uuid import UUID -from danswer.access.utils import prefix_user +from danswer.access.utils import prefix_external_group +from danswer.access.utils import prefix_user_email from danswer.access.utils import prefix_user_group from danswer.configs.constants import PUBLIC_DOC_PAT @dataclass(frozen=True) -class DocumentAccess: - user_ids: set[str] # stringified UUIDs - user_groups: set[str] # names of user groups associated with this document +class ExternalAccess: + # Emails of external users with access to the doc externally + external_user_emails: set[str] + # Names or external IDs of groups with access to the doc + external_user_group_ids: set[str] + # Whether the document is public in the external system or Danswer is_public: bool - def to_acl(self) -> list[str]: - return ( - [prefix_user(user_id) for user_id in self.user_ids] + +@dataclass(frozen=True) +class DocumentAccess(ExternalAccess): + # User emails for Danswer users, None indicates admin + user_emails: set[str | None] + # Names of user groups associated with this document + user_groups: set[str] + + def to_acl(self) -> set[str]: + return set( + [ + prefix_user_email(user_email) + for user_email in self.user_emails + if user_email + ] + [prefix_user_group(group_name) for group_name in self.user_groups] + + [ + prefix_user_email(user_email) + for user_email in self.external_user_emails + ] + + [ + # The group names are already prefixed by the source type + # This adds an additional prefix of "external_group:" + prefix_external_group(group_name) + for group_name in self.external_user_group_ids + ] + ([PUBLIC_DOC_PAT] if self.is_public else []) ) @classmethod def build( - cls, user_ids: list[UUID | None], user_groups: list[str], is_public: bool + cls, + user_emails: list[str | None], + user_groups: list[str], + external_user_emails: list[str], + external_user_group_ids: list[str], + is_public: bool, ) -> "DocumentAccess": return cls( - user_ids={str(user_id) for user_id in user_ids if user_id}, + external_user_emails={ + prefix_user_email(external_email) + for external_email in external_user_emails + }, + external_user_group_ids={ + prefix_external_group(external_group_id) + for external_group_id in external_user_group_ids + }, + user_emails={ + prefix_user_email(user_email) + for user_email in user_emails + if user_email + }, user_groups=set(user_groups), is_public=is_public, ) diff --git a/backend/danswer/access/utils.py b/backend/danswer/access/utils.py index 060560eaedc..82abf9785f8 100644 --- a/backend/danswer/access/utils.py +++ b/backend/danswer/access/utils.py @@ -1,10 +1,24 @@ -def prefix_user(user_id: str) -> str: - """Prefixes a user ID to eliminate collision with group names. - This assumes that groups are prefixed with a different prefix.""" - return f"user_id:{user_id}" +from danswer.configs.constants import DocumentSource + + +def prefix_user_email(user_email: str) -> str: + """Prefixes a user email to eliminate collision with group names. + This applies to both a Danswer user and an External user, this is to make the query time + more efficient""" + return f"user_email:{user_email}" def prefix_user_group(user_group_name: str) -> str: - """Prefixes a user group name to eliminate collision with user IDs. + """Prefixes a user group name to eliminate collision with user emails. This assumes that user ids are prefixed with a different prefix.""" return f"group:{user_group_name}" + + +def prefix_external_group(ext_group_name: str) -> str: + """Prefixes an external group name to eliminate collision with user emails / Danswer groups.""" + return f"external_group:{ext_group_name}" + + +def prefix_group_w_source(ext_group_name: str, source: DocumentSource) -> str: + """External groups may collide across sources, every source needs its own prefix.""" + return f"{source.value.upper()}_{ext_group_name}" diff --git a/backend/danswer/auth/schemas.py b/backend/danswer/auth/schemas.py index 9e0553991cc..db8a97ceb04 100644 --- a/backend/danswer/auth/schemas.py +++ b/backend/danswer/auth/schemas.py @@ -33,7 +33,9 @@ class UserRead(schemas.BaseUser[uuid.UUID]): class UserCreate(schemas.BaseUserCreate): role: UserRole = UserRole.BASIC + has_web_login: bool | None = True class UserUpdate(schemas.BaseUserUpdate): role: UserRole + has_web_login: bool | None = True diff --git a/backend/danswer/auth/users.py b/backend/danswer/auth/users.py index eec1db412e0..a9ceb2254f0 100644 --- a/backend/danswer/auth/users.py +++ b/backend/danswer/auth/users.py @@ -16,7 +16,9 @@ from fastapi import Request from fastapi import Response from fastapi import status +from fastapi.security import OAuth2PasswordRequestForm from fastapi_users import BaseUserManager +from fastapi_users import exceptions from fastapi_users import FastAPIUsers from fastapi_users import models from fastapi_users import schemas @@ -33,6 +35,7 @@ from danswer.auth.invited_users import get_invited_users from danswer.auth.schemas import UserCreate from danswer.auth.schemas import UserRole +from danswer.auth.schemas import UserUpdate from danswer.configs.app_configs import AUTH_TYPE from danswer.configs.app_configs import DISABLE_AUTH from danswer.configs.app_configs import EMAIL_FROM @@ -67,23 +70,6 @@ logger = setup_logger() -def validate_curator_request(groups: list | None, is_public: bool) -> None: - if is_public: - detail = "Curators cannot create public objects" - logger.error(detail) - raise HTTPException( - status_code=401, - detail=detail, - ) - if not groups: - detail = "Curators must specify 1+ groups" - logger.error(detail) - raise HTTPException( - status_code=401, - detail=detail, - ) - - def is_user_admin(user: User | None) -> bool: if AUTH_TYPE == AuthType.DISABLED: return True @@ -201,16 +187,36 @@ async def create( user_create: schemas.UC | UserCreate, safe: bool = False, request: Optional[Request] = None, - ) -> models.UP: + ) -> User: verify_email_is_invited(user_create.email) verify_email_domain(user_create.email) # if hasattr(user_create, "role"): - # user_count = await get_user_count() - # if user_count == 0 or user_create.email in get_default_admin_user_emails(): - # user_create.role = UserRole.ADMIN - # else: - # user_create.role = UserRole.BASIC - return await super().create(user_create, safe=safe, request=request) # type: ignore + # user_count = await get_user_count() + # if user_count == 0 or user_create.email in get_default_admin_user_emails(): + # user_create.role = UserRole.ADMIN + # else: + # user_create.role = UserRole.BASIC + user = None + try: + user = await super().create(user_create, safe=safe, request=request) # type: ignore + except exceptions.UserAlreadyExists: + user = await self.get_by_email(user_create.email) + # Handle case where user has used product outside of web and is now creating an account through web + if ( + not user.has_web_login + and hasattr(user_create, "has_web_login") + and user_create.has_web_login + ): + user_update = UserUpdate( + password=user_create.password, + has_web_login=True, + role=user_create.role, + is_verified=user_create.is_verified, + ) + user = await self.update(user_update, user) + else: + raise exceptions.UserAlreadyExists() + return user async def oauth_callback( self: "BaseUserManager[models.UOAP, models.ID]", @@ -251,6 +257,18 @@ async def oauth_callback( if user.oidc_expiry and not TRACK_EXTERNAL_IDP_EXPIRY: await self.user_db.update(user, update_dict={"oidc_expiry": None}) + # Handle case where user has used product outside of web and is now creating an account through web + if not user.has_web_login: + await self.user_db.update( + user, + update_dict={ + "is_verified": is_verified_by_default, + "has_web_login": True, + }, + ) + user.is_verified = is_verified_by_default + user.has_web_login = True + return user async def on_after_register( @@ -279,6 +297,32 @@ async def on_after_request_verify( send_user_verification_email(user.email, token) + async def authenticate( + self, credentials: OAuth2PasswordRequestForm + ) -> Optional[User]: + try: + user = await self.get_by_email(credentials.username) + except exceptions.UserNotExists: + self.password_helper.hash(credentials.password) + return None + + if not user.has_web_login: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="NO_WEB_LOGIN_AND_HAS_NO_PASSWORD", + ) + + verified, updated_password_hash = self.password_helper.verify_and_update( + credentials.password, user.hashed_password + ) + if not verified: + return None + + if updated_password_hash is not None: + await self.user_db.update(user, {"hashed_password": updated_password_hash}) + + return user + async def get_user_manager( user_db: SQLAlchemyUserDatabase = Depends(get_user_db), @@ -381,6 +425,7 @@ async def optional_user( async def double_check_user( user: User | None, optional: bool = DISABLE_AUTH, + include_expired: bool = False, ) -> User | None: if optional: return None @@ -397,7 +442,11 @@ async def double_check_user( detail="Access denied. User is not verified.", ) - if user.oidc_expiry and user.oidc_expiry < datetime.now(timezone.utc): + if ( + user.oidc_expiry + and user.oidc_expiry < datetime.now(timezone.utc) + and not include_expired + ): raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Access denied. User's OIDC token has expired.", @@ -406,6 +455,12 @@ async def double_check_user( return user +async def current_user_with_expired_token( + user: User | None = Depends(optional_user), +) -> User | None: + return await double_check_user(user, include_expired=True) + + async def current_user( user: User | None = Depends(optional_user), ) -> User | None: diff --git a/backend/danswer/background/celery/celery_app.py b/backend/danswer/background/celery/celery_app.py index ffd805c2986..0440f275c36 100644 --- a/backend/danswer/background/celery/celery_app.py +++ b/backend/danswer/background/celery/celery_app.py @@ -1,464 +1,447 @@ -import json +import logging +import time from datetime import timedelta from typing import Any -from typing import cast - -from celery import Celery # type: ignore -from celery.contrib.abortable import AbortableTask # type: ignore -from celery.exceptions import TaskRevokedError -from sqlalchemy import text -from sqlalchemy.orm import Session - -from danswer.background.celery.celery_utils import extract_ids_from_runnable_connector -from danswer.background.celery.celery_utils import should_kick_off_deletion_of_cc_pair -from danswer.background.celery.celery_utils import should_prune_cc_pair -from danswer.background.celery.celery_utils import should_sync_doc_set -from danswer.background.connector_deletion import delete_connector_credential_pair -from danswer.background.connector_deletion import delete_connector_credential_pair_batch -from danswer.background.task_utils import build_celery_task_wrapper -from danswer.background.task_utils import name_cc_cleanup_task -from danswer.background.task_utils import name_cc_prune_task -from danswer.background.task_utils import name_document_set_sync_task -from danswer.configs.app_configs import JOB_TIMEOUT -from danswer.configs.constants import POSTGRES_CELERY_APP_NAME -from danswer.configs.constants import PostgresAdvisoryLocks -from danswer.connectors.factory import instantiate_connector -from danswer.connectors.models import InputType -from danswer.db.connector_credential_pair import get_connector_credential_pair -from danswer.db.connector_credential_pair import get_connector_credential_pairs -from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed -from danswer.db.document import get_documents_for_connector_credential_pair -from danswer.db.document import prepare_to_modify_documents -from danswer.db.document_set import delete_document_set -from danswer.db.document_set import fetch_document_sets -from danswer.db.document_set import fetch_document_sets_for_documents -from danswer.db.document_set import fetch_documents_for_document_set_paginated -from danswer.db.document_set import get_document_set_by_id -from danswer.db.document_set import mark_document_set_as_synced -from danswer.db.engine import build_connection_string -from danswer.db.engine import get_sqlalchemy_engine -from danswer.db.engine import SYNC_DB_API -from danswer.db.models import DocumentSet -from danswer.document_index.document_index_utils import get_both_index_names -from danswer.document_index.factory import get_default_document_index -from danswer.document_index.interfaces import UpdateRequest + +import redis +from celery import bootsteps # type: ignore +from celery import Celery +from celery import current_task +from celery import signals +from celery import Task +from celery.exceptions import WorkerShutdown +from celery.signals import beat_init +from celery.signals import worker_init +from celery.signals import worker_ready +from celery.signals import worker_shutdown +from celery.states import READY_STATES +from celery.utils.log import get_task_logger + +from danswer.background.celery.celery_redis import RedisConnectorCredentialPair +from danswer.background.celery.celery_redis import RedisConnectorDeletion +from danswer.background.celery.celery_redis import RedisDocumentSet +from danswer.background.celery.celery_redis import RedisUserGroup +from danswer.background.celery.celery_utils import celery_is_worker_primary +from danswer.configs.constants import CELERY_PRIMARY_WORKER_LOCK_TIMEOUT +from danswer.configs.constants import DanswerCeleryPriority +from danswer.configs.constants import DanswerRedisLocks +from danswer.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME +from danswer.configs.constants import POSTGRES_CELERY_WORKER_HEAVY_APP_NAME +from danswer.configs.constants import POSTGRES_CELERY_WORKER_LIGHT_APP_NAME +from danswer.configs.constants import POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME +from danswer.db.engine import SqlEngine +from danswer.redis.redis_pool import RedisPool +from danswer.utils.logger import ColoredFormatter +from danswer.utils.logger import PlainFormatter from danswer.utils.logger import setup_logger logger = setup_logger() -connection_string = build_connection_string( - db_api=SYNC_DB_API, app_name=POSTGRES_CELERY_APP_NAME -) -celery_broker_url = f"sqla+{connection_string}" -celery_backend_url = f"db+{connection_string}" -celery_app = Celery(__name__, broker=celery_broker_url, backend=celery_backend_url) +# use this within celery tasks to get celery task specific logging +task_logger = get_task_logger(__name__) + +redis_pool = RedisPool() + +celery_app = Celery(__name__) +celery_app.config_from_object( + "danswer.background.celery.celeryconfig" +) # Load configuration from 'celeryconfig.py' + + +@signals.task_postrun.connect +def celery_task_postrun( + sender: Any | None = None, + task_id: str | None = None, + task: Task | None = None, + args: tuple | None = None, + kwargs: dict | None = None, + retval: Any | None = None, + state: str | None = None, + **kwds: Any, +) -> None: + """We handle this signal in order to remove completed tasks + from their respective tasksets. This allows us to track the progress of document set + and user group syncs. + + This function runs after any task completes (both success and failure) + Note that this signal does not fire on a task that failed to complete and is going + to be retried. + """ + if not task: + return + + task_logger.debug(f"Task {task.name} (ID: {task_id}) completed with state: {state}") + # logger.debug(f"Result: {retval}") + + if state not in READY_STATES: + return + + if not task_id: + return + + if task_id.startswith(RedisConnectorCredentialPair.PREFIX): + r = redis_pool.get_client() + r.srem(RedisConnectorCredentialPair.get_taskset_key(), task_id) + return + + if task_id.startswith(RedisDocumentSet.PREFIX): + r = redis_pool.get_client() + document_set_id = RedisDocumentSet.get_id_from_task_id(task_id) + if document_set_id is not None: + rds = RedisDocumentSet(document_set_id) + r.srem(rds.taskset_key, task_id) + return + + if task_id.startswith(RedisUserGroup.PREFIX): + r = redis_pool.get_client() + usergroup_id = RedisUserGroup.get_id_from_task_id(task_id) + if usergroup_id is not None: + rug = RedisUserGroup(usergroup_id) + r.srem(rug.taskset_key, task_id) + return + + if task_id.startswith(RedisConnectorDeletion.PREFIX): + r = redis_pool.get_client() + cc_pair_id = RedisConnectorDeletion.get_id_from_task_id(task_id) + if cc_pair_id is not None: + rcd = RedisConnectorDeletion(cc_pair_id) + r.srem(rcd.taskset_key, task_id) + return + + +@beat_init.connect +def on_beat_init(sender: Any, **kwargs: Any) -> None: + SqlEngine.set_app_name(POSTGRES_CELERY_BEAT_APP_NAME) + SqlEngine.init_engine(pool_size=2, max_overflow=0) + + +@worker_init.connect +def on_worker_init(sender: Any, **kwargs: Any) -> None: + # decide some initial startup settings based on the celery worker's hostname + # (set at the command line) + hostname = sender.hostname + if hostname.startswith("light"): + SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_LIGHT_APP_NAME) + SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=8) + elif hostname.startswith("heavy"): + SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_HEAVY_APP_NAME) + SqlEngine.init_engine(pool_size=8, max_overflow=0) + else: + SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME) + SqlEngine.init_engine(pool_size=8, max_overflow=0) + + r = redis_pool.get_client() + + WAIT_INTERVAL = 5 + WAIT_LIMIT = 60 + + time_start = time.monotonic() + logger.info("Redis: Readiness check starting.") + while True: + try: + if r.ping(): + break + except Exception: + pass + + time_elapsed = time.monotonic() - time_start + logger.info( + f"Redis: Ping failed. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}" + ) + if time_elapsed > WAIT_LIMIT: + msg = ( + f"Redis: Readiness check did not succeed within the timeout " + f"({WAIT_LIMIT} seconds). Exiting..." + ) + logger.error(msg) + raise WorkerShutdown(msg) + time.sleep(WAIT_INTERVAL) -_SYNC_BATCH_SIZE = 100 + logger.info("Redis: Readiness check succeeded. Continuing...") + if not celery_is_worker_primary(sender): + logger.info("Running as a secondary celery worker.") + logger.info("Waiting for primary worker to be ready...") + time_start = time.monotonic() + while True: + if r.exists(DanswerRedisLocks.PRIMARY_WORKER): + break -##### -# Tasks that need to be run in job queue, registered via APIs -# -# If imports from this module are needed, use local imports to avoid circular importing -##### -@build_celery_task_wrapper(name_cc_cleanup_task) -@celery_app.task(soft_time_limit=JOB_TIMEOUT) -def cleanup_connector_credential_pair_task( - connector_id: int, - credential_id: int, -) -> int: - """Connector deletion task. This is run as an async task because it is a somewhat slow job. - Needs to potentially update a large number of Postgres and Vespa docs, including deleting them - or updating the ACL""" - engine = get_sqlalchemy_engine() - with Session(engine) as db_session: - # validate that the connector / credential pair is deletable - cc_pair = get_connector_credential_pair( - db_session=db_session, - connector_id=connector_id, - credential_id=credential_id, - ) - if not cc_pair: - raise ValueError( - f"Cannot run deletion attempt - connector_credential_pair with Connector ID: " - f"{connector_id} and Credential ID: {credential_id} does not exist." + time.monotonic() + time_elapsed = time.monotonic() - time_start + logger.info( + f"Primary worker is not ready yet. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}" ) + if time_elapsed > WAIT_LIMIT: + msg = ( + f"Primary worker was not ready within the timeout. " + f"({WAIT_LIMIT} seconds). Exiting..." + ) + logger.error(msg) + raise WorkerShutdown(msg) - deletion_attempt_disallowed_reason = check_deletion_attempt_is_allowed( - connector_credential_pair=cc_pair, db_session=db_session - ) - if deletion_attempt_disallowed_reason: - raise ValueError(deletion_attempt_disallowed_reason) + time.sleep(WAIT_INTERVAL) - try: - # The bulk of the work is in here, updates Postgres and Vespa - curr_ind_name, sec_ind_name = get_both_index_names(db_session) - document_index = get_default_document_index( - primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name - ) - return delete_connector_credential_pair( - db_session=db_session, - document_index=document_index, - cc_pair=cc_pair, - ) - except Exception as e: - logger.exception(f"Failed to run connector_deletion due to {e}") - raise e - - -@build_celery_task_wrapper(name_cc_prune_task) -@celery_app.task(soft_time_limit=JOB_TIMEOUT) -def prune_documents_task(connector_id: int, credential_id: int) -> None: - """connector pruning task. For a cc pair, this task pulls all document IDs from the source - and compares those IDs to locally stored documents and deletes all locally stored IDs missing - from the most recently pulled document ID list""" - with Session(get_sqlalchemy_engine()) as db_session: - try: - cc_pair = get_connector_credential_pair( - db_session=db_session, - connector_id=connector_id, - credential_id=credential_id, - ) + logger.info("Wait for primary worker completed successfully. Continuing...") + return - if not cc_pair: - logger.warning(f"ccpair not found for {connector_id} {credential_id}") - return + logger.info("Running as the primary celery worker.") - runnable_connector = instantiate_connector( - cc_pair.connector.source, - InputType.PRUNE, - cc_pair.connector.connector_specific_config, - cc_pair.credential, - db_session, - ) + # This is singleton work that should be done on startup exactly once + # by the primary worker + r = redis_pool.get_client() - all_connector_doc_ids: set[str] = extract_ids_from_runnable_connector( - runnable_connector - ) + # For the moment, we're assuming that we are the only primary worker + # that should be running. + # TODO: maybe check for or clean up another zombie primary worker if we detect it + r.delete(DanswerRedisLocks.PRIMARY_WORKER) - all_indexed_document_ids = { - doc.id - for doc in get_documents_for_connector_credential_pair( - db_session=db_session, - connector_id=connector_id, - credential_id=credential_id, - ) - } + # this process wide lock is taken to help other workers start up in order. + # it is planned to use this lock to enforce singleton behavior on the primary + # worker, since the primary worker does redis cleanup on startup, but this isn't + # implemented yet. + lock = r.lock( + DanswerRedisLocks.PRIMARY_WORKER, + timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT, + ) - doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids) + logger.info("Primary worker lock: Acquire starting.") + acquired = lock.acquire(blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2) + if acquired: + logger.info("Primary worker lock: Acquire succeeded.") + else: + logger.error("Primary worker lock: Acquire failed!") + raise WorkerShutdown("Primary worker lock could not be acquired!") - curr_ind_name, sec_ind_name = get_both_index_names(db_session) - document_index = get_default_document_index( - primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name - ) + sender.primary_worker_lock = lock - if len(doc_ids_to_remove) == 0: - logger.info( - f"No docs to prune from {cc_pair.connector.source} connector" - ) - return + r.delete(DanswerRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK) + r.delete(DanswerRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK) - logger.info( - f"pruning {len(doc_ids_to_remove)} doc(s) from {cc_pair.connector.source} connector" - ) - delete_connector_credential_pair_batch( - document_ids=doc_ids_to_remove, - connector_id=connector_id, - credential_id=credential_id, - document_index=document_index, - ) - except Exception as e: - logger.exception( - f"Failed to run pruning for connector id {connector_id} due to {e}" - ) - raise e - - -@build_celery_task_wrapper(name_document_set_sync_task) -@celery_app.task(soft_time_limit=JOB_TIMEOUT) -def sync_document_set_task(document_set_id: int) -> None: - """For document sets marked as not up to date, sync the state from postgres - into the datastore. Also handles deletions.""" - - def _sync_document_batch(document_ids: list[str], db_session: Session) -> None: - logger.debug(f"Syncing document sets for: {document_ids}") - - # Acquires a lock on the documents so that no other process can modify them - with prepare_to_modify_documents( - db_session=db_session, document_ids=document_ids - ): - # get current state of document sets for these documents - document_set_map = { - document_id: document_sets - for document_id, document_sets in fetch_document_sets_for_documents( - document_ids=document_ids, db_session=db_session - ) - } + r.delete(RedisConnectorCredentialPair.get_taskset_key()) + r.delete(RedisConnectorCredentialPair.get_fence_key()) - # update Vespa - curr_ind_name, sec_ind_name = get_both_index_names(db_session) - document_index = get_default_document_index( - primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name - ) - update_requests = [ - UpdateRequest( - document_ids=[document_id], - document_sets=set(document_set_map.get(document_id, [])), - ) - for document_id in document_ids - ] - document_index.update(update_requests=update_requests) + for key in r.scan_iter(RedisDocumentSet.TASKSET_PREFIX + "*"): + r.delete(key) - with Session(get_sqlalchemy_engine()) as db_session: - try: - cursor = None - while True: - document_batch, cursor = fetch_documents_for_document_set_paginated( - document_set_id=document_set_id, - db_session=db_session, - current_only=False, - last_document_id=cursor, - limit=_SYNC_BATCH_SIZE, - ) - _sync_document_batch( - document_ids=[document.id for document in document_batch], - db_session=db_session, - ) - if cursor is None: - break - - # if there are no connectors, then delete the document set. Otherwise, just - # mark it as successfully synced. - document_set = cast( - DocumentSet, - get_document_set_by_id( - db_session=db_session, document_set_id=document_set_id - ), - ) # casting since we "know" a document set with this ID exists - if not document_set.connector_credential_pairs: - delete_document_set( - document_set_row=document_set, db_session=db_session - ) - logger.info( - f"Successfully deleted document set with ID: '{document_set_id}'!" - ) - else: - mark_document_set_as_synced( - document_set_id=document_set_id, db_session=db_session - ) - logger.info(f"Document set sync for '{document_set_id}' complete!") + for key in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"): + r.delete(key) - except Exception: - logger.exception("Failed to sync document set %s", document_set_id) - raise + for key in r.scan_iter(RedisUserGroup.TASKSET_PREFIX + "*"): + r.delete(key) + for key in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"): + r.delete(key) -##### -# Periodic Tasks -##### -@celery_app.task( - name="check_for_document_sets_sync_task", - soft_time_limit=JOB_TIMEOUT, -) -def check_for_document_sets_sync_task() -> None: - """Runs periodically to check if any sync tasks should be run and adds them - to the queue""" - with Session(get_sqlalchemy_engine()) as db_session: - # check if any document sets are not synced - document_set_info = fetch_document_sets( - user_id=None, db_session=db_session, include_outdated=True - ) - for document_set, _ in document_set_info: - if should_sync_doc_set(document_set, db_session): - logger.info(f"Syncing the {document_set.name} document set") - sync_document_set_task.apply_async( - kwargs=dict(document_set_id=document_set.id), - ) + for key in r.scan_iter(RedisConnectorDeletion.TASKSET_PREFIX + "*"): + r.delete(key) + for key in r.scan_iter(RedisConnectorDeletion.FENCE_PREFIX + "*"): + r.delete(key) -@celery_app.task( - name="check_for_cc_pair_deletion_task", - soft_time_limit=JOB_TIMEOUT, -) -def check_for_cc_pair_deletion_task() -> None: - """Runs periodically to check if any deletion tasks should be run""" - with Session(get_sqlalchemy_engine()) as db_session: - # check if any document sets are not synced - cc_pairs = get_connector_credential_pairs(db_session) - for cc_pair in cc_pairs: - if should_kick_off_deletion_of_cc_pair(cc_pair, db_session): - logger.notice(f"Deleting the {cc_pair.name} connector credential pair") - cleanup_connector_credential_pair_task.apply_async( - kwargs=dict( - connector_id=cc_pair.connector.id, - credential_id=cc_pair.credential.id, - ), - ) +@worker_ready.connect +def on_worker_ready(sender: Any, **kwargs: Any) -> None: + task_logger.info("worker_ready signal received.") -@celery_app.task( - name="kombu_message_cleanup_task", - soft_time_limit=JOB_TIMEOUT, - bind=True, - base=AbortableTask, -) -def kombu_message_cleanup_task(self: Any) -> int: - """Runs periodically to clean up the kombu_message table""" - - # we will select messages older than this amount to clean up - KOMBU_MESSAGE_CLEANUP_AGE = 7 # days - KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT = 1000 - - ctx = {} - ctx["last_processed_id"] = 0 - ctx["deleted"] = 0 - ctx["cleanup_age"] = KOMBU_MESSAGE_CLEANUP_AGE - ctx["page_limit"] = KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT - with Session(get_sqlalchemy_engine()) as db_session: - # Exit the task if we can't take the advisory lock - result = db_session.execute( - text("SELECT pg_try_advisory_lock(:id)"), - {"id": PostgresAdvisoryLocks.KOMBU_MESSAGE_CLEANUP_LOCK_ID.value}, - ).scalar() - if not result: - return 0 - while True: - if self.is_aborted(): - raise TaskRevokedError("kombu_message_cleanup_task was aborted.") +@worker_shutdown.connect +def on_worker_shutdown(sender: Any, **kwargs: Any) -> None: + if not celery_is_worker_primary(sender): + return - b = kombu_message_cleanup_task_helper(ctx, db_session) - if not b: - break + if not sender.primary_worker_lock: + return - db_session.commit() + logger.info("Releasing primary worker lock.") + lock = sender.primary_worker_lock + if lock.owned(): + lock.release() + sender.primary_worker_lock = None - if ctx["deleted"] > 0: - logger.info(f"Deleted {ctx['deleted']} orphaned messages from kombu_message.") - return ctx["deleted"] +class CeleryTaskPlainFormatter(PlainFormatter): + def format(self, record: logging.LogRecord) -> str: + task = current_task + if task and task.request: + record.__dict__.update(task_id=task.request.id, task_name=task.name) + record.msg = f"[{task.name}({task.request.id})] {record.msg}" + return super().format(record) -def kombu_message_cleanup_task_helper(ctx: dict, db_session: Session) -> bool: - """ - Helper function to clean up old messages from the `kombu_message` table that are no longer relevant. - - This function retrieves messages from the `kombu_message` table that are no longer visible and - older than a specified interval. It checks if the corresponding task_id exists in the - `celery_taskmeta` table. If the task_id does not exist, the message is deleted. - - Args: - ctx (dict): A context dictionary containing configuration parameters such as: - - 'cleanup_age' (int): The age in days after which messages are considered old. - - 'page_limit' (int): The maximum number of messages to process in one batch. - - 'last_processed_id' (int): The ID of the last processed message to handle pagination. - - 'deleted' (int): A counter to track the number of deleted messages. - db_session (Session): The SQLAlchemy database session for executing queries. - - Returns: - bool: Returns True if there are more rows to process, False if not. - """ - query = text( - """ - SELECT id, timestamp, payload - FROM kombu_message WHERE visible = 'false' - AND timestamp < CURRENT_TIMESTAMP - INTERVAL :interval_days - AND id > :last_processed_id - ORDER BY id - LIMIT :page_limit -""" +class CeleryTaskColoredFormatter(ColoredFormatter): + def format(self, record: logging.LogRecord) -> str: + task = current_task + if task and task.request: + record.__dict__.update(task_id=task.request.id, task_name=task.name) + record.msg = f"[{task.name}({task.request.id})] {record.msg}" + + return super().format(record) + + +@signals.setup_logging.connect +def on_setup_logging( + loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any +) -> None: + # TODO: could unhardcode format and colorize and accept these as options from + # celery's config + + # reformats celery's worker logger + root_logger = logging.getLogger() + + root_handler = logging.StreamHandler() # Set up a handler for the root logger + root_formatter = ColoredFormatter( + "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", ) - kombu_messages = db_session.execute( - query, - { - "interval_days": f"{ctx['cleanup_age']} days", - "page_limit": ctx["page_limit"], - "last_processed_id": ctx["last_processed_id"], - }, - ).fetchall() - - if len(kombu_messages) == 0: - return False - - for msg in kombu_messages: - payload = json.loads(msg[2]) - task_id = payload["headers"]["id"] - - # Check if task_id exists in celery_taskmeta - task_exists = db_session.execute( - text("SELECT 1 FROM celery_taskmeta WHERE task_id = :task_id"), - {"task_id": task_id}, - ).fetchone() - - # If task_id does not exist, delete the message - if not task_exists: - result = db_session.execute( - text("DELETE FROM kombu_message WHERE id = :message_id"), - {"message_id": msg[0]}, - ) - if result.rowcount > 0: # type: ignore - ctx["deleted"] += 1 - else: - task_name = payload["headers"]["task"] - logger.warning( - f"Message found for task older than {ctx['cleanup_age']} days. " - f"id={task_id} name={task_name}" - ) + root_handler.setFormatter(root_formatter) + root_logger.addHandler(root_handler) # Apply the handler to the root logger + + if logfile: + root_file_handler = logging.FileHandler(logfile) + root_file_formatter = PlainFormatter( + "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", + ) + root_file_handler.setFormatter(root_file_formatter) + root_logger.addHandler(root_file_handler) - ctx["last_processed_id"] = msg[0] + root_logger.setLevel(loglevel) - return True + # reformats celery's task logger + task_formatter = CeleryTaskColoredFormatter( + "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", + ) + task_handler = logging.StreamHandler() # Set up a handler for the task logger + task_handler.setFormatter(task_formatter) + task_logger.addHandler(task_handler) # Apply the handler to the task logger + + if logfile: + task_file_handler = logging.FileHandler(logfile) + task_file_formatter = CeleryTaskPlainFormatter( + "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", + ) + task_file_handler.setFormatter(task_file_formatter) + task_logger.addHandler(task_file_handler) + task_logger.setLevel(loglevel) + task_logger.propagate = False -@celery_app.task( - name="check_for_prune_task", - soft_time_limit=JOB_TIMEOUT, -) -def check_for_prune_task() -> None: - """Runs periodically to check if any prune tasks should be run and adds them - to the queue""" - - with Session(get_sqlalchemy_engine()) as db_session: - all_cc_pairs = get_connector_credential_pairs(db_session) - - for cc_pair in all_cc_pairs: - if should_prune_cc_pair( - connector=cc_pair.connector, - credential=cc_pair.credential, - db_session=db_session, - ): - logger.info(f"Pruning the {cc_pair.connector.name} connector") - - prune_documents_task.apply_async( - kwargs=dict( - connector_id=cc_pair.connector.id, - credential_id=cc_pair.credential.id, - ) + +class HubPeriodicTask(bootsteps.StartStopStep): + """Regularly reacquires the primary worker lock outside of the task queue. + Use the task_logger in this class to avoid double logging.""" + + # it's unclear to me whether using the hub's timer or the bootstep timer is better + requires = {"celery.worker.components:Hub"} + + def __init__(self, worker: Any, **kwargs: Any) -> None: + self.interval = CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 8 # Interval in seconds + self.task_tref = None + + def start(self, worker: Any) -> None: + if not celery_is_worker_primary(worker): + return + + # Access the worker's event loop (hub) + hub = worker.consumer.controller.hub + + # Schedule the periodic task + self.task_tref = hub.call_repeatedly( + self.interval, self.run_periodic_task, worker + ) + task_logger.info("Scheduled periodic task with hub.") + + def run_periodic_task(self, worker: Any) -> None: + try: + if not worker.primary_worker_lock: + return + + if not hasattr(worker, "primary_worker_lock"): + return + + r = redis_pool.get_client() + + lock: redis.lock.Lock = worker.primary_worker_lock + + task_logger.info("Reacquiring primary worker lock.") + + if lock.owned(): + task_logger.debug("Reacquiring primary worker lock.") + lock.reacquire() + else: + task_logger.warning( + "Full acquisition of primary worker lock. " + "Reasons could be computer sleep or a clock change." + ) + lock = r.lock( + DanswerRedisLocks.PRIMARY_WORKER, + timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT, ) + task_logger.info("Primary worker lock: Acquire starting.") + acquired = lock.acquire( + blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2 + ) + if acquired: + task_logger.info("Primary worker lock: Acquire succeeded.") + else: + task_logger.error("Primary worker lock: Acquire failed!") + raise TimeoutError("Primary worker lock could not be acquired!") + + worker.primary_worker_lock = lock + except Exception: + task_logger.exception("HubPeriodicTask.run_periodic_task exceptioned.") + + def stop(self, worker: Any) -> None: + # Cancel the scheduled task when the worker stops + if self.task_tref: + self.task_tref.cancel() + task_logger.info("Canceled periodic task with hub.") + + +celery_app.steps["worker"].add(HubPeriodicTask) + +celery_app.autodiscover_tasks( + [ + "danswer.background.celery.tasks.connector_deletion", + "danswer.background.celery.tasks.periodic", + "danswer.background.celery.tasks.pruning", + "danswer.background.celery.tasks.vespa", + ] +) ##### # Celery Beat (Periodic Tasks) Settings ##### celery_app.conf.beat_schedule = { - "check-for-document-set-sync": { - "task": "check_for_document_sets_sync_task", + "check-for-vespa-sync": { + "task": "check_for_vespa_sync_task", "schedule": timedelta(seconds=5), - }, - "check-for-cc-pair-deletion": { - "task": "check_for_cc_pair_deletion_task", - # don't need to check too often, since we kick off a deletion initially - # during the API call that actually marks the CC pair for deletion - "schedule": timedelta(minutes=1), + "options": {"priority": DanswerCeleryPriority.HIGH}, }, } +celery_app.conf.beat_schedule.update( + { + "check-for-connector-deletion-task": { + "task": "check_for_connector_deletion_task", + # don't need to check too often, since we kick off a deletion initially + # during the API call that actually marks the CC pair for deletion + "schedule": timedelta(minutes=1), + "options": {"priority": DanswerCeleryPriority.HIGH}, + }, + } +) celery_app.conf.beat_schedule.update( { "check-for-prune": { "task": "check_for_prune_task", "schedule": timedelta(seconds=5), + "options": {"priority": DanswerCeleryPriority.HIGH}, }, } ) @@ -467,6 +450,16 @@ def check_for_prune_task() -> None: "kombu-message-cleanup": { "task": "kombu_message_cleanup_task", "schedule": timedelta(seconds=3600), + "options": {"priority": DanswerCeleryPriority.LOWEST}, + }, + } +) +celery_app.conf.beat_schedule.update( + { + "monitor-vespa-sync": { + "task": "monitor_vespa_sync", + "schedule": timedelta(seconds=5), + "options": {"priority": DanswerCeleryPriority.HIGH}, }, } ) diff --git a/backend/danswer/background/celery/celery_redis.py b/backend/danswer/background/celery/celery_redis.py new file mode 100644 index 00000000000..1d837bd51e0 --- /dev/null +++ b/backend/danswer/background/celery/celery_redis.py @@ -0,0 +1,361 @@ +# These are helper objects for tracking the keys we need to write in redis +import time +from abc import ABC +from abc import abstractmethod +from typing import cast +from uuid import uuid4 + +import redis +from celery import Celery +from redis import Redis +from sqlalchemy.orm import Session + +from danswer.background.celery.celeryconfig import CELERY_SEPARATOR +from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.configs.constants import DanswerCeleryPriority +from danswer.configs.constants import DanswerCeleryQueues +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id +from danswer.db.document import construct_document_select_for_connector_credential_pair +from danswer.db.document import ( + construct_document_select_for_connector_credential_pair_by_needs_sync, +) +from danswer.db.document_set import construct_document_select_by_docset +from danswer.utils.variable_functionality import fetch_versioned_implementation + + +class RedisObjectHelper(ABC): + PREFIX = "base" + FENCE_PREFIX = PREFIX + "_fence" + TASKSET_PREFIX = PREFIX + "_taskset" + + def __init__(self, id: int): + self._id: int = id + + @property + def task_id_prefix(self) -> str: + return f"{self.PREFIX}_{self._id}" + + @property + def fence_key(self) -> str: + # example: documentset_fence_1 + return f"{self.FENCE_PREFIX}_{self._id}" + + @property + def taskset_key(self) -> str: + # example: documentset_taskset_1 + return f"{self.TASKSET_PREFIX}_{self._id}" + + @staticmethod + def get_id_from_fence_key(key: str) -> int | None: + """ + Extracts the object ID from a fence key in the format `PREFIX_fence_X`. + + Args: + key (str): The fence key string. + + Returns: + Optional[int]: The extracted ID if the key is in the correct format, otherwise None. + """ + parts = key.split("_") + if len(parts) != 3: + return None + + try: + object_id = int(parts[2]) + except ValueError: + return None + + return object_id + + @staticmethod + def get_id_from_task_id(task_id: str) -> int | None: + """ + Extracts the object ID from a task ID string. + + This method assumes the task ID is formatted as `prefix_objectid_suffix`, where: + - `prefix` is an arbitrary string (e.g., the name of the task or entity), + - `objectid` is the ID you want to extract, + - `suffix` is another arbitrary string (e.g., a UUID). + + Example: + If the input `task_id` is `documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc`, + this method will return the string `"1"`. + + Args: + task_id (str): The task ID string from which to extract the object ID. + + Returns: + str | None: The extracted object ID if the task ID is in the correct format, otherwise None. + """ + # example: task_id=documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc + parts = task_id.split("_") + if len(parts) != 3: + return None + + try: + object_id = int(parts[1]) + except ValueError: + return None + + return object_id + + @abstractmethod + def generate_tasks( + self, + celery_app: Celery, + db_session: Session, + redis_client: Redis, + lock: redis.lock.Lock, + ) -> int | None: + pass + + +class RedisDocumentSet(RedisObjectHelper): + PREFIX = "documentset" + FENCE_PREFIX = PREFIX + "_fence" + TASKSET_PREFIX = PREFIX + "_taskset" + + def generate_tasks( + self, + celery_app: Celery, + db_session: Session, + redis_client: Redis, + lock: redis.lock.Lock, + ) -> int | None: + last_lock_time = time.monotonic() + + async_results = [] + stmt = construct_document_select_by_docset(self._id, current_only=False) + for doc in db_session.scalars(stmt).yield_per(1): + current_time = time.monotonic() + if current_time - last_lock_time >= ( + CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 + ): + lock.reacquire() + last_lock_time = current_time + + # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" + # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" + # we prefix the task id so it's easier to keep track of who created the task + # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" + custom_task_id = f"{self.task_id_prefix}_{uuid4()}" + + # add to the set BEFORE creating the task. + redis_client.sadd(self.taskset_key, custom_task_id) + + result = celery_app.send_task( + "vespa_metadata_sync_task", + kwargs=dict(document_id=doc.id), + queue=DanswerCeleryQueues.VESPA_METADATA_SYNC, + task_id=custom_task_id, + priority=DanswerCeleryPriority.LOW, + ) + + async_results.append(result) + + return len(async_results) + + +class RedisUserGroup(RedisObjectHelper): + PREFIX = "usergroup" + FENCE_PREFIX = PREFIX + "_fence" + TASKSET_PREFIX = PREFIX + "_taskset" + + def generate_tasks( + self, + celery_app: Celery, + db_session: Session, + redis_client: Redis, + lock: redis.lock.Lock, + ) -> int | None: + last_lock_time = time.monotonic() + + async_results = [] + + try: + construct_document_select_by_usergroup = fetch_versioned_implementation( + "danswer.db.user_group", + "construct_document_select_by_usergroup", + ) + except ModuleNotFoundError: + return 0 + + stmt = construct_document_select_by_usergroup(self._id) + for doc in db_session.scalars(stmt).yield_per(1): + current_time = time.monotonic() + if current_time - last_lock_time >= ( + CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 + ): + lock.reacquire() + last_lock_time = current_time + + # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" + # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" + # we prefix the task id so it's easier to keep track of who created the task + # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" + custom_task_id = f"{self.task_id_prefix}_{uuid4()}" + + # add to the set BEFORE creating the task. + redis_client.sadd(self.taskset_key, custom_task_id) + + result = celery_app.send_task( + "vespa_metadata_sync_task", + kwargs=dict(document_id=doc.id), + queue=DanswerCeleryQueues.VESPA_METADATA_SYNC, + task_id=custom_task_id, + priority=DanswerCeleryPriority.LOW, + ) + + async_results.append(result) + + return len(async_results) + + +class RedisConnectorCredentialPair(RedisObjectHelper): + """This class differs from the default in that the taskset used spans + all connectors and is not per connector.""" + + PREFIX = "connectorsync" + FENCE_PREFIX = PREFIX + "_fence" + TASKSET_PREFIX = PREFIX + "_taskset" + + @classmethod + def get_fence_key(cls) -> str: + return RedisConnectorCredentialPair.FENCE_PREFIX + + @classmethod + def get_taskset_key(cls) -> str: + return RedisConnectorCredentialPair.TASKSET_PREFIX + + @property + def taskset_key(self) -> str: + """Notice that this is intentionally reusing the same taskset for all + connector syncs""" + # example: connector_taskset + return f"{self.TASKSET_PREFIX}" + + def generate_tasks( + self, + celery_app: Celery, + db_session: Session, + redis_client: Redis, + lock: redis.lock.Lock, + ) -> int | None: + last_lock_time = time.monotonic() + + async_results = [] + cc_pair = get_connector_credential_pair_from_id(self._id, db_session) + if not cc_pair: + return None + + stmt = construct_document_select_for_connector_credential_pair_by_needs_sync( + cc_pair.connector_id, cc_pair.credential_id + ) + for doc in db_session.scalars(stmt).yield_per(1): + current_time = time.monotonic() + if current_time - last_lock_time >= ( + CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 + ): + lock.reacquire() + last_lock_time = current_time + + # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" + # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" + # we prefix the task id so it's easier to keep track of who created the task + # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" + custom_task_id = f"{self.task_id_prefix}_{uuid4()}" + + # add to the tracking taskset in redis BEFORE creating the celery task. + # note that for the moment we are using a single taskset key, not differentiated by cc_pair id + redis_client.sadd( + RedisConnectorCredentialPair.get_taskset_key(), custom_task_id + ) + + # Priority on sync's triggered by new indexing should be medium + result = celery_app.send_task( + "vespa_metadata_sync_task", + kwargs=dict(document_id=doc.id), + queue=DanswerCeleryQueues.VESPA_METADATA_SYNC, + task_id=custom_task_id, + priority=DanswerCeleryPriority.MEDIUM, + ) + + async_results.append(result) + + return len(async_results) + + +class RedisConnectorDeletion(RedisObjectHelper): + PREFIX = "connectordeletion" + FENCE_PREFIX = PREFIX + "_fence" + TASKSET_PREFIX = PREFIX + "_taskset" + + def generate_tasks( + self, + celery_app: Celery, + db_session: Session, + redis_client: Redis, + lock: redis.lock.Lock, + ) -> int | None: + last_lock_time = time.monotonic() + + async_results = [] + cc_pair = get_connector_credential_pair_from_id(self._id, db_session) + if not cc_pair: + return None + + stmt = construct_document_select_for_connector_credential_pair( + cc_pair.connector_id, cc_pair.credential_id + ) + for doc in db_session.scalars(stmt).yield_per(1): + current_time = time.monotonic() + if current_time - last_lock_time >= ( + CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 + ): + lock.reacquire() + last_lock_time = current_time + + # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" + # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" + # we prefix the task id so it's easier to keep track of who created the task + # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" + custom_task_id = f"{self.task_id_prefix}_{uuid4()}" + + # add to the tracking taskset in redis BEFORE creating the celery task. + # note that for the moment we are using a single taskset key, not differentiated by cc_pair id + redis_client.sadd(self.taskset_key, custom_task_id) + + # Priority on sync's triggered by new indexing should be medium + result = celery_app.send_task( + "document_by_cc_pair_cleanup_task", + kwargs=dict( + document_id=doc.id, + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + ), + queue=DanswerCeleryQueues.CONNECTOR_DELETION, + task_id=custom_task_id, + priority=DanswerCeleryPriority.MEDIUM, + ) + + async_results.append(result) + + return len(async_results) + + +def celery_get_queue_length(queue: str, r: Redis) -> int: + """This is a redis specific way to get the length of a celery queue. + It is priority aware and knows how to count across the multiple redis lists + used to implement task prioritization. + This operation is not atomic.""" + total_length = 0 + for i in range(len(DanswerCeleryPriority)): + queue_name = queue + if i > 0: + queue_name += CELERY_SEPARATOR + queue_name += str(i) + + length = r.llen(queue_name) + total_length += cast(int, length) + + return total_length diff --git a/backend/danswer/background/celery/celery_utils.py b/backend/danswer/background/celery/celery_utils.py index e4d4d13bb1d..9ee282e1af3 100644 --- a/backend/danswer/background/celery/celery_utils.py +++ b/backend/danswer/background/celery/celery_utils.py @@ -1,11 +1,11 @@ from datetime import datetime from datetime import timezone +from typing import Any from sqlalchemy.orm import Session -from danswer.background.task_utils import name_cc_cleanup_task +from danswer.background.celery.celery_redis import RedisConnectorDeletion from danswer.background.task_utils import name_cc_prune_task -from danswer.background.task_utils import name_document_set_sync_task from danswer.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING from danswer.configs.app_configs import MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE from danswer.connectors.cross_connector_utils.rate_limit_wrapper import ( @@ -16,30 +16,44 @@ from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.models import Document -from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed +from danswer.db.connector_credential_pair import get_connector_credential_pair from danswer.db.engine import get_db_current_time -from danswer.db.enums import ConnectorCredentialPairStatus +from danswer.db.enums import TaskStatus from danswer.db.models import Connector -from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Credential -from danswer.db.models import DocumentSet from danswer.db.models import TaskQueueState from danswer.db.tasks import check_task_is_live_and_not_timed_out from danswer.db.tasks import get_latest_task from danswer.db.tasks import get_latest_task_by_type +from danswer.redis.redis_pool import RedisPool from danswer.server.documents.models import DeletionAttemptSnapshot from danswer.utils.logger import setup_logger logger = setup_logger() +redis_pool = RedisPool() def _get_deletion_status( connector_id: int, credential_id: int, db_session: Session ) -> TaskQueueState | None: - cleanup_task_name = name_cc_cleanup_task( - connector_id=connector_id, credential_id=credential_id + """We no longer store TaskQueueState in the DB for a deletion attempt. + This function populates TaskQueueState by just checking redis. + """ + cc_pair = get_connector_credential_pair( + connector_id=connector_id, credential_id=credential_id, db_session=db_session + ) + if not cc_pair: + return None + + rcd = RedisConnectorDeletion(cc_pair.id) + + r = redis_pool.get_client() + if not r.exists(rcd.fence_key): + return None + + return TaskQueueState( + task_id="", task_name=rcd.fence_key, status=TaskStatus.STARTED ) - return get_latest_task(task_name=cleanup_task_name, db_session=db_session) def get_deletion_attempt_snapshot( @@ -56,44 +70,28 @@ def get_deletion_attempt_snapshot( ) -def should_kick_off_deletion_of_cc_pair( - cc_pair: ConnectorCredentialPair, db_session: Session +def skip_cc_pair_pruning_by_task( + pruning_task: TaskQueueState | None, db_session: Session ) -> bool: - if cc_pair.status != ConnectorCredentialPairStatus.DELETING: - return False - - if check_deletion_attempt_is_allowed(cc_pair, db_session): - return False - - deletion_task = _get_deletion_status( - connector_id=cc_pair.connector_id, - credential_id=cc_pair.credential_id, - db_session=db_session, - ) - if deletion_task and check_task_is_live_and_not_timed_out( - deletion_task, - db_session, - # 1 hour timeout - timeout=60 * 60, - ): - return False - - return True - - -def should_sync_doc_set(document_set: DocumentSet, db_session: Session) -> bool: - if document_set.is_up_to_date: - return False + """task should be the latest prune task for this cc_pair""" + if not ALLOW_SIMULTANEOUS_PRUNING: + # if only one prune is allowed at any time, then check to see if any prune + # is active + pruning_type_task_name = name_cc_prune_task() + last_pruning_type_task = get_latest_task_by_type( + pruning_type_task_name, db_session + ) - task_name = name_document_set_sync_task(document_set.id) - latest_sync = get_latest_task(task_name, db_session) + if last_pruning_type_task and check_task_is_live_and_not_timed_out( + last_pruning_type_task, db_session + ): + return True - if latest_sync and check_task_is_live_and_not_timed_out(latest_sync, db_session): - logger.info(f"Document set '{document_set.id}' is already syncing. Skipping.") - return False + if pruning_task and check_task_is_live_and_not_timed_out(pruning_task, db_session): + # if the last task is live right now, we shouldn't start a new one + return True - logger.info(f"Document set {document_set.id} syncing now.") - return True + return False def should_prune_cc_pair( @@ -106,31 +104,26 @@ def should_prune_cc_pair( connector_id=connector.id, credential_id=credential.id ) last_pruning_task = get_latest_task(pruning_task_name, db_session) + + if skip_cc_pair_pruning_by_task(last_pruning_task, db_session): + return False + current_db_time = get_db_current_time(db_session) if not last_pruning_task: + # If the connector has never been pruned, then compare vs when the connector + # was created time_since_initialization = current_db_time - connector.time_created if time_since_initialization.total_seconds() >= connector.prune_freq: return True return False - if not ALLOW_SIMULTANEOUS_PRUNING: - pruning_type_task_name = name_cc_prune_task() - last_pruning_type_task = get_latest_task_by_type( - pruning_type_task_name, db_session - ) - - if last_pruning_type_task and check_task_is_live_and_not_timed_out( - last_pruning_type_task, db_session - ): - return False - - if check_task_is_live_and_not_timed_out(last_pruning_task, db_session): - return False - if not last_pruning_task.start_time: + # if the last prune task hasn't started, we shouldn't start a new one return False + # if the last prune task has a start time, then compare against it to determine + # if we should start time_since_last_pruning = current_db_time - last_pruning_task.start_time return time_since_last_pruning.total_seconds() >= connector.prune_freq @@ -168,3 +161,30 @@ def extract_ids_from_runnable_connector(runnable_connector: BaseConnector) -> se all_connector_doc_ids.update(doc_batch_processing_func(doc_batch)) return all_connector_doc_ids + + +def celery_is_listening_to_queue(worker: Any, name: str) -> bool: + """Checks to see if we're listening to the named queue""" + + # how to get a list of queues this worker is listening to + # https://stackoverflow.com/questions/29790523/how-to-determine-which-queues-a-celery-worker-is-consuming-at-runtime + queue_names = list(worker.app.amqp.queues.consume_from.keys()) + for queue_name in queue_names: + if queue_name == name: + return True + + return False + + +def celery_is_worker_primary(worker: Any) -> bool: + """There are multiple approaches that could be taken, but the way we do it is to + check the hostname set for the celery worker, either in celeryconfig.py or on the + command line.""" + hostname = worker.hostname + if hostname.startswith("light"): + return False + + if hostname.startswith("heavy"): + return False + + return True diff --git a/backend/danswer/background/celery/celeryconfig.py b/backend/danswer/background/celery/celeryconfig.py new file mode 100644 index 00000000000..1b1aa092d17 --- /dev/null +++ b/backend/danswer/background/celery/celeryconfig.py @@ -0,0 +1,97 @@ +# docs: https://docs.celeryq.dev/en/stable/userguide/configuration.html +from danswer.configs.app_configs import CELERY_BROKER_POOL_LIMIT +from danswer.configs.app_configs import CELERY_RESULT_EXPIRES +from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY +from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY_RESULT_BACKEND +from danswer.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL +from danswer.configs.app_configs import REDIS_HOST +from danswer.configs.app_configs import REDIS_PASSWORD +from danswer.configs.app_configs import REDIS_PORT +from danswer.configs.app_configs import REDIS_SSL +from danswer.configs.app_configs import REDIS_SSL_CA_CERTS +from danswer.configs.app_configs import REDIS_SSL_CERT_REQS +from danswer.configs.constants import DanswerCeleryPriority +from danswer.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS + +CELERY_SEPARATOR = ":" + +CELERY_PASSWORD_PART = "" +if REDIS_PASSWORD: + CELERY_PASSWORD_PART = f":{REDIS_PASSWORD}@" + +REDIS_SCHEME = "redis" + +# SSL-specific query parameters for Redis URL +SSL_QUERY_PARAMS = "" +if REDIS_SSL: + REDIS_SCHEME = "rediss" + SSL_QUERY_PARAMS = f"?ssl_cert_reqs={REDIS_SSL_CERT_REQS}" + if REDIS_SSL_CA_CERTS: + SSL_QUERY_PARAMS += f"&ssl_ca_certs={REDIS_SSL_CA_CERTS}" + +# example celery_broker_url: "redis://:password@localhost:6379/15" +broker_url = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY}{SSL_QUERY_PARAMS}" + +result_backend = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY_RESULT_BACKEND}{SSL_QUERY_PARAMS}" + +# NOTE: prefetch 4 is significantly faster than prefetch 1 for small tasks +# however, prefetching is bad when tasks are lengthy as those tasks +# can stall other tasks. +worker_prefetch_multiplier = 4 + +broker_connection_retry_on_startup = True +broker_pool_limit = CELERY_BROKER_POOL_LIMIT + +# redis broker settings +# https://docs.celeryq.dev/projects/kombu/en/stable/reference/kombu.transport.redis.html +broker_transport_options = { + "priority_steps": list(range(len(DanswerCeleryPriority))), + "sep": CELERY_SEPARATOR, + "queue_order_strategy": "priority", + "retry_on_timeout": True, + "health_check_interval": REDIS_HEALTH_CHECK_INTERVAL, + "socket_keepalive": True, + "socket_keepalive_options": REDIS_SOCKET_KEEPALIVE_OPTIONS, +} + +# redis backend settings +# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings + +# there doesn't appear to be a way to set socket_keepalive_options on the redis result backend +redis_socket_keepalive = True +redis_retry_on_timeout = True +redis_backend_health_check_interval = REDIS_HEALTH_CHECK_INTERVAL + + +task_default_priority = DanswerCeleryPriority.MEDIUM +task_acks_late = True + +# It's possible we don't even need celery's result backend, in which case all of the optimization below +# might be irrelevant +result_expires = CELERY_RESULT_EXPIRES # 86400 seconds is the default + +# Option 0: Defaults (json serializer, no compression) +# about 1.5 KB per queued task. 1KB in queue, 400B for result, 100 as a child entry in generator result + +# Option 1: Reduces generator task result sizes by roughly 20% +# task_compression = "bzip2" +# task_serializer = "pickle" +# result_compression = "bzip2" +# result_serializer = "pickle" +# accept_content=["pickle"] + +# Option 2: this significantly reduces the size of the result for generator tasks since the list of children +# can be large. small tasks change very little +# def pickle_bz2_encoder(data): +# return bz2.compress(pickle.dumps(data)) + +# def pickle_bz2_decoder(data): +# return pickle.loads(bz2.decompress(data)) + +# from kombu import serialization # To register custom serialization with Celery/Kombu + +# serialization.register('pickle-bzip2', pickle_bz2_encoder, pickle_bz2_decoder, 'application/x-pickle-bz2', 'binary') + +# task_serializer = "pickle-bzip2" +# result_serializer = "pickle-bzip2" +# accept_content=["pickle", "pickle-bzip2"] diff --git a/backend/danswer/background/celery/tasks/connector_deletion/tasks.py b/backend/danswer/background/celery/tasks/connector_deletion/tasks.py new file mode 100644 index 00000000000..655487f7168 --- /dev/null +++ b/backend/danswer/background/celery/tasks/connector_deletion/tasks.py @@ -0,0 +1,133 @@ +import redis +from celery import shared_task +from celery.exceptions import SoftTimeLimitExceeded +from celery.utils.log import get_task_logger +from redis import Redis +from sqlalchemy.orm import Session +from sqlalchemy.orm.exc import ObjectDeletedError + +from danswer.background.celery.celery_app import celery_app +from danswer.background.celery.celery_redis import RedisConnectorDeletion +from danswer.configs.app_configs import JOB_TIMEOUT +from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.configs.constants import DanswerRedisLocks +from danswer.db.connector_credential_pair import get_connector_credential_pairs +from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.enums import ConnectorCredentialPairStatus +from danswer.db.enums import IndexingStatus +from danswer.db.index_attempt import get_last_attempt +from danswer.db.models import ConnectorCredentialPair +from danswer.db.search_settings import get_current_search_settings +from danswer.redis.redis_pool import RedisPool + +redis_pool = RedisPool() + +# use this within celery tasks to get celery task specific logging +task_logger = get_task_logger(__name__) + + +@shared_task( + name="check_for_connector_deletion_task", + soft_time_limit=JOB_TIMEOUT, + trail=False, +) +def check_for_connector_deletion_task() -> None: + r = redis_pool.get_client() + + lock_beat = r.lock( + DanswerRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK, + timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT, + ) + + try: + # these tasks should never overlap + if not lock_beat.acquire(blocking=False): + return + + with Session(get_sqlalchemy_engine()) as db_session: + cc_pairs = get_connector_credential_pairs(db_session) + for cc_pair in cc_pairs: + try_generate_document_cc_pair_cleanup_tasks( + cc_pair, db_session, r, lock_beat + ) + except SoftTimeLimitExceeded: + task_logger.info( + "Soft time limit exceeded, task is being terminated gracefully." + ) + except Exception: + task_logger.exception("Unexpected exception") + finally: + if lock_beat.owned(): + lock_beat.release() + + +def try_generate_document_cc_pair_cleanup_tasks( + cc_pair: ConnectorCredentialPair, + db_session: Session, + r: Redis, + lock_beat: redis.lock.Lock, +) -> int | None: + """Returns an int if syncing is needed. The int represents the number of sync tasks generated. + Note that syncing can still be required even if the number of sync tasks generated is zero. + Returns None if no syncing is required. + """ + + lock_beat.reacquire() + + rcd = RedisConnectorDeletion(cc_pair.id) + + # don't generate sync tasks if tasks are still pending + if r.exists(rcd.fence_key): + return None + + # we need to refresh the state of the object inside the fence + # to avoid a race condition with db.commit/fence deletion + # at the end of this taskset + try: + db_session.refresh(cc_pair) + except ObjectDeletedError: + return None + + if cc_pair.status != ConnectorCredentialPairStatus.DELETING: + return None + + search_settings = get_current_search_settings(db_session) + + last_indexing = get_last_attempt( + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + search_settings_id=search_settings.id, + db_session=db_session, + ) + if last_indexing: + if ( + last_indexing.status == IndexingStatus.IN_PROGRESS + or last_indexing.status == IndexingStatus.NOT_STARTED + ): + return None + + # add tasks to celery and build up the task set to monitor in redis + r.delete(rcd.taskset_key) + + # Add all documents that need to be updated into the queue + task_logger.info( + f"RedisConnectorDeletion.generate_tasks starting. cc_pair_id={cc_pair.id}" + ) + tasks_generated = rcd.generate_tasks(celery_app, db_session, r, lock_beat) + if tasks_generated is None: + return None + + # Currently we are allowing the sync to proceed with 0 tasks. + # It's possible for sets/groups to be generated initially with no entries + # and they still need to be marked as up to date. + # if tasks_generated == 0: + # return 0 + + task_logger.info( + f"RedisConnectorDeletion.generate_tasks finished. " + f"cc_pair_id={cc_pair.id} tasks_generated={tasks_generated}" + ) + + # set this only after all tasks have been added + r.set(rcd.fence_key, tasks_generated) + return tasks_generated diff --git a/backend/danswer/background/celery/tasks/periodic/tasks.py b/backend/danswer/background/celery/tasks/periodic/tasks.py new file mode 100644 index 00000000000..bd3b082aeb8 --- /dev/null +++ b/backend/danswer/background/celery/tasks/periodic/tasks.py @@ -0,0 +1,140 @@ +##### +# Periodic Tasks +##### +import json +from typing import Any + +from celery import shared_task +from celery.contrib.abortable import AbortableTask # type: ignore +from celery.exceptions import TaskRevokedError +from celery.utils.log import get_task_logger +from sqlalchemy import inspect +from sqlalchemy import text +from sqlalchemy.orm import Session + +from danswer.configs.app_configs import JOB_TIMEOUT +from danswer.configs.constants import PostgresAdvisoryLocks +from danswer.db.engine import get_sqlalchemy_engine # type: ignore + +# use this within celery tasks to get celery task specific logging +task_logger = get_task_logger(__name__) + + +@shared_task( + name="kombu_message_cleanup_task", + soft_time_limit=JOB_TIMEOUT, + bind=True, + base=AbortableTask, +) +def kombu_message_cleanup_task(self: Any) -> int: + """Runs periodically to clean up the kombu_message table""" + + # we will select messages older than this amount to clean up + KOMBU_MESSAGE_CLEANUP_AGE = 7 # days + KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT = 1000 + + ctx = {} + ctx["last_processed_id"] = 0 + ctx["deleted"] = 0 + ctx["cleanup_age"] = KOMBU_MESSAGE_CLEANUP_AGE + ctx["page_limit"] = KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT + with Session(get_sqlalchemy_engine()) as db_session: + # Exit the task if we can't take the advisory lock + result = db_session.execute( + text("SELECT pg_try_advisory_lock(:id)"), + {"id": PostgresAdvisoryLocks.KOMBU_MESSAGE_CLEANUP_LOCK_ID.value}, + ).scalar() + if not result: + return 0 + + while True: + if self.is_aborted(): + raise TaskRevokedError("kombu_message_cleanup_task was aborted.") + + b = kombu_message_cleanup_task_helper(ctx, db_session) + if not b: + break + + db_session.commit() + + if ctx["deleted"] > 0: + task_logger.info( + f"Deleted {ctx['deleted']} orphaned messages from kombu_message." + ) + + return ctx["deleted"] + + +def kombu_message_cleanup_task_helper(ctx: dict, db_session: Session) -> bool: + """ + Helper function to clean up old messages from the `kombu_message` table that are no longer relevant. + + This function retrieves messages from the `kombu_message` table that are no longer visible and + older than a specified interval. It checks if the corresponding task_id exists in the + `celery_taskmeta` table. If the task_id does not exist, the message is deleted. + + Args: + ctx (dict): A context dictionary containing configuration parameters such as: + - 'cleanup_age' (int): The age in days after which messages are considered old. + - 'page_limit' (int): The maximum number of messages to process in one batch. + - 'last_processed_id' (int): The ID of the last processed message to handle pagination. + - 'deleted' (int): A counter to track the number of deleted messages. + db_session (Session): The SQLAlchemy database session for executing queries. + + Returns: + bool: Returns True if there are more rows to process, False if not. + """ + + inspector = inspect(db_session.bind) + if not inspector: + return False + + # With the move to redis as celery's broker and backend, kombu tables may not even exist. + # We can fail silently. + if not inspector.has_table("kombu_message"): + return False + + query = text( + """ + SELECT id, timestamp, payload + FROM kombu_message WHERE visible = 'false' + AND timestamp < CURRENT_TIMESTAMP - INTERVAL :interval_days + AND id > :last_processed_id + ORDER BY id + LIMIT :page_limit +""" + ) + kombu_messages = db_session.execute( + query, + { + "interval_days": f"{ctx['cleanup_age']} days", + "page_limit": ctx["page_limit"], + "last_processed_id": ctx["last_processed_id"], + }, + ).fetchall() + + if len(kombu_messages) == 0: + return False + + for msg in kombu_messages: + payload = json.loads(msg[2]) + task_id = payload["headers"]["id"] + + # Check if task_id exists in celery_taskmeta + task_exists = db_session.execute( + text("SELECT 1 FROM celery_taskmeta WHERE task_id = :task_id"), + {"task_id": task_id}, + ).fetchone() + + # If task_id does not exist, delete the message + if not task_exists: + result = db_session.execute( + text("DELETE FROM kombu_message WHERE id = :message_id"), + {"message_id": msg[0]}, + ) + if result.rowcount > 0: # type: ignore + ctx["deleted"] += 1 + + ctx["last_processed_id"] = msg[0] + + return True diff --git a/backend/danswer/background/celery/tasks/pruning/tasks.py b/backend/danswer/background/celery/tasks/pruning/tasks.py new file mode 100644 index 00000000000..2f840e430ae --- /dev/null +++ b/backend/danswer/background/celery/tasks/pruning/tasks.py @@ -0,0 +1,120 @@ +from celery import shared_task +from celery.utils.log import get_task_logger +from sqlalchemy.orm import Session + +from danswer.background.celery.celery_app import celery_app +from danswer.background.celery.celery_utils import extract_ids_from_runnable_connector +from danswer.background.celery.celery_utils import should_prune_cc_pair +from danswer.background.connector_deletion import delete_connector_credential_pair_batch +from danswer.background.task_utils import build_celery_task_wrapper +from danswer.background.task_utils import name_cc_prune_task +from danswer.configs.app_configs import JOB_TIMEOUT +from danswer.connectors.factory import instantiate_connector +from danswer.connectors.models import InputType +from danswer.db.connector_credential_pair import get_connector_credential_pair +from danswer.db.connector_credential_pair import get_connector_credential_pairs +from danswer.db.document import get_documents_for_connector_credential_pair +from danswer.db.engine import get_sqlalchemy_engine +from danswer.document_index.document_index_utils import get_both_index_names +from danswer.document_index.factory import get_default_document_index + + +# use this within celery tasks to get celery task specific logging +task_logger = get_task_logger(__name__) + + +@shared_task( + name="check_for_prune_task", + soft_time_limit=JOB_TIMEOUT, +) +def check_for_prune_task() -> None: + """Runs periodically to check if any prune tasks should be run and adds them + to the queue""" + + with Session(get_sqlalchemy_engine()) as db_session: + all_cc_pairs = get_connector_credential_pairs(db_session) + + for cc_pair in all_cc_pairs: + if should_prune_cc_pair( + connector=cc_pair.connector, + credential=cc_pair.credential, + db_session=db_session, + ): + task_logger.info(f"Pruning the {cc_pair.connector.name} connector") + + prune_documents_task.apply_async( + kwargs=dict( + connector_id=cc_pair.connector.id, + credential_id=cc_pair.credential.id, + ) + ) + + +@build_celery_task_wrapper(name_cc_prune_task) +@celery_app.task(name="prune_documents_task", soft_time_limit=JOB_TIMEOUT) +def prune_documents_task(connector_id: int, credential_id: int) -> None: + """connector pruning task. For a cc pair, this task pulls all document IDs from the source + and compares those IDs to locally stored documents and deletes all locally stored IDs missing + from the most recently pulled document ID list""" + with Session(get_sqlalchemy_engine()) as db_session: + try: + cc_pair = get_connector_credential_pair( + db_session=db_session, + connector_id=connector_id, + credential_id=credential_id, + ) + + if not cc_pair: + task_logger.warning( + f"ccpair not found for {connector_id} {credential_id}" + ) + return + + runnable_connector = instantiate_connector( + db_session, + cc_pair.connector.source, + InputType.PRUNE, + cc_pair.connector.connector_specific_config, + cc_pair.credential, + ) + + all_connector_doc_ids: set[str] = extract_ids_from_runnable_connector( + runnable_connector + ) + + all_indexed_document_ids = { + doc.id + for doc in get_documents_for_connector_credential_pair( + db_session=db_session, + connector_id=connector_id, + credential_id=credential_id, + ) + } + + doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids) + + curr_ind_name, sec_ind_name = get_both_index_names(db_session) + document_index = get_default_document_index( + primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name + ) + + if len(doc_ids_to_remove) == 0: + task_logger.info( + f"No docs to prune from {cc_pair.connector.source} connector" + ) + return + + task_logger.info( + f"pruning {len(doc_ids_to_remove)} doc(s) from {cc_pair.connector.source} connector" + ) + delete_connector_credential_pair_batch( + document_ids=doc_ids_to_remove, + connector_id=connector_id, + credential_id=credential_id, + document_index=document_index, + ) + except Exception as e: + task_logger.exception( + f"Failed to run pruning for connector id {connector_id}." + ) + raise e diff --git a/backend/danswer/background/celery/tasks/vespa/tasks.py b/backend/danswer/background/celery/tasks/vespa/tasks.py new file mode 100644 index 00000000000..d11d317d0b1 --- /dev/null +++ b/backend/danswer/background/celery/tasks/vespa/tasks.py @@ -0,0 +1,526 @@ +import traceback +from typing import cast + +import redis +from celery import shared_task +from celery import Task +from celery.exceptions import SoftTimeLimitExceeded +from celery.utils.log import get_task_logger +from redis import Redis +from sqlalchemy.orm import Session + +from danswer.access.access import get_access_for_document +from danswer.background.celery.celery_app import celery_app +from danswer.background.celery.celery_redis import RedisConnectorCredentialPair +from danswer.background.celery.celery_redis import RedisConnectorDeletion +from danswer.background.celery.celery_redis import RedisDocumentSet +from danswer.background.celery.celery_redis import RedisUserGroup +from danswer.configs.app_configs import JOB_TIMEOUT +from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.configs.constants import DanswerRedisLocks +from danswer.db.connector import fetch_connector_by_id +from danswer.db.connector_credential_pair import add_deletion_failure_message +from danswer.db.connector_credential_pair import ( + delete_connector_credential_pair__no_commit, +) +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id +from danswer.db.connector_credential_pair import get_connector_credential_pairs +from danswer.db.document import count_documents_by_needs_sync +from danswer.db.document import get_document +from danswer.db.document import mark_document_as_synced +from danswer.db.document_set import delete_document_set +from danswer.db.document_set import delete_document_set_cc_pair_relationship__no_commit +from danswer.db.document_set import fetch_document_sets +from danswer.db.document_set import fetch_document_sets_for_document +from danswer.db.document_set import get_document_set_by_id +from danswer.db.document_set import mark_document_set_as_synced +from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.index_attempt import delete_index_attempts +from danswer.db.models import DocumentSet +from danswer.db.models import UserGroup +from danswer.document_index.document_index_utils import get_both_index_names +from danswer.document_index.factory import get_default_document_index +from danswer.document_index.interfaces import UpdateRequest +from danswer.redis.redis_pool import RedisPool +from danswer.utils.variable_functionality import fetch_versioned_implementation +from danswer.utils.variable_functionality import ( + fetch_versioned_implementation_with_fallback, +) +from danswer.utils.variable_functionality import noop_fallback + +redis_pool = RedisPool() + +# use this within celery tasks to get celery task specific logging +task_logger = get_task_logger(__name__) + + +# celery auto associates tasks created inside another task, +# which bloats the result metadata considerably. trail=False prevents this. +@shared_task( + name="check_for_vespa_sync_task", + soft_time_limit=JOB_TIMEOUT, + trail=False, +) +def check_for_vespa_sync_task() -> None: + """Runs periodically to check if any document needs syncing. + Generates sets of tasks for Celery if syncing is needed.""" + + r = redis_pool.get_client() + + lock_beat = r.lock( + DanswerRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK, + timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT, + ) + + try: + # these tasks should never overlap + if not lock_beat.acquire(blocking=False): + return + + with Session(get_sqlalchemy_engine()) as db_session: + try_generate_stale_document_sync_tasks(db_session, r, lock_beat) + + # check if any document sets are not synced + document_set_info = fetch_document_sets( + user_id=None, db_session=db_session, include_outdated=True + ) + for document_set, _ in document_set_info: + try_generate_document_set_sync_tasks( + document_set, db_session, r, lock_beat + ) + + # check if any user groups are not synced + try: + fetch_user_groups = fetch_versioned_implementation( + "danswer.db.user_group", "fetch_user_groups" + ) + + user_groups = fetch_user_groups( + db_session=db_session, only_up_to_date=False + ) + for usergroup in user_groups: + try_generate_user_group_sync_tasks( + usergroup, db_session, r, lock_beat + ) + except ModuleNotFoundError: + # Always exceptions on the MIT version, which is expected + pass + except SoftTimeLimitExceeded: + task_logger.info( + "Soft time limit exceeded, task is being terminated gracefully." + ) + except Exception: + task_logger.exception("Unexpected exception") + finally: + if lock_beat.owned(): + lock_beat.release() + + +def try_generate_stale_document_sync_tasks( + db_session: Session, r: Redis, lock_beat: redis.lock.Lock +) -> int | None: + # the fence is up, do nothing + if r.exists(RedisConnectorCredentialPair.get_fence_key()): + return None + + r.delete(RedisConnectorCredentialPair.get_taskset_key()) # delete the taskset + + # add tasks to celery and build up the task set to monitor in redis + stale_doc_count = count_documents_by_needs_sync(db_session) + if stale_doc_count == 0: + return None + + task_logger.info( + f"Stale documents found (at least {stale_doc_count}). Generating sync tasks by cc pair." + ) + + task_logger.info("RedisConnector.generate_tasks starting by cc_pair.") + + # rkuo: we could technically sync all stale docs in one big pass. + # but I feel it's more understandable to group the docs by cc_pair + total_tasks_generated = 0 + cc_pairs = get_connector_credential_pairs(db_session) + for cc_pair in cc_pairs: + rc = RedisConnectorCredentialPair(cc_pair.id) + tasks_generated = rc.generate_tasks(celery_app, db_session, r, lock_beat) + + if tasks_generated is None: + continue + + if tasks_generated == 0: + continue + + task_logger.info( + f"RedisConnector.generate_tasks finished for single cc_pair. " + f"cc_pair_id={cc_pair.id} tasks_generated={tasks_generated}" + ) + + total_tasks_generated += tasks_generated + + task_logger.info( + f"RedisConnector.generate_tasks finished for all cc_pairs. total_tasks_generated={total_tasks_generated}" + ) + + r.set(RedisConnectorCredentialPair.get_fence_key(), total_tasks_generated) + return total_tasks_generated + + +def try_generate_document_set_sync_tasks( + document_set: DocumentSet, db_session: Session, r: Redis, lock_beat: redis.lock.Lock +) -> int | None: + lock_beat.reacquire() + + rds = RedisDocumentSet(document_set.id) + + # don't generate document set sync tasks if tasks are still pending + if r.exists(rds.fence_key): + return None + + # don't generate sync tasks if we're up to date + # race condition with the monitor/cleanup function if we use a cached result! + db_session.refresh(document_set) + if document_set.is_up_to_date: + return None + + # add tasks to celery and build up the task set to monitor in redis + r.delete(rds.taskset_key) + + task_logger.info( + f"RedisDocumentSet.generate_tasks starting. document_set_id={document_set.id}" + ) + + # Add all documents that need to be updated into the queue + tasks_generated = rds.generate_tasks(celery_app, db_session, r, lock_beat) + if tasks_generated is None: + return None + + # Currently we are allowing the sync to proceed with 0 tasks. + # It's possible for sets/groups to be generated initially with no entries + # and they still need to be marked as up to date. + # if tasks_generated == 0: + # return 0 + + task_logger.info( + f"RedisDocumentSet.generate_tasks finished. " + f"document_set_id={document_set.id} tasks_generated={tasks_generated}" + ) + + # set this only after all tasks have been added + r.set(rds.fence_key, tasks_generated) + return tasks_generated + + +def try_generate_user_group_sync_tasks( + usergroup: UserGroup, db_session: Session, r: Redis, lock_beat: redis.lock.Lock +) -> int | None: + lock_beat.reacquire() + + rug = RedisUserGroup(usergroup.id) + + # don't generate sync tasks if tasks are still pending + if r.exists(rug.fence_key): + return None + + # race condition with the monitor/cleanup function if we use a cached result! + db_session.refresh(usergroup) + if usergroup.is_up_to_date: + return None + + # add tasks to celery and build up the task set to monitor in redis + r.delete(rug.taskset_key) + + # Add all documents that need to be updated into the queue + task_logger.info( + f"RedisUserGroup.generate_tasks starting. usergroup_id={usergroup.id}" + ) + tasks_generated = rug.generate_tasks(celery_app, db_session, r, lock_beat) + if tasks_generated is None: + return None + + # Currently we are allowing the sync to proceed with 0 tasks. + # It's possible for sets/groups to be generated initially with no entries + # and they still need to be marked as up to date. + # if tasks_generated == 0: + # return 0 + + task_logger.info( + f"RedisUserGroup.generate_tasks finished. " + f"usergroup_id={usergroup.id} tasks_generated={tasks_generated}" + ) + + # set this only after all tasks have been added + r.set(rug.fence_key, tasks_generated) + return tasks_generated + + +def monitor_connector_taskset(r: Redis) -> None: + fence_value = r.get(RedisConnectorCredentialPair.get_fence_key()) + if fence_value is None: + return + + try: + initial_count = int(cast(int, fence_value)) + except ValueError: + task_logger.error("The value is not an integer.") + return + + count = r.scard(RedisConnectorCredentialPair.get_taskset_key()) + task_logger.info( + f"Stale document sync progress: remaining={count} initial={initial_count}" + ) + if count == 0: + r.delete(RedisConnectorCredentialPair.get_taskset_key()) + r.delete(RedisConnectorCredentialPair.get_fence_key()) + task_logger.info(f"Successfully synced stale documents. count={initial_count}") + + +def monitor_document_set_taskset( + key_bytes: bytes, r: Redis, db_session: Session +) -> None: + fence_key = key_bytes.decode("utf-8") + document_set_id = RedisDocumentSet.get_id_from_fence_key(fence_key) + if document_set_id is None: + task_logger.warning("could not parse document set id from {key}") + return + + rds = RedisDocumentSet(document_set_id) + + fence_value = r.get(rds.fence_key) + if fence_value is None: + return + + try: + initial_count = int(cast(int, fence_value)) + except ValueError: + task_logger.error("The value is not an integer.") + return + + count = cast(int, r.scard(rds.taskset_key)) + task_logger.info( + f"Document set sync progress: document_set_id={document_set_id} remaining={count} initial={initial_count}" + ) + if count > 0: + return + + document_set = cast( + DocumentSet, + get_document_set_by_id(db_session=db_session, document_set_id=document_set_id), + ) # casting since we "know" a document set with this ID exists + if document_set: + if not document_set.connector_credential_pairs: + # if there are no connectors, then delete the document set. + delete_document_set(document_set_row=document_set, db_session=db_session) + task_logger.info( + f"Successfully deleted document set with ID: '{document_set_id}'!" + ) + else: + mark_document_set_as_synced(document_set_id, db_session) + task_logger.info( + f"Successfully synced document set with ID: '{document_set_id}'!" + ) + + r.delete(rds.taskset_key) + r.delete(rds.fence_key) + + +def monitor_connector_deletion_taskset(key_bytes: bytes, r: Redis) -> None: + fence_key = key_bytes.decode("utf-8") + cc_pair_id = RedisConnectorDeletion.get_id_from_fence_key(fence_key) + if cc_pair_id is None: + task_logger.warning("could not parse document set id from {key}") + return + + rcd = RedisConnectorDeletion(cc_pair_id) + + fence_value = r.get(rcd.fence_key) + if fence_value is None: + return + + try: + initial_count = int(cast(int, fence_value)) + except ValueError: + task_logger.error("The value is not an integer.") + return + + count = cast(int, r.scard(rcd.taskset_key)) + task_logger.info( + f"Connector deletion progress: cc_pair_id={cc_pair_id} remaining={count} initial={initial_count}" + ) + if count > 0: + return + + with Session(get_sqlalchemy_engine()) as db_session: + cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session) + if not cc_pair: + return + + try: + # clean up the rest of the related Postgres entities + # index attempts + delete_index_attempts( + db_session=db_session, + cc_pair_id=cc_pair.id, + ) + + # document sets + delete_document_set_cc_pair_relationship__no_commit( + db_session=db_session, + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + ) + + # user groups + cleanup_user_groups = fetch_versioned_implementation_with_fallback( + "danswer.db.user_group", + "delete_user_group_cc_pair_relationship__no_commit", + noop_fallback, + ) + cleanup_user_groups( + cc_pair_id=cc_pair.id, + db_session=db_session, + ) + + # finally, delete the cc-pair + delete_connector_credential_pair__no_commit( + db_session=db_session, + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + ) + # if there are no credentials left, delete the connector + connector = fetch_connector_by_id( + db_session=db_session, + connector_id=cc_pair.connector_id, + ) + if not connector or not len(connector.credentials): + task_logger.info( + "Found no credentials left for connector, deleting connector" + ) + db_session.delete(connector) + db_session.commit() + except Exception as e: + stack_trace = traceback.format_exc() + error_message = f"Error: {str(e)}\n\nStack Trace:\n{stack_trace}" + add_deletion_failure_message(db_session, cc_pair.id, error_message) + task_logger.exception( + f"Failed to run connector_deletion. " + f"connector_id={cc_pair.connector_id} credential_id={cc_pair.credential_id}" + ) + raise e + + task_logger.info( + f"Successfully deleted connector_credential_pair with connector_id: '{cc_pair.connector_id}' " + f"and credential_id: '{cc_pair.credential_id}'. " + f"Deleted {initial_count} docs." + ) + + r.delete(rcd.taskset_key) + r.delete(rcd.fence_key) + + +@shared_task(name="monitor_vespa_sync", soft_time_limit=300) +def monitor_vespa_sync() -> None: + """This is a celery beat task that monitors and finalizes metadata sync tasksets. + It scans for fence values and then gets the counts of any associated tasksets. + If the count is 0, that means all tasks finished and we should clean up. + + This task lock timeout is CELERY_METADATA_SYNC_BEAT_LOCK_TIMEOUT seconds, so don't + do anything too expensive in this function! + """ + r = redis_pool.get_client() + + lock_beat = r.lock( + DanswerRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK, + timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT, + ) + + try: + # prevent overlapping tasks + if not lock_beat.acquire(blocking=False): + return + + if r.exists(RedisConnectorCredentialPair.get_fence_key()): + monitor_connector_taskset(r) + + for key_bytes in r.scan_iter(RedisConnectorDeletion.FENCE_PREFIX + "*"): + monitor_connector_deletion_taskset(key_bytes, r) + + with Session(get_sqlalchemy_engine()) as db_session: + for key_bytes in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"): + monitor_document_set_taskset(key_bytes, r, db_session) + + for key_bytes in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"): + monitor_usergroup_taskset = ( + fetch_versioned_implementation_with_fallback( + "danswer.background.celery.tasks.vespa.tasks", + "monitor_usergroup_taskset", + noop_fallback, + ) + ) + monitor_usergroup_taskset(key_bytes, r, db_session) + + # uncomment for debugging if needed + # r_celery = celery_app.broker_connection().channel().client + # length = celery_get_queue_length(DanswerCeleryQueues.VESPA_METADATA_SYNC, r_celery) + # task_logger.warning(f"queue={DanswerCeleryQueues.VESPA_METADATA_SYNC} length={length}") + except SoftTimeLimitExceeded: + task_logger.info( + "Soft time limit exceeded, task is being terminated gracefully." + ) + finally: + if lock_beat.owned(): + lock_beat.release() + + +@shared_task( + name="vespa_metadata_sync_task", + bind=True, + soft_time_limit=45, + time_limit=60, + max_retries=3, +) +def vespa_metadata_sync_task(self: Task, document_id: str) -> bool: + task_logger.info(f"document_id={document_id}") + + try: + with Session(get_sqlalchemy_engine()) as db_session: + curr_ind_name, sec_ind_name = get_both_index_names(db_session) + document_index = get_default_document_index( + primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name + ) + + doc = get_document(document_id, db_session) + if not doc: + return False + + # document set sync + doc_sets = fetch_document_sets_for_document(document_id, db_session) + update_doc_sets: set[str] = set(doc_sets) + + # User group sync + doc_access = get_access_for_document( + document_id=document_id, db_session=db_session + ) + update_request = UpdateRequest( + document_ids=[document_id], + document_sets=update_doc_sets, + access=doc_access, + boost=doc.boost, + hidden=doc.hidden, + ) + + # update Vespa + document_index.update(update_requests=[update_request]) + + # update db last. Worst case = we crash right before this and + # the sync might repeat again later + mark_document_as_synced(document_id, db_session) + except SoftTimeLimitExceeded: + task_logger.info(f"SoftTimeLimitExceeded exception. doc_id={document_id}") + except Exception as e: + task_logger.exception("Unexpected exception") + + # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64 + countdown = 2 ** (self.request.retries + 4) + self.retry(exc=e, countdown=countdown) + + return True diff --git a/backend/danswer/background/connector_deletion.py b/backend/danswer/background/connector_deletion.py index 90883564910..84b696dd8e4 100644 --- a/backend/danswer/background/connector_deletion.py +++ b/backend/danswer/background/connector_deletion.py @@ -10,34 +10,37 @@ connector / credential pair from the access list (6) delete all relevant entries from postgres """ +from celery import shared_task +from celery import Task +from celery.exceptions import SoftTimeLimitExceeded +from celery.utils.log import get_task_logger from sqlalchemy.orm import Session +from danswer.access.access import get_access_for_document from danswer.access.access import get_access_for_documents -from danswer.db.connector import fetch_connector_by_id -from danswer.db.connector_credential_pair import ( - delete_connector_credential_pair__no_commit, -) from danswer.db.document import delete_document_by_connector_credential_pair__no_commit +from danswer.db.document import delete_documents_by_connector_credential_pair__no_commit from danswer.db.document import delete_documents_complete__no_commit -from danswer.db.document import get_document_connector_cnts -from danswer.db.document import get_documents_for_connector_credential_pair +from danswer.db.document import get_document +from danswer.db.document import get_document_connector_count +from danswer.db.document import get_document_connector_counts +from danswer.db.document import mark_document_as_synced from danswer.db.document import prepare_to_modify_documents -from danswer.db.document_set import delete_document_set_cc_pair_relationship__no_commit +from danswer.db.document_set import fetch_document_sets_for_document from danswer.db.document_set import fetch_document_sets_for_documents from danswer.db.engine import get_sqlalchemy_engine -from danswer.db.index_attempt import delete_index_attempts -from danswer.db.models import ConnectorCredentialPair +from danswer.document_index.document_index_utils import get_both_index_names +from danswer.document_index.factory import get_default_document_index from danswer.document_index.interfaces import DocumentIndex from danswer.document_index.interfaces import UpdateRequest from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.utils.logger import setup_logger -from danswer.utils.variable_functionality import ( - fetch_versioned_implementation_with_fallback, -) -from danswer.utils.variable_functionality import noop_fallback logger = setup_logger() +# use this within celery tasks to get celery task specific logging +task_logger = get_task_logger(__name__) + _DELETION_BATCH_SIZE = 1000 @@ -57,13 +60,15 @@ def delete_connector_credential_pair_batch( with prepare_to_modify_documents( db_session=db_session, document_ids=document_ids ): - document_connector_cnts = get_document_connector_cnts( + document_connector_counts = get_document_connector_counts( db_session=db_session, document_ids=document_ids ) # figure out which docs need to be completely deleted document_ids_to_delete = [ - document_id for document_id, cnt in document_connector_cnts if cnt == 1 + document_id + for document_id, cnt in document_connector_counts + if cnt == 1 ] logger.debug(f"Deleting documents: {document_ids_to_delete}") @@ -76,7 +81,7 @@ def delete_connector_credential_pair_batch( # figure out which docs need to be updated document_ids_to_update = [ - document_id for document_id, cnt in document_connector_cnts if cnt > 1 + document_id for document_id, cnt in document_connector_counts if cnt > 1 ] # maps document id to list of document set names @@ -109,7 +114,7 @@ def delete_connector_credential_pair_batch( document_index.update(update_requests=update_requests) # clean up Postgres - delete_document_by_connector_credential_pair__no_commit( + delete_documents_by_connector_credential_pair__no_commit( db_session=db_session, document_ids=document_ids_to_update, connector_credential_pair_identifier=ConnectorCredentialPairIdentifier( @@ -120,77 +125,87 @@ def delete_connector_credential_pair_batch( db_session.commit() -def delete_connector_credential_pair( - db_session: Session, - document_index: DocumentIndex, - cc_pair: ConnectorCredentialPair, -) -> int: - connector_id = cc_pair.connector_id - credential_id = cc_pair.credential_id - - num_docs_deleted = 0 - while True: - documents = get_documents_for_connector_credential_pair( - db_session=db_session, - connector_id=connector_id, - credential_id=credential_id, - limit=_DELETION_BATCH_SIZE, - ) - if not documents: - break - - delete_connector_credential_pair_batch( - document_ids=[document.id for document in documents], - connector_id=connector_id, - credential_id=credential_id, - document_index=document_index, - ) - num_docs_deleted += len(documents) - - # clean up the rest of the related Postgres entities - # index attempts - delete_index_attempts( - db_session=db_session, - connector_id=connector_id, - credential_id=credential_id, - ) - - # document sets - delete_document_set_cc_pair_relationship__no_commit( - db_session=db_session, - connector_id=connector_id, - credential_id=credential_id, - ) - - # user groups - cleanup_user_groups = fetch_versioned_implementation_with_fallback( - "danswer.db.user_group", - "delete_user_group_cc_pair_relationship__no_commit", - noop_fallback, - ) - cleanup_user_groups( - cc_pair_id=cc_pair.id, - db_session=db_session, - ) - - # finally, delete the cc-pair - delete_connector_credential_pair__no_commit( - db_session=db_session, - connector_id=connector_id, - credential_id=credential_id, - ) - # if there are no credentials left, delete the connector - connector = fetch_connector_by_id( - db_session=db_session, - connector_id=connector_id, - ) - if not connector or not len(connector.credentials): - logger.info("Found no credentials left for connector, deleting connector") - db_session.delete(connector) - db_session.commit() - - logger.notice( - "Successfully deleted connector_credential_pair with connector_id:" - f" '{connector_id}' and credential_id: '{credential_id}'. Deleted {num_docs_deleted} docs." - ) - return num_docs_deleted +@shared_task( + name="document_by_cc_pair_cleanup_task", + bind=True, + soft_time_limit=45, + time_limit=60, + max_retries=3, +) +def document_by_cc_pair_cleanup_task( + self: Task, document_id: str, connector_id: int, credential_id: int +) -> bool: + task_logger.info(f"document_id={document_id}") + + try: + with Session(get_sqlalchemy_engine()) as db_session: + curr_ind_name, sec_ind_name = get_both_index_names(db_session) + document_index = get_default_document_index( + primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name + ) + + count = get_document_connector_count(db_session, document_id) + if count == 1: + # count == 1 means this is the only remaining cc_pair reference to the doc + # delete it from vespa and the db + document_index.delete_single(doc_id=document_id) + delete_documents_complete__no_commit( + db_session=db_session, + document_ids=[document_id], + ) + elif count > 1: + # count > 1 means the document still has cc_pair references + doc = get_document(document_id, db_session) + if not doc: + return False + + # the below functions do not include cc_pairs being deleted. + # i.e. they will correctly omit access for the current cc_pair + doc_access = get_access_for_document( + document_id=document_id, db_session=db_session + ) + + doc_sets = fetch_document_sets_for_document(document_id, db_session) + update_doc_sets: set[str] = set(doc_sets) + + update_request = UpdateRequest( + document_ids=[document_id], + document_sets=update_doc_sets, + access=doc_access, + boost=doc.boost, + hidden=doc.hidden, + ) + + # update Vespa. OK if doc doesn't exist. Raises exception otherwise. + document_index.update_single(update_request=update_request) + + # there are still other cc_pair references to the doc, so just resync to Vespa + delete_document_by_connector_credential_pair__no_commit( + db_session=db_session, + document_id=document_id, + connector_credential_pair_identifier=ConnectorCredentialPairIdentifier( + connector_id=connector_id, + credential_id=credential_id, + ), + ) + + mark_document_as_synced(document_id, db_session) + else: + pass + + # update_docs_last_modified__no_commit( + # db_session=db_session, + # document_ids=[document_id], + # ) + + db_session.commit() + except SoftTimeLimitExceeded: + task_logger.info(f"SoftTimeLimitExceeded exception. doc_id={document_id}") + except Exception as e: + task_logger.exception("Unexpected exception") + + # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64 + countdown = 2 ** (self.request.retries + 4) + self.retry(exc=e, countdown=countdown) + + return True diff --git a/backend/danswer/background/indexing/run_indexing.py b/backend/danswer/background/indexing/run_indexing.py index a98f4e1f5ad..499899ac225 100644 --- a/backend/danswer/background/indexing/run_indexing.py +++ b/backend/danswer/background/indexing/run_indexing.py @@ -29,6 +29,7 @@ from danswer.db.models import IndexModelStatus from danswer.document_index.factory import get_default_document_index from danswer.indexing.embedder import DefaultIndexingEmbedder +from danswer.indexing.indexing_heartbeat import IndexingHeartbeat from danswer.indexing.indexing_pipeline import build_indexing_pipeline from danswer.utils.logger import IndexAttemptSingleton from danswer.utils.logger import setup_logger @@ -56,11 +57,11 @@ def _get_connector_runner( try: runnable_connector = instantiate_connector( - attempt.connector_credential_pair.connector.source, - task, - attempt.connector_credential_pair.connector.connector_specific_config, - attempt.connector_credential_pair.credential, - db_session, + db_session=db_session, + source=attempt.connector_credential_pair.connector.source, + input_type=task, + connector_specific_config=attempt.connector_credential_pair.connector.connector_specific_config, + credential=attempt.connector_credential_pair.credential, ) except Exception as e: logger.exception(f"Unable to instantiate connector due to {e}") @@ -103,34 +104,43 @@ def _run_indexing( ) embedding_model = DefaultIndexingEmbedder.from_db_search_settings( - search_settings=search_settings + search_settings=search_settings, + heartbeat=IndexingHeartbeat( + index_attempt_id=index_attempt.id, + db_session=db_session, + # let the world know we're still making progress after + # every 10 batches + freq=10, + ), ) indexing_pipeline = build_indexing_pipeline( attempt_id=index_attempt.id, embedder=embedding_model, document_index=document_index, - ignore_time_skip=index_attempt.from_beginning - or (search_settings.status == IndexModelStatus.FUTURE), + ignore_time_skip=( + index_attempt.from_beginning + or (search_settings.status == IndexModelStatus.FUTURE) + ), db_session=db_session, ) db_cc_pair = index_attempt.connector_credential_pair db_connector = index_attempt.connector_credential_pair.connector db_credential = index_attempt.connector_credential_pair.credential + earliest_index_time = ( + db_connector.indexing_start.timestamp() if db_connector.indexing_start else 0 + ) last_successful_index_time = ( - db_connector.indexing_start.timestamp() - if index_attempt.from_beginning and db_connector.indexing_start is not None - else ( - 0.0 - if index_attempt.from_beginning - else get_last_successful_attempt_time( - connector_id=db_connector.id, - credential_id=db_credential.id, - search_settings=index_attempt.search_settings, - db_session=db_session, - ) + earliest_index_time + if index_attempt.from_beginning + else get_last_successful_attempt_time( + connector_id=db_connector.id, + credential_id=db_credential.id, + earliest_index=earliest_index_time, + search_settings=index_attempt.search_settings, + db_session=db_session, ) ) @@ -384,17 +394,22 @@ def _prepare_index_attempt(db_session: Session, index_attempt_id: int) -> IndexA return attempt -def run_indexing_entrypoint(index_attempt_id: int, is_ee: bool = False) -> None: +def run_indexing_entrypoint( + index_attempt_id: int, connector_credential_pair_id: int, is_ee: bool = False +) -> None: """Entrypoint for indexing run when using dask distributed. Wraps the actual logic in a `try` block so that we can catch any exceptions and mark the attempt as failed.""" + try: if is_ee: global_version.set_ee() # set the indexing attempt ID so that all log messages from this process # will have it added as a prefix - IndexAttemptSingleton.set_index_attempt_id(index_attempt_id) + IndexAttemptSingleton.set_cc_and_index_id( + index_attempt_id, connector_credential_pair_id + ) with Session(get_sqlalchemy_engine()) as db_session: # make sure that it is valid to run this indexing attempt + mark it diff --git a/backend/danswer/background/task_utils.py b/backend/danswer/background/task_utils.py index 6e122678813..c1c24bf92a1 100644 --- a/backend/danswer/background/task_utils.py +++ b/backend/danswer/background/task_utils.py @@ -14,14 +14,6 @@ from danswer.db.tasks import register_task -def name_cc_cleanup_task(connector_id: int, credential_id: int) -> str: - return f"cleanup_connector_credential_pair_{connector_id}_{credential_id}" - - -def name_document_set_sync_task(document_set_id: int) -> str: - return f"sync_doc_set_{document_set_id}" - - def name_cc_prune_task( connector_id: int | None = None, credential_id: int | None = None ) -> str: @@ -93,9 +85,16 @@ def wrapped_fn( kwargs_for_build_name = kwargs or {} task_name = build_name_fn(*args_for_build_name, **kwargs_for_build_name) with Session(get_sqlalchemy_engine()) as db_session: - # mark the task as started + # register_task must come before fn = apply_async or else the task + # might run mark_task_start (and crash) before the task row exists + db_task = register_task(task_name, db_session) + task = fn(args, kwargs, *other_args, **other_kwargs) - register_task(task.id, task_name, db_session) + + # we update the celery task id for diagnostic purposes + # but it isn't currently used by any code + db_task.task_id = task.id + db_session.commit() return task diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 28abb481143..94e703635ee 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -17,6 +17,7 @@ from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP from danswer.configs.app_configs import NUM_INDEXING_WORKERS from danswer.configs.app_configs import NUM_SECONDARY_INDEXING_WORKERS +from danswer.configs.constants import DocumentSource from danswer.configs.constants import POSTGRES_INDEXER_APP_NAME from danswer.db.connector import fetch_connectors from danswer.db.connector_credential_pair import fetch_connector_credential_pairs @@ -46,7 +47,6 @@ from shared_configs.configs import LOG_LEVEL from shared_configs.configs import MODEL_SERVER_PORT - logger = setup_logger() # If the indexing dies, it's most likely due to resource constraints, @@ -67,6 +67,10 @@ def _should_create_new_indexing( ) -> bool: connector = cc_pair.connector + # don't kick off indexing for `NOT_APPLICABLE` sources + if connector.source == DocumentSource.NOT_APPLICABLE: + return False + # User can still manually create single indexing attempts via the UI for the # currently in use index if DISABLE_INDEX_UPDATE_ON_SWAP: @@ -207,7 +211,6 @@ def cleanup_indexing_jobs( timeout_hours: int = CLEANUP_INDEXING_JOBS_TIMEOUT, ) -> dict[int, Future | SimpleJob]: existing_jobs_copy = existing_jobs.copy() - # clean up completed jobs with Session(get_sqlalchemy_engine()) as db_session: for attempt_id, job in existing_jobs.items(): @@ -308,7 +311,12 @@ def kickoff_indexing_jobs( indexing_attempt_count = 0 + primary_client_full = False + secondary_client_full = False for attempt, search_settings in new_indexing_attempts: + if primary_client_full and secondary_client_full: + break + use_secondary_index = ( search_settings.status == IndexModelStatus.FUTURE if search_settings is not None @@ -333,20 +341,28 @@ def kickoff_indexing_jobs( ) continue - if use_secondary_index: - run = secondary_client.submit( - run_indexing_entrypoint, - attempt.id, - global_version.get_is_ee_version(), - pure=False, - ) + if not use_secondary_index: + if not primary_client_full: + run = client.submit( + run_indexing_entrypoint, + attempt.id, + attempt.connector_credential_pair_id, + global_version.get_is_ee_version(), + pure=False, + ) + if not run: + primary_client_full = True else: - run = client.submit( - run_indexing_entrypoint, - attempt.id, - global_version.get_is_ee_version(), - pure=False, - ) + if not secondary_client_full: + run = secondary_client.submit( + run_indexing_entrypoint, + attempt.id, + attempt.connector_credential_pair_id, + global_version.get_is_ee_version(), + pure=False, + ) + if not run: + secondary_client_full = True if run: if indexing_attempt_count == 0: @@ -400,6 +416,7 @@ def update_loop( warm_up_bi_encoder( embedding_model=embedding_model, ) + logger.notice("First inference complete.") client_primary: Client | SimpleJobClient client_secondary: Client | SimpleJobClient @@ -428,6 +445,7 @@ def update_loop( existing_jobs: dict[int, Future | SimpleJob] = {} + logger.notice("Startup complete. Waiting for indexing jobs...") while True: start = time.time() start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S") diff --git a/backend/danswer/chat/load_yamls.py b/backend/danswer/chat/load_yamls.py index 0690f08b759..8d0fd34d8da 100644 --- a/backend/danswer/chat/load_yamls.py +++ b/backend/danswer/chat/load_yamls.py @@ -122,7 +122,7 @@ def load_personas_from_yaml( prompt_ids=prompt_ids, document_set_ids=doc_set_ids, tool_ids=tool_ids, - default_persona=True, + builtin_persona=True, is_public=True, display_priority=existing_persona.display_priority if existing_persona is not None diff --git a/backend/danswer/chat/models.py b/backend/danswer/chat/models.py index 6d12d68df08..97d5b9e7275 100644 --- a/backend/danswer/chat/models.py +++ b/backend/danswer/chat/models.py @@ -1,5 +1,6 @@ from collections.abc import Iterator from datetime import datetime +from enum import Enum from typing import Any from pydantic import BaseModel @@ -44,8 +45,26 @@ def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: return initial_dict +class StreamStopReason(Enum): + CONTEXT_LENGTH = "context_length" + CANCELLED = "cancelled" + + +class StreamStopInfo(BaseModel): + stop_reason: StreamStopReason + + def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore + data = super().model_dump(mode="json", *args, **kwargs) # type: ignore + data["stop_reason"] = self.stop_reason.name + return data + + class LLMRelevanceFilterResponse(BaseModel): - relevant_chunk_indices: list[int] + llm_selected_doc_indices: list[int] + + +class FinalUsedContextDocsResponse(BaseModel): + final_context_docs: list[LlmDoc] class RelevanceAnalysis(BaseModel): @@ -78,6 +97,16 @@ class CitationInfo(BaseModel): document_id: str +class AllCitations(BaseModel): + citations: list[CitationInfo] + + +# This is a mapping of the citation number to the document index within +# the result search doc set +class MessageSpecificCitations(BaseModel): + citation_map: dict[int, int] + + class MessageResponseIDInfo(BaseModel): user_message_id: int | None reserved_assistant_message_id: int @@ -123,7 +152,7 @@ class QAResponse(SearchResponse, DanswerAnswer): predicted_flow: QueryFlow predicted_search: SearchType eval_res_valid: bool | None = None - llm_chunks_indices: list[int] | None = None + llm_selected_doc_indices: list[int] | None = None error_msg: str | None = None @@ -144,6 +173,7 @@ class CustomToolResponse(BaseModel): | ImageGenerationDisplay | CustomToolResponse | StreamingError + | StreamStopInfo ) diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py index 2eea2cfc20f..f09ac18f32a 100644 --- a/backend/danswer/chat/process_message.py +++ b/backend/danswer/chat/process_message.py @@ -7,12 +7,15 @@ from sqlalchemy.orm import Session from danswer.chat.chat_utils import create_chat_chain +from danswer.chat.models import AllCitations from danswer.chat.models import CitationInfo from danswer.chat.models import CustomToolResponse from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import FinalUsedContextDocsResponse from danswer.chat.models import ImageGenerationDisplay from danswer.chat.models import LLMRelevanceFilterResponse from danswer.chat.models import MessageResponseIDInfo +from danswer.chat.models import MessageSpecificCitations from danswer.chat.models import QADocsResponse from danswer.chat.models import StreamingError from danswer.configs.chat_configs import BING_API_KEY @@ -70,7 +73,9 @@ from danswer.server.query_and_chat.models import CreateChatMessageRequest from danswer.server.utils import get_json_line from danswer.tools.built_in_tools import get_built_in_tool_by_id -from danswer.tools.custom.custom_tool import build_custom_tools_from_openapi_schema +from danswer.tools.custom.custom_tool import ( + build_custom_tools_from_openapi_schema_and_headers, +) from danswer.tools.custom.custom_tool import CUSTOM_TOOL_RESPONSE_ID from danswer.tools.custom.custom_tool import CustomToolCallSummary from danswer.tools.force import ForceUseTool @@ -85,6 +90,8 @@ ) from danswer.tools.internet_search.internet_search_tool import InternetSearchResponse from danswer.tools.internet_search.internet_search_tool import InternetSearchTool +from danswer.tools.models import DynamicSchemaInfo +from danswer.tools.search.search_tool import FINAL_CONTEXT_DOCUMENTS_ID from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID from danswer.tools.search.search_tool import SearchResponseSummary from danswer.tools.search.search_tool import SearchTool @@ -100,9 +107,9 @@ logger = setup_logger() -def translate_citations( +def _translate_citations( citations_list: list[CitationInfo], db_docs: list[DbSearchDoc] -) -> dict[int, int]: +) -> MessageSpecificCitations: """Always cites the first instance of the document_id, assumes the db_docs are sorted in the order displayed in the UI""" doc_id_to_saved_doc_id_map: dict[str, int] = {} @@ -117,7 +124,7 @@ def translate_citations( citation.citation_num ] = doc_id_to_saved_doc_id_map[citation.document_id] - return citation_to_saved_doc_id_map + return MessageSpecificCitations(citation_map=citation_to_saved_doc_id_map) def _handle_search_tool_response_summary( @@ -239,11 +246,14 @@ def _get_force_search_settings( StreamingError | QADocsResponse | LLMRelevanceFilterResponse + | FinalUsedContextDocsResponse | ChatMessageDetail | DanswerAnswerPiece + | AllCitations | CitationInfo | ImageGenerationDisplay | CustomToolResponse + | MessageSpecificCitations | MessageResponseIDInfo ) ChatPacketStream = Iterator[ChatPacket] @@ -263,6 +273,7 @@ def stream_chat_message_objects( use_existing_user_message: bool = False, litellm_additional_headers: dict[str, str] | None = None, is_connected: Callable[[], bool] | None = None, + enforce_chat_session_id_for_search_docs: bool = True, ) -> ChatPacketStream: """Streams in order: 1. [conditional] Retrieved documents if a search needs to be run @@ -434,6 +445,7 @@ def stream_chat_message_objects( chat_session=chat_session, user_id=user_id, db_session=db_session, + enforce_chat_session_id_for_search_docs=enforce_chat_session_id_for_search_docs, ) # Generates full documents currently @@ -597,8 +609,13 @@ def stream_chat_message_objects( if db_tool_model.openapi_schema: tool_dict[db_tool_model.id] = cast( list[Tool], - build_custom_tools_from_openapi_schema( - db_tool_model.openapi_schema + build_custom_tools_from_openapi_schema_and_headers( + db_tool_model.openapi_schema, + dynamic_schema_info=DynamicSchemaInfo( + chat_session_id=chat_session_id, + message_id=user_message.id if user_message else None, + ), + custom_headers=db_tool_model.custom_headers, ), ) @@ -663,9 +680,11 @@ def stream_chat_message_objects( db_session=db_session, selected_search_docs=selected_db_search_docs, # Deduping happens at the last step to avoid harming quality by dropping content early on - dedupe_docs=retrieval_options.dedupe_docs - if retrieval_options - else False, + dedupe_docs=( + retrieval_options.dedupe_docs + if retrieval_options + else False + ), ) yield qa_docs_response elif packet.id == SECTION_RELEVANCE_LIST_ID: @@ -688,9 +707,14 @@ def stream_chat_message_objects( ) yield LLMRelevanceFilterResponse( - relevant_chunk_indices=llm_indices + llm_selected_doc_indices=llm_indices ) + elif packet.id == FINAL_CONTEXT_DOCUMENTS_ID: + yield FinalUsedContextDocsResponse( + final_context_docs=packet.response + ) + elif packet.id == IMAGE_GENERATION_RESPONSE_ID: img_generation_response = cast( list[ImageGenerationResponse], packet.response @@ -727,10 +751,18 @@ def stream_chat_message_objects( tool_result = packet yield cast(ChatPacket, packet) logger.debug("Reached end of stream") - except Exception as e: + except ValueError as e: + logger.exception("Failed to process chat message.") + error_msg = str(e) - logger.exception(f"Failed to process chat message: {error_msg}") + yield StreamingError(error=error_msg) + db_session.rollback() + return + + except Exception as e: + logger.exception("Failed to process chat message.") + error_msg = str(e) stack_trace = traceback.format_exc() client_error_msg = litellm_exception_to_error_msg(e, llm) if llm.config.api_key and len(llm.config.api_key) > 2: @@ -743,12 +775,13 @@ def stream_chat_message_objects( # Post-LLM answer processing try: - db_citations = None + message_specific_citations: MessageSpecificCitations | None = None if reference_db_search_docs: - db_citations = translate_citations( + message_specific_citations = _translate_citations( citations_list=answer.citations, db_docs=reference_db_search_docs, ) + yield AllCitations(citations=answer.citations) # Saving Gen AI answer and responding with message info tool_name_to_tool_id: dict[str, int] = {} @@ -765,18 +798,22 @@ def stream_chat_message_objects( reference_docs=reference_db_search_docs, files=ai_message_files, token_count=len(llm_tokenizer_encode_func(answer.llm_answer)), - citations=db_citations, + citations=message_specific_citations.citation_map + if message_specific_citations + else None, error=None, - tool_calls=[ - ToolCall( - tool_id=tool_name_to_tool_id[tool_result.tool_name], - tool_name=tool_result.tool_name, - tool_arguments=tool_result.tool_args, - tool_result=tool_result.tool_result, - ) - ] - if tool_result - else [], + tool_calls=( + [ + ToolCall( + tool_id=tool_name_to_tool_id[tool_result.tool_name], + tool_name=tool_result.tool_name, + tool_arguments=tool_result.tool_args, + tool_result=tool_result.tool_result, + ) + ] + if tool_result + else [] + ), ) logger.debug("Committing messages") diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index f6b218c5f56..aa3cccc512f 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -126,6 +126,7 @@ except ValueError: INDEX_BATCH_SIZE = 16 + # Below are intended to match the env variables names used by the official postgres docker image # https://hub.docker.com/_/postgres POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres" @@ -149,6 +150,43 @@ except ValueError: POSTGRES_POOL_RECYCLE = POSTGRES_POOL_RECYCLE_DEFAULT +REDIS_SSL = os.getenv("REDIS_SSL", "").lower() == "true" +REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost" +REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379)) +REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") or "" + +# Used for general redis things +REDIS_DB_NUMBER = int(os.environ.get("REDIS_DB_NUMBER", 0)) + +# Used by celery as broker and backend +REDIS_DB_NUMBER_CELERY_RESULT_BACKEND = int( + os.environ.get("REDIS_DB_NUMBER_CELERY_RESULT_BACKEND", 14) +) +REDIS_DB_NUMBER_CELERY = int(os.environ.get("REDIS_DB_NUMBER_CELERY", 15)) # broker + +# will propagate to both our redis client as well as celery's redis client +REDIS_HEALTH_CHECK_INTERVAL = int(os.environ.get("REDIS_HEALTH_CHECK_INTERVAL", 60)) + +# our redis client only, not celery's +REDIS_POOL_MAX_CONNECTIONS = int(os.environ.get("REDIS_POOL_MAX_CONNECTIONS", 128)) + +# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings +# should be one of "required", "optional", or "none" +REDIS_SSL_CERT_REQS = os.getenv("REDIS_SSL_CERT_REQS", "none") +REDIS_SSL_CA_CERTS = os.getenv("REDIS_SSL_CA_CERTS", None) + +CELERY_RESULT_EXPIRES = int(os.environ.get("CELERY_RESULT_EXPIRES", 86400)) # seconds + +# https://docs.celeryq.dev/en/stable/userguide/configuration.html#broker-pool-limit +# Setting to None may help when there is a proxy in the way closing idle connections +CELERY_BROKER_POOL_LIMIT_DEFAULT = 10 +try: + CELERY_BROKER_POOL_LIMIT = int( + os.environ.get("CELERY_BROKER_POOL_LIMIT", CELERY_BROKER_POOL_LIMIT_DEFAULT) + ) +except ValueError: + CELERY_BROKER_POOL_LIMIT = CELERY_BROKER_POOL_LIMIT_DEFAULT + ##### # Connector Configs ##### @@ -225,6 +263,10 @@ for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",") if ignored_tag ] +# Maximum size for Jira tickets in bytes (default: 100KB) +JIRA_CONNECTOR_MAX_TICKET_SIZE = int( + os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024) +) GONG_CONNECTOR_START_TIME = os.environ.get("GONG_CONNECTOR_START_TIME") @@ -248,7 +290,7 @@ os.environ.get("ALLOW_SIMULTANEOUS_PRUNING", "").lower() == "true" ) -# This is the maxiumum rate at which documents are queried for a pruning job. 0 disables the limitation. +# This is the maximum rate at which documents are queried for a pruning job. 0 disables the limitation. MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE = int( os.environ.get("MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE", 0) ) diff --git a/backend/danswer/configs/chat_configs.py b/backend/danswer/configs/chat_configs.py index 2b6b0990e1d..e67e4258fec 100644 --- a/backend/danswer/configs/chat_configs.py +++ b/backend/danswer/configs/chat_configs.py @@ -83,8 +83,15 @@ # Stops streaming answers back to the UI if this pattern is seen: STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None -# The backend logic for this being True isn't fully supported yet -HARD_DELETE_CHATS = False +# Set this to "true" to hard delete chats +# This will make chats unviewable by admins after a user deletes them +# As opposed to soft deleting them, which just hides them from non-admin users +HARD_DELETE_CHATS = os.environ.get("HARD_DELETE_CHATS", "").lower() == "true" # Internet Search BING_API_KEY = os.environ.get("BING_API_KEY") or None + +# Enable in-house model for detecting connector-based filtering in queries +ENABLE_CONNECTOR_CLASSIFIER = os.environ.get("ENABLE_CONNECTOR_CLASSIFIER", False) + +VESPA_SEARCHER_THREADS = int(os.environ.get("VESPA_SEARCHER_THREADS") or 2) diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 64c162d7bef..678b3a5499d 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -1,3 +1,5 @@ +import platform +import socket from enum import auto from enum import Enum @@ -34,7 +36,9 @@ POSTGRES_INDEXER_APP_NAME = "indexer" POSTGRES_CELERY_APP_NAME = "celery" POSTGRES_CELERY_BEAT_APP_NAME = "celery_beat" -POSTGRES_CELERY_WORKER_APP_NAME = "celery_worker" +POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME = "celery_worker_primary" +POSTGRES_CELERY_WORKER_LIGHT_APP_NAME = "celery_worker_light" +POSTGRES_CELERY_WORKER_HEAVY_APP_NAME = "celery_worker_heavy" POSTGRES_PERMISSIONS_APP_NAME = "permissions" POSTGRES_UNKNOWN_APP_NAME = "unknown" @@ -57,9 +61,13 @@ KV_GEN_AI_KEY_CHECK_TIME = "genai_api_key_last_check_time" KV_SETTINGS_KEY = "danswer_settings" KV_CUSTOMER_UUID_KEY = "customer_uuid" +KV_INSTANCE_DOMAIN_KEY = "instance_domain" KV_ENTERPRISE_SETTINGS_KEY = "danswer_enterprise_settings" KV_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__" +CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT = 60 +CELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120 + class DocumentSource(str, Enum): # Special case, document passed in via Danswer APIs without specifying a source type @@ -96,10 +104,12 @@ class DocumentSource(str, Enum): CLICKUP = "clickup" MEDIAWIKI = "mediawiki" WIKIPEDIA = "wikipedia" + ASANA = "asana" S3 = "s3" R2 = "r2" GOOGLE_CLOUD_STORAGE = "google_cloud_storage" OCI_STORAGE = "oci_storage" + XENFORO = "xenforo" NOT_APPLICABLE = "not_applicable" @@ -130,6 +140,12 @@ class AuthType(str, Enum): SAML = "saml" +class SessionType(str, Enum): + CHAT = "Chat" + SEARCH = "Search" + SLACK = "Slack" + + class QAFeedbackType(str, Enum): LIKE = "like" # User likes the answer, used for metrics DISLIKE = "dislike" # User dislikes the answer, used for metrics @@ -166,3 +182,36 @@ class FileOrigin(str, Enum): class PostgresAdvisoryLocks(Enum): KOMBU_MESSAGE_CLEANUP_LOCK_ID = auto() + + +class DanswerCeleryQueues: + VESPA_DOCSET_SYNC_GENERATOR = "vespa_docset_sync_generator" + VESPA_USERGROUP_SYNC_GENERATOR = "vespa_usergroup_sync_generator" + VESPA_METADATA_SYNC = "vespa_metadata_sync" + CONNECTOR_DELETION = "connector_deletion" + + +class DanswerRedisLocks: + PRIMARY_WORKER = "da_lock:primary_worker" + CHECK_VESPA_SYNC_BEAT_LOCK = "da_lock:check_vespa_sync_beat" + MONITOR_VESPA_SYNC_BEAT_LOCK = "da_lock:monitor_vespa_sync_beat" + CHECK_CONNECTOR_DELETION_BEAT_LOCK = "da_lock:check_connector_deletion_beat" + MONITOR_CONNECTOR_DELETION_BEAT_LOCK = "da_lock:monitor_connector_deletion_beat" + + +class DanswerCeleryPriority(int, Enum): + HIGHEST = 0 + HIGH = auto() + MEDIUM = auto() + LOW = auto() + LOWEST = auto() + + +REDIS_SOCKET_KEEPALIVE_OPTIONS = {} +REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPINTVL] = 15 +REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPCNT] = 3 + +if platform.system() == "Darwin": + REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPALIVE] = 60 # type: ignore +else: + REDIS_SOCKET_KEEPALIVE_OPTIONS[socket.TCP_KEEPIDLE] = 60 # type: ignore diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py index e5fa5e74a28..0f6febe4130 100644 --- a/backend/danswer/configs/model_configs.py +++ b/backend/danswer/configs/model_configs.py @@ -39,9 +39,13 @@ ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "search_query: ") ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "search_document: ") # Purely an optimization, memory limitation consideration -BATCH_SIZE_ENCODE_CHUNKS = 8 + +# User's set embedding batch size overrides the default encoding batch sizes +EMBEDDING_BATCH_SIZE = int(os.environ.get("EMBEDDING_BATCH_SIZE") or 0) or None + +BATCH_SIZE_ENCODE_CHUNKS = EMBEDDING_BATCH_SIZE or 8 # don't send over too many chunks at once, as sending too many could cause timeouts -BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = 512 +BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES = EMBEDDING_BATCH_SIZE or 512 # For score display purposes, only way is to know the expected ranges CROSS_ENCODER_RANGE_MAX = 1 CROSS_ENCODER_RANGE_MIN = 0 @@ -51,35 +55,14 @@ # Generative AI Model Configs ##### -# If changing GEN_AI_MODEL_PROVIDER or GEN_AI_MODEL_VERSION from the default, -# be sure to use one that is LiteLLM compatible: -# https://litellm.vercel.app/docs/providers/azure#completion---using-env-variables -# The provider is the prefix before / in the model argument - -# Additionally Danswer supports GPT4All and custom request library based models -# Set GEN_AI_MODEL_PROVIDER to "custom" to use the custom requests approach -# Set GEN_AI_MODEL_PROVIDER to "gpt4all" to use gpt4all models running locally -GEN_AI_MODEL_PROVIDER = os.environ.get("GEN_AI_MODEL_PROVIDER") or "openai" -# If using Azure, it's the engine name, for example: Danswer +# NOTE: the 3 below should only be used for dev. +GEN_AI_API_KEY = os.environ.get("GEN_AI_API_KEY") GEN_AI_MODEL_VERSION = os.environ.get("GEN_AI_MODEL_VERSION") # The fallback display name to use for default model when using a custom model provider GEN_AI_DISPLAY_NAME = os.environ.get("GEN_AI_DISPLAY_NAME") or "Custom LLM" -# For secondary flows like extracting filters or deciding if a chunk is useful, we don't need -# as powerful of a model as say GPT-4 so we can use an alternative that is faster and cheaper FAST_GEN_AI_MODEL_VERSION = os.environ.get("FAST_GEN_AI_MODEL_VERSION") -# If the Generative AI model requires an API key for access, otherwise can leave blank -GEN_AI_API_KEY = ( - os.environ.get("GEN_AI_API_KEY", os.environ.get("OPENAI_API_KEY")) or None -) - -# API Base, such as (for Azure): https://danswer.openai.azure.com/ -GEN_AI_API_ENDPOINT = os.environ.get("GEN_AI_API_ENDPOINT") or None -# API Version, such as (for Azure): 2023-09-15-preview -GEN_AI_API_VERSION = os.environ.get("GEN_AI_API_VERSION") or None -# LiteLLM custom_llm_provider -GEN_AI_LLM_PROVIDER_TYPE = os.environ.get("GEN_AI_LLM_PROVIDER_TYPE") or None # Override the auto-detection of LLM max context length GEN_AI_MAX_TOKENS = int(os.environ.get("GEN_AI_MAX_TOKENS") or 0) or None diff --git a/backend/danswer/connectors/README.md b/backend/danswer/connectors/README.md index b50232fa256..ef6c63d2697 100644 --- a/backend/danswer/connectors/README.md +++ b/backend/danswer/connectors/README.md @@ -59,6 +59,8 @@ if __name__ == "__main__": latest_docs = test_connector.poll_source(one_day_ago, current) ``` +> Note: Be sure to set PYTHONPATH to danswer/backend before running the above main. + ### Additional Required Changes: #### Backend Changes @@ -68,17 +70,16 @@ if __name__ == "__main__": [here](https://github.com/danswer-ai/danswer/blob/main/backend/danswer/connectors/factory.py#L33) #### Frontend Changes -- Create the new connector directory and admin page under `danswer/web/src/app/admin/connectors/` -- Create the new icon, type, source, and filter changes -(refer to existing [PR](https://github.com/danswer-ai/danswer/pull/139)) +- Add the new Connector definition to the `SOURCE_METADATA_MAP` [here](https://github.com/danswer-ai/danswer/blob/main/web/src/lib/sources.ts#L59). +- Add the definition for the new Form to the `connectorConfigs` object [here](https://github.com/danswer-ai/danswer/blob/main/web/src/lib/connectors/connectors.ts#L79). #### Docs Changes Create the new connector page (with guiding images!) with how to get the connector credentials and how to set up the -connector in Danswer. Then create a Pull Request in https://github.com/danswer-ai/danswer-docs - +connector in Danswer. Then create a Pull Request in https://github.com/danswer-ai/danswer-docs. ### Before opening PR 1. Be sure to fully test changes end to end with setting up the connector and updating the index with new docs from the -new connector. -2. Be sure to run the linting/formatting, refer to the formatting and linting section in +new connector. To make it easier to review, please attach a video showing the successful creation of the connector via the UI (starting from the `Add Connector` page). +2. Add a folder + tests under `backend/tests/daily/connectors` director. For an example, checkout the [test for Confluence](https://github.com/danswer-ai/danswer/blob/main/backend/tests/daily/connectors/confluence/test_confluence_basic.py). In the PR description, include a guide on how to setup the new source to pass the test. Before merging, we will re-create the environment and make sure the test(s) pass. +3. Be sure to run the linting/formatting, refer to the formatting and linting section in [CONTRIBUTING.md](https://github.com/danswer-ai/danswer/blob/main/CONTRIBUTING.md#formatting-and-linting) diff --git a/backend/ee/danswer/connectors/__init__.py b/backend/danswer/connectors/asana/__init__.py similarity index 100% rename from backend/ee/danswer/connectors/__init__.py rename to backend/danswer/connectors/asana/__init__.py diff --git a/backend/danswer/connectors/asana/asana_api.py b/backend/danswer/connectors/asana/asana_api.py new file mode 100755 index 00000000000..57c470c4531 --- /dev/null +++ b/backend/danswer/connectors/asana/asana_api.py @@ -0,0 +1,233 @@ +import time +from collections.abc import Iterator +from datetime import datetime +from typing import Dict + +import asana # type: ignore + +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +# https://github.com/Asana/python-asana/tree/master?tab=readme-ov-file#documentation-for-api-endpoints +class AsanaTask: + def __init__( + self, + id: str, + title: str, + text: str, + link: str, + last_modified: datetime, + project_gid: str, + project_name: str, + ) -> None: + self.id = id + self.title = title + self.text = text + self.link = link + self.last_modified = last_modified + self.project_gid = project_gid + self.project_name = project_name + + def __str__(self) -> str: + return f"ID: {self.id}\nTitle: {self.title}\nLast modified: {self.last_modified}\nText: {self.text}" + + +class AsanaAPI: + def __init__( + self, api_token: str, workspace_gid: str, team_gid: str | None + ) -> None: + self._user = None # type: ignore + self.workspace_gid = workspace_gid + self.team_gid = team_gid + + self.configuration = asana.Configuration() + self.api_client = asana.ApiClient(self.configuration) + self.tasks_api = asana.TasksApi(self.api_client) + self.stories_api = asana.StoriesApi(self.api_client) + self.users_api = asana.UsersApi(self.api_client) + self.project_api = asana.ProjectsApi(self.api_client) + self.workspaces_api = asana.WorkspacesApi(self.api_client) + + self.api_error_count = 0 + self.configuration.access_token = api_token + self.task_count = 0 + + def get_tasks( + self, project_gids: list[str] | None, start_date: str + ) -> Iterator[AsanaTask]: + """Get all tasks from the projects with the given gids that were modified since the given date. + If project_gids is None, get all tasks from all projects in the workspace.""" + logger.info("Starting to fetch Asana projects") + projects = self.project_api.get_projects( + opts={ + "workspace": self.workspace_gid, + "opt_fields": "gid,name,archived,modified_at", + } + ) + start_seconds = int(time.mktime(datetime.now().timetuple())) + projects_list = [] + project_count = 0 + for project_info in projects: + project_gid = project_info["gid"] + if project_gids is None or project_gid in project_gids: + projects_list.append(project_gid) + else: + logger.debug( + f"Skipping project: {project_gid} - not in accepted project_gids" + ) + project_count += 1 + if project_count % 100 == 0: + logger.info(f"Processed {project_count} projects") + + logger.info(f"Found {len(projects_list)} projects to process") + for project_gid in projects_list: + for task in self._get_tasks_for_project( + project_gid, start_date, start_seconds + ): + yield task + logger.info(f"Completed fetching {self.task_count} tasks from Asana") + if self.api_error_count > 0: + logger.warning( + f"Encountered {self.api_error_count} API errors during task fetching" + ) + + def _get_tasks_for_project( + self, project_gid: str, start_date: str, start_seconds: int + ) -> Iterator[AsanaTask]: + project = self.project_api.get_project(project_gid, opts={}) + if project["archived"]: + logger.info(f"Skipping archived project: {project['name']} ({project_gid})") + return [] + if not project["team"] or not project["team"]["gid"]: + logger.info( + f"Skipping project without a team: {project['name']} ({project_gid})" + ) + return [] + if project["privacy_setting"] == "private": + if self.team_gid and project["team"]["gid"] != self.team_gid: + logger.info( + f"Skipping private project not in configured team: {project['name']} ({project_gid})" + ) + return [] + else: + logger.info( + f"Processing private project in configured team: {project['name']} ({project_gid})" + ) + + simple_start_date = start_date.split(".")[0].split("+")[0] + logger.info( + f"Fetching tasks modified since {simple_start_date} for project: {project['name']} ({project_gid})" + ) + + opts = { + "opt_fields": "name,memberships,memberships.project,completed_at,completed_by,created_at," + "created_by,custom_fields,dependencies,due_at,due_on,external,html_notes,liked,likes," + "modified_at,notes,num_hearts,parent,projects,resource_subtype,resource_type,start_on," + "workspace,permalink_url", + "modified_since": start_date, + } + tasks_from_api = self.tasks_api.get_tasks_for_project(project_gid, opts) + for data in tasks_from_api: + self.task_count += 1 + if self.task_count % 10 == 0: + end_seconds = time.mktime(datetime.now().timetuple()) + runtime_seconds = end_seconds - start_seconds + if runtime_seconds > 0: + logger.info( + f"Processed {self.task_count} tasks in {runtime_seconds:.0f} seconds " + f"({self.task_count / runtime_seconds:.2f} tasks/second)" + ) + + logger.debug(f"Processing Asana task: {data['name']}") + + text = self._construct_task_text(data) + + try: + text += self._fetch_and_add_comments(data["gid"]) + + last_modified_date = self.format_date(data["modified_at"]) + text += f"Last modified: {last_modified_date}\n" + + task = AsanaTask( + id=data["gid"], + title=data["name"], + text=text, + link=data["permalink_url"], + last_modified=datetime.fromisoformat(data["modified_at"]), + project_gid=project_gid, + project_name=project["name"], + ) + yield task + except Exception: + logger.error( + f"Error processing task {data['gid']} in project {project_gid}", + exc_info=True, + ) + self.api_error_count += 1 + + def _construct_task_text(self, data: Dict) -> str: + text = f"{data['name']}\n\n" + + if data["notes"]: + text += f"{data['notes']}\n\n" + + if data["created_by"] and data["created_by"]["gid"]: + creator = self.get_user(data["created_by"]["gid"])["name"] + created_date = self.format_date(data["created_at"]) + text += f"Created by: {creator} on {created_date}\n" + + if data["due_on"]: + due_date = self.format_date(data["due_on"]) + text += f"Due date: {due_date}\n" + + if data["completed_at"]: + completed_date = self.format_date(data["completed_at"]) + text += f"Completed on: {completed_date}\n" + + text += "\n" + return text + + def _fetch_and_add_comments(self, task_gid: str) -> str: + text = "" + stories_opts: Dict[str, str] = {} + story_start = time.time() + stories = self.stories_api.get_stories_for_task(task_gid, stories_opts) + + story_count = 0 + comment_count = 0 + + for story in stories: + story_count += 1 + if story["resource_subtype"] == "comment_added": + comment = self.stories_api.get_story( + story["gid"], opts={"opt_fields": "text,created_by,created_at"} + ) + commenter = self.get_user(comment["created_by"]["gid"])["name"] + text += f"Comment by {commenter}: {comment['text']}\n\n" + comment_count += 1 + + story_duration = time.time() - story_start + logger.debug( + f"Processed {story_count} stories (including {comment_count} comments) in {story_duration:.2f} seconds" + ) + + return text + + def get_user(self, user_gid: str) -> Dict: + if self._user is not None: + return self._user + self._user = self.users_api.get_user(user_gid, {"opt_fields": "name,email"}) + + if not self._user: + logger.warning(f"Unable to fetch user information for user_gid: {user_gid}") + return {"name": "Unknown"} + return self._user + + def format_date(self, date_str: str) -> str: + date = datetime.fromisoformat(date_str) + return time.strftime("%Y-%m-%d", date.timetuple()) + + def get_time(self) -> str: + return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) diff --git a/backend/danswer/connectors/asana/connector.py b/backend/danswer/connectors/asana/connector.py new file mode 100755 index 00000000000..3e2c9a8aaf6 --- /dev/null +++ b/backend/danswer/connectors/asana/connector.py @@ -0,0 +1,120 @@ +import datetime +from typing import Any + +from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.asana import asana_api +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +class AsanaConnector(LoadConnector, PollConnector): + def __init__( + self, + asana_workspace_id: str, + asana_project_ids: str | None = None, + asana_team_id: str | None = None, + batch_size: int = INDEX_BATCH_SIZE, + continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE, + ) -> None: + self.workspace_id = asana_workspace_id + self.project_ids_to_index: list[str] | None = ( + asana_project_ids.split(",") if asana_project_ids is not None else None + ) + self.asana_team_id = asana_team_id + self.batch_size = batch_size + self.continue_on_failure = continue_on_failure + logger.info( + f"AsanaConnector initialized with workspace_id: {asana_workspace_id}" + ) + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + self.api_token = credentials["asana_api_token_secret"] + self.asana_client = asana_api.AsanaAPI( + api_token=self.api_token, + workspace_gid=self.workspace_id, + team_gid=self.asana_team_id, + ) + logger.info("Asana credentials loaded and API client initialized") + return None + + def poll_source( + self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch | None + ) -> GenerateDocumentsOutput: + start_time = datetime.datetime.fromtimestamp(start).isoformat() + logger.info(f"Starting Asana poll from {start_time}") + asana = asana_api.AsanaAPI( + api_token=self.api_token, + workspace_gid=self.workspace_id, + team_gid=self.asana_team_id, + ) + docs_batch: list[Document] = [] + tasks = asana.get_tasks(self.project_ids_to_index, start_time) + + for task in tasks: + doc = self._message_to_doc(task) + docs_batch.append(doc) + + if len(docs_batch) >= self.batch_size: + logger.info(f"Yielding batch of {len(docs_batch)} documents") + yield docs_batch + docs_batch = [] + + if docs_batch: + logger.info(f"Yielding final batch of {len(docs_batch)} documents") + yield docs_batch + + logger.info("Asana poll completed") + + def load_from_state(self) -> GenerateDocumentsOutput: + logger.notice("Starting full index of all Asana tasks") + return self.poll_source(start=0, end=None) + + def _message_to_doc(self, task: asana_api.AsanaTask) -> Document: + logger.debug(f"Converting Asana task {task.id} to Document") + return Document( + id=task.id, + sections=[Section(link=task.link, text=task.text)], + doc_updated_at=task.last_modified, + source=DocumentSource.ASANA, + semantic_identifier=task.title, + metadata={ + "group": task.project_gid, + "project": task.project_name, + }, + ) + + +if __name__ == "__main__": + import time + import os + + logger.notice("Starting Asana connector test") + connector = AsanaConnector( + os.environ["WORKSPACE_ID"], + os.environ["PROJECT_IDS"], + os.environ["TEAM_ID"], + ) + connector.load_credentials( + { + "asana_api_token_secret": os.environ["API_TOKEN"], + } + ) + logger.info("Loading all documents from Asana") + all_docs = connector.load_from_state() + current = time.time() + one_day_ago = current - 24 * 60 * 60 # 1 day + logger.info("Polling for documents updated in the last 24 hours") + latest_docs = connector.poll_source(one_day_ago, current) + for docs in latest_docs: + for doc in docs: + print(doc.id) + logger.notice("Asana connector test completed") diff --git a/backend/danswer/connectors/confluence/confluence_utils.py b/backend/danswer/connectors/confluence/confluence_utils.py new file mode 100644 index 00000000000..927e989bf3f --- /dev/null +++ b/backend/danswer/connectors/confluence/confluence_utils.py @@ -0,0 +1,32 @@ +import bs4 + + +def build_confluence_document_id(base_url: str, content_url: str) -> str: + """For confluence, the document id is the page url for a page based document + or the attachment download url for an attachment based document + + Args: + base_url (str): The base url of the Confluence instance + content_url (str): The url of the page or attachment download url + + Returns: + str: The document id + """ + return f"{base_url}{content_url}" + + +def get_used_attachments(text: str) -> list[str]: + """Parse a Confluence html page to generate a list of current + attachment in used + + Args: + text (str): The page content + + Returns: + list[str]: List of filenames currently in use by the page text + """ + files_in_used = [] + soup = bs4.BeautifulSoup(text, "html.parser") + for attachment in soup.findAll("ri:attachment"): + files_in_used.append(attachment.attrs["ri:filename"]) + return files_in_used diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index b8dc967a3d9..d3caf66cc14 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -7,7 +7,6 @@ from functools import lru_cache from typing import Any from typing import cast -from urllib.parse import urlparse import bs4 from atlassian import Confluence # type:ignore @@ -23,6 +22,10 @@ from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource +from danswer.connectors.confluence.confluence_utils import ( + build_confluence_document_id, +) +from danswer.connectors.confluence.confluence_utils import get_used_attachments from danswer.connectors.confluence.rate_limit_handler import ( make_confluence_call_handle_rate_limit, ) @@ -53,79 +56,6 @@ ) -def _extract_confluence_keys_from_cloud_url(wiki_url: str) -> tuple[str, str, str]: - """Sample - URL w/ page: https://danswer.atlassian.net/wiki/spaces/1234abcd/pages/5678efgh/overview - URL w/o page: https://danswer.atlassian.net/wiki/spaces/ASAM/overview - - wiki_base is https://danswer.atlassian.net/wiki - space is 1234abcd - page_id is 5678efgh - """ - parsed_url = urlparse(wiki_url) - wiki_base = ( - parsed_url.scheme - + "://" - + parsed_url.netloc - + parsed_url.path.split("/spaces")[0] - ) - - path_parts = parsed_url.path.split("/") - space = path_parts[3] - - page_id = path_parts[5] if len(path_parts) > 5 else "" - return wiki_base, space, page_id - - -def _extract_confluence_keys_from_datacenter_url(wiki_url: str) -> tuple[str, str, str]: - """Sample - URL w/ page https://danswer.ai/confluence/display/1234abcd/pages/5678efgh/overview - URL w/o page https://danswer.ai/confluence/display/1234abcd/overview - wiki_base is https://danswer.ai/confluence - space is 1234abcd - page_id is 5678efgh - """ - # /display/ is always right before the space and at the end of the base print() - DISPLAY = "/display/" - PAGE = "/pages/" - - parsed_url = urlparse(wiki_url) - wiki_base = ( - parsed_url.scheme - + "://" - + parsed_url.netloc - + parsed_url.path.split(DISPLAY)[0] - ) - space = DISPLAY.join(parsed_url.path.split(DISPLAY)[1:]).split("/")[0] - page_id = "" - if (content := parsed_url.path.split(PAGE)) and len(content) > 1: - page_id = content[1] - return wiki_base, space, page_id - - -def extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str, str, bool]: - is_confluence_cloud = ( - ".atlassian.net/wiki/spaces/" in wiki_url - or ".jira.com/wiki/spaces/" in wiki_url - ) - - try: - if is_confluence_cloud: - wiki_base, space, page_id = _extract_confluence_keys_from_cloud_url( - wiki_url - ) - else: - wiki_base, space, page_id = _extract_confluence_keys_from_datacenter_url( - wiki_url - ) - except Exception as e: - error_msg = f"Not a valid Confluence Wiki Link, unable to extract wiki base, space, and page id. Exception: {e}" - logger.error(error_msg) - raise ValueError(error_msg) - - return wiki_base, space, page_id, is_confluence_cloud - - @lru_cache() def _get_user(user_id: str, confluence_client: Confluence) -> str: """Get Confluence Display Name based on the account-id or userkey value @@ -179,24 +109,6 @@ def parse_html_page(text: str, confluence_client: Confluence) -> str: return format_document_soup(soup) -def get_used_attachments(text: str, confluence_client: Confluence) -> list[str]: - """Parse a Confluence html page to generate a list of current - attachment in used - - Args: - text (str): The page content - confluence_client (Confluence): Confluence client - - Returns: - list[str]: List of filename currently in used - """ - files_in_used = [] - soup = bs4.BeautifulSoup(text, "html.parser") - for attachment in soup.findAll("ri:attachment"): - files_in_used.append(attachment.attrs["ri:filename"]) - return files_in_used - - def _comment_dfs( comments_str: str, comment_pages: Collection[dict[str, Any]], @@ -372,7 +284,10 @@ def _fetch_single_depth_child_pages( class ConfluenceConnector(LoadConnector, PollConnector): def __init__( self, - wiki_page_url: str, + wiki_base: str, + space: str, + is_cloud: bool, + page_id: str = "", index_recursively: bool = True, batch_size: int = INDEX_BATCH_SIZE, continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE, @@ -386,15 +301,15 @@ def __init__( self.labels_to_skip = set(labels_to_skip) self.recursive_indexer: RecursiveIndexer | None = None self.index_recursively = index_recursively - ( - self.wiki_base, - self.space, - self.page_id, - self.is_cloud, - ) = extract_confluence_keys_from_url(wiki_page_url) - self.space_level_scan = False + # Remove trailing slash from wiki_base if present + self.wiki_base = wiki_base.rstrip("/") + self.space = space + self.page_id = page_id + + self.is_cloud = is_cloud + self.space_level_scan = False self.confluence_client: Confluence | None = None if self.page_id is None or self.page_id == "": @@ -414,7 +329,6 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None username=username if self.is_cloud else None, password=access_token if self.is_cloud else None, token=access_token if not self.is_cloud else None, - cloud=self.is_cloud, ) return None @@ -696,13 +610,16 @@ def _get_doc_batch( page_html = ( page["body"].get("storage", page["body"].get("view", {})).get("value") ) - page_url = self.wiki_base + page["_links"]["webui"] + # The url and the id are the same + page_url = build_confluence_document_id( + self.wiki_base, page["_links"]["webui"] + ) if not page_html: logger.debug("Page is empty, skipping: %s", page_url) continue page_text = parse_html_page(page_html, self.confluence_client) - files_in_used = get_used_attachments(page_html, self.confluence_client) + files_in_used = get_used_attachments(page_html) attachment_text, unused_page_attachments = self._fetch_attachments( self.confluence_client, page_id, files_in_used ) @@ -755,8 +672,9 @@ def _get_attachment_batch( if time_filter and not time_filter(last_updated): continue - attachment_url = self._attachment_to_download_link( - self.confluence_client, attachment + # The url and the id are the same + attachment_url = build_confluence_document_id( + self.wiki_base, attachment["_links"]["download"] ) attachment_content = self._attachment_to_content( self.confluence_client, attachment @@ -866,7 +784,13 @@ def poll_source( if __name__ == "__main__": - connector = ConfluenceConnector(os.environ["CONFLUENCE_TEST_SPACE_URL"]) + connector = ConfluenceConnector( + wiki_base=os.environ["CONFLUENCE_TEST_SPACE_URL"], + space=os.environ["CONFLUENCE_TEST_SPACE"], + is_cloud=os.environ.get("CONFLUENCE_IS_CLOUD", "true").lower() == "true", + page_id=os.environ.get("CONFLUENCE_TEST_PAGE_ID", ""), + index_recursively=True, + ) connector.load_credentials( { "confluence_username": os.environ["CONFLUENCE_USER_NAME"], diff --git a/backend/danswer/connectors/confluence/rate_limit_handler.py b/backend/danswer/connectors/confluence/rate_limit_handler.py index 8755b78f3f4..ea0e46800ff 100644 --- a/backend/danswer/connectors/confluence/rate_limit_handler.py +++ b/backend/danswer/connectors/confluence/rate_limit_handler.py @@ -23,7 +23,7 @@ class ConfluenceRateLimitError(Exception): def make_confluence_call_handle_rate_limit(confluence_call: F) -> F: def wrapped_call(*args: list[Any], **kwargs: Any) -> Any: - max_retries = 10 + max_retries = 5 starting_delay = 5 backoff = 2 max_delay = 600 @@ -32,17 +32,30 @@ def wrapped_call(*args: list[Any], **kwargs: Any) -> Any: try: return confluence_call(*args, **kwargs) except HTTPError as e: + # Check if the response or headers are None to avoid potential AttributeError + if e.response is None or e.response.headers is None: + logger.warning("HTTPError with `None` as response or as headers") + raise e + + retry_after_header = e.response.headers.get("Retry-After") if ( e.response.status_code == 429 or RATE_LIMIT_MESSAGE_LOWERCASE in e.response.text.lower() ): retry_after = None - try: - retry_after = int(e.response.headers.get("Retry-After")) - except (ValueError, TypeError): - pass + if retry_after_header is not None: + try: + retry_after = int(retry_after_header) + except ValueError: + pass + + if retry_after is not None: + if retry_after > 600: + logger.warning( + f"Clamping retry_after from {retry_after} to {max_delay} seconds..." + ) + retry_after = max_delay - if retry_after: logger.warning( f"Rate limit hit. Retrying after {retry_after} seconds..." ) diff --git a/backend/danswer/connectors/danswer_jira/connector.py b/backend/danswer/connectors/danswer_jira/connector.py index 9a8fbb31501..097aa41c372 100644 --- a/backend/danswer/connectors/danswer_jira/connector.py +++ b/backend/danswer/connectors/danswer_jira/connector.py @@ -9,6 +9,7 @@ from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP +from danswer.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc from danswer.connectors.interfaces import GenerateDocumentsOutput @@ -45,10 +46,15 @@ def extract_jira_project(url: str) -> tuple[str, str]: return jira_base, jira_project -def extract_text_from_content(content: dict) -> str: +def extract_text_from_adf(adf: dict | None) -> str: + """Extracts plain text from Atlassian Document Format: + https://developer.atlassian.com/cloud/jira/platform/apis/document/structure/ + + WARNING: This function is incomplete and will e.g. skip lists! + """ texts = [] - if "content" in content: - for block in content["content"]: + if adf is not None and "content" in adf: + for block in adf["content"]: if "content" in block: for item in block["content"]: if item["type"] == "text": @@ -72,18 +78,15 @@ def _get_comment_strs( comment_strs = [] for comment in jira.fields.comment.comments: try: - if hasattr(comment, "body"): - body_text = extract_text_from_content(comment.raw["body"]) - elif hasattr(comment, "raw"): - body = comment.raw.get("body", "No body content available") - body_text = ( - extract_text_from_content(body) if isinstance(body, dict) else body - ) - else: - body_text = "No body attribute found" + body_text = ( + comment.body + if JIRA_API_VERSION == "2" + else extract_text_from_adf(comment.raw["body"]) + ) if ( hasattr(comment, "author") + and hasattr(comment.author, "emailAddress") and comment.author.emailAddress in comment_email_blacklist ): continue # Skip adding comment if author's email is in blacklist @@ -126,13 +129,24 @@ def fetch_jira_issues_batch( ) continue + description = ( + jira.fields.description + if JIRA_API_VERSION == "2" + else extract_text_from_adf(jira.raw["fields"]["description"]) + ) comments = _get_comment_strs(jira, comment_email_blacklist) - semantic_rep = ( - f"{jira.fields.description}\n" - if jira.fields.description - else "" + "\n".join([f"Comment: {comment}" for comment in comments]) + ticket_content = f"{description}\n" + "\n".join( + [f"Comment: {comment}" for comment in comments if comment] ) + # Check ticket size + if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE: + logger.info( + f"Skipping {jira.key} because it exceeds the maximum size of " + f"{JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes." + ) + continue + page_url = f"{jira_client.client_info()}/browse/{jira.key}" people = set() @@ -175,7 +189,7 @@ def fetch_jira_issues_batch( doc_batch.append( Document( id=page_url, - sections=[Section(link=page_url, text=semantic_rep)], + sections=[Section(link=page_url, text=ticket_content)], source=DocumentSource.JIRA, semantic_identifier=jira.fields.summary, doc_updated_at=time_str_to_utc(jira.fields.updated), @@ -231,10 +245,12 @@ def load_from_state(self) -> GenerateDocumentsOutput: if self.jira_client is None: raise ConnectorMissingCredentialError("Jira") + # Quote the project name to handle reserved words + quoted_project = f'"{self.jira_project}"' start_ind = 0 while True: doc_batch, fetched_batch_size = fetch_jira_issues_batch( - jql=f"project = {self.jira_project}", + jql=f"project = {quoted_project}", start_index=start_ind, jira_client=self.jira_client, batch_size=self.batch_size, @@ -262,8 +278,10 @@ def poll_source( "%Y-%m-%d %H:%M" ) + # Quote the project name to handle reserved words + quoted_project = f'"{self.jira_project}"' jql = ( - f"project = {self.jira_project} AND " + f"project = {quoted_project} AND " f"updated >= '{start_date_str}' AND " f"updated <= '{end_date_str}'" ) diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index 1a3d605d3a5..75e0d9bb238 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -4,6 +4,7 @@ from sqlalchemy.orm import Session from danswer.configs.constants import DocumentSource +from danswer.connectors.asana.connector import AsanaConnector from danswer.connectors.axero.connector import AxeroConnector from danswer.connectors.blob.connector import BlobStorageConnector from danswer.connectors.bookstack.connector import BookstackConnector @@ -41,6 +42,7 @@ from danswer.connectors.teams.connector import TeamsConnector from danswer.connectors.web.connector import WebConnector from danswer.connectors.wikipedia.connector import WikipediaConnector +from danswer.connectors.xenforo.connector import XenforoConnector from danswer.connectors.zendesk.connector import ZendeskConnector from danswer.connectors.zulip.connector import ZulipConnector from danswer.db.credentials import backend_update_credential_json @@ -61,6 +63,7 @@ def identify_connector_class( DocumentSource.SLACK: { InputType.LOAD_STATE: SlackLoadConnector, InputType.POLL: SlackPollConnector, + InputType.PRUNE: SlackPollConnector, }, DocumentSource.GITHUB: GithubConnector, DocumentSource.GMAIL: GmailConnector, @@ -91,10 +94,12 @@ def identify_connector_class( DocumentSource.CLICKUP: ClickupConnector, DocumentSource.MEDIAWIKI: MediaWikiConnector, DocumentSource.WIKIPEDIA: WikipediaConnector, + DocumentSource.ASANA: AsanaConnector, DocumentSource.S3: BlobStorageConnector, DocumentSource.R2: BlobStorageConnector, DocumentSource.GOOGLE_CLOUD_STORAGE: BlobStorageConnector, DocumentSource.OCI_STORAGE: BlobStorageConnector, + DocumentSource.XENFORO: XenforoConnector, } connector_by_source = connector_map.get(source, {}) @@ -124,11 +129,11 @@ def identify_connector_class( def instantiate_connector( + db_session: Session, source: DocumentSource, input_type: InputType, connector_specific_config: dict[str, Any], credential: Credential, - db_session: Session, ) -> BaseConnector: connector_class = identify_connector_class(source, input_type) connector = connector_class(**connector_specific_config) diff --git a/backend/danswer/connectors/file/connector.py b/backend/danswer/connectors/file/connector.py index 6c5501734b0..83d0af2c12e 100644 --- a/backend/danswer/connectors/file/connector.py +++ b/backend/danswer/connectors/file/connector.py @@ -23,7 +23,7 @@ from danswer.file_processing.extract_file_text import get_file_ext from danswer.file_processing.extract_file_text import is_text_file_extension from danswer.file_processing.extract_file_text import load_files_from_zip -from danswer.file_processing.extract_file_text import pdf_to_text +from danswer.file_processing.extract_file_text import read_pdf_file from danswer.file_processing.extract_file_text import read_text_file from danswer.file_store.file_store import get_default_file_store from danswer.utils.logger import setup_logger @@ -75,7 +75,7 @@ def _process_file( # Using the PDF reader function directly to pass in password cleanly elif extension == ".pdf": - file_content_raw = pdf_to_text(file=file, pdf_pass=pdf_pass) + file_content_raw, file_metadata = read_pdf_file(file=file, pdf_pass=pdf_pass) else: file_content_raw = extract_file_text( diff --git a/backend/danswer/connectors/google_drive/connector.py b/backend/danswer/connectors/google_drive/connector.py index 40a9b73432f..bf267ab7786 100644 --- a/backend/danswer/connectors/google_drive/connector.py +++ b/backend/danswer/connectors/google_drive/connector.py @@ -6,7 +6,6 @@ from enum import Enum from itertools import chain from typing import Any -from typing import cast from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore @@ -21,19 +20,13 @@ from danswer.configs.constants import DocumentSource from danswer.configs.constants import IGNORE_FOR_QA from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder -from danswer.connectors.google_drive.connector_auth import ( - get_google_drive_creds_for_authorized_user, -) -from danswer.connectors.google_drive.connector_auth import ( - get_google_drive_creds_for_service_account, -) +from danswer.connectors.google_drive.connector_auth import get_google_drive_creds from danswer.connectors.google_drive.constants import ( DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, ) from danswer.connectors.google_drive.constants import ( DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, ) -from danswer.connectors.google_drive.constants import DB_CREDENTIALS_DICT_TOKEN_KEY from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector @@ -41,8 +34,8 @@ from danswer.connectors.models import Document from danswer.connectors.models import Section from danswer.file_processing.extract_file_text import docx_to_text -from danswer.file_processing.extract_file_text import pdf_to_text from danswer.file_processing.extract_file_text import pptx_to_text +from danswer.file_processing.extract_file_text import read_pdf_file from danswer.utils.batching import batch_generator from danswer.utils.logger import setup_logger @@ -62,6 +55,8 @@ class GDriveMimeType(str, Enum): POWERPOINT = ( "application/vnd.openxmlformats-officedocument.presentationml.presentation" ) + PLAIN_TEXT = "text/plain" + MARKDOWN = "text/markdown" GoogleDriveFileType = dict[str, Any] @@ -316,25 +311,29 @@ def extract_text(file: dict[str, str], service: discovery.Resource) -> str: GDriveMimeType.PPT.value, GDriveMimeType.SPREADSHEET.value, ]: - export_mime_type = "text/plain" - if mime_type == GDriveMimeType.SPREADSHEET.value: - export_mime_type = "text/csv" - elif mime_type == GDriveMimeType.PPT.value: - export_mime_type = "text/plain" - - response = ( + export_mime_type = ( + "text/plain" + if mime_type != GDriveMimeType.SPREADSHEET.value + else "text/csv" + ) + return ( service.files() .export(fileId=file["id"], mimeType=export_mime_type) .execute() + .decode("utf-8") ) - return response.decode("utf-8") - + elif mime_type in [ + GDriveMimeType.PLAIN_TEXT.value, + GDriveMimeType.MARKDOWN.value, + ]: + return service.files().get_media(fileId=file["id"]).execute().decode("utf-8") elif mime_type == GDriveMimeType.WORD_DOC.value: response = service.files().get_media(fileId=file["id"]).execute() return docx_to_text(file=io.BytesIO(response)) elif mime_type == GDriveMimeType.PDF.value: response = service.files().get_media(fileId=file["id"]).execute() - return pdf_to_text(file=io.BytesIO(response)) + text, _ = read_pdf_file(file=io.BytesIO(response)) + return text elif mime_type == GDriveMimeType.POWERPOINT.value: response = service.files().get_media(fileId=file["id"]).execute() return pptx_to_text(file=io.BytesIO(response)) @@ -401,42 +400,7 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None (2) A credential which holds a service account key JSON file, which can then be used to impersonate any user in the workspace. """ - creds: OAuthCredentials | ServiceAccountCredentials | None = None - new_creds_dict = None - if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials: - access_token_json_str = cast( - str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY] - ) - creds = get_google_drive_creds_for_authorized_user( - token_json_str=access_token_json_str - ) - - # tell caller to update token stored in DB if it has changed - # (e.g. the token has been refreshed) - new_creds_json_str = creds.to_json() if creds else "" - if new_creds_json_str != access_token_json_str: - new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str} - - if DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials: - service_account_key_json_str = credentials[ - DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY - ] - creds = get_google_drive_creds_for_service_account( - service_account_key_json_str=service_account_key_json_str - ) - - # "Impersonate" a user if one is specified - delegated_user_email = cast( - str | None, credentials.get(DB_CREDENTIALS_DICT_DELEGATED_USER_KEY) - ) - if delegated_user_email: - creds = creds.with_subject(delegated_user_email) if creds else None # type: ignore - - if creds is None: - raise PermissionError( - "Unable to access Google Drive - unknown credential structure." - ) - + creds, new_creds_dict = get_google_drive_creds(credentials) self.creds = creds return new_creds_dict @@ -503,6 +467,7 @@ def _fetch_docs_from_drive( file["modifiedTime"] ).astimezone(timezone.utc), metadata={} if text_contents else {IGNORE_FOR_QA: "True"}, + additional_info=file.get("id"), ) ) except Exception as e: diff --git a/backend/danswer/connectors/google_drive/connector_auth.py b/backend/danswer/connectors/google_drive/connector_auth.py index 0f47727e6ee..cc68fec54ea 100644 --- a/backend/danswer/connectors/google_drive/connector_auth.py +++ b/backend/danswer/connectors/google_drive/connector_auth.py @@ -10,11 +10,13 @@ from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore from sqlalchemy.orm import Session +from danswer.configs.app_configs import ENTERPRISE_EDITION_ENABLED from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.constants import DocumentSource from danswer.configs.constants import KV_CRED_KEY from danswer.configs.constants import KV_GOOGLE_DRIVE_CRED_KEY from danswer.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY +from danswer.connectors.google_drive.constants import BASE_SCOPES from danswer.connectors.google_drive.constants import ( DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, ) @@ -22,7 +24,8 @@ DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, ) from danswer.connectors.google_drive.constants import DB_CREDENTIALS_DICT_TOKEN_KEY -from danswer.connectors.google_drive.constants import SCOPES +from danswer.connectors.google_drive.constants import FETCH_GROUPS_SCOPES +from danswer.connectors.google_drive.constants import FETCH_PERMISSIONS_SCOPES from danswer.db.credentials import update_credential_json from danswer.db.models import User from danswer.dynamic_configs.factory import get_dynamic_config_store @@ -34,15 +37,25 @@ logger = setup_logger() +def build_gdrive_scopes() -> list[str]: + base_scopes: list[str] = BASE_SCOPES + permissions_scopes: list[str] = FETCH_PERMISSIONS_SCOPES + groups_scopes: list[str] = FETCH_GROUPS_SCOPES + + if ENTERPRISE_EDITION_ENABLED: + return base_scopes + permissions_scopes + groups_scopes + return base_scopes + permissions_scopes + + def _build_frontend_google_drive_redirect() -> str: return f"{WEB_DOMAIN}/admin/connectors/google-drive/auth/callback" def get_google_drive_creds_for_authorized_user( - token_json_str: str, + token_json_str: str, scopes: list[str] = build_gdrive_scopes() ) -> OAuthCredentials | None: creds_json = json.loads(token_json_str) - creds = OAuthCredentials.from_authorized_user_info(creds_json, SCOPES) + creds = OAuthCredentials.from_authorized_user_info(creds_json, scopes) if creds.valid: return creds @@ -59,18 +72,67 @@ def get_google_drive_creds_for_authorized_user( return None -def get_google_drive_creds_for_service_account( - service_account_key_json_str: str, +def _get_google_drive_creds_for_service_account( + service_account_key_json_str: str, scopes: list[str] = build_gdrive_scopes() ) -> ServiceAccountCredentials | None: service_account_key = json.loads(service_account_key_json_str) creds = ServiceAccountCredentials.from_service_account_info( - service_account_key, scopes=SCOPES + service_account_key, scopes=scopes ) if not creds.valid or not creds.expired: creds.refresh(Request()) return creds if creds.valid else None +def get_google_drive_creds( + credentials: dict[str, str], scopes: list[str] = build_gdrive_scopes() +) -> tuple[ServiceAccountCredentials | OAuthCredentials, dict[str, str] | None]: + oauth_creds = None + service_creds = None + new_creds_dict = None + if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials: + access_token_json_str = cast(str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY]) + oauth_creds = get_google_drive_creds_for_authorized_user( + token_json_str=access_token_json_str, scopes=scopes + ) + + # tell caller to update token stored in DB if it has changed + # (e.g. the token has been refreshed) + new_creds_json_str = oauth_creds.to_json() if oauth_creds else "" + if new_creds_json_str != access_token_json_str: + new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str} + + elif DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials: + service_account_key_json_str = credentials[ + DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY + ] + service_creds = _get_google_drive_creds_for_service_account( + service_account_key_json_str=service_account_key_json_str, + scopes=scopes, + ) + + # "Impersonate" a user if one is specified + delegated_user_email = cast( + str | None, credentials.get(DB_CREDENTIALS_DICT_DELEGATED_USER_KEY) + ) + if delegated_user_email: + service_creds = ( + service_creds.with_subject(delegated_user_email) + if service_creds + else None + ) + + creds: ServiceAccountCredentials | OAuthCredentials | None = ( + oauth_creds or service_creds + ) + if creds is None: + raise PermissionError( + "Unable to access Google Drive - unknown credential structure." + ) + + return creds, new_creds_dict + + def verify_csrf(credential_id: int, state: str) -> None: csrf = get_dynamic_config_store().load(KV_CRED_KEY.format(str(credential_id))) if csrf != state: @@ -84,7 +146,7 @@ def get_auth_url(credential_id: int) -> str: credential_json = json.loads(creds_str) flow = InstalledAppFlow.from_client_config( credential_json, - scopes=SCOPES, + scopes=build_gdrive_scopes(), redirect_uri=_build_frontend_google_drive_redirect(), ) auth_url, _ = flow.authorization_url(prompt="consent") @@ -107,7 +169,7 @@ def update_credential_access_tokens( app_credentials = get_google_app_cred() flow = InstalledAppFlow.from_client_config( app_credentials.model_dump(), - scopes=SCOPES, + scopes=build_gdrive_scopes(), redirect_uri=_build_frontend_google_drive_redirect(), ) flow.fetch_token(code=auth_code) diff --git a/backend/danswer/connectors/google_drive/constants.py b/backend/danswer/connectors/google_drive/constants.py index 214bfd5cb97..0cca65c13df 100644 --- a/backend/danswer/connectors/google_drive/constants.py +++ b/backend/danswer/connectors/google_drive/constants.py @@ -1,7 +1,7 @@ DB_CREDENTIALS_DICT_TOKEN_KEY = "google_drive_tokens" DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key" DB_CREDENTIALS_DICT_DELEGATED_USER_KEY = "google_drive_delegated_user" -SCOPES = [ - "https://www.googleapis.com/auth/drive.readonly", - "https://www.googleapis.com/auth/drive.metadata.readonly", -] + +BASE_SCOPES = ["https://www.googleapis.com/auth/drive.readonly"] +FETCH_PERMISSIONS_SCOPES = ["https://www.googleapis.com/auth/drive.metadata.readonly"] +FETCH_GROUPS_SCOPES = ["https://www.googleapis.com/auth/cloud-identity.groups.readonly"] diff --git a/backend/danswer/connectors/models.py b/backend/danswer/connectors/models.py index 192aa1b206a..7d86d21980d 100644 --- a/backend/danswer/connectors/models.py +++ b/backend/danswer/connectors/models.py @@ -113,6 +113,9 @@ class DocumentBase(BaseModel): # The default title is semantic_identifier though unless otherwise specified title: str | None = None from_ingestion_api: bool = False + # Anything else that may be useful that is specific to this particular connector type that other + # parts of the code may need. If you're unsure, this can be left as None + additional_info: Any = None def get_title_for_document_index( self, diff --git a/backend/danswer/connectors/notion/connector.py b/backend/danswer/connectors/notion/connector.py index fd607e4f97a..7878434da04 100644 --- a/backend/danswer/connectors/notion/connector.py +++ b/backend/danswer/connectors/notion/connector.py @@ -237,6 +237,14 @@ def _read_blocks( ) continue + if result_type == "external_object_instance_page": + logger.warning( + f"Skipping 'external_object_instance_page' ('{result_block_id}') for base block '{base_block_id}': " + f"Notion API does not currently support reading external blocks (as of 24/07/03) " + f"(discussion: https://github.com/danswer-ai/danswer/issues/1761)" + ) + continue + cur_result_text_arr = [] if "rich_text" in result_obj: for rich_text in result_obj["rich_text"]: diff --git a/backend/danswer/connectors/productboard/connector.py b/backend/danswer/connectors/productboard/connector.py index 9ef301aa76d..c7a2d45cae8 100644 --- a/backend/danswer/connectors/productboard/connector.py +++ b/backend/danswer/connectors/productboard/connector.py @@ -98,6 +98,15 @@ def _get_features(self) -> Generator[Document, None, None]: owner = self._get_owner_email(feature) experts = [BasicExpertInfo(email=owner)] if owner else None + metadata: dict[str, str | list[str]] = {} + entity_type = feature.get("type", "feature") + if entity_type: + metadata["entity_type"] = str(entity_type) + + status = feature.get("status", {}).get("name") + if status: + metadata["status"] = str(status) + yield Document( id=feature["id"], sections=[ @@ -110,10 +119,7 @@ def _get_features(self) -> Generator[Document, None, None]: source=DocumentSource.PRODUCTBOARD, doc_updated_at=time_str_to_utc(feature["updatedAt"]), primary_owners=experts, - metadata={ - "entity_type": feature["type"], - "status": feature["status"]["name"], - }, + metadata=metadata, ) def _get_components(self) -> Generator[Document, None, None]: @@ -174,6 +180,12 @@ def _get_objectives(self) -> Generator[Document, None, None]: owner = self._get_owner_email(objective) experts = [BasicExpertInfo(email=owner)] if owner else None + metadata: dict[str, str | list[str]] = { + "entity_type": "objective", + } + if objective.get("state"): + metadata["state"] = str(objective["state"]) + yield Document( id=objective["id"], sections=[ @@ -186,10 +198,7 @@ def _get_objectives(self) -> Generator[Document, None, None]: source=DocumentSource.PRODUCTBOARD, doc_updated_at=time_str_to_utc(objective["updatedAt"]), primary_owners=experts, - metadata={ - "entity_type": "release", - "state": objective["state"], - }, + metadata=metadata, ) def _is_updated_at_out_of_time_range( diff --git a/backend/danswer/connectors/sharepoint/connector.py b/backend/danswer/connectors/sharepoint/connector.py index b66c010d77f..e74dcbf7edd 100644 --- a/backend/danswer/connectors/sharepoint/connector.py +++ b/backend/danswer/connectors/sharepoint/connector.py @@ -25,7 +25,6 @@ from danswer.file_processing.extract_file_text import extract_file_text from danswer.utils.logger import setup_logger - logger = setup_logger() @@ -137,7 +136,7 @@ def _populate_sitedata_sites(self) -> None: .execute_query() ] else: - sites = self.graph_client.sites.get().execute_query() + sites = self.graph_client.sites.get_all().execute_query() self.site_data = [ SiteData(url=None, folder=None, sites=sites, driveitems=[]) ] diff --git a/backend/danswer/connectors/slack/connector.py b/backend/danswer/connectors/slack/connector.py index 6c451389932..d7a23714a37 100644 --- a/backend/danswer/connectors/slack/connector.py +++ b/backend/danswer/connectors/slack/connector.py @@ -8,13 +8,12 @@ from slack_sdk import WebClient from slack_sdk.errors import SlackApiError -from slack_sdk.web import SlackResponse from danswer.configs.app_configs import ENABLE_EXPENSIVE_EXPERT_CALLS from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource -from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import IdConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch from danswer.connectors.models import BasicExpertInfo @@ -23,12 +22,12 @@ from danswer.connectors.models import Section from danswer.connectors.slack.utils import expert_info_from_slack_id from danswer.connectors.slack.utils import get_message_link -from danswer.connectors.slack.utils import make_slack_api_call_logged -from danswer.connectors.slack.utils import make_slack_api_call_paginated -from danswer.connectors.slack.utils import make_slack_api_rate_limited +from danswer.connectors.slack.utils import make_paginated_slack_api_call_w_retries +from danswer.connectors.slack.utils import make_slack_api_call_w_retries from danswer.connectors.slack.utils import SlackTextCleaner from danswer.utils.logger import setup_logger + logger = setup_logger() @@ -37,47 +36,18 @@ # list of messages in a thread ThreadType = list[MessageType] -basic_retry_wrapper = retry_builder() - - -def _make_paginated_slack_api_call( - call: Callable[..., SlackResponse], **kwargs: Any -) -> Generator[dict[str, Any], None, None]: - return make_slack_api_call_paginated( - basic_retry_wrapper( - make_slack_api_rate_limited(make_slack_api_call_logged(call)) - ) - )(**kwargs) - - -def _make_slack_api_call( - call: Callable[..., SlackResponse], **kwargs: Any -) -> SlackResponse: - return basic_retry_wrapper( - make_slack_api_rate_limited(make_slack_api_call_logged(call)) - )(**kwargs) - -def get_channel_info(client: WebClient, channel_id: str) -> ChannelType: - """Get information about a channel. Needed to convert channel ID to channel name""" - return _make_slack_api_call(client.conversations_info, channel=channel_id)[0][ - "channel" - ] - - -def _get_channels( +def _collect_paginated_channels( client: WebClient, exclude_archived: bool, - get_private: bool, + channel_types: list[str], ) -> list[ChannelType]: channels: list[dict[str, Any]] = [] - for result in _make_paginated_slack_api_call( + for result in make_paginated_slack_api_call_w_retries( client.conversations_list, exclude_archived=exclude_archived, # also get private channels the bot is added to - types=["public_channel", "private_channel"] - if get_private - else ["public_channel"], + types=channel_types, ): channels.extend(result["channels"]) @@ -87,19 +57,38 @@ def _get_channels( def get_channels( client: WebClient, exclude_archived: bool = True, + get_public: bool = True, + get_private: bool = True, ) -> list[ChannelType]: """Get all channels in the workspace""" + channels: list[dict[str, Any]] = [] + channel_types = [] + if get_public: + channel_types.append("public_channel") + if get_private: + channel_types.append("private_channel") # try getting private channels as well at first try: - return _get_channels( - client=client, exclude_archived=exclude_archived, get_private=True + channels = _collect_paginated_channels( + client=client, + exclude_archived=exclude_archived, + channel_types=channel_types, ) except SlackApiError as e: logger.info(f"Unable to fetch private channels due to - {e}") + logger.info("trying again without private channels") + if get_public: + channel_types = ["public_channel"] + else: + logger.warning("No channels to fetch") + return [] + channels = _collect_paginated_channels( + client=client, + exclude_archived=exclude_archived, + channel_types=channel_types, + ) - return _get_channels( - client=client, exclude_archived=exclude_archived, get_private=False - ) + return channels def get_channel_messages( @@ -111,14 +100,14 @@ def get_channel_messages( """Get all messages in a channel""" # join so that the bot can access messages if not channel["is_member"]: - _make_slack_api_call( + make_slack_api_call_w_retries( client.conversations_join, channel=channel["id"], is_private=channel["is_private"], ) logger.info(f"Successfully joined '{channel['name']}'") - for result in _make_paginated_slack_api_call( + for result in make_paginated_slack_api_call_w_retries( client.conversations_history, channel=channel["id"], oldest=oldest, @@ -130,7 +119,7 @@ def get_channel_messages( def get_thread(client: WebClient, channel_id: str, thread_id: str) -> ThreadType: """Get all messages in a thread""" threads: list[MessageType] = [] - for result in _make_paginated_slack_api_call( + for result in make_paginated_slack_api_call_w_retries( client.conversations_replies, channel=channel_id, ts=thread_id ): threads.extend(result["messages"]) @@ -265,7 +254,7 @@ def filter_channels( ] -def get_all_docs( +def _get_all_docs( client: WebClient, workspace: str, channels: list[str] | None = None, @@ -327,7 +316,44 @@ def get_all_docs( ) -class SlackPollConnector(PollConnector): +def _get_all_doc_ids( + client: WebClient, + channels: list[str] | None = None, + channel_name_regex_enabled: bool = False, + msg_filter_func: Callable[[MessageType], bool] = _default_msg_filter, +) -> set[str]: + """ + Get all document ids in the workspace, channel by channel + This is pretty identical to get_all_docs, but it returns a set of ids instead of documents + This makes it an order of magnitude faster than get_all_docs + """ + + all_channels = get_channels(client) + filtered_channels = filter_channels( + all_channels, channels, channel_name_regex_enabled + ) + + all_doc_ids = set() + for channel in filtered_channels: + channel_message_batches = get_channel_messages( + client=client, + channel=channel, + ) + + for message_batch in channel_message_batches: + for message in message_batch: + if msg_filter_func(message): + continue + + # The document id is the channel id and the ts of the first message in the thread + # Since we already have the first message of the thread, we dont have to + # fetch the thread for id retrieval, saving time and API calls + all_doc_ids.add(f"{channel['id']}__{message['ts']}") + + return all_doc_ids + + +class SlackPollConnector(PollConnector, IdConnector): def __init__( self, workspace: str, @@ -348,6 +374,16 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None self.client = WebClient(token=bot_token) return None + def retrieve_all_source_ids(self) -> set[str]: + if self.client is None: + raise ConnectorMissingCredentialError("Slack") + + return _get_all_doc_ids( + client=self.client, + channels=self.channels, + channel_name_regex_enabled=self.channel_regex_enabled, + ) + def poll_source( self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch ) -> GenerateDocumentsOutput: @@ -355,7 +391,7 @@ def poll_source( raise ConnectorMissingCredentialError("Slack") documents: list[Document] = [] - for document in get_all_docs( + for document in _get_all_docs( client=self.client, workspace=self.workspace, channels=self.channels, diff --git a/backend/danswer/connectors/slack/utils.py b/backend/danswer/connectors/slack/utils.py index 8650ce9ddc9..20e859a6d44 100644 --- a/backend/danswer/connectors/slack/utils.py +++ b/backend/danswer/connectors/slack/utils.py @@ -10,11 +10,13 @@ from slack_sdk.errors import SlackApiError from slack_sdk.web import SlackResponse +from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder from danswer.connectors.models import BasicExpertInfo from danswer.utils.logger import setup_logger logger = setup_logger() +basic_retry_wrapper = retry_builder() # number of messages we request per page when fetching paginated slack messages _SLACK_LIMIT = 900 @@ -34,7 +36,7 @@ def get_message_link( ) -def make_slack_api_call_logged( +def _make_slack_api_call_logged( call: Callable[..., SlackResponse], ) -> Callable[..., SlackResponse]: @wraps(call) @@ -47,7 +49,7 @@ def logged_call(**kwargs: Any) -> SlackResponse: return logged_call -def make_slack_api_call_paginated( +def _make_slack_api_call_paginated( call: Callable[..., SlackResponse], ) -> Callable[..., Generator[dict[str, Any], None, None]]: """Wraps calls to slack API so that they automatically handle pagination""" @@ -116,6 +118,24 @@ def rate_limited_call(**kwargs: Any) -> SlackResponse: return rate_limited_call +def make_slack_api_call_w_retries( + call: Callable[..., SlackResponse], **kwargs: Any +) -> SlackResponse: + return basic_retry_wrapper( + make_slack_api_rate_limited(_make_slack_api_call_logged(call)) + )(**kwargs) + + +def make_paginated_slack_api_call_w_retries( + call: Callable[..., SlackResponse], **kwargs: Any +) -> Generator[dict[str, Any], None, None]: + return _make_slack_api_call_paginated( + basic_retry_wrapper( + make_slack_api_rate_limited(_make_slack_api_call_logged(call)) + ) + )(**kwargs) + + def expert_info_from_slack_id( user_id: str | None, client: WebClient, diff --git a/backend/danswer/connectors/web/connector.py b/backend/danswer/connectors/web/connector.py index 6e76e404acd..bb1f64efdfe 100644 --- a/backend/danswer/connectors/web/connector.py +++ b/backend/danswer/connectors/web/connector.py @@ -1,6 +1,8 @@ import io import ipaddress import socket +from datetime import datetime +from datetime import timezone from enum import Enum from typing import Any from typing import cast @@ -27,7 +29,7 @@ from danswer.connectors.interfaces import LoadConnector from danswer.connectors.models import Document from danswer.connectors.models import Section -from danswer.file_processing.extract_file_text import pdf_to_text +from danswer.file_processing.extract_file_text import read_pdf_file from danswer.file_processing.html_utils import web_html_cleanup from danswer.utils.logger import setup_logger from danswer.utils.sitemap import list_pages_for_site @@ -85,7 +87,8 @@ def check_internet_connection(url: str) -> None: response = requests.get(url, timeout=3) response.raise_for_status() except requests.exceptions.HTTPError as e: - status_code = e.response.status_code + # Extract status code from the response, defaulting to -1 if response is None + status_code = e.response.status_code if e.response is not None else -1 error_msg = { 400: "Bad Request", 401: "Unauthorized", @@ -202,6 +205,15 @@ def _read_urls_file(location: str) -> list[str]: return urls +def _get_datetime_from_last_modified_header(last_modified: str) -> datetime | None: + try: + return datetime.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace( + tzinfo=timezone.utc + ) + except (ValueError, TypeError): + return None + + class WebConnector(LoadConnector): def __init__( self, @@ -284,7 +296,10 @@ def load_from_state(self) -> GenerateDocumentsOutput: if current_url.split(".")[-1] == "pdf": # PDF files are not checked for links response = requests.get(current_url) - page_text = pdf_to_text(file=io.BytesIO(response.content)) + page_text, metadata = read_pdf_file( + file=io.BytesIO(response.content) + ) + last_modified = response.headers.get("Last-Modified") doc_batch.append( Document( @@ -292,13 +307,23 @@ def load_from_state(self) -> GenerateDocumentsOutput: sections=[Section(link=current_url, text=page_text)], source=DocumentSource.WEB, semantic_identifier=current_url.split("/")[-1], - metadata={}, + metadata=metadata, + doc_updated_at=_get_datetime_from_last_modified_header( + last_modified + ) + if last_modified + else None, ) ) continue page = context.new_page() page_response = page.goto(current_url) + last_modified = ( + page_response.header_value("Last-Modified") + if page_response + else None + ) final_page = page.url if final_page != current_url: logger.info(f"Redirected to {final_page}") @@ -334,6 +359,11 @@ def load_from_state(self) -> GenerateDocumentsOutput: source=DocumentSource.WEB, semantic_identifier=parsed_html.title or current_url, metadata={}, + doc_updated_at=_get_datetime_from_last_modified_header( + last_modified + ) + if last_modified + else None, ) ) diff --git a/backend/ee/danswer/connectors/confluence/__init__.py b/backend/danswer/connectors/xenforo/__init__.py similarity index 100% rename from backend/ee/danswer/connectors/confluence/__init__.py rename to backend/danswer/connectors/xenforo/__init__.py diff --git a/backend/danswer/connectors/xenforo/connector.py b/backend/danswer/connectors/xenforo/connector.py new file mode 100644 index 00000000000..7f5221543f1 --- /dev/null +++ b/backend/danswer/connectors/xenforo/connector.py @@ -0,0 +1,244 @@ +""" +This is the XenforoConnector class. It is used to connect to a Xenforo forum and load or update documents from the forum. + +To use this class, you need to provide the URL of the Xenforo forum board you want to connect to when creating an instance +of the class. The URL should be a string that starts with 'http://' or 'https://', followed by the domain name of the +forum, followed by the board name. For example: + + base_url = 'https://www.example.com/forum/boards/some-topic/' + +The `load_from_state` method is used to load documents from the forum. It takes an optional `state` parameter, which +can be used to specify a state from which to start loading documents. +""" +import re +from datetime import datetime +from datetime import timedelta +from datetime import timezone +from typing import Any +from urllib.parse import urlparse + +import pytz +import requests +from bs4 import BeautifulSoup +from bs4 import Tag + +from danswer.configs.constants import DocumentSource +from danswer.connectors.cross_connector_utils.miscellaneous_utils import datetime_to_utc +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.models import BasicExpertInfo +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def get_title(soup: BeautifulSoup) -> str: + el = soup.find("h1", "p-title-value") + if not el: + return "" + title = el.text + for char in (";", ":", "!", "*", "/", "\\", "?", '"', "<", ">", "|"): + title = title.replace(char, "_") + return title + + +def get_pages(soup: BeautifulSoup, url: str) -> list[str]: + page_tags = soup.select("li.pageNav-page") + page_numbers = [] + for button in page_tags: + if re.match(r"^\d+$", button.text): + page_numbers.append(button.text) + + max_pages = int(max(page_numbers, key=int)) if page_numbers else 1 + + all_pages = [] + for x in range(1, int(max_pages) + 1): + all_pages.append(f"{url}page-{x}") + return all_pages + + +def parse_post_date(post_element: BeautifulSoup) -> datetime: + el = post_element.find("time") + if not isinstance(el, Tag) or "datetime" not in el.attrs: + return datetime.utcfromtimestamp(0).replace(tzinfo=timezone.utc) + + date_value = el["datetime"] + + # Ensure date_value is a string (if it's a list, take the first element) + if isinstance(date_value, list): + date_value = date_value[0] + + post_date = datetime.strptime(date_value, "%Y-%m-%dT%H:%M:%S%z") + return datetime_to_utc(post_date) + + +def scrape_page_posts( + soup: BeautifulSoup, + page_index: int, + url: str, + initial_run: bool, + start_time: datetime, +) -> list: + title = get_title(soup) + + documents = [] + for post in soup.find_all("div", class_="message-inner"): + post_date = parse_post_date(post) + if initial_run or post_date > start_time: + el = post.find("div", class_="bbWrapper") + if not el: + continue + post_text = el.get_text(strip=True) + "\n" + author_tag = post.find("a", class_="username") + if author_tag is None: + author_tag = post.find("span", class_="username") + author = author_tag.get_text(strip=True) if author_tag else "Deleted author" + formatted_time = post_date.strftime("%Y-%m-%d %H:%M:%S") + + # TODO: if a caller calls this for each page of a thread, it may see the + # same post multiple times if there is a sticky post + # that appears on each page of a thread. + # it's important to generate unique doc id's, so page index is part of the + # id. We may want to de-dupe this stuff inside the indexing service. + document = Document( + id=f"{DocumentSource.XENFORO.value}_{title}_{page_index}_{formatted_time}", + sections=[Section(link=url, text=post_text)], + title=title, + source=DocumentSource.XENFORO, + semantic_identifier=title, + primary_owners=[BasicExpertInfo(display_name=author)], + metadata={ + "type": "post", + "author": author, + "time": formatted_time, + }, + doc_updated_at=post_date, + ) + + documents.append(document) + return documents + + +class XenforoConnector(LoadConnector): + # Class variable to track if the connector has been run before + has_been_run_before = False + + def __init__(self, base_url: str) -> None: + self.base_url = base_url + self.initial_run = not XenforoConnector.has_been_run_before + self.start = datetime.utcnow().replace(tzinfo=pytz.utc) - timedelta(days=1) + self.cookies: dict[str, str] = {} + # mimic user browser to avoid being blocked by the website (see: https://www.useragents.me/) + self.headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/121.0.0.0 Safari/537.36" + } + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + if credentials: + logger.warning("Unexpected credentials provided for Xenforo Connector") + return None + + def load_from_state(self) -> GenerateDocumentsOutput: + # Standardize URL to always end in /. + if self.base_url[-1] != "/": + self.base_url += "/" + + # Remove all extra parameters from the end such as page, post. + matches = ("threads/", "boards/", "forums/") + for each in matches: + if each in self.base_url: + try: + self.base_url = self.base_url[ + 0 : self.base_url.index( + "/", self.base_url.index(each) + len(each) + ) + + 1 + ] + except ValueError: + pass + + doc_batch: list[Document] = [] + all_threads = [] + + # If the URL contains "boards/" or "forums/", find all threads. + if "boards/" in self.base_url or "forums/" in self.base_url: + pages = get_pages(self.requestsite(self.base_url), self.base_url) + + # Get all pages on thread_list_page + for pre_count, thread_list_page in enumerate(pages, start=1): + logger.info( + f"Getting pages from thread_list_page.. Current: {pre_count}/{len(pages)}\r" + ) + all_threads += self.get_threads(thread_list_page) + # If the URL contains "threads/", add the thread to the list. + elif "threads/" in self.base_url: + all_threads.append(self.base_url) + + # Process all threads + for thread_count, thread_url in enumerate(all_threads, start=1): + soup = self.requestsite(thread_url) + if soup is None: + logger.error(f"Failed to load page: {self.base_url}") + continue + pages = get_pages(soup, thread_url) + # Getting all pages for all threads + for page_index, page in enumerate(pages, start=1): + logger.info( + f"Progress: Page {page_index}/{len(pages)} - Thread {thread_count}/{len(all_threads)}\r" + ) + soup_page = self.requestsite(page) + doc_batch.extend( + scrape_page_posts( + soup_page, page_index, thread_url, self.initial_run, self.start + ) + ) + if doc_batch: + yield doc_batch + + # Mark the initial run finished after all threads and pages have been processed + XenforoConnector.has_been_run_before = True + + def get_threads(self, url: str) -> list[str]: + soup = self.requestsite(url) + thread_tags = soup.find_all(class_="structItem-title") + base_url = "{uri.scheme}://{uri.netloc}".format(uri=urlparse(url)) + threads = [] + for x in thread_tags: + y = x.find_all(href=True) + for element in y: + link = element["href"] + if "threads/" in link: + stripped = link[0 : link.rfind("/") + 1] + if base_url + stripped not in threads: + threads.append(base_url + stripped) + return threads + + def requestsite(self, url: str) -> BeautifulSoup: + try: + response = requests.get( + url, cookies=self.cookies, headers=self.headers, timeout=10 + ) + if response.status_code != 200: + logger.error( + f"<{url}> Request Error: {response.status_code} - {response.reason}" + ) + return BeautifulSoup(response.text, "html.parser") + except TimeoutError: + logger.error("Timed out Error.") + except Exception as e: + logger.error(f"Error on {url}") + logger.exception(e) + return BeautifulSoup("", "html.parser") + + +if __name__ == "__main__": + connector = XenforoConnector( + # base_url="https://cassiopaea.org/forum/threads/how-to-change-your-emotional-state.41381/" + base_url="https://xenforo.com/community/threads/whats-new-with-enhanced-search-resource-manager-and-media-gallery-in-xenforo-2-3.220935/" + ) + document_batches = connector.load_from_state() + print(next(document_batches)) diff --git a/backend/danswer/connectors/zendesk/connector.py b/backend/danswer/connectors/zendesk/connector.py index b6d4220b9ce..f85f2efff57 100644 --- a/backend/danswer/connectors/zendesk/connector.py +++ b/backend/danswer/connectors/zendesk/connector.py @@ -3,6 +3,7 @@ import requests from retry import retry from zenpy import Zenpy # type: ignore +from zenpy.lib.api_objects import Ticket # type: ignore from zenpy.lib.api_objects.help_centre_objects import Article # type: ignore from danswer.configs.app_configs import INDEX_BATCH_SIZE @@ -59,10 +60,15 @@ def __init__(self) -> None: class ZendeskConnector(LoadConnector, PollConnector): - def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None: + def __init__( + self, + batch_size: int = INDEX_BATCH_SIZE, + content_type: str = "articles", + ) -> None: self.batch_size = batch_size self.zendesk_client: Zenpy | None = None self.content_tags: dict[str, str] = {} + self.content_type = content_type @retry(tries=3, delay=2, backoff=2) def _set_content_tags( @@ -122,16 +128,86 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None def load_from_state(self) -> GenerateDocumentsOutput: return self.poll_source(None, None) + def _ticket_to_document(self, ticket: Ticket) -> Document: + if self.zendesk_client is None: + raise ZendeskClientNotSetUpError() + + owner = None + if ticket.requester and ticket.requester.name and ticket.requester.email: + owner = [ + BasicExpertInfo( + display_name=ticket.requester.name, email=ticket.requester.email + ) + ] + update_time = time_str_to_utc(ticket.updated_at) if ticket.updated_at else None + + metadata: dict[str, str | list[str]] = {} + if ticket.status is not None: + metadata["status"] = ticket.status + if ticket.priority is not None: + metadata["priority"] = ticket.priority + if ticket.tags: + metadata["tags"] = ticket.tags + if ticket.type is not None: + metadata["ticket_type"] = ticket.type + + # Fetch comments for the ticket + comments = self.zendesk_client.tickets.comments(ticket=ticket) + + # Combine all comments into a single text + comments_text = "\n\n".join( + [ + f"Comment{f' by {comment.author.name}' if comment.author and comment.author.name else ''}" + f"{f' at {comment.created_at}' if comment.created_at else ''}:\n{comment.body}" + for comment in comments + if comment.body + ] + ) + + # Combine ticket description and comments + description = ( + ticket.description + if hasattr(ticket, "description") and ticket.description + else "" + ) + full_text = f"Ticket Description:\n{description}\n\nComments:\n{comments_text}" + + # Extract subdomain from ticket.url + subdomain = ticket.url.split("//")[1].split(".zendesk.com")[0] + + # Build the html url for the ticket + ticket_url = f"https://{subdomain}.zendesk.com/agent/tickets/{ticket.id}" + + return Document( + id=f"zendesk_ticket_{ticket.id}", + sections=[Section(link=ticket_url, text=full_text)], + source=DocumentSource.ZENDESK, + semantic_identifier=f"Ticket #{ticket.id}: {ticket.subject or 'No Subject'}", + doc_updated_at=update_time, + primary_owners=owner, + metadata=metadata, + ) + def poll_source( self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None ) -> GenerateDocumentsOutput: if self.zendesk_client is None: raise ZendeskClientNotSetUpError() + if self.content_type == "articles": + yield from self._poll_articles(start) + elif self.content_type == "tickets": + yield from self._poll_tickets(start) + else: + raise ValueError(f"Unsupported content_type: {self.content_type}") + + def _poll_articles( + self, start: SecondsSinceUnixEpoch | None + ) -> GenerateDocumentsOutput: articles = ( - self.zendesk_client.help_center.articles(cursor_pagination=True) + self.zendesk_client.help_center.articles(cursor_pagination=True) # type: ignore if start is None - else self.zendesk_client.help_center.articles.incremental( + else self.zendesk_client.help_center.articles.incremental( # type: ignore start_time=int(start) ) ) @@ -155,9 +231,43 @@ def poll_source( if doc_batch: yield doc_batch + def _poll_tickets( + self, start: SecondsSinceUnixEpoch | None + ) -> GenerateDocumentsOutput: + if self.zendesk_client is None: + raise ZendeskClientNotSetUpError() + + ticket_generator = self.zendesk_client.tickets.incremental(start_time=start) + + while True: + doc_batch = [] + for _ in range(self.batch_size): + try: + ticket = next(ticket_generator) + + # Check if the ticket status is deleted and skip it if so + if ticket.status == "deleted": + continue + + doc_batch.append(self._ticket_to_document(ticket)) + + if len(doc_batch) >= self.batch_size: + yield doc_batch + doc_batch.clear() + + except StopIteration: + # No more tickets to process + if doc_batch: + yield doc_batch + return + + if doc_batch: + yield doc_batch + if __name__ == "__main__": import os + import time connector = ZendeskConnector() diff --git a/backend/danswer/danswerbot/slack/blocks.py b/backend/danswer/danswerbot/slack/blocks.py index da4a867e233..4107a381554 100644 --- a/backend/danswer/danswerbot/slack/blocks.py +++ b/backend/danswer/danswerbot/slack/blocks.py @@ -25,7 +25,6 @@ from danswer.danswerbot.slack.constants import FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID -from danswer.danswerbot.slack.constants import GENERATE_ANSWER_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID from danswer.danswerbot.slack.icons import source_to_github_img_link @@ -360,22 +359,6 @@ def build_quotes_block( return [SectionBlock(text="*Relevant Snippets*\n" + "\n".join(quote_lines))] -def build_standard_answer_blocks( - answer_message: str, -) -> list[Block]: - generate_button_block = ButtonElement( - action_id=GENERATE_ANSWER_BUTTON_ACTION_ID, - text="Generate Full Answer", - ) - answer_block = SectionBlock(text=answer_message) - return [ - answer_block, - ActionsBlock( - elements=[generate_button_block], - ), - ] - - def build_qa_response_blocks( message_id: int | None, answer: str | None, diff --git a/backend/danswer/danswerbot/slack/handlers/handle_buttons.py b/backend/danswer/danswerbot/slack/handlers/handle_buttons.py index 732be8df9db..9e1c171ee4f 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_buttons.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_buttons.py @@ -11,6 +11,7 @@ from danswer.configs.constants import MessageType from danswer.configs.constants import SearchFeedbackType from danswer.configs.danswerbot_configs import DANSWER_FOLLOWUP_EMOJI +from danswer.connectors.slack.utils import expert_info_from_slack_id from danswer.connectors.slack.utils import make_slack_api_rate_limited from danswer.danswerbot.slack.blocks import build_follow_up_resolved_blocks from danswer.danswerbot.slack.blocks import get_document_feedback_blocks @@ -87,6 +88,8 @@ def handle_generate_answer_button( message_ts = req.payload["message"]["ts"] thread_ts = req.payload["container"]["thread_ts"] user_id = req.payload["user"]["id"] + expert_info = expert_info_from_slack_id(user_id, client.web_client, user_cache={}) + email = expert_info.email if expert_info else None if not thread_ts: raise ValueError("Missing thread_ts in the payload") @@ -125,6 +128,7 @@ def handle_generate_answer_button( msg_to_respond=cast(str, message_ts or thread_ts), thread_to_respond=cast(str, thread_ts or message_ts), sender=user_id or None, + email=email or None, bypass_filters=True, is_bot_msg=False, is_bot_dm=False, diff --git a/backend/danswer/danswerbot/slack/handlers/handle_message.py b/backend/danswer/danswerbot/slack/handlers/handle_message.py index 2edbd973553..0882796204d 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_message.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_message.py @@ -21,6 +21,7 @@ from danswer.danswerbot.slack.utils import update_emote_react from danswer.db.engine import get_sqlalchemy_engine from danswer.db.models import SlackBotConfig +from danswer.db.users import add_non_web_user_if_not_exists from danswer.utils.logger import setup_logger from shared_configs.configs import SLACK_CHANNEL_ID @@ -209,6 +210,9 @@ def handle_message( logger.error(f"Was not able to react to user message due to: {e}") with Session(get_sqlalchemy_engine()) as db_session: + if message_info.email: + add_non_web_user_if_not_exists(db_session, message_info.email) + # first check if we need to respond with a standard answer used_standard_answer = handle_standard_answers( message_info=message_info, diff --git a/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py b/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py index e3a78917a76..f1c9bd077cf 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py @@ -5,6 +5,7 @@ from typing import Optional from typing import TypeVar +from fastapi import HTTPException from retry import retry from slack_sdk import WebClient from slack_sdk.models.blocks import DividerBlock @@ -38,6 +39,7 @@ from danswer.db.models import SlackBotResponseType from danswer.db.persona import fetch_persona_by_id from danswer.db.search_settings import get_current_search_settings +from danswer.db.users import get_user_by_email from danswer.llm.answering.prompts.citations_prompt import ( compute_max_document_tokens_for_persona, ) @@ -99,6 +101,12 @@ def handle_regular_answer( messages = message_info.thread_messages message_ts_to_respond_to = message_info.msg_to_respond is_bot_msg = message_info.is_bot_msg + user = None + if message_info.is_bot_dm: + if message_info.email: + engine = get_sqlalchemy_engine() + with Session(engine) as db_session: + user = get_user_by_email(message_info.email, db_session) document_set_names: list[str] | None = None persona = slack_bot_config.persona if slack_bot_config else None @@ -128,7 +136,8 @@ def handle_regular_answer( else slack_bot_config.response_type == SlackBotResponseType.CITATIONS ) - if not message_ts_to_respond_to: + if not message_ts_to_respond_to and not is_bot_msg: + # if the message is not "/danswer" command, then it should have a message ts to respond to raise RuntimeError( "No message timestamp to respond to in `handle_message`. This should never happen." ) @@ -145,15 +154,23 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non with Session(get_sqlalchemy_engine()) as db_session: if len(new_message_request.messages) > 1: - persona = cast( - Persona, - fetch_persona_by_id( - db_session, - new_message_request.persona_id, - user=None, - get_editable=False, - ), - ) + if new_message_request.persona_config: + raise HTTPException( + status_code=403, + detail="Slack bot does not support persona config", + ) + + elif new_message_request.persona_id: + persona = cast( + Persona, + fetch_persona_by_id( + db_session, + new_message_request.persona_id, + user=None, + get_editable=False, + ), + ) + llm, _ = get_llms_for_persona(persona) # In cases of threads, split the available tokens between docs and thread context @@ -185,7 +202,7 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non # This also handles creating the query event in postgres answer = get_search_answer( query_req=new_message_request, - user=None, + user=user, max_document_tokens=max_document_tokens, max_history_tokens=max_history_tokens, db_session=db_session, @@ -412,7 +429,7 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non ) # Get the chunks fed to the LLM only, then fill with other docs - llm_doc_inds = answer.llm_chunks_indices or [] + llm_doc_inds = answer.llm_selected_doc_indices or [] llm_docs = [top_docs[i] for i in llm_doc_inds] remaining_docs = [ doc for idx, doc in enumerate(top_docs) if idx not in llm_doc_inds @@ -463,7 +480,9 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non # For DM (ephemeral message), we need to create a thread via a normal message so the user can see # the ephemeral message. This also will give the user a notification which ephemeral message does not. - if receiver_ids: + # if there is no message_ts_to_respond_to, and we have made it this far, then this is a /danswer message + # so we shouldn't send_team_member_message + if receiver_ids and message_ts_to_respond_to is not None: send_team_member_message( client=client, channel=channel, diff --git a/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py b/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py index 8e1663c1a4c..58a2101588d 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_standard_answers.py @@ -1,61 +1,43 @@ from slack_sdk import WebClient from sqlalchemy.orm import Session -from danswer.configs.constants import MessageType -from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI -from danswer.danswerbot.slack.blocks import build_standard_answer_blocks -from danswer.danswerbot.slack.blocks import get_restate_blocks -from danswer.danswerbot.slack.handlers.utils import send_team_member_message from danswer.danswerbot.slack.models import SlackMessageInfo -from danswer.danswerbot.slack.utils import respond_in_thread -from danswer.danswerbot.slack.utils import update_emote_react -from danswer.db.chat import create_chat_session -from danswer.db.chat import create_new_chat_message -from danswer.db.chat import get_chat_messages_by_sessions -from danswer.db.chat import get_chat_sessions_by_slack_thread_id -from danswer.db.chat import get_or_create_root_message from danswer.db.models import Prompt from danswer.db.models import SlackBotConfig -from danswer.db.standard_answer import fetch_standard_answer_categories_by_names -from danswer.db.standard_answer import find_matching_standard_answers -from danswer.server.manage.models import StandardAnswer from danswer.utils.logger import DanswerLoggingAdapter from danswer.utils.logger import setup_logger +from danswer.utils.variable_functionality import fetch_versioned_implementation logger = setup_logger() -def oneoff_standard_answers( - message: str, - slack_bot_categories: list[str], +def handle_standard_answers( + message_info: SlackMessageInfo, + receiver_ids: list[str] | None, + slack_bot_config: SlackBotConfig | None, + prompt: Prompt | None, + logger: DanswerLoggingAdapter, + client: WebClient, db_session: Session, -) -> list[StandardAnswer]: - """ - Respond to the user message if it matches any configured standard answers. - - Returns a list of matching StandardAnswers if found, otherwise None. - """ - configured_standard_answers = { - standard_answer - for category in fetch_standard_answer_categories_by_names( - slack_bot_categories, db_session=db_session - ) - for standard_answer in category.standard_answers - } - - matching_standard_answers = find_matching_standard_answers( - query=message, - id_in=[answer.id for answer in configured_standard_answers], +) -> bool: + """Returns whether one or more Standard Answer message blocks were + emitted by the Slack bot""" + versioned_handle_standard_answers = fetch_versioned_implementation( + "danswer.danswerbot.slack.handlers.handle_standard_answers", + "_handle_standard_answers", + ) + return versioned_handle_standard_answers( + message_info=message_info, + receiver_ids=receiver_ids, + slack_bot_config=slack_bot_config, + prompt=prompt, + logger=logger, + client=client, db_session=db_session, ) - server_standard_answers = [ - StandardAnswer.from_model(db_answer) for db_answer in matching_standard_answers - ] - return server_standard_answers - -def handle_standard_answers( +def _handle_standard_answers( message_info: SlackMessageInfo, receiver_ids: list[str] | None, slack_bot_config: SlackBotConfig | None, @@ -65,151 +47,10 @@ def handle_standard_answers( db_session: Session, ) -> bool: """ - Potentially respond to the user message depending on whether the user's message matches - any of the configured standard answers and also whether those answers have already been - provided in the current thread. + Standard Answers are a paid Enterprise Edition feature. This is the fallback + function handling the case where EE features are not enabled. - Returns True if standard answers are found to match the user's message and therefore, - we still need to respond to the users. + Always returns false i.e. since EE features are not enabled, we NEVER create any + Slack message blocks. """ - # if no channel config, then no standard answers are configured - if not slack_bot_config: - return False - - slack_thread_id = message_info.thread_to_respond - configured_standard_answer_categories = ( - slack_bot_config.standard_answer_categories if slack_bot_config else [] - ) - configured_standard_answers = set( - [ - standard_answer - for standard_answer_category in configured_standard_answer_categories - for standard_answer in standard_answer_category.standard_answers - ] - ) - query_msg = message_info.thread_messages[-1] - - if slack_thread_id is None: - used_standard_answer_ids = set([]) - else: - chat_sessions = get_chat_sessions_by_slack_thread_id( - slack_thread_id=slack_thread_id, - user_id=None, - db_session=db_session, - ) - chat_messages = get_chat_messages_by_sessions( - chat_session_ids=[chat_session.id for chat_session in chat_sessions], - user_id=None, - db_session=db_session, - skip_permission_check=True, - ) - used_standard_answer_ids = set( - [ - standard_answer.id - for chat_message in chat_messages - for standard_answer in chat_message.standard_answers - ] - ) - - usable_standard_answers = configured_standard_answers.difference( - used_standard_answer_ids - ) - if usable_standard_answers: - matching_standard_answers = find_matching_standard_answers( - query=query_msg.message, - id_in=[standard_answer.id for standard_answer in usable_standard_answers], - db_session=db_session, - ) - else: - matching_standard_answers = [] - if matching_standard_answers: - chat_session = create_chat_session( - db_session=db_session, - description="", - user_id=None, - persona_id=slack_bot_config.persona.id if slack_bot_config.persona else 0, - danswerbot_flow=True, - slack_thread_id=slack_thread_id, - one_shot=True, - ) - - root_message = get_or_create_root_message( - chat_session_id=chat_session.id, db_session=db_session - ) - - new_user_message = create_new_chat_message( - chat_session_id=chat_session.id, - parent_message=root_message, - prompt_id=prompt.id if prompt else None, - message=query_msg.message, - token_count=0, - message_type=MessageType.USER, - db_session=db_session, - commit=True, - ) - - formatted_answers = [] - for standard_answer in matching_standard_answers: - block_quotified_answer = ">" + standard_answer.answer.replace("\n", "\n> ") - formatted_answer = ( - f'Since you mentioned _"{standard_answer.keyword}"_, ' - f"I thought this might be useful: \n\n{block_quotified_answer}" - ) - formatted_answers.append(formatted_answer) - answer_message = "\n\n".join(formatted_answers) - - _ = create_new_chat_message( - chat_session_id=chat_session.id, - parent_message=new_user_message, - prompt_id=prompt.id if prompt else None, - message=answer_message, - token_count=0, - message_type=MessageType.ASSISTANT, - error=None, - db_session=db_session, - commit=True, - ) - - update_emote_react( - emoji=DANSWER_REACT_EMOJI, - channel=message_info.channel_to_respond, - message_ts=message_info.msg_to_respond, - remove=True, - client=client, - ) - - restate_question_blocks = get_restate_blocks( - msg=query_msg.message, - is_bot_msg=message_info.is_bot_msg, - ) - - answer_blocks = build_standard_answer_blocks( - answer_message=answer_message, - ) - - all_blocks = restate_question_blocks + answer_blocks - - try: - respond_in_thread( - client=client, - channel=message_info.channel_to_respond, - receiver_ids=receiver_ids, - text="Hello! Danswer has some results for you!", - blocks=all_blocks, - thread_ts=message_info.msg_to_respond, - unfurl=False, - ) - - if receiver_ids and slack_thread_id: - send_team_member_message( - client=client, - channel=message_info.channel_to_respond, - thread_ts=slack_thread_id, - ) - - return True - except Exception as e: - logger.exception(f"Unable to send standard answer message: {e}") - return False - else: - return False + return False diff --git a/backend/danswer/danswerbot/slack/listener.py b/backend/danswer/danswerbot/slack/listener.py index c59f4caf1aa..c430f1b31b7 100644 --- a/backend/danswer/danswerbot/slack/listener.py +++ b/backend/danswer/danswerbot/slack/listener.py @@ -13,6 +13,7 @@ from danswer.configs.danswerbot_configs import DANSWER_BOT_REPHRASE_MESSAGE from danswer.configs.danswerbot_configs import DANSWER_BOT_RESPOND_EVERY_CHANNEL from danswer.configs.danswerbot_configs import NOTIFY_SLACKBOT_NO_ANSWER +from danswer.connectors.slack.utils import expert_info_from_slack_id from danswer.danswerbot.slack.config import get_slack_bot_config_for_channel from danswer.danswerbot.slack.constants import DISLIKE_BLOCK_ACTION_ID from danswer.danswerbot.slack.constants import FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID @@ -55,6 +56,7 @@ from danswer.search.retrieval.search_runner import download_nltk_data from danswer.server.manage.models import SlackBotTokens from danswer.utils.logger import setup_logger +from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable from shared_configs.configs import MODEL_SERVER_HOST from shared_configs.configs import MODEL_SERVER_PORT from shared_configs.configs import SLACK_CHANNEL_ID @@ -256,6 +258,11 @@ def build_request_details( tagged = event.get("type") == "app_mention" message_ts = event.get("ts") thread_ts = event.get("thread_ts") + sender = event.get("user") or None + expert_info = expert_info_from_slack_id( + sender, client.web_client, user_cache={} + ) + email = expert_info.email if expert_info else None msg = remove_danswer_bot_tag(msg, client=client.web_client) @@ -286,7 +293,8 @@ def build_request_details( channel_to_respond=channel, msg_to_respond=cast(str, message_ts or thread_ts), thread_to_respond=cast(str, thread_ts or message_ts), - sender=event.get("user") or None, + sender=sender, + email=email, bypass_filters=tagged, is_bot_msg=False, is_bot_dm=event.get("channel_type") == "im", @@ -296,6 +304,10 @@ def build_request_details( channel = req.payload["channel_id"] msg = req.payload["text"] sender = req.payload["user_id"] + expert_info = expert_info_from_slack_id( + sender, client.web_client, user_cache={} + ) + email = expert_info.email if expert_info else None single_msg = ThreadMessage(message=msg, sender=None, role=MessageType.USER) @@ -305,6 +317,7 @@ def build_request_details( msg_to_respond=None, thread_to_respond=None, sender=sender, + email=email, bypass_filters=True, is_bot_msg=True, is_bot_dm=False, @@ -469,6 +482,8 @@ def _initialize_socket_client(socket_client: SocketModeClient) -> None: slack_bot_tokens: SlackBotTokens | None = None socket_client: SocketModeClient | None = None + set_is_ee_based_on_env_variable() + logger.notice("Verifying query preprocessing (NLTK) data is downloaded") download_nltk_data() diff --git a/backend/danswer/danswerbot/slack/models.py b/backend/danswer/danswerbot/slack/models.py index e4521a759a7..6394eab562d 100644 --- a/backend/danswer/danswerbot/slack/models.py +++ b/backend/danswer/danswerbot/slack/models.py @@ -9,6 +9,7 @@ class SlackMessageInfo(BaseModel): msg_to_respond: str | None thread_to_respond: str | None sender: str | None + email: str | None bypass_filters: bool # User has tagged @DanswerBot is_bot_msg: bool # User is using /DanswerBot is_bot_dm: bool # User is direct messaging to DanswerBot diff --git a/backend/danswer/db/auth.py b/backend/danswer/db/auth.py index 7710232d01f..9b54e82cc1f 100644 --- a/backend/danswer/db/auth.py +++ b/backend/danswer/db/auth.py @@ -28,7 +28,7 @@ def get_default_admin_user_emails() -> list[str]: get_default_admin_user_emails_fn: Callable[ [], list[str] ] = fetch_versioned_implementation_with_fallback( - "danswer.auth.users", "get_default_admin_user_emails_", lambda: [] + "danswer.auth.users", "get_default_admin_user_emails_", lambda: list[str]() ) return get_default_admin_user_emails_fn() diff --git a/backend/danswer/db/chat.py b/backend/danswer/db/chat.py index 3cb991dd43b..feb2e2b4b51 100644 --- a/backend/danswer/db/chat.py +++ b/backend/danswer/db/chat.py @@ -3,7 +3,6 @@ from datetime import timedelta from uuid import UUID -from sqlalchemy import and_ from sqlalchemy import delete from sqlalchemy import desc from sqlalchemy import func @@ -87,29 +86,57 @@ def get_chat_sessions_by_slack_thread_id( return db_session.scalars(stmt).all() -def get_first_messages_for_chat_sessions( - chat_session_ids: list[int], db_session: Session +def get_valid_messages_from_query_sessions( + chat_session_ids: list[int], + db_session: Session, ) -> dict[int, str]: - subquery = ( - select(ChatMessage.chat_session_id, func.min(ChatMessage.id).label("min_id")) + user_message_subquery = ( + select( + ChatMessage.chat_session_id, func.min(ChatMessage.id).label("user_msg_id") + ) .where( - and_( - ChatMessage.chat_session_id.in_(chat_session_ids), - ChatMessage.message_type == MessageType.USER, # Select USER messages - ) + ChatMessage.chat_session_id.in_(chat_session_ids), + ChatMessage.message_type == MessageType.USER, + ) + .group_by(ChatMessage.chat_session_id) + .subquery() + ) + + assistant_message_subquery = ( + select( + ChatMessage.chat_session_id, + func.min(ChatMessage.id).label("assistant_msg_id"), + ) + .where( + ChatMessage.chat_session_id.in_(chat_session_ids), + ChatMessage.message_type == MessageType.ASSISTANT, ) .group_by(ChatMessage.chat_session_id) .subquery() ) - query = select(ChatMessage.chat_session_id, ChatMessage.message).join( - subquery, - (ChatMessage.chat_session_id == subquery.c.chat_session_id) - & (ChatMessage.id == subquery.c.min_id), + query = ( + select(ChatMessage.chat_session_id, ChatMessage.message) + .join( + user_message_subquery, + ChatMessage.chat_session_id == user_message_subquery.c.chat_session_id, + ) + .join( + assistant_message_subquery, + ChatMessage.chat_session_id == assistant_message_subquery.c.chat_session_id, + ) + .join( + ChatMessage__SearchDoc, + ChatMessage__SearchDoc.chat_message_id + == assistant_message_subquery.c.assistant_msg_id, + ) + .where(ChatMessage.id == user_message_subquery.c.user_msg_id) ) first_messages = db_session.execute(query).all() - return dict([(row.chat_session_id, row.message) for row in first_messages]) + logger.info(f"Retrieved {len(first_messages)} first messages with documents") + + return {row.chat_session_id: row.message for row in first_messages} def get_chat_sessions_by_user( @@ -199,7 +226,7 @@ def create_chat_session( db_session: Session, description: str, user_id: UUID | None, - persona_id: int, + persona_id: int | None, # Can be none if temporary persona is used llm_override: LLMOverride | None = None, prompt_override: PromptOverride | None = None, one_shot: bool = False, @@ -253,6 +280,13 @@ def delete_chat_session( db_session: Session, hard_delete: bool = HARD_DELETE_CHATS, ) -> None: + chat_session = get_chat_session_by_id( + chat_session_id=chat_session_id, user_id=user_id, db_session=db_session + ) + + if chat_session.deleted: + raise ValueError("Cannot delete an already deleted chat session") + if hard_delete: delete_messages_and_files_from_chat_session(chat_session_id, db_session) db_session.execute(delete(ChatSession).where(ChatSession.id == chat_session_id)) @@ -564,6 +598,7 @@ def get_doc_query_identifiers_from_model( chat_session: ChatSession, user_id: UUID | None, db_session: Session, + enforce_chat_session_id_for_search_docs: bool, ) -> list[tuple[str, int]]: """Given a list of search_doc_ids""" search_docs = ( @@ -583,7 +618,8 @@ def get_doc_query_identifiers_from_model( for doc in search_docs ] ): - raise ValueError("Invalid reference doc, not from this chat session.") + if enforce_chat_session_id_for_search_docs: + raise ValueError("Invalid reference doc, not from this chat session.") except IndexError: # This happens when the doc has no chat_messages associated with it. # which happens as an edge case where the chat message failed to save diff --git a/backend/danswer/db/connector_credential_pair.py b/backend/danswer/db/connector_credential_pair.py index a6848232caf..f9d79df96ae 100644 --- a/backend/danswer/db/connector_credential_pair.py +++ b/backend/danswer/db/connector_credential_pair.py @@ -12,6 +12,7 @@ from danswer.configs.constants import DocumentSource from danswer.db.connector import fetch_connector_by_id from danswer.db.credentials import fetch_credential_by_id +from danswer.db.enums import AccessType from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.models import ConnectorCredentialPair from danswer.db.models import IndexAttempt @@ -24,6 +25,8 @@ from danswer.db.models import UserRole from danswer.server.models import StatusResponse from danswer.utils.logger import setup_logger +from ee.danswer.db.external_perm import delete_user__ext_group_for_cc_pair__no_commit +from ee.danswer.external_permissions.sync_params import check_if_valid_sync_source logger = setup_logger() @@ -74,7 +77,7 @@ def _add_user_filters( .correlate(ConnectorCredentialPair) ) else: - where_clause |= ConnectorCredentialPair.is_public == True # noqa: E712 + where_clause |= ConnectorCredentialPair.access_type == AccessType.PUBLIC return stmt.where(where_clause) @@ -94,8 +97,19 @@ def get_connector_credential_pairs( ) # noqa if ids: stmt = stmt.where(ConnectorCredentialPair.id.in_(ids)) - results = db_session.scalars(stmt) - return list(results.all()) + return list(db_session.scalars(stmt).all()) + + +def add_deletion_failure_message( + db_session: Session, + cc_pair_id: int, + failure_message: str, +) -> None: + cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session) + if not cc_pair: + return + cc_pair.deletion_failure_message = failure_message + db_session.commit() def get_cc_pair_groups_for_ids( @@ -159,6 +173,7 @@ def get_connector_credential_pair_from_id( def get_last_successful_attempt_time( connector_id: int, credential_id: int, + earliest_index: float, search_settings: SearchSettings, db_session: Session, ) -> float: @@ -172,7 +187,7 @@ def get_last_successful_attempt_time( connector_credential_pair is None or connector_credential_pair.last_successful_index_time is None ): - return 0.0 + return earliest_index return connector_credential_pair.last_successful_index_time.timestamp() @@ -192,11 +207,9 @@ def get_last_successful_attempt_time( .order_by(IndexAttempt.time_started.desc()) .first() ) + if not attempt or not attempt.time_started: - connector = fetch_connector_by_id(connector_id, db_session) - if connector and connector.indexing_start: - return connector.indexing_start.timestamp() - return 0.0 + return earliest_index return attempt.time_started.timestamp() @@ -298,9 +311,9 @@ def associate_default_cc_pair(db_session: Session) -> None: association = ConnectorCredentialPair( connector_id=0, credential_id=0, + access_type=AccessType.PUBLIC, name="DefaultCCPair", status=ConnectorCredentialPairStatus.ACTIVE, - is_public=True, ) db_session.add(association) db_session.commit() @@ -325,8 +338,9 @@ def add_credential_to_connector( connector_id: int, credential_id: int, cc_pair_name: str | None, - is_public: bool, + access_type: AccessType, groups: list[int] | None, + auto_sync_options: dict | None = None, ) -> StatusResponse: connector = fetch_connector_by_id(connector_id, db_session) credential = fetch_credential_by_id(credential_id, user, db_session) @@ -334,10 +348,21 @@ def add_credential_to_connector( if connector is None: raise HTTPException(status_code=404, detail="Connector does not exist") + if access_type == AccessType.SYNC: + if not check_if_valid_sync_source(connector.source): + raise HTTPException( + status_code=400, + detail=f"Connector of type {connector.source} does not support SYNC access type", + ) + if credential is None: + error_msg = ( + f"Credential {credential_id} does not exist or does not belong to user" + ) + logger.error(error_msg) raise HTTPException( status_code=401, - detail="Credential does not exist or does not belong to user", + detail=error_msg, ) existing_association = ( @@ -351,7 +376,7 @@ def add_credential_to_connector( if existing_association is not None: return StatusResponse( success=False, - message=f"Connector already has Credential {credential_id}", + message=f"Connector {connector_id} already has Credential {credential_id}", data=connector_id, ) @@ -360,12 +385,13 @@ def add_credential_to_connector( credential_id=credential_id, name=cc_pair_name, status=ConnectorCredentialPairStatus.ACTIVE, - is_public=is_public, + access_type=access_type, + auto_sync_options=auto_sync_options, ) db_session.add(association) db_session.flush() # make sure the association has an id - if groups: + if groups and access_type != AccessType.SYNC: _relate_groups_to_cc_pair__no_commit( db_session=db_session, cc_pair_id=association.id, @@ -375,8 +401,8 @@ def add_credential_to_connector( db_session.commit() return StatusResponse( - success=False, - message=f"Connector already has Credential {credential_id}", + success=True, + message=f"Creating new association between Connector {connector_id} and Credential {credential_id}", data=association.id, ) @@ -408,6 +434,10 @@ def remove_credential_from_connector( ) if association is not None: + delete_user__ext_group_for_cc_pair__no_commit( + db_session=db_session, + cc_pair_id=association.id, + ) db_session.delete(association) db_session.commit() return StatusResponse( diff --git a/backend/danswer/db/document.py b/backend/danswer/db/document.py index 77ea4e3dd9d..8aee28aef05 100644 --- a/backend/danswer/db/document.py +++ b/backend/danswer/db/document.py @@ -3,26 +3,30 @@ from collections.abc import Generator from collections.abc import Sequence from datetime import datetime -from uuid import UUID +from datetime import timezone from sqlalchemy import and_ from sqlalchemy import delete from sqlalchemy import exists from sqlalchemy import func from sqlalchemy import or_ +from sqlalchemy import Select from sqlalchemy import select from sqlalchemy.dialects.postgresql import insert from sqlalchemy.engine.util import TransactionalContext from sqlalchemy.exc import OperationalError from sqlalchemy.orm import Session +from sqlalchemy.sql.expression import null from danswer.configs.constants import DEFAULT_BOOST +from danswer.db.enums import AccessType from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.feedback import delete_document_feedback_for_documents__no_commit from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Credential from danswer.db.models import Document as DbDocument from danswer.db.models import DocumentByConnectorCredentialPair +from danswer.db.models import User from danswer.db.tag import delete_document_tags_for_documents__no_commit from danswer.db.utils import model_to_dict from danswer.document_index.interfaces import DocumentMetadata @@ -38,6 +42,80 @@ def check_docs_exist(db_session: Session) -> bool: return result.scalar() or False +def count_documents_by_needs_sync(session: Session) -> int: + """Get the count of all documents where: + 1. last_modified is newer than last_synced + 2. last_synced is null (meaning we've never synced) + + This function executes the query and returns the count of + documents matching the criteria.""" + + count = ( + session.query(func.count()) + .select_from(DbDocument) + .filter( + or_( + DbDocument.last_modified > DbDocument.last_synced, + DbDocument.last_synced.is_(None), + ) + ) + .scalar() + ) + + return count + + +def construct_document_select_for_connector_credential_pair_by_needs_sync( + connector_id: int, credential_id: int +) -> Select: + initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where( + and_( + DocumentByConnectorCredentialPair.connector_id == connector_id, + DocumentByConnectorCredentialPair.credential_id == credential_id, + ) + ) + + stmt = ( + select(DbDocument) + .where( + DbDocument.id.in_(initial_doc_ids_stmt), + or_( + DbDocument.last_modified + > DbDocument.last_synced, # last_modified is newer than last_synced + DbDocument.last_synced.is_(None), # never synced + ), + ) + .distinct() + ) + + return stmt + + +def construct_document_select_for_connector_credential_pair( + connector_id: int, credential_id: int | None = None +) -> Select: + initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where( + and_( + DocumentByConnectorCredentialPair.connector_id == connector_id, + DocumentByConnectorCredentialPair.credential_id == credential_id, + ) + ) + stmt = select(DbDocument).where(DbDocument.id.in_(initial_doc_ids_stmt)).distinct() + return stmt + + +def get_document_ids_for_connector_credential_pair( + db_session: Session, connector_id: int, credential_id: int, limit: int | None = None +) -> list[str]: + doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where( + and_( + DocumentByConnectorCredentialPair.connector_id == connector_id, + DocumentByConnectorCredentialPair.credential_id == credential_id, + ) + ) + return list(db_session.execute(doc_ids_stmt).scalars().all()) + + def get_documents_for_connector_credential_pair( db_session: Session, connector_id: int, credential_id: int, limit: int | None = None ) -> Sequence[DbDocument]: @@ -54,15 +132,26 @@ def get_documents_for_connector_credential_pair( def get_documents_by_ids( - document_ids: list[str], db_session: Session, + document_ids: list[str], ) -> list[DbDocument]: stmt = select(DbDocument).where(DbDocument.id.in_(document_ids)) documents = db_session.execute(stmt).scalars().all() return list(documents) -def get_document_connector_cnts( +def get_document_connector_count( + db_session: Session, + document_id: str, +) -> int: + results = get_document_connector_counts(db_session, [document_id]) + if not results or len(results) == 0: + return 0 + + return results[0][1] + + +def get_document_connector_counts( db_session: Session, document_ids: list[str], ) -> Sequence[tuple[str, int]]: @@ -77,7 +166,7 @@ def get_document_connector_cnts( return db_session.execute(stmt).all() # type: ignore -def get_document_cnts_for_cc_pairs( +def get_document_counts_for_cc_pairs( db_session: Session, cc_pair_identifiers: list[ConnectorCredentialPairIdentifier] ) -> Sequence[tuple[int, int, int]]: stmt = ( @@ -108,22 +197,50 @@ def get_document_cnts_for_cc_pairs( return db_session.execute(stmt).all() # type: ignore -def get_acccess_info_for_documents( +def get_access_info_for_document( + db_session: Session, + document_id: str, +) -> tuple[str, list[str | None], bool] | None: + """Gets access info for a single document by calling the get_access_info_for_documents function + and passing a list with a single document ID. + Args: + db_session (Session): The database session to use. + document_id (str): The document ID to fetch access info for. + Returns: + Optional[Tuple[str, List[str | None], bool]]: A tuple containing the document ID, a list of user emails, + and a boolean indicating if the document is globally public, or None if no results are found. + """ + results = get_access_info_for_documents(db_session, [document_id]) + if not results: + return None + + return results[0] + + +def get_access_info_for_documents( db_session: Session, document_ids: list[str], -) -> Sequence[tuple[str, list[UUID | None], bool]]: +) -> Sequence[tuple[str, list[str | None], bool]]: """Gets back all relevant access info for the given documents. This includes the user_ids for cc pairs that the document is associated with + whether any of the associated cc pairs are intending to make the document globally public. + Returns the list where each element contains: + - Document ID (which is also the ID of the DocumentByConnectorCredentialPair) + - List of emails of Danswer users with direct access to the doc (includes a "None" element if + the connector was set up by an admin when auth was off + - bool for whether the document is public (the document later can also be marked public by + automatic permission sync step) """ + stmt = select( + DocumentByConnectorCredentialPair.id, + func.array_agg(func.coalesce(User.email, null())).label("user_emails"), + func.bool_or(ConnectorCredentialPair.access_type == AccessType.PUBLIC).label( + "public_doc" + ), + ).where(DocumentByConnectorCredentialPair.id.in_(document_ids)) + stmt = ( - select( - DocumentByConnectorCredentialPair.id, - func.array_agg(Credential.user_id).label("user_ids"), - func.bool_or(ConnectorCredentialPair.is_public).label("public_doc"), - ) - .where(DocumentByConnectorCredentialPair.id.in_(document_ids)) - .join( + stmt.join( Credential, DocumentByConnectorCredentialPair.credential_id == Credential.id, ) @@ -136,6 +253,13 @@ def get_acccess_info_for_documents( == ConnectorCredentialPair.credential_id, ), ) + .outerjoin( + User, + and_( + Credential.user_id == User.id, + ConnectorCredentialPair.access_type != AccessType.SYNC, + ), + ) # don't include CC pairs that are being deleted # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them .where(ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING) @@ -173,6 +297,7 @@ def upsert_documents( semantic_id=doc.semantic_identifier, link=doc.first_link, doc_updated_at=None, # this is intentional + last_modified=datetime.now(timezone.utc), primary_owners=doc.primary_owners, secondary_owners=doc.secondary_owners, ) @@ -180,9 +305,19 @@ def upsert_documents( for doc in seen_documents.values() ] ) - # for now, there are no columns to update. If more metadata is added, then this - # needs to change to an `on_conflict_do_update` - on_conflict_stmt = insert_stmt.on_conflict_do_nothing() + + on_conflict_stmt = insert_stmt.on_conflict_do_update( + index_elements=["id"], # Conflict target + set_={ + "from_ingestion_api": insert_stmt.excluded.from_ingestion_api, + "boost": insert_stmt.excluded.boost, + "hidden": insert_stmt.excluded.hidden, + "semantic_id": insert_stmt.excluded.semantic_id, + "link": insert_stmt.excluded.link, + "primary_owners": insert_stmt.excluded.primary_owners, + "secondary_owners": insert_stmt.excluded.secondary_owners, + }, + ) db_session.execute(on_conflict_stmt) db_session.commit() @@ -214,7 +349,7 @@ def upsert_document_by_connector_credential_pair( db_session.commit() -def update_docs_updated_at( +def update_docs_updated_at__no_commit( ids_to_new_updated_at: dict[str, datetime], db_session: Session, ) -> None: @@ -226,6 +361,28 @@ def update_docs_updated_at( for document in documents_to_update: document.doc_updated_at = ids_to_new_updated_at[document.id] + +def update_docs_last_modified__no_commit( + document_ids: list[str], + db_session: Session, +) -> None: + documents_to_update = ( + db_session.query(DbDocument).filter(DbDocument.id.in_(document_ids)).all() + ) + + now = datetime.now(timezone.utc) + for doc in documents_to_update: + doc.last_modified = now + + +def mark_document_as_synced(document_id: str, db_session: Session) -> None: + stmt = select(DbDocument).where(DbDocument.id == document_id) + doc = db_session.scalar(stmt) + if doc is None: + raise ValueError(f"No document with ID: {document_id}") + + # update last_synced + doc.last_synced = datetime.now(timezone.utc) db_session.commit() @@ -241,11 +398,34 @@ def upsert_documents_complete( def delete_document_by_connector_credential_pair__no_commit( + db_session: Session, + document_id: str, + connector_credential_pair_identifier: ConnectorCredentialPairIdentifier + | None = None, +) -> None: + """Deletes a single document by cc pair relationship entry. + Foreign key rows are left in place. + The implicit assumption is that the document itself still has other cc_pair + references and needs to continue existing. + """ + delete_documents_by_connector_credential_pair__no_commit( + db_session=db_session, + document_ids=[document_id], + connector_credential_pair_identifier=connector_credential_pair_identifier, + ) + + +def delete_documents_by_connector_credential_pair__no_commit( db_session: Session, document_ids: list[str], connector_credential_pair_identifier: ConnectorCredentialPairIdentifier | None = None, ) -> None: + """This deletes just the document by cc pair entries for a particular cc pair. + Foreign key rows are left in place. + The implicit assumption is that the document itself still has other cc_pair + references and needs to continue existing. + """ stmt = delete(DocumentByConnectorCredentialPair).where( DocumentByConnectorCredentialPair.id.in_(document_ids) ) @@ -268,8 +448,9 @@ def delete_documents__no_commit(db_session: Session, document_ids: list[str]) -> def delete_documents_complete__no_commit( db_session: Session, document_ids: list[str] ) -> None: + """This completely deletes the documents from the db, including all foreign key relationships""" logger.info(f"Deleting {len(document_ids)} documents from the DB") - delete_document_by_connector_credential_pair__no_commit(db_session, document_ids) + delete_documents_by_connector_credential_pair__no_commit(db_session, document_ids) delete_document_feedback_for_documents__no_commit( document_ids=document_ids, db_session=db_session ) @@ -379,3 +560,12 @@ def get_documents_by_cc_pair( .filter(ConnectorCredentialPair.id == cc_pair_id) .all() ) + + +def get_document( + document_id: str, + db_session: Session, +) -> DbDocument | None: + stmt = select(DbDocument).where(DbDocument.id == document_id) + doc: DbDocument | None = db_session.execute(stmt).scalar_one_or_none() + return doc diff --git a/backend/danswer/db/document_set.py b/backend/danswer/db/document_set.py index 2de61a491f9..0ba6c4e9ab3 100644 --- a/backend/danswer/db/document_set.py +++ b/backend/danswer/db/document_set.py @@ -14,6 +14,7 @@ from danswer.db.connector_credential_pair import get_cc_pair_groups_for_ids from danswer.db.connector_credential_pair import get_connector_credential_pairs +from danswer.db.enums import AccessType from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Document @@ -180,7 +181,7 @@ def _check_if_cc_pairs_are_owned_by_groups( ids=missing_cc_pair_ids, ) for cc_pair in cc_pairs: - if not cc_pair.is_public: + if cc_pair.access_type != AccessType.PUBLIC: raise ValueError( f"Connector Credential Pair with ID: '{cc_pair.id}'" " is not owned by the specified groups" @@ -248,6 +249,10 @@ def update_document_set( document_set_update_request: DocumentSetUpdateRequest, user: User | None = None, ) -> tuple[DocumentSetDBModel, list[DocumentSet__ConnectorCredentialPair]]: + """If successful, this sets document_set_row.is_up_to_date = False. + That will be processed via Celery in check_for_vespa_sync_task + and trigger a long running background sync to Vespa. + """ if not document_set_update_request.cc_pair_ids: # It's cc-pairs in actuality but the UI displays this error raise ValueError("Cannot create a document set with no Connectors") @@ -519,42 +524,135 @@ def fetch_documents_for_document_set_paginated( return documents, documents[-1].id if documents else None -def fetch_document_sets_for_documents( - document_ids: list[str], - db_session: Session, -) -> Sequence[tuple[str, list[str]]]: - """Gives back a list of (document_id, list[document_set_names]) tuples""" +def construct_document_select_by_docset( + document_set_id: int, + current_only: bool = True, +) -> Select: + """This returns a statement that should be executed using + .yield_per() to minimize overhead. The primary consumers of this function + are background processing task generators.""" + stmt = ( - select(Document.id, func.array_agg(DocumentSetDBModel.name)) + select(Document) .join( - DocumentSet__ConnectorCredentialPair, - DocumentSetDBModel.id - == DocumentSet__ConnectorCredentialPair.document_set_id, + DocumentByConnectorCredentialPair, + DocumentByConnectorCredentialPair.id == Document.id, ) .join( ConnectorCredentialPair, - ConnectorCredentialPair.id - == DocumentSet__ConnectorCredentialPair.connector_credential_pair_id, + and_( + ConnectorCredentialPair.connector_id + == DocumentByConnectorCredentialPair.connector_id, + ConnectorCredentialPair.credential_id + == DocumentByConnectorCredentialPair.credential_id, + ), + ) + .join( + DocumentSet__ConnectorCredentialPair, + DocumentSet__ConnectorCredentialPair.connector_credential_pair_id + == ConnectorCredentialPair.id, ) .join( + DocumentSetDBModel, + DocumentSetDBModel.id + == DocumentSet__ConnectorCredentialPair.document_set_id, + ) + .where(DocumentSetDBModel.id == document_set_id) + .order_by(Document.id) + ) + + if current_only: + stmt = stmt.where( + DocumentSet__ConnectorCredentialPair.is_current == True # noqa: E712 + ) + + stmt = stmt.distinct() + return stmt + + +def fetch_document_sets_for_document( + document_id: str, + db_session: Session, +) -> list[str]: + """ + Fetches the document set names for a single document ID. + + :param document_id: The ID of the document to fetch sets for. + :param db_session: The SQLAlchemy session to use for the query. + :return: A list of document set names, or None if no result is found. + """ + result = fetch_document_sets_for_documents([document_id], db_session) + if not result: + return [] + + return result[0][1] + + +def fetch_document_sets_for_documents( + document_ids: list[str], + db_session: Session, +) -> Sequence[tuple[str, list[str]]]: + """Gives back a list of (document_id, list[document_set_names]) tuples""" + + """Building subqueries""" + # NOTE: have to build these subqueries first in order to guarantee that we get one + # returned row for each specified document_id. Basically, we want to do the filters first, + # then the outer joins. + + # don't include CC pairs that are being deleted + # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them + # as we can assume their document sets are no longer relevant + valid_cc_pairs_subquery = aliased( + ConnectorCredentialPair, + select(ConnectorCredentialPair) + .where( + ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING + ) # noqa: E712 + .subquery(), + ) + + valid_document_set__cc_pairs_subquery = aliased( + DocumentSet__ConnectorCredentialPair, + select(DocumentSet__ConnectorCredentialPair) + .where(DocumentSet__ConnectorCredentialPair.is_current == True) # noqa: E712 + .subquery(), + ) + """End building subqueries""" + + stmt = ( + select( + Document.id, + func.coalesce( + func.array_remove(func.array_agg(DocumentSetDBModel.name), None), [] + ).label("document_set_names"), + ) + # Here we select document sets by relation: + # Document -> DocumentByConnectorCredentialPair -> ConnectorCredentialPair -> + # DocumentSet__ConnectorCredentialPair -> DocumentSet + .outerjoin( DocumentByConnectorCredentialPair, + Document.id == DocumentByConnectorCredentialPair.id, + ) + .outerjoin( + valid_cc_pairs_subquery, and_( DocumentByConnectorCredentialPair.connector_id - == ConnectorCredentialPair.connector_id, + == valid_cc_pairs_subquery.connector_id, DocumentByConnectorCredentialPair.credential_id - == ConnectorCredentialPair.credential_id, + == valid_cc_pairs_subquery.credential_id, ), ) - .join( - Document, - Document.id == DocumentByConnectorCredentialPair.id, + .outerjoin( + valid_document_set__cc_pairs_subquery, + valid_cc_pairs_subquery.id + == valid_document_set__cc_pairs_subquery.connector_credential_pair_id, + ) + .outerjoin( + DocumentSetDBModel, + DocumentSetDBModel.id + == valid_document_set__cc_pairs_subquery.document_set_id, ) .where(Document.id.in_(document_ids)) - # don't include CC pairs that are being deleted - # NOTE: CC pairs can never go from DELETING to any other state -> it's safe to ignore them - # as we can assume their document sets are no longer relevant - .where(ConnectorCredentialPair.status != ConnectorCredentialPairStatus.DELETING) - .where(DocumentSet__ConnectorCredentialPair.is_current == True) # noqa: E712 .group_by(Document.id) ) return db_session.execute(stmt).all() # type: ignore @@ -607,7 +705,7 @@ def check_document_sets_are_public( ConnectorCredentialPair.id.in_( connector_credential_pair_ids # type:ignore ), - ConnectorCredentialPair.is_public.is_(False), + ConnectorCredentialPair.access_type != AccessType.PUBLIC, ) .limit(1) .first() diff --git a/backend/danswer/db/engine.py b/backend/danswer/db/engine.py index 94b5d0123cc..af44498be24 100644 --- a/backend/danswer/db/engine.py +++ b/backend/danswer/db/engine.py @@ -1,8 +1,10 @@ import contextlib +import threading import time from collections.abc import AsyncGenerator from collections.abc import Generator from datetime import datetime +from typing import Any from typing import ContextManager from sqlalchemy import event @@ -32,14 +34,9 @@ SYNC_DB_API = "psycopg2" ASYNC_DB_API = "asyncpg" -POSTGRES_APP_NAME = ( - POSTGRES_UNKNOWN_APP_NAME # helps to diagnose open connections in postgres -) - # global so we don't create more than one engine per process # outside of being best practice, this is needed so we can properly pool # connections and not create a new pool on every request -_SYNC_ENGINE: Engine | None = None _ASYNC_ENGINE: AsyncEngine | None = None SessionFactory: sessionmaker[Session] | None = None @@ -108,6 +105,67 @@ def get_db_current_time(db_session: Session) -> datetime: return result +class SqlEngine: + """Class to manage a global sql alchemy engine (needed for proper resource control) + Will eventually subsume most of the standalone functions in this file. + Sync only for now""" + + _engine: Engine | None = None + _lock: threading.Lock = threading.Lock() + _app_name: str = POSTGRES_UNKNOWN_APP_NAME + + # Default parameters for engine creation + DEFAULT_ENGINE_KWARGS = { + "pool_size": 40, + "max_overflow": 10, + "pool_pre_ping": POSTGRES_POOL_PRE_PING, + "pool_recycle": POSTGRES_POOL_RECYCLE, + } + + def __init__(self) -> None: + pass + + @classmethod + def _init_engine(cls, **engine_kwargs: Any) -> Engine: + """Private helper method to create and return an Engine.""" + connection_string = build_connection_string( + db_api=SYNC_DB_API, app_name=cls._app_name + "_sync" + ) + merged_kwargs = {**cls.DEFAULT_ENGINE_KWARGS, **engine_kwargs} + return create_engine(connection_string, **merged_kwargs) + + @classmethod + def init_engine(cls, **engine_kwargs: Any) -> None: + """Allow the caller to init the engine with extra params. Different clients + such as the API server and different celery workers and tasks + need different settings.""" + with cls._lock: + if not cls._engine: + cls._engine = cls._init_engine(**engine_kwargs) + + @classmethod + def get_engine(cls) -> Engine: + """Gets the sql alchemy engine. Will init a default engine if init hasn't + already been called. You probably want to init first!""" + if not cls._engine: + with cls._lock: + if not cls._engine: + cls._engine = cls._init_engine() + return cls._engine + + @classmethod + def set_app_name(cls, app_name: str) -> None: + """Class method to set the app name.""" + cls._app_name = app_name + + @classmethod + def get_app_name(cls) -> str: + """Class method to get current app name.""" + if not cls._app_name: + return "" + return cls._app_name + + def build_connection_string( *, db_api: str = ASYNC_DB_API, @@ -125,24 +183,11 @@ def build_connection_string( def init_sqlalchemy_engine(app_name: str) -> None: - global POSTGRES_APP_NAME - POSTGRES_APP_NAME = app_name + SqlEngine.set_app_name(app_name) def get_sqlalchemy_engine() -> Engine: - global _SYNC_ENGINE - if _SYNC_ENGINE is None: - connection_string = build_connection_string( - db_api=SYNC_DB_API, app_name=POSTGRES_APP_NAME + "_sync" - ) - _SYNC_ENGINE = create_engine( - connection_string, - pool_size=40, - max_overflow=10, - pool_pre_ping=POSTGRES_POOL_PRE_PING, - pool_recycle=POSTGRES_POOL_RECYCLE, - ) - return _SYNC_ENGINE + return SqlEngine.get_engine() def get_sqlalchemy_async_engine() -> AsyncEngine: @@ -154,7 +199,9 @@ def get_sqlalchemy_async_engine() -> AsyncEngine: _ASYNC_ENGINE = create_async_engine( connection_string, connect_args={ - "server_settings": {"application_name": POSTGRES_APP_NAME + "_async"} + "server_settings": { + "application_name": SqlEngine.get_app_name() + "_async" + } }, pool_size=40, max_overflow=10, diff --git a/backend/danswer/db/enums.py b/backend/danswer/db/enums.py index eac048e10ab..8d9515d387a 100644 --- a/backend/danswer/db/enums.py +++ b/backend/danswer/db/enums.py @@ -51,3 +51,9 @@ class ConnectorCredentialPairStatus(str, PyEnum): def is_active(self) -> bool: return self == ConnectorCredentialPairStatus.ACTIVE + + +class AccessType(str, PyEnum): + PUBLIC = "public" + PRIVATE = "private" + SYNC = "sync" diff --git a/backend/danswer/db/feedback.py b/backend/danswer/db/feedback.py index 79557f209dc..219e2474729 100644 --- a/backend/danswer/db/feedback.py +++ b/backend/danswer/db/feedback.py @@ -1,3 +1,5 @@ +from datetime import datetime +from datetime import timezone from uuid import UUID from fastapi import HTTPException @@ -14,6 +16,7 @@ from danswer.configs.constants import MessageType from danswer.configs.constants import SearchFeedbackType from danswer.db.chat import get_chat_message +from danswer.db.enums import AccessType from danswer.db.models import ChatMessageFeedback from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Document as DbDocument @@ -24,7 +27,6 @@ from danswer.db.models import UserGroup__ConnectorCredentialPair from danswer.db.models import UserRole from danswer.document_index.interfaces import DocumentIndex -from danswer.document_index.interfaces import UpdateRequest from danswer.utils.logger import setup_logger logger = setup_logger() @@ -93,7 +95,7 @@ def _add_user_filters( .correlate(CCPair) ) else: - where_clause |= CCPair.is_public == True # noqa: E712 + where_clause |= CCPair.access_type == AccessType.PUBLIC return stmt.where(where_clause) @@ -123,12 +125,11 @@ def update_document_boost( db_session: Session, document_id: str, boost: int, - document_index: DocumentIndex, user: User | None = None, ) -> None: stmt = select(DbDocument).where(DbDocument.id == document_id) stmt = _add_user_filters(stmt, user, get_editable=True) - result = db_session.execute(stmt).scalar_one_or_none() + result: DbDocument | None = db_session.execute(stmt).scalar_one_or_none() if result is None: raise HTTPException( status_code=400, detail="Document is not editable by this user" @@ -136,13 +137,9 @@ def update_document_boost( result.boost = boost - update = UpdateRequest( - document_ids=[document_id], - boost=boost, - ) - - document_index.update(update_requests=[update]) - + # updating last_modified triggers sync + # TODO: Should this submit to the queue directly so that the UI can update? + result.last_modified = datetime.now(timezone.utc) db_session.commit() @@ -163,13 +160,9 @@ def update_document_hidden( result.hidden = hidden - update = UpdateRequest( - document_ids=[document_id], - hidden=hidden, - ) - - document_index.update(update_requests=[update]) - + # updating last_modified triggers sync + # TODO: Should this submit to the queue directly so that the UI can update? + result.last_modified = datetime.now(timezone.utc) db_session.commit() @@ -210,11 +203,9 @@ def create_doc_retrieval_feedback( SearchFeedbackType.REJECT, SearchFeedbackType.HIDE, ]: - update = UpdateRequest( - document_ids=[document_id], boost=db_doc.boost, hidden=db_doc.hidden - ) - # Updates are generally batched for efficiency, this case only 1 doc/value is updated - document_index.update(update_requests=[update]) + # updating last_modified triggers sync + # TODO: Should this submit to the queue directly so that the UI can update? + db_doc.last_modified = datetime.now(timezone.utc) db_session.add(retrieval_feedback) db_session.commit() diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py index 0932d500bbd..32e20d065c0 100644 --- a/backend/danswer/db/index_attempt.py +++ b/backend/danswer/db/index_attempt.py @@ -181,6 +181,45 @@ def get_last_attempt( return db_session.execute(stmt).scalars().first() +def get_latest_index_attempts_by_status( + secondary_index: bool, + db_session: Session, + status: IndexingStatus, +) -> Sequence[IndexAttempt]: + """ + Retrieves the most recent index attempt with the specified status for each connector_credential_pair. + Filters attempts based on the secondary_index flag to get either future or present index attempts. + Returns a sequence of IndexAttempt objects, one for each unique connector_credential_pair. + """ + latest_failed_attempts = ( + select( + IndexAttempt.connector_credential_pair_id, + func.max(IndexAttempt.id).label("max_failed_id"), + ) + .join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id) + .where( + SearchSettings.status + == ( + IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT + ), + IndexAttempt.status == status, + ) + .group_by(IndexAttempt.connector_credential_pair_id) + .subquery() + ) + + stmt = select(IndexAttempt).join( + latest_failed_attempts, + ( + IndexAttempt.connector_credential_pair_id + == latest_failed_attempts.c.connector_credential_pair_id + ) + & (IndexAttempt.id == latest_failed_attempts.c.max_failed_id), + ) + + return db_session.execute(stmt).scalars().all() + + def get_latest_index_attempts( secondary_index: bool, db_session: Session, @@ -211,12 +250,12 @@ def get_latest_index_attempts( return db_session.execute(stmt).scalars().all() -def get_index_attempts_for_connector( +def count_index_attempts_for_connector( db_session: Session, connector_id: int, only_current: bool = True, disinclude_finished: bool = False, -) -> Sequence[IndexAttempt]: +) -> int: stmt = ( select(IndexAttempt) .join(ConnectorCredentialPair) @@ -232,23 +271,60 @@ def get_index_attempts_for_connector( stmt = stmt.join(SearchSettings).where( SearchSettings.status == IndexModelStatus.PRESENT ) + # Count total items for pagination + count_stmt = stmt.with_only_columns(func.count()).order_by(None) + total_count = db_session.execute(count_stmt).scalar_one() + return total_count - stmt = stmt.order_by(IndexAttempt.time_created.desc()) - return db_session.execute(stmt).scalars().all() +def get_paginated_index_attempts_for_cc_pair_id( + db_session: Session, + connector_id: int, + page: int, + page_size: int, + only_current: bool = True, + disinclude_finished: bool = False, +) -> list[IndexAttempt]: + stmt = ( + select(IndexAttempt) + .join(ConnectorCredentialPair) + .where(ConnectorCredentialPair.connector_id == connector_id) + ) + if disinclude_finished: + stmt = stmt.where( + IndexAttempt.status.in_( + [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS] + ) + ) + if only_current: + stmt = stmt.join(SearchSettings).where( + SearchSettings.status == IndexModelStatus.PRESENT + ) + + stmt = stmt.order_by(IndexAttempt.time_started.desc()) + + # Apply pagination + stmt = stmt.offset((page - 1) * page_size).limit(page_size) -def get_latest_finished_index_attempt_for_cc_pair( + return list(db_session.execute(stmt).scalars().all()) + + +def get_latest_index_attempt_for_cc_pair_id( + db_session: Session, connector_credential_pair_id: int, secondary_index: bool, - db_session: Session, + only_finished: bool = True, ) -> IndexAttempt | None: - stmt = select(IndexAttempt).distinct() + stmt = select(IndexAttempt) stmt = stmt.where( IndexAttempt.connector_credential_pair_id == connector_credential_pair_id, - IndexAttempt.status.not_in( - [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS] - ), ) + if only_finished: + stmt = stmt.where( + IndexAttempt.status.not_in( + [IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS] + ), + ) if secondary_index: stmt = stmt.join(SearchSettings).where( SearchSettings.status == IndexModelStatus.FUTURE @@ -295,14 +371,21 @@ def get_index_attempts_for_cc_pair( def delete_index_attempts( - connector_id: int, - credential_id: int, + cc_pair_id: int, db_session: Session, ) -> None: + # First, delete related entries in IndexAttemptErrors + stmt_errors = delete(IndexAttemptError).where( + IndexAttemptError.index_attempt_id.in_( + select(IndexAttempt.id).where( + IndexAttempt.connector_credential_pair_id == cc_pair_id + ) + ) + ) + db_session.execute(stmt_errors) + stmt = delete(IndexAttempt).where( - IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id, - ConnectorCredentialPair.connector_id == connector_id, - ConnectorCredentialPair.credential_id == credential_id, + IndexAttempt.connector_credential_pair_id == cc_pair_id, ) db_session.execute(stmt) diff --git a/backend/danswer/db/llm.py b/backend/danswer/db/llm.py index 152cb130573..af2ded9562a 100644 --- a/backend/danswer/db/llm.py +++ b/backend/danswer/db/llm.py @@ -4,8 +4,11 @@ from sqlalchemy.orm import Session from danswer.db.models import CloudEmbeddingProvider as CloudEmbeddingProviderModel +from danswer.db.models import DocumentSet from danswer.db.models import LLMProvider as LLMProviderModel from danswer.db.models import LLMProvider__UserGroup +from danswer.db.models import SearchSettings +from danswer.db.models import Tool as ToolModel from danswer.db.models import User from danswer.db.models import User__UserGroup from danswer.server.manage.embedding.models import CloudEmbeddingProvider @@ -50,6 +53,7 @@ def upsert_cloud_embedding_provider( setattr(existing_provider, key, value) else: new_provider = CloudEmbeddingProviderModel(**provider.model_dump()) + db_session.add(new_provider) existing_provider = new_provider db_session.commit() @@ -58,7 +62,8 @@ def upsert_cloud_embedding_provider( def upsert_llm_provider( - db_session: Session, llm_provider: LLMProviderUpsertRequest + llm_provider: LLMProviderUpsertRequest, + db_session: Session, ) -> FullLLMProvider: existing_llm_provider = db_session.scalar( select(LLMProviderModel).where(LLMProviderModel.name == llm_provider.name) @@ -101,6 +106,20 @@ def fetch_existing_embedding_providers( return list(db_session.scalars(select(CloudEmbeddingProviderModel)).all()) +def fetch_existing_doc_sets( + db_session: Session, doc_ids: list[int] +) -> list[DocumentSet]: + return list( + db_session.scalars(select(DocumentSet).where(DocumentSet.id.in_(doc_ids))).all() + ) + + +def fetch_existing_tools(db_session: Session, tool_ids: list[int]) -> list[ToolModel]: + return list( + db_session.scalars(select(ToolModel).where(ToolModel.id.in_(tool_ids))).all() + ) + + def fetch_existing_llm_providers( db_session: Session, user: User | None = None, @@ -157,12 +176,19 @@ def fetch_provider(db_session: Session, provider_name: str) -> FullLLMProvider | def remove_embedding_provider( db_session: Session, provider_type: EmbeddingProvider ) -> None: + db_session.execute( + delete(SearchSettings).where(SearchSettings.provider_type == provider_type) + ) + + # Delete the embedding provider db_session.execute( delete(CloudEmbeddingProviderModel).where( CloudEmbeddingProviderModel.provider_type == provider_type ) ) + db_session.commit() + def remove_llm_provider(db_session: Session, provider_id: int) -> None: # Remove LLMProvider's dependent relationships @@ -178,7 +204,7 @@ def remove_llm_provider(db_session: Session, provider_id: int) -> None: db_session.commit() -def update_default_provider(db_session: Session, provider_id: int) -> None: +def update_default_provider(provider_id: int, db_session: Session) -> None: new_default = db_session.scalar( select(LLMProviderModel).where(LLMProviderModel.id == provider_id) ) diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index 3cdec323961..fff6b12336d 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -39,6 +39,7 @@ from danswer.configs.constants import DocumentSource from danswer.configs.constants import FileOrigin from danswer.configs.constants import MessageType +from danswer.db.enums import AccessType from danswer.configs.constants import NotificationType from danswer.configs.constants import SearchFeedbackType from danswer.configs.constants import TokenRateLimitScope @@ -61,7 +62,7 @@ class Base(DeclarativeBase): - pass + __abstract__ = True class EncryptedString(TypeDecorator): @@ -108,7 +109,7 @@ class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, Base): class User(SQLAlchemyBaseUserTableUUID, Base): oauth_accounts: Mapped[list[OAuthAccount]] = relationship( - "OAuthAccount", lazy="joined" + "OAuthAccount", lazy="joined", cascade="all, delete-orphan" ) role: Mapped[UserRole] = mapped_column( Enum(UserRole, native_enum=False, default=UserRole.BASIC) @@ -122,7 +123,13 @@ class User(SQLAlchemyBaseUserTableUUID, Base): # if specified, controls the assistants that are shown to the user + their order # if not specified, all assistants are shown chosen_assistants: Mapped[list[int]] = mapped_column( - postgresql.JSONB(), nullable=True + postgresql.JSONB(), nullable=False, default=[-2, -1, 0] + ) + visible_assistants: Mapped[list[int]] = mapped_column( + postgresql.JSONB(), nullable=False, default=[] + ) + hidden_assistants: Mapped[list[int]] = mapped_column( + postgresql.JSONB(), nullable=False, default=[] ) oidc_expiry: Mapped[datetime.datetime] = mapped_column( @@ -157,6 +164,8 @@ class User(SQLAlchemyBaseUserTableUUID, Base): notifications: Mapped[list["Notification"]] = relationship( "Notification", back_populates="user" ) + # Whether the user has logged in via web. False if user has only used Danswer through Slack bot + has_web_login: Mapped[bool] = mapped_column(Boolean, default=True) class InputPrompt(Base): @@ -168,7 +177,9 @@ class InputPrompt(Base): active: Mapped[bool] = mapped_column(Boolean) user: Mapped[User | None] = relationship("User", back_populates="input_prompts") is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) - user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id", ondelete="CASCADE"), nullable=True + ) class InputPrompt__User(Base): @@ -212,7 +223,9 @@ class Notification(Base): notif_type: Mapped[NotificationType] = mapped_column( Enum(NotificationType, native_enum=False) ) - user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id", ondelete="CASCADE"), nullable=True + ) dismissed: Mapped[bool] = mapped_column(Boolean, default=False) last_shown: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True)) first_shown: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True)) @@ -247,7 +260,7 @@ class Persona__User(Base): persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True) user_id: Mapped[UUID | None] = mapped_column( - ForeignKey("user.id"), primary_key=True, nullable=True + ForeignKey("user.id", ondelete="CASCADE"), primary_key=True, nullable=True ) @@ -258,7 +271,7 @@ class DocumentSet__User(Base): ForeignKey("document_set.id"), primary_key=True ) user_id: Mapped[UUID | None] = mapped_column( - ForeignKey("user.id"), primary_key=True, nullable=True + ForeignKey("user.id", ondelete="CASCADE"), primary_key=True, nullable=True ) @@ -373,16 +386,29 @@ class ConnectorCredentialPair(Base): connector_id: Mapped[int] = mapped_column( ForeignKey("connector.id"), primary_key=True ) + + deletion_failure_message: Mapped[str | None] = mapped_column(String, nullable=True) + credential_id: Mapped[int] = mapped_column( ForeignKey("credential.id"), primary_key=True ) # controls whether the documents indexed by this CC pair are visible to all # or if they are only visible to those with that are given explicit access # (e.g. via owning the credential or being a part of a group that is given access) - is_public: Mapped[bool] = mapped_column( - Boolean, - default=True, - nullable=False, + access_type: Mapped[AccessType] = mapped_column( + Enum(AccessType, native_enum=False), nullable=False + ) + + # special info needed for the auto-sync feature. The exact structure depends on the + + # source type (defined in the connector's `source` field) + # E.g. for google_drive perm sync: + # {"customer_id": "123567", "company_domain": "@danswer.ai"} + auto_sync_options: Mapped[dict[str, Any] | None] = mapped_column( + postgresql.JSONB(), nullable=True + ) + last_time_perm_sync: Mapped[datetime.datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True ) # Time finished, not used for calculating backend jobs which uses time started (created) last_successful_index_time: Mapped[datetime.datetime | None] = mapped_column( @@ -413,6 +439,7 @@ class ConnectorCredentialPair(Base): class Document(Base): __tablename__ = "document" + # NOTE: if more sensitive data is added here for display, make sure to add user/group permission # this should correspond to the ID of the document # (as is passed around in Danswer) @@ -426,12 +453,27 @@ class Document(Base): semantic_id: Mapped[str] = mapped_column(String) # First Section's link link: Mapped[str | None] = mapped_column(String, nullable=True) + # The updated time is also used as a measure of the last successful state of the doc # pulled from the source (to help skip reindexing already updated docs in case of # connector retries) + # TODO: rename this column because it conflates the time of the source doc + # with the local last modified time of the doc and any associated metadata + # it should just be the server timestamp of the source doc doc_updated_at: Mapped[datetime.datetime | None] = mapped_column( DateTime(timezone=True), nullable=True ) + + # last time any vespa relevant row metadata or the doc changed. + # does not include last_synced + last_modified: Mapped[datetime.datetime | None] = mapped_column( + DateTime(timezone=True), nullable=False, index=True, default=func.now() + ) + + # last successful sync to vespa + last_synced: Mapped[datetime.datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True, index=True + ) # The following are not attached to User because the account/email may not be known # within Danswer # Something like the document creator @@ -441,14 +483,25 @@ class Document(Base): secondary_owners: Mapped[list[str] | None] = mapped_column( postgresql.ARRAY(String), nullable=True ) - # TODO if more sensitive data is added here for display, make sure to add user/group permission + # Permission sync columns + # Email addresses are saved at the document level for externally synced permissions + # This is becuase the normal flow of assigning permissions is through the cc_pair + # doesn't apply here + external_user_emails: Mapped[list[str] | None] = mapped_column( + postgresql.ARRAY(String), nullable=True + ) + # These group ids have been prefixed by the source type + external_user_group_ids: Mapped[list[str] | None] = mapped_column( + postgresql.ARRAY(String), nullable=True + ) + is_public: Mapped[bool] = mapped_column(Boolean, default=False) retrieval_feedbacks: Mapped[list["DocumentRetrievalFeedback"]] = relationship( "DocumentRetrievalFeedback", back_populates="document" ) tags = relationship( "Tag", - secondary="document__tag", + secondary=Document__Tag.__table__, back_populates="documents", ) @@ -465,7 +518,7 @@ class Tag(Base): documents = relationship( "Document", - secondary="document__tag", + secondary=Document__Tag.__table__, back_populates="tags", ) @@ -521,7 +574,9 @@ class Credential(Base): id: Mapped[int] = mapped_column(primary_key=True) credential_json: Mapped[dict[str, Any]] = mapped_column(EncryptedJson()) - user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id", ondelete="CASCADE"), nullable=True + ) # if `true`, then all Admins will have access to the credential admin_public: Mapped[bool] = mapped_column(Boolean, default=True) time_created: Mapped[datetime.datetime] = mapped_column( @@ -576,6 +631,8 @@ class SearchSettings(Base): Enum(RerankerProvider, native_enum=False), nullable=True ) rerank_api_key: Mapped[str | None] = mapped_column(String, nullable=True) + rerank_api_url: Mapped[str | None] = mapped_column(String, nullable=True) + num_rerank: Mapped[int] = mapped_column(Integer, default=NUM_POSTPROCESSED_RESULTS) cloud_provider: Mapped["CloudEmbeddingProvider"] = relationship( @@ -607,6 +664,10 @@ def __repr__(self) -> str: return f"" + @property + def api_url(self) -> str | None: + return self.cloud_provider.api_url if self.cloud_provider is not None else None + @property def api_key(self) -> str | None: return self.cloud_provider.api_key if self.cloud_provider is not None else None @@ -671,7 +732,11 @@ class IndexAttempt(Base): "SearchSettings", back_populates="index_attempts" ) - error_rows = relationship("IndexAttemptError", back_populates="index_attempt") + error_rows = relationship( + "IndexAttemptError", + back_populates="index_attempt", + cascade="all, delete-orphan", + ) __table_args__ = ( Index( @@ -806,7 +871,7 @@ class SearchDoc(Base): chat_messages = relationship( "ChatMessage", - secondary="chat_message__search_doc", + secondary=ChatMessage__SearchDoc.__table__, back_populates="search_docs", ) @@ -835,8 +900,12 @@ class ChatSession(Base): __tablename__ = "chat_session" id: Mapped[int] = mapped_column(primary_key=True) - user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) - persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id")) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id", ondelete="CASCADE"), nullable=True + ) + persona_id: Mapped[int | None] = mapped_column( + ForeignKey("persona.id"), nullable=True + ) description: Mapped[str] = mapped_column(Text) # One-shot direct answering, currently the two types of chats are not mixed one_shot: Mapped[bool] = mapped_column(Boolean, default=False) @@ -870,7 +939,6 @@ class ChatSession(Base): prompt_override: Mapped[PromptOverride | None] = mapped_column( PydanticType(PromptOverride), nullable=True ) - time_updated: Mapped[datetime.datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), @@ -879,7 +947,6 @@ class ChatSession(Base): time_created: Mapped[datetime.datetime] = mapped_column( DateTime(timezone=True), server_default=func.now() ) - user: Mapped[User] = relationship("User", back_populates="chat_sessions") folder: Mapped["ChatFolder"] = relationship( "ChatFolder", back_populates="chat_sessions" @@ -949,7 +1016,7 @@ class ChatMessage(Base): ) search_docs: Mapped[list["SearchDoc"]] = relationship( "SearchDoc", - secondary="chat_message__search_doc", + secondary=ChatMessage__SearchDoc.__table__, back_populates="chat_messages", ) # NOTE: Should always be attached to the `assistant` message. @@ -972,7 +1039,9 @@ class ChatFolder(Base): id: Mapped[int] = mapped_column(primary_key=True) # Only null if auth is off - user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id", ondelete="CASCADE"), nullable=True + ) name: Mapped[str | None] = mapped_column(String, nullable=True) display_priority: Mapped[int] = mapped_column(Integer, nullable=True, default=0) @@ -1085,6 +1154,7 @@ class CloudEmbeddingProvider(Base): provider_type: Mapped[EmbeddingProvider] = mapped_column( Enum(EmbeddingProvider), primary_key=True ) + api_url: Mapped[str | None] = mapped_column(String, nullable=True) api_key: Mapped[str | None] = mapped_column(EncryptedString()) search_settings: Mapped[list["SearchSettings"]] = relationship( "SearchSettings", @@ -1102,7 +1172,9 @@ class DocumentSet(Base): id: Mapped[int] = mapped_column(Integer, primary_key=True) name: Mapped[str] = mapped_column(String, unique=True) description: Mapped[str] = mapped_column(String) - user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id", ondelete="CASCADE"), nullable=True + ) # Whether changes to the document set have been propagated is_up_to_date: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) # If `False`, then the document set is not visible to users who are not explicitly @@ -1146,7 +1218,9 @@ class Prompt(Base): __tablename__ = "prompt" id: Mapped[int] = mapped_column(primary_key=True) - user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id", ondelete="CASCADE"), nullable=True + ) name: Mapped[str] = mapped_column(String) description: Mapped[str] = mapped_column(String) system_prompt: Mapped[str] = mapped_column(Text) @@ -1181,9 +1255,13 @@ class Tool(Base): openapi_schema: Mapped[dict[str, Any] | None] = mapped_column( postgresql.JSONB(), nullable=True ) - + custom_headers: Mapped[list[dict[str, str]] | None] = mapped_column( + postgresql.JSONB(), nullable=True + ) # user who created / owns the tool. Will be None for built-in tools. - user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id", ondelete="CASCADE"), nullable=True + ) user: Mapped[User | None] = relationship("User", back_populates="custom_tools") # Relationship to Persona through the association table @@ -1207,7 +1285,9 @@ class Persona(Base): __tablename__ = "persona" id: Mapped[int] = mapped_column(primary_key=True) - user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + user_id: Mapped[UUID | None] = mapped_column( + ForeignKey("user.id", ondelete="CASCADE"), nullable=True + ) name: Mapped[str] = mapped_column(String) description: Mapped[str] = mapped_column(String) # Number of chunks to pass to the LLM for generation. @@ -1236,9 +1316,18 @@ class Persona(Base): starter_messages: Mapped[list[StarterMessage] | None] = mapped_column( postgresql.JSONB(), nullable=True ) - # Default personas are configured via backend during deployment + search_start_date: Mapped[datetime.datetime | None] = mapped_column( + DateTime(timezone=True), default=None + ) + # Built-in personas are configured via backend during deployment # Treated specially (cannot be user edited etc.) - default_persona: Mapped[bool] = mapped_column(Boolean, default=False) + builtin_persona: Mapped[bool] = mapped_column(Boolean, default=False) + + # Default personas are personas created by admins and are automatically added + # to all users' assistants list. + is_default_persona: Mapped[bool] = mapped_column( + Boolean, default=False, nullable=False + ) # controls whether the persona is available to be selected by users is_visible: Mapped[bool] = mapped_column(Boolean, default=True) # controls the ordering of personas in the UI @@ -1289,10 +1378,10 @@ class Persona(Base): # Default personas loaded via yaml cannot have the same name __table_args__ = ( Index( - "_default_persona_name_idx", + "_builtin_persona_name_idx", "name", unique=True, - postgresql_where=(default_persona == True), # noqa: E712 + postgresql_where=(builtin_persona == True), # noqa: E712 ), ) @@ -1316,53 +1405,6 @@ class ChannelConfig(TypedDict): follow_up_tags: NotRequired[list[str]] -class StandardAnswerCategory(Base): - __tablename__ = "standard_answer_category" - - id: Mapped[int] = mapped_column(primary_key=True) - name: Mapped[str] = mapped_column(String, unique=True) - standard_answers: Mapped[list["StandardAnswer"]] = relationship( - "StandardAnswer", - secondary=StandardAnswer__StandardAnswerCategory.__table__, - back_populates="categories", - ) - slack_bot_configs: Mapped[list["SlackBotConfig"]] = relationship( - "SlackBotConfig", - secondary=SlackBotConfig__StandardAnswerCategory.__table__, - back_populates="standard_answer_categories", - ) - - -class StandardAnswer(Base): - __tablename__ = "standard_answer" - - id: Mapped[int] = mapped_column(primary_key=True) - keyword: Mapped[str] = mapped_column(String) - answer: Mapped[str] = mapped_column(String) - active: Mapped[bool] = mapped_column(Boolean) - - __table_args__ = ( - Index( - "unique_keyword_active", - keyword, - active, - unique=True, - postgresql_where=(active == True), # noqa: E712 - ), - ) - - categories: Mapped[list[StandardAnswerCategory]] = relationship( - "StandardAnswerCategory", - secondary=StandardAnswer__StandardAnswerCategory.__table__, - back_populates="standard_answers", - ) - chat_messages: Mapped[list[ChatMessage]] = relationship( - "ChatMessage", - secondary=ChatMessage__StandardAnswer.__table__, - back_populates="standard_answers", - ) - - class SlackBotResponseType(str, PyEnum): QUOTES = "quotes" CITATIONS = "citations" @@ -1388,7 +1430,7 @@ class SlackBotConfig(Base): ) persona: Mapped[Persona | None] = relationship("Persona") - standard_answer_categories: Mapped[list[StandardAnswerCategory]] = relationship( + standard_answer_categories: Mapped[list["StandardAnswerCategory"]] = relationship( "StandardAnswerCategory", secondary=SlackBotConfig__StandardAnswerCategory.__table__, back_populates="slack_bot_configs", @@ -1400,7 +1442,7 @@ class TaskQueueState(Base): __tablename__ = "task_queue_jobs" id: Mapped[int] = mapped_column(primary_key=True) - # Celery task id + # Celery task id. currently only for readability/diagnostics task_id: Mapped[str] = mapped_column(String) # For any job type, this would be the same task_name: Mapped[str] = mapped_column(String) @@ -1450,7 +1492,9 @@ class SamlAccount(Base): __tablename__ = "saml" id: Mapped[int] = mapped_column(primary_key=True) - user_id: Mapped[int] = mapped_column(ForeignKey("user.id"), unique=True) + user_id: Mapped[int] = mapped_column( + ForeignKey("user.id", ondelete="CASCADE"), unique=True + ) encrypted_cookie: Mapped[str] = mapped_column(Text, unique=True) expires_at: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True)) updated_at: Mapped[datetime.datetime] = mapped_column( @@ -1469,7 +1513,7 @@ class User__UserGroup(Base): ForeignKey("user_group.id"), primary_key=True ) user_id: Mapped[UUID | None] = mapped_column( - ForeignKey("user.id"), primary_key=True, nullable=True + ForeignKey("user.id", ondelete="CASCADE"), primary_key=True, nullable=True ) @@ -1618,95 +1662,73 @@ class TokenRateLimit__UserGroup(Base): ) -"""Tables related to Permission Sync""" - - -class PermissionSyncStatus(str, PyEnum): - IN_PROGRESS = "in_progress" - SUCCESS = "success" - FAILED = "failed" - +class StandardAnswerCategory(Base): + __tablename__ = "standard_answer_category" -class PermissionSyncJobType(str, PyEnum): - USER_LEVEL = "user_level" - GROUP_LEVEL = "group_level" + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] = mapped_column(String, unique=True) + standard_answers: Mapped[list["StandardAnswer"]] = relationship( + "StandardAnswer", + secondary=StandardAnswer__StandardAnswerCategory.__table__, + back_populates="categories", + ) + slack_bot_configs: Mapped[list["SlackBotConfig"]] = relationship( + "SlackBotConfig", + secondary=SlackBotConfig__StandardAnswerCategory.__table__, + back_populates="standard_answer_categories", + ) -class PermissionSyncRun(Base): - """Represents one run of a permission sync job. For some given cc_pair, it is either sync-ing - the users or it is sync-ing the groups""" +class StandardAnswer(Base): + __tablename__ = "standard_answer" - __tablename__ = "permission_sync_run" + id: Mapped[int] = mapped_column(primary_key=True) + keyword: Mapped[str] = mapped_column(String) + answer: Mapped[str] = mapped_column(String) + active: Mapped[bool] = mapped_column(Boolean) + match_regex: Mapped[bool] = mapped_column(Boolean) + match_any_keywords: Mapped[bool] = mapped_column(Boolean) - id: Mapped[int] = mapped_column(Integer, primary_key=True) - # Not strictly needed but makes it easy to use without fetching from cc_pair - source_type: Mapped[DocumentSource] = mapped_column( - Enum(DocumentSource, native_enum=False) - ) - # Currently all sync jobs are handled as a group permission sync or a user permission sync - update_type: Mapped[PermissionSyncJobType] = mapped_column( - Enum(PermissionSyncJobType) + __table_args__ = ( + Index( + "unique_keyword_active", + keyword, + active, + unique=True, + postgresql_where=(active == True), # noqa: E712 + ), ) - cc_pair_id: Mapped[int | None] = mapped_column( - ForeignKey("connector_credential_pair.id"), nullable=True + + categories: Mapped[list[StandardAnswerCategory]] = relationship( + "StandardAnswerCategory", + secondary=StandardAnswer__StandardAnswerCategory.__table__, + back_populates="standard_answers", ) - status: Mapped[PermissionSyncStatus] = mapped_column(Enum(PermissionSyncStatus)) - error_msg: Mapped[str | None] = mapped_column(Text, default=None) - updated_at: Mapped[datetime.datetime] = mapped_column( - DateTime(timezone=True), server_default=func.now(), onupdate=func.now() + chat_messages: Mapped[list[ChatMessage]] = relationship( + "ChatMessage", + secondary=ChatMessage__StandardAnswer.__table__, + back_populates="standard_answers", ) - cc_pair: Mapped[ConnectorCredentialPair] = relationship("ConnectorCredentialPair") + +"""Tables related to Permission Sync""" -class ExternalPermission(Base): +class User__ExternalUserGroupId(Base): """Maps user info both internal and external to the name of the external group This maps the user to all of their external groups so that the external group name can be attached to the ACL list matching during query time. User level permissions can be handled by directly adding the Danswer user to the doc ACL list""" - __tablename__ = "external_permission" - - id: Mapped[int] = mapped_column(Integer, primary_key=True) - user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) - # Email is needed because we want to keep track of users not in Danswer to simplify process - # when the user joins - user_email: Mapped[str] = mapped_column(String) - source_type: Mapped[DocumentSource] = mapped_column( - Enum(DocumentSource, native_enum=False) - ) - external_permission_group: Mapped[str] = mapped_column(String) - user = relationship("User") - - -class EmailToExternalUserCache(Base): - """A way to map users IDs in the external tool to a user in Danswer or at least an email for - when the user joins. Used as a cache for when fetching external groups which have their own - user ids, this can easily be mapped back to users already known in Danswer without needing - to call external APIs to get the user emails. + __tablename__ = "user__external_user_group_id" - This way when groups are updated in the external tool and we need to update the mapping of - internal users to the groups, we can sync the internal users to the external groups they are - part of using this. - - Ie. User Chris is part of groups alpha, beta, and we can update this if Chris is no longer - part of alpha in some external tool. - """ - - __tablename__ = "email_to_external_user_cache" - - id: Mapped[int] = mapped_column(Integer, primary_key=True) - external_user_id: Mapped[str] = mapped_column(String) - user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) - # Email is needed because we want to keep track of users not in Danswer to simplify process - # when the user joins - user_email: Mapped[str] = mapped_column(String) - source_type: Mapped[DocumentSource] = mapped_column( - Enum(DocumentSource, native_enum=False) + user_id: Mapped[UUID] = mapped_column(ForeignKey("user.id"), primary_key=True) + # These group ids have been prefixed by the source type + external_user_group_id: Mapped[str] = mapped_column(String, primary_key=True) + cc_pair_id: Mapped[int] = mapped_column( + ForeignKey("connector_credential_pair.id"), primary_key=True ) - user = relationship("User") - class UsageReport(Base): """This stores metadata about usage reports generated by admin including user who generated @@ -1721,7 +1743,7 @@ class UsageReport(Base): # if None, report was auto-generated requestor_user_id: Mapped[UUID | None] = mapped_column( - ForeignKey("user.id"), nullable=True + ForeignKey("user.id", ondelete="CASCADE"), nullable=True ) time_created: Mapped[datetime.datetime] = mapped_column( DateTime(timezone=True), server_default=func.now() diff --git a/backend/danswer/db/persona.py b/backend/danswer/db/persona.py index bbf45a1d9ad..36d2d25c402 100644 --- a/backend/danswer/db/persona.py +++ b/backend/danswer/db/persona.py @@ -1,4 +1,5 @@ from collections.abc import Sequence +from datetime import datetime from functools import lru_cache from uuid import UUID @@ -178,6 +179,7 @@ def create_update_persona( except ValueError as e: logger.exception("Failed to create persona") raise HTTPException(status_code=400, detail=str(e)) + return PersonaSnapshot.from_model(persona) @@ -210,6 +212,22 @@ def update_persona_shared_users( ) +def update_persona_public_status( + persona_id: int, + is_public: bool, + db_session: Session, + user: User | None, +) -> None: + persona = fetch_persona_by_id( + db_session=db_session, persona_id=persona_id, user=user, get_editable=True + ) + if user and user.role != UserRole.ADMIN and persona.user_id != user.id: + raise ValueError("You don't have permission to modify this persona") + + persona.is_public = is_public + db_session.commit() + + def get_prompts( user_id: UUID | None, db_session: Session, @@ -242,7 +260,7 @@ def get_personas( stmt = _add_user_filters(stmt=stmt, user=user, get_editable=get_editable) if not include_default: - stmt = stmt.where(Persona.default_persona.is_(False)) + stmt = stmt.where(Persona.builtin_persona.is_(False)) if not include_slack_bot_personas: stmt = stmt.where(not_(Persona.name.startswith(SLACK_BOT_PERSONA_PREFIX))) if not include_deleted: @@ -290,7 +308,7 @@ def mark_delete_persona_by_name( ) -> None: stmt = ( update(Persona) - .where(Persona.name == persona_name, Persona.default_persona == is_default) + .where(Persona.name == persona_name, Persona.builtin_persona == is_default) .values(deleted=True) ) @@ -390,7 +408,6 @@ def upsert_persona( document_set_ids: list[int] | None = None, tool_ids: list[int] | None = None, persona_id: int | None = None, - default_persona: bool = False, commit: bool = True, icon_color: str | None = None, icon_shape: int | None = None, @@ -398,6 +415,9 @@ def upsert_persona( display_priority: int | None = None, is_visible: bool = True, remove_image: bool | None = None, + search_start_date: datetime | None = None, + builtin_persona: bool = False, + is_default_persona: bool = False, chunks_above: int = CONTEXT_CHUNKS_ABOVE, chunks_below: int = CONTEXT_CHUNKS_BELOW, ) -> Persona: @@ -438,8 +458,8 @@ def upsert_persona( validate_persona_tools(tools) if persona: - if not default_persona and persona.default_persona: - raise ValueError("Cannot update default persona with non-default.") + if not builtin_persona and persona.builtin_persona: + raise ValueError("Cannot update builtin persona with non-builtin.") # this checks if the user has permission to edit the persona persona = fetch_persona_by_id( @@ -454,7 +474,7 @@ def upsert_persona( persona.llm_relevance_filter = llm_relevance_filter persona.llm_filter_extraction = llm_filter_extraction persona.recency_bias = recency_bias - persona.default_persona = default_persona + persona.builtin_persona = builtin_persona persona.llm_model_provider_override = llm_model_provider_override persona.llm_model_version_override = llm_model_version_override persona.starter_messages = starter_messages @@ -466,6 +486,8 @@ def upsert_persona( persona.uploaded_image_id = uploaded_image_id persona.display_priority = display_priority persona.is_visible = is_visible + persona.search_start_date = search_start_date + persona.is_default_persona = is_default_persona # Do not delete any associations manually added unless # a new updated list is provided @@ -493,7 +515,7 @@ def upsert_persona( llm_relevance_filter=llm_relevance_filter, llm_filter_extraction=llm_filter_extraction, recency_bias=recency_bias, - default_persona=default_persona, + builtin_persona=builtin_persona, prompts=prompts or [], document_sets=document_sets or [], llm_model_provider_override=llm_model_provider_override, @@ -505,6 +527,8 @@ def upsert_persona( uploaded_image_id=uploaded_image_id, display_priority=display_priority, is_visible=is_visible, + search_start_date=search_start_date, + is_default_persona=is_default_persona, ) db_session.add(persona) @@ -534,7 +558,7 @@ def delete_old_default_personas( Need a more graceful fix later or those need to never have IDs""" stmt = ( update(Persona) - .where(Persona.default_persona, Persona.id > 0) + .where(Persona.builtin_persona, Persona.id > 0) .values(deleted=True, name=func.concat(Persona.name, "_old")) ) @@ -551,6 +575,7 @@ def update_persona_visibility( persona = fetch_persona_by_id( db_session=db_session, persona_id=persona_id, user=user, get_editable=True ) + persona.is_visible = is_visible db_session.commit() @@ -563,13 +588,15 @@ def validate_persona_tools(tools: list[Tool]) -> None: ) -def get_prompts_by_ids(prompt_ids: list[int], db_session: Session) -> Sequence[Prompt]: +def get_prompts_by_ids(prompt_ids: list[int], db_session: Session) -> list[Prompt]: """Unsafe, can fetch prompts from all users""" if not prompt_ids: return [] - prompts = db_session.scalars(select(Prompt).where(Prompt.id.in_(prompt_ids))).all() + prompts = db_session.scalars( + select(Prompt).where(Prompt.id.in_(prompt_ids)).where(Prompt.deleted.is_(False)) + ).all() - return prompts + return list(prompts) def get_prompt_by_id( @@ -650,9 +677,7 @@ def get_persona_by_id( result = db_session.execute(persona_stmt) persona = result.scalar_one_or_none() if persona is None: - raise ValueError( - f"Persona with ID {persona_id} does not exist or does not belong to user" - ) + raise ValueError(f"Persona with ID {persona_id} does not exist") return persona # or check if user owns persona @@ -715,7 +740,7 @@ def delete_persona_by_name( persona_name: str, db_session: Session, is_default: bool = True ) -> None: stmt = delete(Persona).where( - Persona.name == persona_name, Persona.default_persona == is_default + Persona.name == persona_name, Persona.builtin_persona == is_default ) db_session.execute(stmt) diff --git a/backend/danswer/db/search_settings.py b/backend/danswer/db/search_settings.py index 1d0c218e10a..e3f35e31007 100644 --- a/backend/danswer/db/search_settings.py +++ b/backend/danswer/db/search_settings.py @@ -1,3 +1,5 @@ +from sqlalchemy import and_ +from sqlalchemy import delete from sqlalchemy import select from sqlalchemy.orm import Session @@ -13,10 +15,12 @@ from danswer.db.engine import get_sqlalchemy_engine from danswer.db.llm import fetch_embedding_provider from danswer.db.models import CloudEmbeddingProvider +from danswer.db.models import IndexAttempt from danswer.db.models import IndexModelStatus from danswer.db.models import SearchSettings from danswer.indexing.models import IndexingSetting from danswer.natural_language_processing.search_nlp_models import clean_model_name +from danswer.natural_language_processing.search_nlp_models import warm_up_cross_encoder from danswer.search.models import SavedSearchSettings from danswer.server.manage.embedding.models import ( CloudEmbeddingProvider as ServerCloudEmbeddingProvider, @@ -89,6 +93,30 @@ def get_current_db_embedding_provider( return current_embedding_provider +def delete_search_settings(db_session: Session, search_settings_id: int) -> None: + current_settings = get_current_search_settings(db_session) + + if current_settings.id == search_settings_id: + raise ValueError("Cannot delete currently active search settings") + + # First, delete associated index attempts + index_attempts_query = delete(IndexAttempt).where( + IndexAttempt.search_settings_id == search_settings_id + ) + db_session.execute(index_attempts_query) + + # Then, delete the search settings + search_settings_query = delete(SearchSettings).where( + and_( + SearchSettings.id == search_settings_id, + SearchSettings.status != IndexModelStatus.PRESENT, + ) + ) + + db_session.execute(search_settings_query) + db_session.commit() + + def get_current_search_settings(db_session: Session) -> SearchSettings: query = ( select(SearchSettings) @@ -115,6 +143,13 @@ def get_secondary_search_settings(db_session: Session) -> SearchSettings | None: return latest_settings +def get_all_search_settings(db_session: Session) -> list[SearchSettings]: + query = select(SearchSettings).order_by(SearchSettings.id.desc()) + result = db_session.execute(query) + all_settings = result.scalars().all() + return list(all_settings) + + def get_multilingual_expansion(db_session: Session | None = None) -> list[str]: if db_session is None: with Session(get_sqlalchemy_engine()) as db_session: @@ -146,6 +181,14 @@ def update_current_search_settings( logger.warning("No current search settings found to update") return + # Whenever we update the current search settings, we should ensure that the local reranking model is warmed up. + if ( + search_settings.rerank_provider_type is None + and search_settings.rerank_model_name is not None + and current_settings.rerank_model_name != search_settings.rerank_model_name + ): + warm_up_cross_encoder(search_settings.rerank_model_name) + update_search_settings(current_settings, search_settings, preserved_fields) db_session.commit() logger.info("Current search settings updated successfully") @@ -234,6 +277,7 @@ def get_old_default_embedding_model() -> IndexingSetting: passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""), index_name="danswer_chunk", multipass_indexing=False, + api_url=None, ) @@ -246,4 +290,5 @@ def get_new_default_embedding_model() -> IndexingSetting: passage_prefix=ASYM_PASSAGE_PREFIX, index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}", multipass_indexing=False, + api_url=None, ) diff --git a/backend/danswer/db/slack_bot_config.py b/backend/danswer/db/slack_bot_config.py index 322dc4c4ed9..1398057cfc8 100644 --- a/backend/danswer/db/slack_bot_config.py +++ b/backend/danswer/db/slack_bot_config.py @@ -1,4 +1,5 @@ from collections.abc import Sequence +from typing import Any from sqlalchemy import select from sqlalchemy.orm import Session @@ -14,8 +15,11 @@ from danswer.db.persona import get_default_prompt from danswer.db.persona import mark_persona_as_deleted from danswer.db.persona import upsert_persona -from danswer.db.standard_answer import fetch_standard_answer_categories_by_ids from danswer.search.enums import RecencyBiasSetting +from danswer.utils.errors import EERequiredError +from danswer.utils.variable_functionality import ( + fetch_versioned_implementation_with_fallback, +) def _build_persona_name(channel_names: list[str]) -> str: @@ -62,7 +66,7 @@ def create_slack_bot_persona( llm_model_version_override=None, starter_messages=None, is_public=True, - default_persona=False, + is_default_persona=False, db_session=db_session, commit=False, ) @@ -70,6 +74,10 @@ def create_slack_bot_persona( return persona +def _no_ee_standard_answer_categories(*args: Any, **kwargs: Any) -> list: + return [] + + def insert_slack_bot_config( persona_id: int | None, channel_config: ChannelConfig, @@ -78,14 +86,29 @@ def insert_slack_bot_config( enable_auto_filters: bool, db_session: Session, ) -> SlackBotConfig: - existing_standard_answer_categories = fetch_standard_answer_categories_by_ids( - standard_answer_category_ids=standard_answer_category_ids, - db_session=db_session, + versioned_fetch_standard_answer_categories_by_ids = ( + fetch_versioned_implementation_with_fallback( + "danswer.db.standard_answer", + "fetch_standard_answer_categories_by_ids", + _no_ee_standard_answer_categories, + ) ) - if len(existing_standard_answer_categories) != len(standard_answer_category_ids): - raise ValueError( - f"Some or all categories with ids {standard_answer_category_ids} do not exist" + existing_standard_answer_categories = ( + versioned_fetch_standard_answer_categories_by_ids( + standard_answer_category_ids=standard_answer_category_ids, + db_session=db_session, ) + ) + + if len(existing_standard_answer_categories) != len(standard_answer_category_ids): + if len(existing_standard_answer_categories) == 0: + raise EERequiredError( + "Standard answers are a paid Enterprise Edition feature - enable EE or remove standard answer categories" + ) + else: + raise ValueError( + f"Some or all categories with ids {standard_answer_category_ids} do not exist" + ) slack_bot_config = SlackBotConfig( persona_id=persona_id, @@ -117,9 +140,18 @@ def update_slack_bot_config( f"Unable to find slack bot config with ID {slack_bot_config_id}" ) - existing_standard_answer_categories = fetch_standard_answer_categories_by_ids( - standard_answer_category_ids=standard_answer_category_ids, - db_session=db_session, + versioned_fetch_standard_answer_categories_by_ids = ( + fetch_versioned_implementation_with_fallback( + "danswer.db.standard_answer", + "fetch_standard_answer_categories_by_ids", + _no_ee_standard_answer_categories, + ) + ) + existing_standard_answer_categories = ( + versioned_fetch_standard_answer_categories_by_ids( + standard_answer_category_ids=standard_answer_category_ids, + db_session=db_session, + ) ) if len(existing_standard_answer_categories) != len(standard_answer_category_ids): raise ValueError( diff --git a/backend/danswer/db/tag.py b/backend/danswer/db/tag.py index 688b8a11272..6f19859087f 100644 --- a/backend/danswer/db/tag.py +++ b/backend/danswer/db/tag.py @@ -1,3 +1,4 @@ +from sqlalchemy import and_ from sqlalchemy import delete from sqlalchemy import func from sqlalchemy import or_ @@ -107,12 +108,14 @@ def create_or_add_document_tag_list( return all_tags -def get_tags_by_value_prefix_for_source_types( +def find_tags( tag_key_prefix: str | None, tag_value_prefix: str | None, sources: list[DocumentSource] | None, limit: int | None, db_session: Session, + # if set, both tag_key_prefix and tag_value_prefix must be a match + require_both_to_match: bool = False, ) -> list[Tag]: query = select(Tag) @@ -122,7 +125,11 @@ def get_tags_by_value_prefix_for_source_types( conditions.append(Tag.tag_key.ilike(f"{tag_key_prefix}%")) if tag_value_prefix: conditions.append(Tag.tag_value.ilike(f"{tag_value_prefix}%")) - query = query.where(or_(*conditions)) + + final_prefix_condition = ( + and_(*conditions) if require_both_to_match else or_(*conditions) + ) + query = query.where(final_prefix_condition) if sources: query = query.where(Tag.source.in_(sources)) diff --git a/backend/danswer/db/tasks.py b/backend/danswer/db/tasks.py index 23a7edc9882..a7aec90d260 100644 --- a/backend/danswer/db/tasks.py +++ b/backend/danswer/db/tasks.py @@ -44,12 +44,11 @@ def get_latest_task_by_type( def register_task( - task_id: str, task_name: str, db_session: Session, ) -> TaskQueueState: new_task = TaskQueueState( - task_id=task_id, task_name=task_name, status=TaskStatus.PENDING + task_id="", task_name=task_name, status=TaskStatus.PENDING ) db_session.add(new_task) diff --git a/backend/danswer/db/tools.py b/backend/danswer/db/tools.py index 1e75b1c4901..248744b5639 100644 --- a/backend/danswer/db/tools.py +++ b/backend/danswer/db/tools.py @@ -5,6 +5,7 @@ from sqlalchemy.orm import Session from danswer.db.models import Tool +from danswer.server.features.tool.models import Header from danswer.utils.logger import setup_logger logger = setup_logger() @@ -25,6 +26,7 @@ def create_tool( name: str, description: str | None, openapi_schema: dict[str, Any] | None, + custom_headers: list[Header] | None, user_id: UUID | None, db_session: Session, ) -> Tool: @@ -33,6 +35,9 @@ def create_tool( description=description, in_code_tool_id=None, openapi_schema=openapi_schema, + custom_headers=[header.dict() for header in custom_headers] + if custom_headers + else [], user_id=user_id, ) db_session.add(new_tool) @@ -45,6 +50,7 @@ def update_tool( name: str | None, description: str | None, openapi_schema: dict[str, Any] | None, + custom_headers: list[Header] | None, user_id: UUID | None, db_session: Session, ) -> Tool: @@ -60,6 +66,8 @@ def update_tool( tool.openapi_schema = openapi_schema if user_id is not None: tool.user_id = user_id + if custom_headers is not None: + tool.custom_headers = [header.dict() for header in custom_headers] db_session.commit() return tool diff --git a/backend/danswer/db/users.py b/backend/danswer/db/users.py index d824ccfd921..1ff21b71006 100644 --- a/backend/danswer/db/users.py +++ b/backend/danswer/db/users.py @@ -1,9 +1,12 @@ from collections.abc import Sequence from uuid import UUID +from fastapi_users.password import PasswordHelper +from sqlalchemy import func from sqlalchemy import select from sqlalchemy.orm import Session +from danswer.auth.schemas import UserRole from danswer.db.models import User @@ -20,8 +23,23 @@ def list_users( return db_session.scalars(stmt).unique().all() +def get_users_by_emails( + db_session: Session, emails: list[str] +) -> tuple[list[User], list[str]]: + # Use distinct to avoid duplicates + stmt = select(User).filter(User.email.in_(emails)) # type: ignore + found_users = list(db_session.scalars(stmt).unique().all()) # Convert to list + found_users_emails = [user.email for user in found_users] + missing_user_emails = [email for email in emails if email not in found_users_emails] + return found_users, missing_user_emails + + def get_user_by_email(email: str, db_session: Session) -> User | None: - user = db_session.query(User).filter(User.email == email).first() # type: ignore + user = ( + db_session.query(User) + .filter(func.lower(User.email) == func.lower(email)) + .first() + ) return user @@ -30,3 +48,52 @@ def fetch_user_by_id(db_session: Session, user_id: UUID) -> User | None: user = db_session.query(User).filter(User.id == user_id).first() # type: ignore return user + + +def _generate_non_web_user(email: str) -> User: + fastapi_users_pw_helper = PasswordHelper() + password = fastapi_users_pw_helper.generate() + hashed_pass = fastapi_users_pw_helper.hash(password) + return User( + email=email, + hashed_password=hashed_pass, + has_web_login=False, + role=UserRole.BASIC, + ) + + +def add_non_web_user_if_not_exists(db_session: Session, email: str) -> User: + user = get_user_by_email(email, db_session) + if user is not None: + return user + + user = _generate_non_web_user(email=email) + db_session.add(user) + db_session.commit() + return user + + +def add_non_web_user_if_not_exists__no_commit(db_session: Session, email: str) -> User: + user = get_user_by_email(email, db_session) + if user is not None: + return user + + user = _generate_non_web_user(email=email) + db_session.add(user) + db_session.flush() # generate id + return user + + +def batch_add_non_web_user_if_not_exists__no_commit( + db_session: Session, emails: list[str] +) -> list[User]: + found_users, missing_user_emails = get_users_by_emails(db_session, emails) + + new_users: list[User] = [] + for email in missing_user_emails: + new_users.append(_generate_non_web_user(email=email)) + + db_session.add_all(new_users) + db_session.flush() # generate ids + + return found_users + new_users diff --git a/backend/danswer/document_index/factory.py b/backend/danswer/document_index/factory.py index 17701d98e04..aedaec147d0 100644 --- a/backend/danswer/document_index/factory.py +++ b/backend/danswer/document_index/factory.py @@ -1,3 +1,6 @@ +from sqlalchemy.orm import Session + +from danswer.db.search_settings import get_current_search_settings from danswer.document_index.interfaces import DocumentIndex from danswer.document_index.vespa.index import VespaIndex @@ -13,3 +16,14 @@ def get_default_document_index( return VespaIndex( index_name=primary_index_name, secondary_index_name=secondary_index_name ) + + +def get_current_primary_default_document_index(db_session: Session) -> DocumentIndex: + """ + TODO: Use redis to cache this or something + """ + search_settings = get_current_search_settings(db_session) + return get_default_document_index( + primary_index_name=search_settings.index_name, + secondary_index_name=None, + ) diff --git a/backend/danswer/document_index/interfaces.py b/backend/danswer/document_index/interfaces.py index 2acd0977959..b499d696743 100644 --- a/backend/danswer/document_index/interfaces.py +++ b/backend/danswer/document_index/interfaces.py @@ -156,6 +156,16 @@ class Deletable(abc.ABC): Class must implement the ability to delete document by their unique document ids. """ + @abc.abstractmethod + def delete_single(self, doc_id: str) -> None: + """ + Given a single document id, hard delete it from the document index + + Parameters: + - doc_id: document id as specified by the connector + """ + raise NotImplementedError + @abc.abstractmethod def delete(self, doc_ids: list[str]) -> None: """ @@ -177,6 +187,30 @@ class Updatable(abc.ABC): - Whether the document is hidden or not, hidden documents are not returned from search """ + @abc.abstractmethod + def update_single(self, update_request: UpdateRequest) -> None: + """ + Updates some set of chunks for a document. The document and fields to update + are specified in the update request. Each update request in the list applies + its changes to a list of document ids. + None values mean that the field does not need an update. + + The rationale for a single update function is that it allows retries and parallelism + to happen at a higher / more strategic level, is simpler to read, and allows + us to individually handle error conditions per document. + + Parameters: + - update_request: for a list of document ids in the update request, apply the same updates + to all of the documents with those ids. + + Return: + - an HTTPStatus code. The code can used to decide whether to fail immediately, + retry, etc. Although this method likely hits an HTTP API behind the + scenes, the usage of HTTPStatus is a convenience and the interface is not + actually HTTP specific. + """ + raise NotImplementedError + @abc.abstractmethod def update(self, update_requests: list[UpdateRequest]) -> None: """ diff --git a/backend/danswer/document_index/vespa/app_config/services.xml b/backend/danswer/document_index/vespa/app_config/services.xml index 01f2c191ac6..03604d1070c 100644 --- a/backend/danswer/document_index/vespa/app_config/services.xml +++ b/backend/danswer/document_index/vespa/app_config/services.xml @@ -26,6 +26,17 @@ 0.75 + + + + + + SEARCH_THREAD_NUMBER + + + + + 3 750 @@ -33,4 +44,4 @@ 300 - + \ No newline at end of file diff --git a/backend/danswer/document_index/vespa/chunk_retrieval.py b/backend/danswer/document_index/vespa/chunk_retrieval.py index 6a7427630b8..e4b2ad83ce2 100644 --- a/backend/danswer/document_index/vespa/chunk_retrieval.py +++ b/backend/danswer/document_index/vespa/chunk_retrieval.py @@ -30,6 +30,7 @@ from danswer.document_index.vespa_constants import HIDDEN from danswer.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS from danswer.document_index.vespa_constants import MAX_ID_SEARCH_QUERY_SIZE +from danswer.document_index.vespa_constants import MAX_OR_CONDITIONS from danswer.document_index.vespa_constants import METADATA from danswer.document_index.vespa_constants import METADATA_SUFFIX from danswer.document_index.vespa_constants import PRIMARY_OWNERS @@ -292,12 +293,11 @@ def query_vespa( if LOG_VESPA_TIMING_INFORMATION else {}, ) - - response = requests.post( - SEARCH_ENDPOINT, - json=params, - ) try: + response = requests.post( + SEARCH_ENDPOINT, + json=params, + ) response.raise_for_status() except requests.HTTPError as e: request_info = f"Headers: {response.request.headers}\nPayload: {params}" @@ -319,6 +319,12 @@ def query_vespa( logger.debug("Vespa timing info: %s", response_json.get("timing")) hits = response_json["root"].get("children", []) + if not hits: + logger.warning( + f"No hits found for YQL Query: {query_params.get('yql', 'No YQL Query')}" + ) + logger.debug(f"Vespa Response: {response.text}") + for hit in hits: if hit["fields"].get(CONTENT) is None: identifier = hit["fields"].get("documentid") or hit["id"] @@ -379,7 +385,7 @@ def batch_search_api_retrieval( capped_requests: list[VespaChunkRequest] = [] uncapped_requests: list[VespaChunkRequest] = [] chunk_count = 0 - for request in chunk_requests: + for req_ind, request in enumerate(chunk_requests, start=1): # All requests without a chunk range are uncapped # Uncapped requests are retrieved using the Visit API range = request.range @@ -387,9 +393,10 @@ def batch_search_api_retrieval( uncapped_requests.append(request) continue - # If adding the range to the chunk count is greater than the - # max query size, we need to perform a retrieval to avoid hitting the limit - if chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE: + if ( + chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE + or req_ind % MAX_OR_CONDITIONS == 0 + ): retrieved_chunks.extend( _get_chunks_via_batch_search( index_name=index_name, diff --git a/backend/danswer/document_index/vespa/index.py b/backend/danswer/document_index/vespa/index.py index d07da5b06bb..972841bd636 100644 --- a/backend/danswer/document_index/vespa/index.py +++ b/backend/danswer/document_index/vespa/index.py @@ -13,9 +13,11 @@ import httpx import requests +from danswer.configs.app_configs import DOCUMENT_INDEX_NAME from danswer.configs.chat_configs import DOC_TIME_DECAY from danswer.configs.chat_configs import NUM_RETURNED_HITS from danswer.configs.chat_configs import TITLE_CONTENT_RATIO +from danswer.configs.chat_configs import VESPA_SEARCHER_THREADS from danswer.configs.constants import KV_REINDEX_KEY from danswer.document_index.interfaces import DocumentIndex from danswer.document_index.interfaces import DocumentInsertionRecord @@ -52,6 +54,7 @@ from danswer.document_index.vespa_constants import DOCUMENT_SETS from danswer.document_index.vespa_constants import HIDDEN from danswer.document_index.vespa_constants import NUM_THREADS +from danswer.document_index.vespa_constants import SEARCH_THREAD_NUMBER_PAT from danswer.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT from danswer.document_index.vespa_constants import VESPA_DIM_REPLACEMENT_PAT from danswer.document_index.vespa_constants import VESPA_TIMEOUT @@ -118,7 +121,7 @@ def ensure_indices_exist( secondary_index_embedding_dim: int | None, ) -> None: deploy_url = f"{VESPA_APPLICATION_ENDPOINT}/tenant/default/prepareandactivate" - logger.debug(f"Sending Vespa zip to {deploy_url}") + logger.info(f"Deploying Vespa application package to {deploy_url}") vespa_schema_path = os.path.join( os.getcwd(), "danswer", "document_index", "vespa", "app_config" @@ -134,6 +137,10 @@ def ensure_indices_exist( doc_lines = _create_document_xml_lines(schema_names) services = services_template.replace(DOCUMENT_REPLACEMENT_PAT, doc_lines) + services = services.replace( + SEARCH_THREAD_NUMBER_PAT, str(VESPA_SEARCHER_THREADS) + ) + kv_store = get_dynamic_config_store() needs_reindexing = False @@ -282,7 +289,7 @@ def _update_chunk( raise requests.HTTPError(failure_msg) from e def update(self, update_requests: list[UpdateRequest]) -> None: - logger.info(f"Updating {len(update_requests)} documents in Vespa") + logger.debug(f"Updating {len(update_requests)} documents in Vespa") # Handle Vespa character limitations # Mutating update_requests but it's not used later anyway @@ -371,6 +378,91 @@ def update(self, update_requests: list[UpdateRequest]) -> None: time.monotonic() - update_start, ) + def update_single(self, update_request: UpdateRequest) -> None: + """Note: if the document id does not exist, the update will be a no-op and the + function will complete with no errors or exceptions. + Handle other exceptions if you wish to implement retry behavior + """ + if len(update_request.document_ids) != 1: + raise ValueError("update_request must contain a single document id") + + # Handle Vespa character limitations + # Mutating update_request but it's not used later anyway + update_request.document_ids = [ + replace_invalid_doc_id_characters(doc_id) + for doc_id in update_request.document_ids + ] + + # update_start = time.monotonic() + + # Fetch all chunks for each document ahead of time + index_names = [self.index_name] + if self.secondary_index_name: + index_names.append(self.secondary_index_name) + + chunk_id_start_time = time.monotonic() + all_doc_chunk_ids: list[str] = [] + for index_name in index_names: + for document_id in update_request.document_ids: + # this calls vespa and can raise http exceptions + doc_chunk_ids = get_all_vespa_ids_for_document_id( + document_id=document_id, + index_name=index_name, + filters=None, + get_large_chunks=True, + ) + all_doc_chunk_ids.extend(doc_chunk_ids) + logger.debug( + f"Took {time.monotonic() - chunk_id_start_time:.2f} seconds to fetch all Vespa chunk IDs" + ) + + # Build the _VespaUpdateRequest objects + update_dict: dict[str, dict] = {"fields": {}} + if update_request.boost is not None: + update_dict["fields"][BOOST] = {"assign": update_request.boost} + if update_request.document_sets is not None: + update_dict["fields"][DOCUMENT_SETS] = { + "assign": { + document_set: 1 for document_set in update_request.document_sets + } + } + if update_request.access is not None: + update_dict["fields"][ACCESS_CONTROL_LIST] = { + "assign": {acl_entry: 1 for acl_entry in update_request.access.to_acl()} + } + if update_request.hidden is not None: + update_dict["fields"][HIDDEN] = {"assign": update_request.hidden} + + if not update_dict["fields"]: + logger.error("Update request received but nothing to update") + return + + processed_update_requests: list[_VespaUpdateRequest] = [] + for document_id in update_request.document_ids: + for doc_chunk_id in all_doc_chunk_ids: + processed_update_requests.append( + _VespaUpdateRequest( + document_id=document_id, + url=f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}/{doc_chunk_id}", + update_request=update_dict, + ) + ) + + with httpx.Client(http2=True) as http_client: + for update in processed_update_requests: + http_client.put( + update.url, + headers={"Content-Type": "application/json"}, + json=update.update_request, + ) + + # logger.debug( + # "Finished updating Vespa documents in %.2f seconds", + # time.monotonic() - update_start, + # ) + + return + def delete(self, doc_ids: list[str]) -> None: logger.info(f"Deleting {len(doc_ids)} documents from Vespa") @@ -388,6 +480,66 @@ def delete(self, doc_ids: list[str]) -> None: document_ids=doc_ids, index_name=index_name, http_client=http_client ) + def delete_single(self, doc_id: str) -> None: + """Possibly faster overall than the delete method due to using a single + delete call with a selection query.""" + + # Vespa deletion is poorly documented ... luckily we found this + # https://docs.vespa.ai/en/operations/batch-delete.html#example + + doc_id = replace_invalid_doc_id_characters(doc_id) + + # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for + # indexing / updates / deletes since we have to make a large volume of requests. + index_names = [self.index_name] + if self.secondary_index_name: + index_names.append(self.secondary_index_name) + + with httpx.Client(http2=True) as http_client: + for index_name in index_names: + params = httpx.QueryParams( + { + "selection": f"{index_name}.document_id=='{doc_id}'", + "cluster": DOCUMENT_INDEX_NAME, + } + ) + + total_chunks_deleted = 0 + while True: + try: + resp = http_client.delete( + f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}", + params=params, + ) + resp.raise_for_status() + except httpx.HTTPStatusError as e: + logger.error( + f"Failed to delete chunk, details: {e.response.text}" + ) + raise + + resp_data = resp.json() + + if "documentCount" in resp_data: + chunks_deleted = resp_data["documentCount"] + total_chunks_deleted += chunks_deleted + + # Check for continuation token to handle pagination + if "continuation" not in resp_data: + break # Exit loop if no continuation token + + if not resp_data["continuation"]: + break # Exit loop if continuation token is empty + + params = params.set("continuation", resp_data["continuation"]) + + logger.debug( + f"VespaIndex.delete_single: " + f"index={index_name} " + f"doc={doc_id} " + f"chunks_deleted={total_chunks_deleted}" + ) + def id_based_retrieval( self, chunk_requests: list[VespaChunkRequest], diff --git a/backend/danswer/document_index/vespa/indexing_utils.py b/backend/danswer/document_index/vespa/indexing_utils.py index 1b16cfc4947..6b6ba8709d5 100644 --- a/backend/danswer/document_index/vespa/indexing_utils.py +++ b/backend/danswer/document_index/vespa/indexing_utils.py @@ -162,14 +162,16 @@ def _index_vespa_chunk( METADATA_SUFFIX: chunk.metadata_suffix_keyword, EMBEDDINGS: embeddings_name_vector_map, TITLE_EMBEDDING: chunk.title_embedding, - BOOST: chunk.boost, DOC_UPDATED_AT: _vespa_get_updated_at_attribute(document.doc_updated_at), PRIMARY_OWNERS: get_experts_stores_representations(document.primary_owners), SECONDARY_OWNERS: get_experts_stores_representations(document.secondary_owners), # the only `set` vespa has is `weightedset`, so we have to give each # element an arbitrary weight + # rkuo: acl, docset and boost metadata are also updated through the metadata sync queue + # which only calls VespaIndex.update ACCESS_CONTROL_LIST: {acl_entry: 1 for acl_entry in chunk.access.to_acl()}, DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets}, + BOOST: chunk.boost, } vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_chunk_id}" diff --git a/backend/danswer/document_index/vespa_constants.py b/backend/danswer/document_index/vespa_constants.py index 0b8949b4264..8409efe1dea 100644 --- a/backend/danswer/document_index/vespa_constants.py +++ b/backend/danswer/document_index/vespa_constants.py @@ -7,6 +7,7 @@ VESPA_DIM_REPLACEMENT_PAT = "VARIABLE_DIM" DANSWER_CHUNK_REPLACEMENT_PAT = "DANSWER_CHUNK_NAME" DOCUMENT_REPLACEMENT_PAT = "DOCUMENT_REPLACEMENT" +SEARCH_THREAD_NUMBER_PAT = "SEARCH_THREAD_NUMBER" DATE_REPLACEMENT = "DATE_REPLACEMENT" # config server @@ -25,6 +26,9 @@ 32 # since Vespa doesn't allow batching of inserts / updates, we use threads ) MAX_ID_SEARCH_QUERY_SIZE = 400 +# Suspect that adding too many "or" conditions will cause Vespa to timeout and return +# an empty list of hits (with no error status and coverage: 0 and degraded) +MAX_OR_CONDITIONS = 10 # up from 500ms for now, since we've seen quite a few timeouts # in the long term, we are looking to improve the performance of Vespa # so that we can bring this back to default diff --git a/backend/danswer/file_processing/extract_file_text.py b/backend/danswer/file_processing/extract_file_text.py index 7143b428714..36df08ac465 100644 --- a/backend/danswer/file_processing/extract_file_text.py +++ b/backend/danswer/file_processing/extract_file_text.py @@ -8,6 +8,7 @@ from email.parser import Parser as EmailParser from pathlib import Path from typing import Any +from typing import Dict from typing import IO import chardet @@ -178,6 +179,17 @@ def read_text_file( def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str: + """Extract text from a PDF file.""" + # Return only the extracted text from read_pdf_file + text, _ = read_pdf_file(file, pdf_pass) + return text + + +def read_pdf_file( + file: IO[Any], + pdf_pass: str | None = None, +) -> tuple[str, dict]: + metadata: Dict[str, Any] = {} try: pdf_reader = PdfReader(file) @@ -189,16 +201,33 @@ def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str: decrypt_success = pdf_reader.decrypt(pdf_pass) != 0 except Exception: logger.error("Unable to decrypt pdf") - else: - logger.warning("No Password available to to decrypt pdf") if not decrypt_success: # By user request, keep files that are unreadable just so they # can be discoverable by title. - return "" - - return TEXT_SECTION_SEPARATOR.join( - page.extract_text() for page in pdf_reader.pages + return "", metadata + else: + logger.warning("No Password available to to decrypt pdf") + + # Extract metadata from the PDF, removing leading '/' from keys if present + # This standardizes the metadata keys for consistency + metadata = {} + if pdf_reader.metadata is not None: + for key, value in pdf_reader.metadata.items(): + clean_key = key.lstrip("/") + if isinstance(value, str) and value.strip(): + metadata[clean_key] = value + + elif isinstance(value, list) and all( + isinstance(item, str) for item in value + ): + metadata[clean_key] = ", ".join(value) + + return ( + TEXT_SECTION_SEPARATOR.join( + page.extract_text() for page in pdf_reader.pages + ), + metadata, ) except PdfStreamError: logger.exception("PDF file is not a valid PDF") @@ -207,13 +236,47 @@ def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str: # File is still discoverable by title # but the contents are not included as they cannot be parsed - return "" + return "", metadata def docx_to_text(file: IO[Any]) -> str: + def is_simple_table(table: docx.table.Table) -> bool: + for row in table.rows: + # No omitted cells + if row.grid_cols_before > 0 or row.grid_cols_after > 0: + return False + + # No nested tables + if any(cell.tables for cell in row.cells): + return False + + return True + + def extract_cell_text(cell: docx.table._Cell) -> str: + cell_paragraphs = [para.text.strip() for para in cell.paragraphs] + return " ".join(p for p in cell_paragraphs if p) or "N/A" + + paragraphs = [] doc = docx.Document(file) - full_text = [para.text for para in doc.paragraphs] - return TEXT_SECTION_SEPARATOR.join(full_text) + for item in doc.iter_inner_content(): + if isinstance(item, docx.text.paragraph.Paragraph): + paragraphs.append(item.text) + + elif isinstance(item, docx.table.Table): + if not item.rows or not is_simple_table(item): + continue + + # Every row is a new line, joined with a single newline + table_content = "\n".join( + [ + ",\t".join(extract_cell_text(cell) for cell in row.cells) + for row in item.rows + ] + ) + paragraphs.append(table_content) + + # Docx already has good spacing between paragraphs + return "\n".join(paragraphs) def pptx_to_text(file: IO[Any]) -> str: diff --git a/backend/danswer/file_store/utils.py b/backend/danswer/file_store/utils.py index 4b849f70d96..b71d20bbbb4 100644 --- a/backend/danswer/file_store/utils.py +++ b/backend/danswer/file_store/utils.py @@ -1,4 +1,6 @@ +from collections.abc import Callable from io import BytesIO +from typing import Any from typing import cast from uuid import uuid4 @@ -73,5 +75,7 @@ def save_file_from_url(url: str) -> str: def save_files_from_urls(urls: list[str]) -> list[str]: - funcs = [(save_file_from_url, (url,)) for url in urls] + funcs: list[tuple[Callable[..., Any], tuple[Any, ...]]] = [ + (save_file_from_url, (url,)) for url in urls + ] return run_functions_tuples_in_parallel(funcs) diff --git a/backend/danswer/indexing/chunker.py b/backend/danswer/indexing/chunker.py index 03a03f30f49..a25cfc3d32b 100644 --- a/backend/danswer/indexing/chunker.py +++ b/backend/danswer/indexing/chunker.py @@ -10,6 +10,7 @@ get_metadata_keys_to_ignore, ) from danswer.connectors.models import Document +from danswer.indexing.indexing_heartbeat import Heartbeat from danswer.indexing.models import DocAwareChunk from danswer.natural_language_processing.utils import BaseTokenizer from danswer.utils.logger import setup_logger @@ -123,6 +124,7 @@ def __init__( chunk_token_limit: int = DOC_EMBEDDING_CONTEXT_SIZE, chunk_overlap: int = CHUNK_OVERLAP, mini_chunk_size: int = MINI_CHUNK_SIZE, + heartbeat: Heartbeat | None = None, ) -> None: from llama_index.text_splitter import SentenceSplitter @@ -131,6 +133,7 @@ def __init__( self.enable_multipass = enable_multipass self.enable_large_chunks = enable_large_chunks self.tokenizer = tokenizer + self.heartbeat = heartbeat self.blurb_splitter = SentenceSplitter( tokenizer=tokenizer.tokenize, @@ -255,7 +258,7 @@ def _create_chunk( # If the chunk does not have any useable content, it will not be indexed return chunks - def chunk(self, document: Document) -> list[DocAwareChunk]: + def _handle_single_document(self, document: Document) -> list[DocAwareChunk]: # Specifically for reproducing an issue with gmail if document.source == DocumentSource.GMAIL: logger.debug(f"Chunking {document.semantic_identifier}") @@ -302,3 +305,13 @@ def chunk(self, document: Document) -> list[DocAwareChunk]: normal_chunks.extend(large_chunks) return normal_chunks + + def chunk(self, documents: list[Document]) -> list[DocAwareChunk]: + final_chunks: list[DocAwareChunk] = [] + for document in documents: + final_chunks.extend(self._handle_single_document(document)) + + if self.heartbeat: + self.heartbeat.heartbeat() + + return final_chunks diff --git a/backend/danswer/indexing/embedder.py b/backend/danswer/indexing/embedder.py index f7d8f4e7400..259bebd3fd9 100644 --- a/backend/danswer/indexing/embedder.py +++ b/backend/danswer/indexing/embedder.py @@ -1,12 +1,8 @@ from abc import ABC from abc import abstractmethod -from sqlalchemy.orm import Session - -from danswer.db.models import IndexModelStatus from danswer.db.models import SearchSettings -from danswer.db.search_settings import get_current_search_settings -from danswer.db.search_settings import get_secondary_search_settings +from danswer.indexing.indexing_heartbeat import Heartbeat from danswer.indexing.models import ChunkEmbedding from danswer.indexing.models import DocAwareChunk from danswer.indexing.models import IndexChunk @@ -24,6 +20,9 @@ class IndexingEmbedder(ABC): + """Converts chunks into chunks with embeddings. Note that one chunk may have + multiple embeddings associated with it.""" + def __init__( self, model_name: str, @@ -32,6 +31,8 @@ def __init__( passage_prefix: str | None, provider_type: EmbeddingProvider | None, api_key: str | None, + api_url: str | None, + heartbeat: Heartbeat | None, ): self.model_name = model_name self.normalize = normalize @@ -39,6 +40,7 @@ def __init__( self.passage_prefix = passage_prefix self.provider_type = provider_type self.api_key = api_key + self.api_url = api_url self.embedding_model = EmbeddingModel( model_name=model_name, @@ -47,10 +49,12 @@ def __init__( normalize=normalize, api_key=api_key, provider_type=provider_type, + api_url=api_url, # The below are globally set, this flow always uses the indexing one server_host=INDEXING_MODEL_SERVER_HOST, server_port=INDEXING_MODEL_SERVER_PORT, retrim_content=True, + heartbeat=heartbeat, ) @abstractmethod @@ -70,9 +74,18 @@ def __init__( passage_prefix: str | None, provider_type: EmbeddingProvider | None = None, api_key: str | None = None, + api_url: str | None = None, + heartbeat: Heartbeat | None = None, ): super().__init__( - model_name, normalize, query_prefix, passage_prefix, provider_type, api_key + model_name, + normalize, + query_prefix, + passage_prefix, + provider_type, + api_key, + api_url, + heartbeat, ) @log_function_time() @@ -170,7 +183,7 @@ def embed_chunks( @classmethod def from_db_search_settings( - cls, search_settings: SearchSettings + cls, search_settings: SearchSettings, heartbeat: Heartbeat | None = None ) -> "DefaultIndexingEmbedder": return cls( model_name=search_settings.model_name, @@ -179,27 +192,6 @@ def from_db_search_settings( passage_prefix=search_settings.passage_prefix, provider_type=search_settings.provider_type, api_key=search_settings.api_key, + api_url=search_settings.api_url, + heartbeat=heartbeat, ) - - -def get_embedding_model_from_search_settings( - db_session: Session, index_model_status: IndexModelStatus = IndexModelStatus.PRESENT -) -> IndexingEmbedder: - search_settings: SearchSettings | None - if index_model_status == IndexModelStatus.PRESENT: - search_settings = get_current_search_settings(db_session) - elif index_model_status == IndexModelStatus.FUTURE: - search_settings = get_secondary_search_settings(db_session) - if not search_settings: - raise RuntimeError("No secondary index configured") - else: - raise RuntimeError("Not supporting embedding model rollbacks") - - return DefaultIndexingEmbedder( - model_name=search_settings.model_name, - normalize=search_settings.normalize, - query_prefix=search_settings.query_prefix, - passage_prefix=search_settings.passage_prefix, - provider_type=search_settings.provider_type, - api_key=search_settings.api_key, - ) diff --git a/backend/danswer/indexing/indexing_heartbeat.py b/backend/danswer/indexing/indexing_heartbeat.py new file mode 100644 index 00000000000..c500a0ad559 --- /dev/null +++ b/backend/danswer/indexing/indexing_heartbeat.py @@ -0,0 +1,41 @@ +import abc +from typing import Any + +from sqlalchemy import func +from sqlalchemy.orm import Session + +from danswer.db.index_attempt import get_index_attempt +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +class Heartbeat(abc.ABC): + """Useful for any long-running work that goes through a bunch of items + and needs to occasionally give updates on progress. + e.g. chunking, embedding, updating vespa, etc.""" + + @abc.abstractmethod + def heartbeat(self, metadata: Any = None) -> None: + raise NotImplementedError + + +class IndexingHeartbeat(Heartbeat): + def __init__(self, index_attempt_id: int, db_session: Session, freq: int): + self.cnt = 0 + + self.index_attempt_id = index_attempt_id + self.db_session = db_session + self.freq = freq + + def heartbeat(self, metadata: Any = None) -> None: + self.cnt += 1 + if self.cnt % self.freq == 0: + index_attempt = get_index_attempt( + db_session=self.db_session, index_attempt_id=self.index_attempt_id + ) + if index_attempt: + index_attempt.time_updated = func.now() + self.db_session.commit() + else: + logger.error("Index attempt not found, this should not happen!") diff --git a/backend/danswer/indexing/indexing_pipeline.py b/backend/danswer/indexing/indexing_pipeline.py index 3517b55767d..992bce2dccf 100644 --- a/backend/danswer/indexing/indexing_pipeline.py +++ b/backend/danswer/indexing/indexing_pipeline.py @@ -7,6 +7,7 @@ from sqlalchemy.orm import Session from danswer.access.access import get_access_for_documents +from danswer.access.models import DocumentAccess from danswer.configs.app_configs import ENABLE_MULTIPASS_INDEXING from danswer.configs.app_configs import INDEXING_EXCEPTION_LIMIT from danswer.configs.constants import DEFAULT_BOOST @@ -17,7 +18,8 @@ from danswer.connectors.models import IndexAttemptMetadata from danswer.db.document import get_documents_by_ids from danswer.db.document import prepare_to_modify_documents -from danswer.db.document import update_docs_updated_at +from danswer.db.document import update_docs_last_modified__no_commit +from danswer.db.document import update_docs_updated_at__no_commit from danswer.db.document import upsert_documents_complete from danswer.db.document_set import fetch_document_sets_for_documents from danswer.db.index_attempt import create_index_attempt_error @@ -29,6 +31,7 @@ from danswer.document_index.interfaces import DocumentMetadata from danswer.indexing.chunker import Chunker from danswer.indexing.embedder import IndexingEmbedder +from danswer.indexing.indexing_heartbeat import IndexingHeartbeat from danswer.indexing.models import DocAwareChunk from danswer.indexing.models import DocMetadataAwareIndexChunk from danswer.utils.logger import setup_logger @@ -218,8 +221,8 @@ def index_doc_batch_prepare( document_ids = [document.id for document in documents] db_docs: list[DBDocument] = get_documents_by_ids( - document_ids=document_ids, db_session=db_session, + document_ids=document_ids, ) # Skip indexing docs that don't have a newer updated at @@ -263,6 +266,14 @@ def index_doc_batch( Note that the documents should already be batched at this point so that it does not inflate the memory requirements""" + no_access = DocumentAccess.build( + user_emails=[], + user_groups=[], + external_user_emails=[], + external_user_group_ids=[], + is_public=False, + ) + ctx = index_doc_batch_prepare( document_batch=document_batch, index_attempt_metadata=index_attempt_metadata, @@ -273,18 +284,10 @@ def index_doc_batch( return 0, 0 logger.debug("Starting chunking") - chunks: list[DocAwareChunk] = [] - for document in ctx.updatable_docs: - chunks.extend(chunker.chunk(document=document)) + chunks: list[DocAwareChunk] = chunker.chunk(ctx.updatable_docs) logger.debug("Starting embedding") - chunks_with_embeddings = ( - embedder.embed_chunks( - chunks=chunks, - ) - if chunks - else [] - ) + chunks_with_embeddings = embedder.embed_chunks(chunks) if chunks else [] updatable_ids = [doc.id for doc in ctx.updatable_docs] @@ -292,9 +295,6 @@ def index_doc_batch( # NOTE: don't need to acquire till here, since this is when the actual race condition # with Vespa can occur. with prepare_to_modify_documents(db_session=db_session, document_ids=updatable_ids): - # Attach the latest status from Postgres (source of truth for access) to each - # chunk. This access status will be attached to each chunk in the document index - # TODO: attach document sets to the chunk based on the status of Postgres as well document_id_to_access_info = get_access_for_documents( document_ids=updatable_ids, db_session=db_session ) @@ -304,10 +304,18 @@ def index_doc_batch( document_ids=updatable_ids, db_session=db_session ) } + + # we're concerned about race conditions where multiple simultaneous indexings might result + # in one set of metadata overwriting another one in vespa. + # we still write data here for immediate and most likely correct sync, but + # to resolve this, an update of the last modified field at the end of this loop + # always triggers a final metadata sync access_aware_chunks = [ DocMetadataAwareIndexChunk.from_index_chunk( index_chunk=chunk, - access=document_id_to_access_info[chunk.source_document.id], + access=document_id_to_access_info.get( + chunk.source_document.id, no_access + ), document_sets=set( document_id_to_document_set.get(chunk.source_document.id, []) ), @@ -333,17 +341,25 @@ def index_doc_batch( doc for doc in ctx.updatable_docs if doc.id in successful_doc_ids ] - # Update the time of latest version of the doc successfully indexed + last_modified_ids = [] ids_to_new_updated_at = {} for doc in successful_docs: + last_modified_ids.append(doc.id) + # doc_updated_at is the connector source's idea of when the doc was last modified if doc.doc_updated_at is None: continue ids_to_new_updated_at[doc.id] = doc.doc_updated_at - update_docs_updated_at( + update_docs_updated_at__no_commit( ids_to_new_updated_at=ids_to_new_updated_at, db_session=db_session ) + update_docs_last_modified__no_commit( + document_ids=last_modified_ids, db_session=db_session + ) + + db_session.commit() + return len([r for r in insertion_records if r.already_existed is False]), len( access_aware_chunks ) @@ -383,6 +399,13 @@ def build_indexing_pipeline( tokenizer=embedder.embedding_model.tokenizer, enable_multipass=multipass, enable_large_chunks=enable_large_chunks, + # after every doc, update status in case there are a bunch of + # really long docs + heartbeat=IndexingHeartbeat( + index_attempt_id=attempt_id, db_session=db_session, freq=1 + ) + if attempt_id + else None, ) return partial( diff --git a/backend/danswer/indexing/models.py b/backend/danswer/indexing/models.py index b23de0eb477..c789a2b351b 100644 --- a/backend/danswer/indexing/models.py +++ b/backend/danswer/indexing/models.py @@ -61,6 +61,8 @@ class IndexChunk(DocAwareChunk): title_embedding: Embedding | None +# TODO(rkuo): currently, this extra metadata sent during indexing is just for speed, +# but full consistency happens on background sync class DocMetadataAwareIndexChunk(IndexChunk): """An `IndexChunk` that contains all necessary metadata to be indexed. This includes the following: @@ -95,10 +97,12 @@ def from_index_chunk( class EmbeddingModelDetail(BaseModel): + id: int | None = None model_name: str normalize: bool query_prefix: str | None passage_prefix: str | None + api_url: str | None = None provider_type: EmbeddingProvider | None = None api_key: str | None = None @@ -111,12 +115,14 @@ def from_db_model( search_settings: "SearchSettings", ) -> "EmbeddingModelDetail": return cls( + id=search_settings.id, model_name=search_settings.model_name, normalize=search_settings.normalize, query_prefix=search_settings.query_prefix, passage_prefix=search_settings.passage_prefix, provider_type=search_settings.provider_type, api_key=search_settings.api_key, + api_url=search_settings.api_url, ) diff --git a/backend/danswer/llm/answering/answer.py b/backend/danswer/llm/answering/answer.py index a664db217af..5cddadeb2db 100644 --- a/backend/danswer/llm/answering/answer.py +++ b/backend/danswer/llm/answering/answer.py @@ -1,5 +1,7 @@ +import itertools from collections.abc import Callable from collections.abc import Iterator +from typing import Any from typing import cast from uuid import uuid4 @@ -12,6 +14,8 @@ from danswer.chat.models import CitationInfo from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import LlmDoc +from danswer.chat.models import StreamStopInfo +from danswer.chat.models import StreamStopReason from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE from danswer.file_store.utils import InMemoryChatFile from danswer.llm.answering.models import AnswerStyleConfig @@ -35,7 +39,7 @@ from danswer.llm.answering.stream_processing.utils import DocumentIdOrderMapping from danswer.llm.answering.stream_processing.utils import map_document_id_order from danswer.llm.interfaces import LLM -from danswer.llm.utils import message_generator_to_string_generator +from danswer.llm.interfaces import ToolChoiceOptions from danswer.natural_language_processing.utils import get_tokenizer from danswer.tools.custom.custom_tool_prompt_builder import ( build_user_message_for_custom_tool_for_non_tool_calling_llm, @@ -49,7 +53,7 @@ from danswer.tools.internet_search.internet_search_tool import InternetSearchTool from danswer.tools.message import build_tool_message from danswer.tools.message import ToolCallSummary -from danswer.tools.search.search_tool import FINAL_CONTEXT_DOCUMENTS +from danswer.tools.search.search_tool import FINAL_CONTEXT_DOCUMENTS_ID from danswer.tools.search.search_tool import SEARCH_DOC_CONTENT_ID from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID from danswer.tools.search.search_tool import SearchResponseSummary @@ -176,6 +180,7 @@ def _update_prompt_builder_for_search_tool( if self.answer_style_config.citation_config else False ), + history_message=self.single_message_history or "", ) ) elif self.answer_style_config.quotes_config: @@ -190,7 +195,9 @@ def _update_prompt_builder_for_search_tool( def _raw_output_for_explicit_tool_calling_llms( self, - ) -> Iterator[str | ToolCallKickoff | ToolResponse | ToolCallFinalResult]: + ) -> Iterator[ + str | StreamStopInfo | ToolCallKickoff | ToolResponse | ToolCallFinalResult + ]: prompt_builder = AnswerPromptBuilder(self.message_history, self.llm.config) tool_call_chunk: AIMessageChunk | None = None @@ -225,6 +232,7 @@ def _raw_output_for_explicit_tool_calling_llms( self.tools, self.force_use_tool ) ] + for message in self.llm.stream( prompt=prompt, tools=final_tool_definitions if final_tool_definitions else None, @@ -242,6 +250,13 @@ def _raw_output_for_explicit_tool_calling_llms( if self.is_cancelled: return yield cast(str, message.content) + if ( + message.additional_kwargs.get("usage_metadata", {}).get("stop") + == "length" + ): + yield StreamStopInfo( + stop_reason=StreamStopReason.CONTEXT_LENGTH + ) if not tool_call_chunk: return # no tool call needed @@ -298,21 +313,43 @@ def _raw_output_for_explicit_tool_calling_llms( yield tool_runner.tool_final_result() prompt = prompt_builder.build(tool_call_summary=tool_call_summary) - for token in message_generator_to_string_generator( - self.llm.stream( - prompt=prompt, - tools=[tool.tool_definition() for tool in self.tools], - ) - ): - if self.is_cancelled: - return - yield token + + yield from self._process_llm_stream( + prompt=prompt, + # as of now, we don't support multiple tool calls in sequence, which is why + # we don't need to pass this in here + # tools=[tool.tool_definition() for tool in self.tools], + ) return + # This method processes the LLM stream and yields the content or stop information + def _process_llm_stream( + self, + prompt: Any, + tools: list[dict] | None = None, + tool_choice: ToolChoiceOptions | None = None, + ) -> Iterator[str | StreamStopInfo]: + for message in self.llm.stream( + prompt=prompt, tools=tools, tool_choice=tool_choice + ): + if isinstance(message, AIMessageChunk): + if message.content: + if self.is_cancelled: + return StreamStopInfo(stop_reason=StreamStopReason.CANCELLED) + yield cast(str, message.content) + + if ( + message.additional_kwargs.get("usage_metadata", {}).get("stop") + == "length" + ): + yield StreamStopInfo(stop_reason=StreamStopReason.CONTEXT_LENGTH) + def _raw_output_for_non_explicit_tool_calling_llms( self, - ) -> Iterator[str | ToolCallKickoff | ToolResponse | ToolCallFinalResult]: + ) -> Iterator[ + str | StreamStopInfo | ToolCallKickoff | ToolResponse | ToolCallFinalResult + ]: prompt_builder = AnswerPromptBuilder(self.message_history, self.llm.config) chosen_tool_and_args: tuple[Tool, dict] | None = None @@ -387,13 +424,10 @@ def _raw_output_for_non_explicit_tool_calling_llms( ) ) prompt = prompt_builder.build() - for token in message_generator_to_string_generator( - self.llm.stream(prompt=prompt) - ): - if self.is_cancelled: - return - yield token - + yield from self._process_llm_stream( + prompt=prompt, + tools=None, + ) return tool, tool_args = chosen_tool_and_args @@ -403,7 +437,7 @@ def _raw_output_for_non_explicit_tool_calling_llms( if tool.name in {SearchTool._NAME, InternetSearchTool._NAME}: final_context_documents = None for response in tool_runner.tool_responses(): - if response.id == FINAL_CONTEXT_DOCUMENTS: + if response.id == FINAL_CONTEXT_DOCUMENTS_ID: final_context_documents = cast(list[LlmDoc], response.response) yield response @@ -447,12 +481,8 @@ def _raw_output_for_non_explicit_tool_calling_llms( yield final prompt = prompt_builder.build() - for token in message_generator_to_string_generator( - self.llm.stream(prompt=prompt) - ): - if self.is_cancelled: - return - yield token + + yield from self._process_llm_stream(prompt=prompt, tools=None) @property def processed_streamed_output(self) -> AnswerStream: @@ -470,17 +500,15 @@ def processed_streamed_output(self) -> AnswerStream: ) def _process_stream( - stream: Iterator[ToolCallKickoff | ToolResponse | str], + stream: Iterator[ToolCallKickoff | ToolResponse | str | StreamStopInfo], ) -> AnswerStream: message = None # special things we need to keep track of for the SearchTool - search_results: list[ - LlmDoc - ] | None = None # raw results that will be displayed to the user - final_context_docs: list[ - LlmDoc - ] | None = None # processed docs to feed into the LLM + # raw results that will be displayed to the user + search_results: list[LlmDoc] | None = None + # processed docs to feed into the LLM + final_context_docs: list[LlmDoc] | None = None for message in stream: if isinstance(message, ToolCallKickoff) or isinstance( @@ -499,8 +527,9 @@ def _process_stream( SearchResponseSummary, message.response ).top_sections ] - elif message.id == FINAL_CONTEXT_DOCUMENTS: + elif message.id == FINAL_CONTEXT_DOCUMENTS_ID: final_context_docs = cast(list[LlmDoc], message.response) + yield message elif ( message.id == SEARCH_DOC_CONTENT_ID @@ -524,13 +553,29 @@ def _process_stream( answer_style_configs=self.answer_style_config, ) + stream_stop_info = None + def _stream() -> Iterator[str]: - if message: - yield cast(str, message) - yield from cast(Iterator[str], stream) + nonlocal stream_stop_info + for item in itertools.chain([message], stream): + if isinstance(item, StreamStopInfo): + stream_stop_info = item + return + + # this should never happen, but we're seeing weird behavior here so handling for now + if not isinstance(item, str): + logger.error( + f"Received non-string item in answer stream: {item}. Skipping." + ) + continue + + yield item yield from process_answer_stream_fn(_stream()) + if stream_stop_info: + yield stream_stop_info + processed_stream = [] for processed_packet in _process_stream(output_generator): processed_stream.append(processed_packet) diff --git a/backend/danswer/llm/answering/prompts/citations_prompt.py b/backend/danswer/llm/answering/prompts/citations_prompt.py index eddae9badb4..52345f3e587 100644 --- a/backend/danswer/llm/answering/prompts/citations_prompt.py +++ b/backend/danswer/llm/answering/prompts/citations_prompt.py @@ -29,6 +29,9 @@ from danswer.prompts.token_counts import CITATION_STATEMENT_TOKEN_CNT from danswer.prompts.token_counts import LANGUAGE_HINT_TOKEN_CNT from danswer.search.models import InferenceChunk +from danswer.utils.logger import setup_logger + +logger = setup_logger() def get_prompt_tokens(prompt_config: PromptConfig) -> int: @@ -156,6 +159,7 @@ def build_citations_user_message( user_prompt = CITATIONS_PROMPT_FOR_TOOL_CALLING.format( task_prompt=task_prompt_with_reminder, user_query=question, + history_block=history_message, ) user_prompt = user_prompt.strip() diff --git a/backend/danswer/llm/answering/stream_processing/citation_processing.py b/backend/danswer/llm/answering/stream_processing/citation_processing.py index de80b6f6756..f1e5489550d 100644 --- a/backend/danswer/llm/answering/stream_processing/citation_processing.py +++ b/backend/danswer/llm/answering/stream_processing/citation_processing.py @@ -11,7 +11,6 @@ from danswer.prompts.constants import TRIPLE_BACKTICK from danswer.utils.logger import setup_logger - logger = setup_logger() @@ -86,6 +85,15 @@ def extract_citations_from_stream( curr_segment += token llm_out += token + # Handle code blocks without language tags + if "`" in curr_segment: + if curr_segment.endswith("`"): + continue + elif "```" in curr_segment: + piece_that_comes_after = curr_segment.split("```")[1][0] + if piece_that_comes_after == "\n" and in_code_block(llm_out): + curr_segment = curr_segment.replace("```", "```plaintext") + citation_pattern = r"\[(\d+)\]" citations_found = list(re.finditer(citation_pattern, curr_segment)) @@ -204,7 +212,9 @@ def extract_citations_from_stream( def build_citation_processor( context_docs: list[LlmDoc], doc_id_to_rank_map: DocumentIdOrderMapping ) -> StreamProcessor: - def stream_processor(tokens: Iterator[str]) -> AnswerQuestionStreamReturn: + def stream_processor( + tokens: Iterator[str], + ) -> AnswerQuestionStreamReturn: yield from extract_citations_from_stream( tokens=tokens, context_docs=context_docs, diff --git a/backend/danswer/llm/answering/stream_processing/quotes_processing.py b/backend/danswer/llm/answering/stream_processing/quotes_processing.py index 74f37b85264..501a56b5aa7 100644 --- a/backend/danswer/llm/answering/stream_processing/quotes_processing.py +++ b/backend/danswer/llm/answering/stream_processing/quotes_processing.py @@ -285,7 +285,9 @@ def process_model_tokens( def build_quotes_processor( context_docs: list[LlmDoc], is_json_prompt: bool ) -> Callable[[Iterator[str]], AnswerQuestionStreamReturn]: - def stream_processor(tokens: Iterator[str]) -> AnswerQuestionStreamReturn: + def stream_processor( + tokens: Iterator[str], + ) -> AnswerQuestionStreamReturn: yield from process_model_tokens( tokens=tokens, context_docs=context_docs, diff --git a/backend/danswer/llm/chat_llm.py b/backend/danswer/llm/chat_llm.py index 359e3239b9d..b65498a2ab1 100644 --- a/backend/danswer/llm/chat_llm.py +++ b/backend/danswer/llm/chat_llm.py @@ -25,9 +25,6 @@ from danswer.configs.app_configs import LOG_ALL_MODEL_INTERACTIONS from danswer.configs.app_configs import LOG_DANSWER_MODEL_INTERACTIONS from danswer.configs.model_configs import DISABLE_LITELLM_STREAMING -from danswer.configs.model_configs import GEN_AI_API_ENDPOINT -from danswer.configs.model_configs import GEN_AI_API_VERSION -from danswer.configs.model_configs import GEN_AI_LLM_PROVIDER_TYPE from danswer.configs.model_configs import GEN_AI_TEMPERATURE from danswer.llm.interfaces import LLM from danswer.llm.interfaces import LLMConfig @@ -141,7 +138,9 @@ def _convert_message_to_dict(message: BaseMessage) -> dict: def _convert_delta_to_message_chunk( - _dict: dict[str, Any], curr_msg: BaseMessage | None + _dict: dict[str, Any], + curr_msg: BaseMessage | None, + stop_reason: str | None = None, ) -> BaseMessageChunk: """Adapted from langchain_community.chat_models.litellm._convert_delta_to_message_chunk""" role = _dict.get("role") or (_base_msg_to_role(curr_msg) if curr_msg else None) @@ -166,12 +165,23 @@ def _convert_delta_to_message_chunk( args=tool_call.function.arguments, index=0, # only support a single tool call atm ) + return AIMessageChunk( content=content, - additional_kwargs=additional_kwargs, tool_call_chunks=[tool_call_chunk], + additional_kwargs={ + "usage_metadata": {"stop": stop_reason}, + **additional_kwargs, + }, ) - return AIMessageChunk(content=content, additional_kwargs=additional_kwargs) + + return AIMessageChunk( + content=content, + additional_kwargs={ + "usage_metadata": {"stop": stop_reason}, + **additional_kwargs, + }, + ) elif role == "system": return SystemMessageChunk(content=content) elif role == "function": @@ -192,10 +202,10 @@ def __init__( timeout: int, model_provider: str, model_name: str, + api_base: str | None = None, + api_version: str | None = None, max_output_tokens: int | None = None, - api_base: str | None = GEN_AI_API_ENDPOINT, - api_version: str | None = GEN_AI_API_VERSION, - custom_llm_provider: str | None = GEN_AI_LLM_PROVIDER_TYPE, + custom_llm_provider: str | None = None, temperature: float = GEN_AI_TEMPERATURE, custom_config: dict[str, str] | None = None, extra_headers: dict[str, str] | None = None, @@ -209,7 +219,7 @@ def __init__( self._api_version = api_version self._custom_llm_provider = custom_llm_provider - # This can be used to store the maximum output tkoens for this model. + # This can be used to store the maximum output tokens for this model. # self._max_output_tokens = ( # max_output_tokens # if max_output_tokens is not None @@ -277,15 +287,17 @@ def _completion( prompt = [_convert_message_to_dict(HumanMessage(content=prompt))] try: - # When custom LLM provider is supplied, model name doesn't require prefix in LiteLLM + # NOTE(sd109): When custom LLM provider is supplied, model name doesn't require prefix in LiteLLM prefix = f"{self.config.model_provider}/" if not self._custom_llm_provider else "" return litellm.completion( # model choice model=f"{prefix}{self.config.model_name}", - api_key=self._api_key, - base_url=self._api_base, - api_version=self._api_version, - custom_llm_provider=self._custom_llm_provider, + # NOTE: have to pass in None instead of empty string for these + # otherwise litellm can have some issues with bedrock + api_key=self._api_key or None, + base_url=self._api_base or None, + api_version=self._api_version or None, + custom_llm_provider=self._custom_llm_provider or None, # actual input messages=prompt, tools=tools, @@ -354,10 +366,16 @@ def _stream_implementation( ) try: for part in response: - if len(part["choices"]) == 0: + if not part["choices"]: continue - delta = part["choices"][0]["delta"] - message_chunk = _convert_delta_to_message_chunk(delta, output) + + choice = part["choices"][0] + message_chunk = _convert_delta_to_message_chunk( + choice["delta"], + output, + stop_reason=choice["finish_reason"], + ) + if output is None: output = message_chunk else: diff --git a/backend/danswer/llm/custom_llm.py b/backend/danswer/llm/custom_llm.py index 967e014a903..4a5ba7857c3 100644 --- a/backend/danswer/llm/custom_llm.py +++ b/backend/danswer/llm/custom_llm.py @@ -7,7 +7,6 @@ from langchain_core.messages import BaseMessage from requests import Timeout -from danswer.configs.model_configs import GEN_AI_API_ENDPOINT from danswer.configs.model_configs import GEN_AI_NUM_RESERVED_OUTPUT_TOKENS from danswer.llm.interfaces import LLM from danswer.llm.interfaces import ToolChoiceOptions @@ -37,7 +36,7 @@ def __init__( # Not used here but you probably want a model server that isn't completely open api_key: str | None, timeout: int, - endpoint: str | None = GEN_AI_API_ENDPOINT, + endpoint: str, max_output_tokens: int = GEN_AI_NUM_RESERVED_OUTPUT_TOKENS, ): if not endpoint: diff --git a/backend/danswer/llm/llm_initialization.py b/backend/danswer/llm/llm_initialization.py deleted file mode 100644 index db59b836d7f..00000000000 --- a/backend/danswer/llm/llm_initialization.py +++ /dev/null @@ -1,113 +0,0 @@ -from sqlalchemy.orm import Session - -from danswer.configs.app_configs import DISABLE_GENERATIVE_AI -from danswer.configs.model_configs import FAST_GEN_AI_MODEL_VERSION -from danswer.configs.model_configs import GEN_AI_API_ENDPOINT -from danswer.configs.model_configs import GEN_AI_API_KEY -from danswer.configs.model_configs import GEN_AI_API_VERSION -from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION -from danswer.configs.model_configs import GEN_AI_LLM_PROVIDER_TYPE -from danswer.configs.model_configs import GEN_AI_DISPLAY_NAME -from danswer.db.llm import fetch_existing_llm_providers -from danswer.db.llm import update_default_provider -from danswer.db.llm import upsert_llm_provider -from danswer.llm.llm_provider_options import AZURE_PROVIDER_NAME -from danswer.llm.llm_provider_options import BEDROCK_PROVIDER_NAME -from danswer.llm.llm_provider_options import fetch_available_well_known_llms -from danswer.server.manage.llm.models import LLMProviderUpsertRequest -from danswer.utils.logger import setup_logger - - -logger = setup_logger() - - -def load_llm_providers(db_session: Session) -> None: - existing_providers = fetch_existing_llm_providers(db_session) - if existing_providers: - return - - if not GEN_AI_API_KEY or DISABLE_GENERATIVE_AI: - return - - if GEN_AI_MODEL_PROVIDER == "custom": - # Validate that all required env vars are present - for var in ( - GEN_AI_LLM_PROVIDER_TYPE, - GEN_AI_API_ENDPOINT, - GEN_AI_MODEL_VERSION, - GEN_AI_DISPLAY_NAME, - ): - if not var: - logger.error( - "Cannot auto-transition custom LLM provider due to missing env vars." - "The following env vars must all be set:" - "GEN_AI_LLM_PROVIDER_TYPE, GEN_AI_API_ENDPOINT, GEN_AI_MODEL_VERSION, GEN_AI_DISPLAY_NAME" - ) - return None - llm_provider_request = LLMProviderUpsertRequest( - name=GEN_AI_DISPLAY_NAME, - provider=GEN_AI_MODEL_PROVIDER, - api_key=GEN_AI_API_KEY, - api_base=GEN_AI_API_ENDPOINT, - api_version=GEN_AI_API_VERSION, - custom_config={}, - default_model_name=GEN_AI_MODEL_VERSION, - fast_default_model_name=FAST_GEN_AI_MODEL_VERSION, - ) - - else: - - well_known_provider_name_to_provider = { - provider.name: provider - for provider in fetch_available_well_known_llms() - if provider.name != BEDROCK_PROVIDER_NAME - } - - if GEN_AI_MODEL_PROVIDER not in well_known_provider_name_to_provider: - logger.error( - f"Cannot auto-transition LLM provider: {GEN_AI_MODEL_PROVIDER}" - ) - return None - - # Azure provider requires custom model names, - # OpenAI / anthropic can just use the defaults - model_names = ( - [ - name - for name in [ - GEN_AI_MODEL_VERSION, - FAST_GEN_AI_MODEL_VERSION, - ] - if name - ] - if GEN_AI_MODEL_PROVIDER == AZURE_PROVIDER_NAME - else None - ) - - well_known_provider = well_known_provider_name_to_provider[ - GEN_AI_MODEL_PROVIDER - ] - llm_provider_request = LLMProviderUpsertRequest( - name=well_known_provider.display_name, - provider=GEN_AI_MODEL_PROVIDER, - api_key=GEN_AI_API_KEY, - api_base=GEN_AI_API_ENDPOINT, - api_version=GEN_AI_API_VERSION, - custom_config={}, - default_model_name=( - GEN_AI_MODEL_VERSION - or well_known_provider.default_model - or well_known_provider.llm_names[0] - ), - fast_default_model_name=( - FAST_GEN_AI_MODEL_VERSION or well_known_provider.default_fast_model - ), - model_names=model_names, - ) - - llm_provider = upsert_llm_provider(db_session, llm_provider_request) - update_default_provider(db_session, llm_provider.id) - logger.notice( - f"Migrated LLM provider from env variables for provider '{GEN_AI_MODEL_PROVIDER}'" - ) diff --git a/backend/danswer/llm/llm_provider_options.py b/backend/danswer/llm/llm_provider_options.py index 24feeb2f27c..8fc1de73955 100644 --- a/backend/danswer/llm/llm_provider_options.py +++ b/backend/danswer/llm/llm_provider_options.py @@ -24,6 +24,8 @@ class WellKnownLLMProviderDescriptor(BaseModel): OPENAI_PROVIDER_NAME = "openai" OPEN_AI_MODEL_NAMES = [ + "o1-mini", + "o1-preview", "gpt-4", "gpt-4o", "gpt-4o-mini", @@ -95,8 +97,8 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]: api_version_required=False, custom_config_keys=[], llm_names=fetch_models_for_provider(ANTHROPIC_PROVIDER_NAME), - default_model="claude-3-opus-20240229", - default_fast_model="claude-3-sonnet-20240229", + default_model="claude-3-5-sonnet-20240620", + default_fast_model="claude-3-5-sonnet-20240620", ), WellKnownLLMProviderDescriptor( name=AZURE_PROVIDER_NAME, @@ -128,8 +130,8 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]: ), ], llm_names=fetch_models_for_provider(BEDROCK_PROVIDER_NAME), - default_model="anthropic.claude-3-sonnet-20240229-v1:0", - default_fast_model="anthropic.claude-3-haiku-20240307-v1:0", + default_model="anthropic.claude-3-5-sonnet-20240620-v1:0", + default_fast_model="anthropic.claude-3-5-sonnet-20240620-v1:0", ), ] diff --git a/backend/danswer/llm/utils.py b/backend/danswer/llm/utils.py index 82617f3f05b..3a5e40875f1 100644 --- a/backend/danswer/llm/utils.py +++ b/backend/danswer/llm/utils.py @@ -32,7 +32,6 @@ from danswer.configs.constants import MessageType from danswer.configs.model_configs import GEN_AI_MAX_TOKENS from danswer.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS -from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER from danswer.configs.model_configs import GEN_AI_NUM_RESERVED_OUTPUT_TOKENS from danswer.db.models import ChatMessage from danswer.file_store.models import ChatFileType @@ -48,7 +47,9 @@ logger = setup_logger() -def litellm_exception_to_error_msg(e: Exception, llm: LLM) -> str: +def litellm_exception_to_error_msg( + e: Exception, llm: LLM, fallback_to_error_msg: bool = False +) -> str: error_msg = str(e) if isinstance(e, BadRequestError): @@ -95,7 +96,7 @@ def litellm_exception_to_error_msg(e: Exception, llm: LLM) -> str: error_msg = "Request timed out: The operation took too long to complete. Please try again." elif isinstance(e, APIError): error_msg = f"API error: An error occurred while communicating with the API. Details: {str(e)}" - else: + elif not fallback_to_error_msg: error_msg = "An unexpected error occurred while processing your request. Please try again later." return error_msg @@ -331,7 +332,7 @@ def test_llm(llm: LLM) -> str | None: def get_llm_max_tokens( model_map: dict, model_name: str, - model_provider: str = GEN_AI_MODEL_PROVIDER, + model_provider: str, ) -> int: """Best effort attempt to get the max tokens for the LLM""" if GEN_AI_MAX_TOKENS: @@ -371,7 +372,7 @@ def get_llm_max_tokens( def get_llm_max_output_tokens( model_map: dict, model_name: str, - model_provider: str = GEN_AI_MODEL_PROVIDER, + model_provider: str, ) -> int: """Best effort attempt to get the max output tokens for the LLM""" try: diff --git a/backend/danswer/main.py b/backend/danswer/main.py index 6652e5d3c39..a5abb8f28c2 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -1,4 +1,5 @@ import time +import traceback from collections.abc import AsyncGenerator from contextlib import asynccontextmanager from typing import Any @@ -7,7 +8,9 @@ import uvicorn from fastapi import APIRouter from fastapi import FastAPI +from fastapi import HTTPException from fastapi import Request +from fastapi import status from fastapi.exceptions import RequestValidationError from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse @@ -36,6 +39,9 @@ from danswer.configs.constants import KV_REINDEX_KEY from danswer.configs.constants import KV_SEARCH_SETTINGS from danswer.configs.constants import POSTGRES_WEB_APP_NAME +from danswer.configs.model_configs import FAST_GEN_AI_MODEL_VERSION +from danswer.configs.model_configs import GEN_AI_API_KEY +from danswer.configs.model_configs import GEN_AI_MODEL_VERSION from danswer.db.connector import check_connectors_exist from danswer.db.connector import create_initial_default_connector from danswer.db.connector_credential_pair import associate_default_cc_pair @@ -48,19 +54,20 @@ from danswer.db.engine import warm_up_connections from danswer.db.index_attempt import cancel_indexing_attempts_past_model from danswer.db.index_attempt import expire_index_attempts +from danswer.db.llm import fetch_default_provider +from danswer.db.llm import update_default_provider +from danswer.db.llm import upsert_llm_provider from danswer.db.persona import delete_old_default_personas from danswer.db.search_settings import get_current_search_settings from danswer.db.search_settings import get_secondary_search_settings from danswer.db.search_settings import update_current_search_settings from danswer.db.search_settings import update_secondary_search_settings -from danswer.db.standard_answer import create_initial_default_standard_answer_category from danswer.db.swap_index import check_index_swap from danswer.document_index.factory import get_default_document_index from danswer.document_index.interfaces import DocumentIndex from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.indexing.models import IndexingSetting -from danswer.llm.llm_initialization import load_llm_providers from danswer.natural_language_processing.search_nlp_models import EmbeddingModel from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder from danswer.natural_language_processing.search_nlp_models import warm_up_cross_encoder @@ -91,9 +98,9 @@ from danswer.server.manage.get_state import router as state_router from danswer.server.manage.llm.api import admin_router as llm_admin_router from danswer.server.manage.llm.api import basic_router as llm_router +from danswer.server.manage.llm.models import LLMProviderUpsertRequest from danswer.server.manage.search_settings import router as search_settings_router from danswer.server.manage.slack_bot import router as slack_bot_management_router -from danswer.server.manage.standard_answer import router as standard_answer_router from danswer.server.manage.users import router as user_router from danswer.server.middleware.latency_logging import add_latency_logging_middleware from danswer.server.query_and_chat.chat_backend import router as chat_router @@ -103,22 +110,26 @@ from danswer.server.query_and_chat.query_backend import basic_router as query_router from danswer.server.settings.api import admin_router as settings_admin_router from danswer.server.settings.api import basic_router as settings_router +from danswer.server.settings.store import load_settings +from danswer.server.settings.store import store_settings from danswer.server.token_rate_limits.api import ( router as token_rate_limit_settings_router, ) from danswer.tools.built_in_tools import auto_add_search_tool_to_personas from danswer.tools.built_in_tools import load_builtin_tools from danswer.tools.built_in_tools import refresh_built_in_tools_cache +from danswer.utils.gpu_utils import gpu_status_request from danswer.utils.logger import setup_logger +from danswer.utils.telemetry import get_or_generate_uuid from danswer.utils.telemetry import optional_telemetry from danswer.utils.telemetry import RecordType from danswer.utils.variable_functionality import fetch_versioned_implementation from danswer.utils.variable_functionality import global_version from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable +from shared_configs.configs import CORS_ALLOWED_ORIGIN from shared_configs.configs import MODEL_SERVER_HOST from shared_configs.configs import MODEL_SERVER_PORT - logger = setup_logger() @@ -176,12 +187,6 @@ def setup_postgres(db_session: Session) -> None: create_initial_default_connector(db_session) associate_default_cc_pair(db_session) - logger.notice("Verifying default standard answer category exists.") - create_initial_default_standard_answer_category(db_session) - - logger.notice("Loading LLM providers from env variables") - load_llm_providers(db_session) - logger.notice("Loading default Prompts and Personas") delete_old_default_personas(db_session) load_chat_yamls() @@ -191,6 +196,64 @@ def setup_postgres(db_session: Session) -> None: refresh_built_in_tools_cache(db_session) auto_add_search_tool_to_personas(db_session) + if GEN_AI_API_KEY and fetch_default_provider(db_session) is None: + # Only for dev flows + logger.notice("Setting up default OpenAI LLM for dev.") + llm_model = GEN_AI_MODEL_VERSION or "gpt-4o-mini" + fast_model = FAST_GEN_AI_MODEL_VERSION or "gpt-4o-mini" + model_req = LLMProviderUpsertRequest( + name="DevEnvPresetOpenAI", + provider="openai", + api_key=GEN_AI_API_KEY, + api_base=None, + api_version=None, + custom_config=None, + default_model_name=llm_model, + fast_default_model_name=fast_model, + is_public=True, + groups=[], + display_model_names=[llm_model, fast_model], + model_names=[llm_model, fast_model], + ) + new_llm_provider = upsert_llm_provider( + llm_provider=model_req, db_session=db_session + ) + update_default_provider(provider_id=new_llm_provider.id, db_session=db_session) + + +def update_default_multipass_indexing(db_session: Session) -> None: + docs_exist = check_docs_exist(db_session) + connectors_exist = check_connectors_exist(db_session) + logger.debug(f"Docs exist: {docs_exist}, Connectors exist: {connectors_exist}") + + if not docs_exist and not connectors_exist: + logger.info( + "No existing docs or connectors found. Checking GPU availability for multipass indexing." + ) + gpu_available = gpu_status_request() + logger.info(f"GPU available: {gpu_available}") + + current_settings = get_current_search_settings(db_session) + + logger.notice(f"Updating multipass indexing setting to: {gpu_available}") + updated_settings = SavedSearchSettings.from_db_model(current_settings) + # Enable multipass indexing if GPU is available or if using a cloud provider + updated_settings.multipass_indexing = ( + gpu_available or current_settings.cloud_provider is not None + ) + update_current_search_settings(db_session, updated_settings) + + # Update settings with GPU availability + settings = load_settings() + settings.gpu_enabled = gpu_available + store_settings(settings) + logger.notice(f"Updated settings with GPU availability: {gpu_available}") + + else: + logger.debug( + "Existing docs or connectors found. Skipping multipass indexing update." + ) + def translate_saved_search_settings(db_session: Session) -> None: kv_store = get_dynamic_config_store() @@ -260,21 +323,32 @@ def setup_vespa( document_index: DocumentIndex, index_setting: IndexingSetting, secondary_index_setting: IndexingSetting | None, -) -> None: +) -> bool: # Vespa startup is a bit slow, so give it a few seconds - wait_time = 5 - for _ in range(5): + WAIT_SECONDS = 5 + VESPA_ATTEMPTS = 5 + for x in range(VESPA_ATTEMPTS): try: + logger.notice(f"Setting up Vespa (attempt {x+1}/{VESPA_ATTEMPTS})...") document_index.ensure_indices_exist( index_embedding_dim=index_setting.model_dim, secondary_index_embedding_dim=secondary_index_setting.model_dim if secondary_index_setting else None, ) - break + + logger.notice("Vespa setup complete.") + return True except Exception: - logger.notice(f"Waiting on Vespa, retrying in {wait_time} seconds...") - time.sleep(wait_time) + logger.notice( + f"Vespa setup did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds." + ) + time.sleep(WAIT_SECONDS) + + logger.error( + f"Vespa setup did not succeed. Attempt limit reached. ({VESPA_ATTEMPTS})" + ) + return False @asynccontextmanager @@ -297,6 +371,9 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: # fill up Postgres connection pools await warm_up_connections() + # We cache this at the beginning so there is no delay in the first telemetry + get_or_generate_uuid() + with Session(engine) as db_session: check_index_swap(db_session=db_session) search_settings = get_current_search_settings(db_session) @@ -329,8 +406,11 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: logger.notice( f"Multilingual query expansion is enabled with {search_settings.multilingual_expansion}." ) - - if search_settings.rerank_model_name and not search_settings.provider_type: + if ( + search_settings.rerank_model_name + and not search_settings.provider_type + and not search_settings.rerank_provider_type + ): warm_up_cross_encoder(search_settings.rerank_model_name) logger.notice("Verifying query preprocessing (NLTK) data is downloaded") @@ -353,13 +433,18 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: if secondary_search_settings else None, ) - setup_vespa( + + success = setup_vespa( document_index, IndexingSetting.from_db_model(search_settings), IndexingSetting.from_db_model(secondary_search_settings) if secondary_search_settings else None, ) + if not success: + raise RuntimeError( + "Could not connect to Vespa within the specified timeout." + ) logger.notice(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}") if search_settings.provider_type is None: @@ -371,15 +456,41 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: ), ) + # update multipass indexing setting based on GPU availability + update_default_multipass_indexing(db_session) + optional_telemetry(record_type=RecordType.VERSION, data={"version": __version__}) yield +def log_http_error(_: Request, exc: Exception) -> JSONResponse: + status_code = getattr(exc, "status_code", 500) + if status_code >= 400: + error_msg = f"{str(exc)}\n" + error_msg += "".join(traceback.format_tb(exc.__traceback__)) + logger.error(error_msg) + + detail = exc.detail if isinstance(exc, HTTPException) else str(exc) + return JSONResponse( + status_code=status_code, + content={"detail": detail}, + ) + + def get_application() -> FastAPI: application = FastAPI( title="Danswer Backend", version=__version__, lifespan=lifespan ) + # Add the custom exception handler + application.add_exception_handler(status.HTTP_400_BAD_REQUEST, log_http_error) + application.add_exception_handler(status.HTTP_401_UNAUTHORIZED, log_http_error) + application.add_exception_handler(status.HTTP_403_FORBIDDEN, log_http_error) + application.add_exception_handler(status.HTTP_404_NOT_FOUND, log_http_error) + application.add_exception_handler( + status.HTTP_500_INTERNAL_SERVER_ERROR, log_http_error + ) + include_router_with_global_prefix_prepended(application, chat_router) include_router_with_global_prefix_prepended(application, query_router) include_router_with_global_prefix_prepended(application, document_router) @@ -395,7 +506,6 @@ def get_application() -> FastAPI: include_router_with_global_prefix_prepended( application, slack_bot_management_router ) - include_router_with_global_prefix_prepended(application, standard_answer_router) include_router_with_global_prefix_prepended(application, persona_router) include_router_with_global_prefix_prepended(application, admin_persona_router) include_router_with_global_prefix_prepended(application, input_prompt_router) @@ -485,7 +595,7 @@ def get_application() -> FastAPI: application.add_middleware( CORSMiddleware, - allow_origins=["*"], # Change this to the list of allowed origins if needed + allow_origins=CORS_ALLOWED_ORIGIN, # Configurable via environment variable allow_credentials=True, allow_methods=["*"], allow_headers=["*"], diff --git a/backend/danswer/natural_language_processing/search_nlp_models.py b/backend/danswer/natural_language_processing/search_nlp_models.py index b7835c4e906..2fbf94a5be2 100644 --- a/backend/danswer/natural_language_processing/search_nlp_models.py +++ b/backend/danswer/natural_language_processing/search_nlp_models.py @@ -16,6 +16,7 @@ ) from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE from danswer.db.models import SearchSettings +from danswer.indexing.indexing_heartbeat import Heartbeat from danswer.natural_language_processing.utils import get_tokenizer from danswer.natural_language_processing.utils import tokenizer_trim_content from danswer.utils.logger import setup_logger @@ -24,6 +25,8 @@ from shared_configs.enums import EmbeddingProvider from shared_configs.enums import EmbedTextType from shared_configs.enums import RerankerProvider +from shared_configs.model_server_models import ConnectorClassificationRequest +from shared_configs.model_server_models import ConnectorClassificationResponse from shared_configs.model_server_models import Embedding from shared_configs.model_server_models import EmbedRequest from shared_configs.model_server_models import EmbedResponse @@ -90,8 +93,10 @@ def __init__( query_prefix: str | None, passage_prefix: str | None, api_key: str | None, + api_url: str | None, provider_type: EmbeddingProvider | None, retrim_content: bool = False, + heartbeat: Heartbeat | None = None, ) -> None: self.api_key = api_key self.provider_type = provider_type @@ -100,9 +105,11 @@ def __init__( self.normalize = normalize self.model_name = model_name self.retrim_content = retrim_content + self.api_url = api_url self.tokenizer = get_tokenizer( model_name=model_name, provider_type=provider_type ) + self.heartbeat = heartbeat model_server_url = build_model_server_url(server_host, server_port) self.embed_server_endpoint = f"{model_server_url}/encoder/bi-encoder-embed" @@ -157,10 +164,14 @@ def _batch_encode_texts( text_type=text_type, manual_query_prefix=self.query_prefix, manual_passage_prefix=self.passage_prefix, + api_url=self.api_url, ) response = self._make_model_server_request(embed_request) embeddings.extend(response.embeddings) + + if self.heartbeat: + self.heartbeat.heartbeat() return embeddings def encode( @@ -226,6 +237,7 @@ def from_db_model( passage_prefix=search_settings.passage_prefix, api_key=search_settings.api_key, provider_type=search_settings.provider_type, + api_url=search_settings.api_url, retrim_content=retrim_content, ) @@ -236,6 +248,7 @@ def __init__( model_name: str, provider_type: RerankerProvider | None, api_key: str | None, + api_url: str | None, model_server_host: str = MODEL_SERVER_HOST, model_server_port: int = MODEL_SERVER_PORT, ) -> None: @@ -244,6 +257,7 @@ def __init__( self.model_name = model_name self.provider_type = provider_type self.api_key = api_key + self.api_url = api_url def predict(self, query: str, passages: list[str]) -> list[float]: rerank_request = RerankRequest( @@ -252,6 +266,7 @@ def predict(self, query: str, passages: list[str]) -> list[float]: model_name=self.model_name, provider_type=self.provider_type, api_key=self.api_key, + api_url=self.api_url, ) response = requests.post( @@ -297,6 +312,37 @@ def predict( return response_model.is_keyword, response_model.keywords +class ConnectorClassificationModel: + def __init__( + self, + model_server_host: str = MODEL_SERVER_HOST, + model_server_port: int = MODEL_SERVER_PORT, + ): + model_server_url = build_model_server_url(model_server_host, model_server_port) + self.connector_classification_endpoint = ( + model_server_url + "/custom/connector-classification" + ) + + def predict( + self, + query: str, + available_connectors: list[str], + ) -> list[str]: + connector_classification_request = ConnectorClassificationRequest( + available_connectors=available_connectors, + query=query, + ) + response = requests.post( + self.connector_classification_endpoint, + json=connector_classification_request.dict(), + ) + response.raise_for_status() + + response_model = ConnectorClassificationResponse(**response.json()) + + return response_model.connectors + + def warm_up_retry( func: Callable[..., Any], tries: int = 20, @@ -312,8 +358,8 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: return func(*args, **kwargs) except Exception as e: exceptions.append(e) - logger.exception( - f"Attempt {attempt + 1} failed; retrying in {delay} seconds..." + logger.info( + f"Attempt {attempt + 1}/{tries} failed; retrying in {delay} seconds..." ) time.sleep(delay) raise Exception(f"All retries failed: {exceptions}") @@ -363,6 +409,7 @@ def warm_up_cross_encoder( reranking_model = RerankingModel( model_name=rerank_model_name, provider_type=None, + api_url=None, api_key=None, ) diff --git a/backend/danswer/one_shot_answer/answer_question.py b/backend/danswer/one_shot_answer/answer_question.py index a5a0fe0dad5..f051da82f14 100644 --- a/backend/danswer/one_shot_answer/answer_question.py +++ b/backend/danswer/one_shot_answer/answer_question.py @@ -26,6 +26,7 @@ from danswer.db.chat import translate_db_search_doc_to_server_search_doc from danswer.db.chat import update_search_docs_table_with_relevance from danswer.db.engine import get_session_context_manager +from danswer.db.models import Persona from danswer.db.models import User from danswer.db.persona import get_prompt_by_id from danswer.llm.answering.answer import Answer @@ -60,7 +61,7 @@ from danswer.tools.tool_runner import ToolCallKickoff from danswer.utils.logger import setup_logger from danswer.utils.timing import log_generator_function_time - +from ee.danswer.server.query_and_chat.utils import create_temporary_persona logger = setup_logger() @@ -118,7 +119,17 @@ def stream_answer_objects( one_shot=True, danswerbot_flow=danswerbot_flow, ) - llm, fast_llm = get_llms_for_persona(persona=chat_session.persona) + + temporary_persona: Persona | None = None + if query_req.persona_config is not None: + new_persona = create_temporary_persona( + db_session=db_session, persona_config=query_req.persona_config, user=user + ) + temporary_persona = new_persona + + persona = temporary_persona if temporary_persona else chat_session.persona + + llm, fast_llm = get_llms_for_persona(persona=persona) llm_tokenizer = get_tokenizer( model_name=llm.config.model_name, @@ -153,11 +164,11 @@ def stream_answer_objects( prompt_id=query_req.prompt_id, user=None, db_session=db_session ) if prompt is None: - if not chat_session.persona.prompts: + if not persona.prompts: raise RuntimeError( "Persona does not have any prompts - this should never happen" ) - prompt = chat_session.persona.prompts[0] + prompt = persona.prompts[0] # Create the first User query message new_user_message = create_new_chat_message( @@ -174,9 +185,7 @@ def stream_answer_objects( prompt_config = PromptConfig.from_model(prompt) document_pruning_config = DocumentPruningConfig( max_chunks=int( - chat_session.persona.num_chunks - if chat_session.persona.num_chunks is not None - else default_num_chunks + persona.num_chunks if persona.num_chunks is not None else default_num_chunks ), max_tokens=max_document_tokens, ) @@ -187,16 +196,16 @@ def stream_answer_objects( evaluation_type=LLMEvaluationType.SKIP if DISABLE_LLM_DOC_RELEVANCE else query_req.evaluation_type, - persona=chat_session.persona, + persona=persona, retrieval_options=query_req.retrieval_options, prompt_config=prompt_config, llm=llm, fast_llm=fast_llm, pruning_config=document_pruning_config, + bypass_acl=bypass_acl, chunks_above=query_req.chunks_above, chunks_below=query_req.chunks_below, full_doc=query_req.full_doc, - bypass_acl=bypass_acl, ) answer_config = AnswerStyleConfig( @@ -209,13 +218,15 @@ def stream_answer_objects( question=query_msg.message, answer_style_config=answer_config, prompt_config=PromptConfig.from_model(prompt), - llm=get_main_llm_from_tuple(get_llms_for_persona(persona=chat_session.persona)), + llm=get_main_llm_from_tuple(get_llms_for_persona(persona=persona)), single_message_history=history_str, - tools=[search_tool], - force_use_tool=ForceUseTool( - force_use=True, - tool_name=search_tool.name, - args={"query": rephrased_query}, + tools=[search_tool] if search_tool else [], + force_use_tool=( + ForceUseTool( + tool_name=search_tool.name, + args={"query": rephrased_query}, + force_use=True, + ) ), # for now, don't use tool calling for this flow, as we haven't # tested quotes with tool calling too much yet @@ -223,9 +234,7 @@ def stream_answer_objects( return_contexts=query_req.return_contexts, skip_gen_ai_answer_generation=query_req.skip_gen_ai_answer_generation, ) - # won't be any ImageGenerationDisplay responses since that tool is never passed in - for packet in cast(AnswerObjectIterator, answer.processed_streamed_output): # for one-shot flow, don't currently do anything with these if isinstance(packet, ToolResponse): @@ -261,6 +270,7 @@ def stream_answer_objects( applied_time_cutoff=search_response_summary.final_filters.time_cutoff, recency_bias_multiplier=search_response_summary.recency_bias_multiplier, ) + yield initial_response elif packet.id == SEARCH_DOC_CONTENT_ID: @@ -287,6 +297,7 @@ def stream_answer_objects( relevance_summary=evaluation_response, ) yield evaluation_response + else: yield packet @@ -371,7 +382,7 @@ def get_search_answer( elif isinstance(packet, QADocsResponse): qa_response.docs = packet elif isinstance(packet, LLMRelevanceFilterResponse): - qa_response.llm_chunks_indices = packet.relevant_chunk_indices + qa_response.llm_selected_doc_indices = packet.llm_selected_doc_indices elif isinstance(packet, DanswerQuotes): qa_response.quotes = packet elif isinstance(packet, CitationInfo): diff --git a/backend/danswer/one_shot_answer/models.py b/backend/danswer/one_shot_answer/models.py index d7e81975630..735fc12bbb9 100644 --- a/backend/danswer/one_shot_answer/models.py +++ b/backend/danswer/one_shot_answer/models.py @@ -1,3 +1,5 @@ +from typing import Any + from pydantic import BaseModel from pydantic import Field from pydantic import model_validator @@ -8,6 +10,8 @@ from danswer.chat.models import QADocsResponse from danswer.configs.constants import MessageType from danswer.search.enums import LLMEvaluationType +from danswer.search.enums import RecencyBiasSetting +from danswer.search.enums import SearchType from danswer.search.models import ChunkContext from danswer.search.models import RerankingDetails from danswer.search.models import RetrievalDetails @@ -23,10 +27,49 @@ class ThreadMessage(BaseModel): role: MessageType = MessageType.USER +class PromptConfig(BaseModel): + name: str + description: str = "" + system_prompt: str + task_prompt: str = "" + include_citations: bool = True + datetime_aware: bool = True + + +class DocumentSetConfig(BaseModel): + id: int + + +class ToolConfig(BaseModel): + id: int + + +class PersonaConfig(BaseModel): + name: str + description: str + search_type: SearchType = SearchType.SEMANTIC + num_chunks: float | None = None + llm_relevance_filter: bool = False + llm_filter_extraction: bool = False + recency_bias: RecencyBiasSetting = RecencyBiasSetting.AUTO + llm_model_provider_override: str | None = None + llm_model_version_override: str | None = None + + prompts: list[PromptConfig] = Field(default_factory=list) + prompt_ids: list[int] = Field(default_factory=list) + + document_set_ids: list[int] = Field(default_factory=list) + tools: list[ToolConfig] = Field(default_factory=list) + tool_ids: list[int] = Field(default_factory=list) + custom_tools_openapi: list[dict[str, Any]] = Field(default_factory=list) + + class DirectQARequest(ChunkContext): + persona_config: PersonaConfig | None = None + persona_id: int | None = None + messages: list[ThreadMessage] - prompt_id: int | None - persona_id: int + prompt_id: int | None = None multilingual_query_expansion: list[str] | None = None retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails) rerank_settings: RerankingDetails | None = None @@ -43,6 +86,12 @@ class DirectQARequest(ChunkContext): # If True, skips generative an AI response to the search query skip_gen_ai_answer_generation: bool = False + @model_validator(mode="after") + def check_persona_fields(self) -> "DirectQARequest": + if (self.persona_config is None) == (self.persona_id is None): + raise ValueError("Exactly one of persona_config or persona_id must be set") + return self + @model_validator(mode="after") def check_chain_of_thought_and_prompt_id(self) -> "DirectQARequest": if self.chain_of_thought and self.prompt_id is not None: @@ -62,7 +111,7 @@ class OneShotQAResponse(BaseModel): quotes: DanswerQuotes | None = None citations: list[CitationInfo] | None = None docs: QADocsResponse | None = None - llm_chunks_indices: list[int] | None = None + llm_selected_doc_indices: list[int] | None = None error_msg: str | None = None answer_valid: bool = True # Reflexion result, default True if Reflexion not run chat_message_id: int | None = None diff --git a/backend/danswer/prompts/direct_qa_prompts.py b/backend/danswer/prompts/direct_qa_prompts.py index 16768963931..0139da13e88 100644 --- a/backend/danswer/prompts/direct_qa_prompts.py +++ b/backend/danswer/prompts/direct_qa_prompts.py @@ -109,6 +109,9 @@ Refer to the provided context documents when responding to me.{DEFAULT_IGNORE_STATEMENT} \ You should always get right to the point, and never use extraneous language. +CHAT HISTORY: +{{history_block}} + {{task_prompt}} {QUESTION_PAT.upper()} diff --git a/backend/danswer/redis/redis_pool.py b/backend/danswer/redis/redis_pool.py new file mode 100644 index 00000000000..54cb8d918e4 --- /dev/null +++ b/backend/danswer/redis/redis_pool.py @@ -0,0 +1,91 @@ +import threading +from typing import Optional + +import redis +from redis.client import Redis + +from danswer.configs.app_configs import REDIS_DB_NUMBER +from danswer.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL +from danswer.configs.app_configs import REDIS_HOST +from danswer.configs.app_configs import REDIS_PASSWORD +from danswer.configs.app_configs import REDIS_POOL_MAX_CONNECTIONS +from danswer.configs.app_configs import REDIS_PORT +from danswer.configs.app_configs import REDIS_SSL +from danswer.configs.app_configs import REDIS_SSL_CA_CERTS +from danswer.configs.app_configs import REDIS_SSL_CERT_REQS +from danswer.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS + + +class RedisPool: + _instance: Optional["RedisPool"] = None + _lock: threading.Lock = threading.Lock() + _pool: redis.BlockingConnectionPool + + def __new__(cls) -> "RedisPool": + if not cls._instance: + with cls._lock: + if not cls._instance: + cls._instance = super(RedisPool, cls).__new__(cls) + cls._instance._init_pool() + return cls._instance + + def _init_pool(self) -> None: + self._pool = RedisPool.create_pool(ssl=REDIS_SSL) + + def get_client(self) -> Redis: + return redis.Redis(connection_pool=self._pool) + + @staticmethod + def create_pool( + host: str = REDIS_HOST, + port: int = REDIS_PORT, + db: int = REDIS_DB_NUMBER, + password: str = REDIS_PASSWORD, + max_connections: int = REDIS_POOL_MAX_CONNECTIONS, + ssl_ca_certs: str | None = REDIS_SSL_CA_CERTS, + ssl_cert_reqs: str = REDIS_SSL_CERT_REQS, + ssl: bool = False, + ) -> redis.BlockingConnectionPool: + """We use BlockingConnectionPool because it will block and wait for a connection + rather than error if max_connections is reached. This is far more deterministic + behavior and aligned with how we want to use Redis.""" + + # Using ConnectionPool is not well documented. + # Useful examples: https://github.com/redis/redis-py/issues/780 + if ssl: + return redis.BlockingConnectionPool( + host=host, + port=port, + db=db, + password=password, + max_connections=max_connections, + timeout=None, + health_check_interval=REDIS_HEALTH_CHECK_INTERVAL, + socket_keepalive=True, + socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS, + connection_class=redis.SSLConnection, + ssl_ca_certs=ssl_ca_certs, + ssl_cert_reqs=ssl_cert_reqs, + ) + + return redis.BlockingConnectionPool( + host=host, + port=port, + db=db, + password=password, + max_connections=max_connections, + timeout=None, + health_check_interval=REDIS_HEALTH_CHECK_INTERVAL, + socket_keepalive=True, + socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS, + ) + + +# # Usage example +# redis_pool = RedisPool() +# redis_client = redis_pool.get_client() + +# # Example of setting and getting a value +# redis_client.set('key', 'value') +# value = redis_client.get('key') +# print(value.decode()) # Output: 'value' diff --git a/backend/danswer/search/models.py b/backend/danswer/search/models.py index 15387e6c63e..503b07653ef 100644 --- a/backend/danswer/search/models.py +++ b/backend/danswer/search/models.py @@ -26,6 +26,7 @@ class RerankingDetails(BaseModel): # If model is None (or num_rerank is 0), then reranking is turned off rerank_model_name: str | None + rerank_api_url: str | None rerank_provider_type: RerankerProvider | None rerank_api_key: str | None = None @@ -42,6 +43,7 @@ def from_db_model(cls, search_settings: SearchSettings) -> "RerankingDetails": rerank_provider_type=search_settings.rerank_provider_type, rerank_api_key=search_settings.rerank_api_key, num_rerank=search_settings.num_rerank, + rerank_api_url=search_settings.rerank_api_url, ) @@ -81,6 +83,8 @@ def from_db_model(cls, search_settings: SearchSettings) -> "SavedSearchSettings" num_rerank=search_settings.num_rerank, # Multilingual Expansion multilingual_expansion=search_settings.multilingual_expansion, + rerank_api_url=search_settings.rerank_api_url, + disable_rerank_for_streaming=search_settings.disable_rerank_for_streaming, ) diff --git a/backend/danswer/search/pipeline.py b/backend/danswer/search/pipeline.py index ad3e19e149d..183c8729d67 100644 --- a/backend/danswer/search/pipeline.py +++ b/backend/danswer/search/pipeline.py @@ -209,7 +209,9 @@ def _get_sections(self) -> list[InferenceSection]: if inference_section is not None: expanded_inference_sections.append(inference_section) else: - logger.warning("Skipped creation of section, no chunks found") + logger.warning( + "Skipped creation of section for full docs, no chunks found" + ) self._retrieved_sections = expanded_inference_sections return expanded_inference_sections @@ -270,6 +272,11 @@ def _get_sections(self) -> list[InferenceSection]: (chunk.document_id, chunk.chunk_id): chunk for chunk in inference_chunks } + # In case of failed parallel calls to Vespa, at least we should have the initial retrieved chunks + doc_chunk_ind_to_chunk.update( + {(chunk.document_id, chunk.chunk_id): chunk for chunk in retrieved_chunks} + ) + # Build the surroundings for all of the initial retrieved chunks for chunk in retrieved_chunks: start_ind = max(0, chunk.chunk_id - above) @@ -360,10 +367,10 @@ def section_relevance(self) -> list[SectionRelevancePiece] | None: try: results = run_functions_in_parallel(function_calls=functions) self._section_relevance = list(results.values()) - except Exception: + except Exception as e: raise ValueError( - "An issue occured during the agentic evaluation proecss." - ) + "An issue occured during the agentic evaluation process." + ) from e elif self.search_query.evaluation_type == LLMEvaluationType.BASIC: if DISABLE_LLM_DOC_RELEVANCE: diff --git a/backend/danswer/search/postprocessing/postprocessing.py b/backend/danswer/search/postprocessing/postprocessing.py index 6a3d2dc2dcd..b4a1e48bd39 100644 --- a/backend/danswer/search/postprocessing/postprocessing.py +++ b/backend/danswer/search/postprocessing/postprocessing.py @@ -100,6 +100,7 @@ def semantic_reranking( model_name=rerank_settings.rerank_model_name, provider_type=rerank_settings.rerank_provider_type, api_key=rerank_settings.rerank_api_key, + api_url=rerank_settings.rerank_api_url, ) passages = [ @@ -253,8 +254,8 @@ def search_postprocessing( if not retrieved_sections: # Avoids trying to rerank an empty list which throws an error - yield [] - yield [] + yield cast(list[InferenceSection], []) + yield cast(list[SectionRelevancePiece], []) return rerank_task_id = None diff --git a/backend/danswer/search/preprocessing/preprocessing.py b/backend/danswer/search/preprocessing/preprocessing.py index 43a6a43ce88..37fb254884a 100644 --- a/backend/danswer/search/preprocessing/preprocessing.py +++ b/backend/danswer/search/preprocessing/preprocessing.py @@ -67,6 +67,9 @@ def retrieval_preprocessing( ] time_filter = preset_filters.time_cutoff + if time_filter is None and persona: + time_filter = persona.search_start_date + source_filter = preset_filters.source_type auto_detect_time_filter = True @@ -154,7 +157,7 @@ def retrieval_preprocessing( final_filters = IndexFilters( source_type=preset_filters.source_type or predicted_source_filters, document_set=preset_filters.document_set, - time_cutoff=preset_filters.time_cutoff or predicted_time_cutoff, + time_cutoff=time_filter or predicted_time_cutoff, tags=preset_filters.tags, # Tags are never auto-extracted access_control_list=user_acl_filters, ) diff --git a/backend/danswer/search/retrieval/search_runner.py b/backend/danswer/search/retrieval/search_runner.py index 31582f90819..30347464ff8 100644 --- a/backend/danswer/search/retrieval/search_runner.py +++ b/backend/danswer/search/retrieval/search_runner.py @@ -3,7 +3,6 @@ import nltk # type:ignore from nltk.corpus import stopwords # type:ignore -from nltk.stem import WordNetLemmatizer # type:ignore from nltk.tokenize import word_tokenize # type:ignore from sqlalchemy.orm import Session @@ -40,7 +39,7 @@ def download_nltk_data() -> None: resources = { "stopwords": "corpora/stopwords", - "wordnet": "corpora/wordnet", + # "wordnet": "corpora/wordnet", # Not in use "punkt": "tokenizers/punkt", } @@ -58,15 +57,16 @@ def download_nltk_data() -> None: def lemmatize_text(keywords: list[str]) -> list[str]: - try: - query = " ".join(keywords) - lemmatizer = WordNetLemmatizer() - word_tokens = word_tokenize(query) - lemmatized_words = [lemmatizer.lemmatize(word) for word in word_tokens] - combined_keywords = list(set(keywords + lemmatized_words)) - return combined_keywords - except Exception: - return keywords + raise NotImplementedError("Lemmatization should not be used currently") + # try: + # query = " ".join(keywords) + # lemmatizer = WordNetLemmatizer() + # word_tokens = word_tokenize(query) + # lemmatized_words = [lemmatizer.lemmatize(word) for word in word_tokens] + # combined_keywords = list(set(keywords + lemmatized_words)) + # return combined_keywords + # except Exception: + # return keywords def remove_stop_words_and_punctuation(keywords: list[str]) -> list[str]: diff --git a/backend/danswer/secondary_llm_flows/agentic_evaluation.py b/backend/danswer/secondary_llm_flows/agentic_evaluation.py index 3de9db00be6..03121e3cf1d 100644 --- a/backend/danswer/secondary_llm_flows/agentic_evaluation.py +++ b/backend/danswer/secondary_llm_flows/agentic_evaluation.py @@ -58,25 +58,30 @@ def _get_metadata_str(metadata: dict[str, str | list[str]]) -> str: center_metadata=center_metadata_str, ) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) - model_output = message_to_string(llm.invoke(filled_llm_prompt)) + try: + model_output = message_to_string(llm.invoke(filled_llm_prompt)) - # Search for the "Useful Analysis" section in the model output - # This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon, - # then any text up to "3. Final Relevance" - # The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines - # If no match is found, the entire model output is used as the analysis - analysis_match = re.search( - r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance", - model_output, - re.DOTALL, - ) - analysis = analysis_match.group(1).strip() if analysis_match else model_output + # Search for the "Useful Analysis" section in the model output + # This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon, + # then any text up to "3. Final Relevance" + # The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines + # If no match is found, the entire model output is used as the analysis + analysis_match = re.search( + r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance", + model_output, + re.DOTALL, + ) + analysis = analysis_match.group(1).strip() if analysis_match else model_output - # Get the last non-empty line - last_line = next( - (line for line in reversed(model_output.split("\n")) if line.strip()), "" - ) - relevant = last_line.strip().lower().startswith("true") + # Get the last non-empty line + last_line = next( + (line for line in reversed(model_output.split("\n")) if line.strip()), "" + ) + relevant = last_line.strip().lower().startswith("true") + except Exception as e: + logger.exception(f"An issue occured during the agentic evaluation process. {e}") + relevant = False + analysis = "" return SectionRelevancePiece( document_id=document_id, diff --git a/backend/danswer/secondary_llm_flows/source_filter.py b/backend/danswer/secondary_llm_flows/source_filter.py index 802a14f42fa..f58a91016e0 100644 --- a/backend/danswer/secondary_llm_flows/source_filter.py +++ b/backend/danswer/secondary_llm_flows/source_filter.py @@ -3,12 +3,16 @@ from sqlalchemy.orm import Session +from danswer.configs.chat_configs import ENABLE_CONNECTOR_CLASSIFIER from danswer.configs.constants import DocumentSource from danswer.db.connector import fetch_unique_document_sources from danswer.db.engine import get_sqlalchemy_engine from danswer.llm.interfaces import LLM from danswer.llm.utils import dict_based_prompt_to_langchain_prompt from danswer.llm.utils import message_to_string +from danswer.natural_language_processing.search_nlp_models import ( + ConnectorClassificationModel, +) from danswer.prompts.constants import SOURCES_KEY from danswer.prompts.filter_extration import FILE_SOURCE_WARNING from danswer.prompts.filter_extration import SOURCE_FILTER_PROMPT @@ -42,11 +46,38 @@ def _sample_document_sources( return random.sample(valid_sources, num_sample) +def _sample_documents_using_custom_connector_classifier( + query: str, + valid_sources: list[DocumentSource], +) -> list[DocumentSource] | None: + query_joined = "".join(ch for ch in query.lower() if ch.isalnum()) + available_connectors = list( + filter( + lambda conn: conn.lower() in query_joined, + [item.value for item in valid_sources], + ) + ) + + if not available_connectors: + return None + + connectors = ConnectorClassificationModel().predict(query, available_connectors) + + return strings_to_document_sources(connectors) if connectors else None + + def extract_source_filter( query: str, llm: LLM, db_session: Session ) -> list[DocumentSource] | None: """Returns a list of valid sources for search or None if no specific sources were detected""" + valid_sources = fetch_unique_document_sources(db_session) + if not valid_sources: + return None + + if ENABLE_CONNECTOR_CLASSIFIER: + return _sample_documents_using_custom_connector_classifier(query, valid_sources) + def _get_source_filter_messages( query: str, valid_sources: list[DocumentSource], @@ -146,10 +177,6 @@ def _extract_source_filters_from_llm_out( logger.warning("LLM failed to provide a valid Source Filter output") return None - valid_sources = fetch_unique_document_sources(db_session) - if not valid_sources: - return None - messages = _get_source_filter_messages(query=query, valid_sources=valid_sources) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) model_output = message_to_string(llm.invoke(filled_llm_prompt)) diff --git a/backend/danswer/server/auth_check.py b/backend/danswer/server/auth_check.py index 12258eba29b..8a35a560a24 100644 --- a/backend/danswer/server/auth_check.py +++ b/backend/danswer/server/auth_check.py @@ -7,6 +7,7 @@ from danswer.auth.users import current_admin_user from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user +from danswer.auth.users import current_user_with_expired_token from danswer.configs.app_configs import APP_API_PREFIX from danswer.server.danswer_api.ingestion import api_key_dep @@ -96,6 +97,7 @@ def check_router_auth( or depends_fn == current_admin_user or depends_fn == current_curator_or_admin_user or depends_fn == api_key_dep + or depends_fn == current_user_with_expired_token ): found_auth = True break diff --git a/backend/danswer/server/documents/cc_pair.py b/backend/danswer/server/documents/cc_pair.py index 69ae9916348..428666751a4 100644 --- a/backend/danswer/server/documents/cc_pair.py +++ b/backend/danswer/server/documents/cc_pair.py @@ -1,38 +1,81 @@ +import math +from http import HTTPStatus + from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException -from pydantic import BaseModel +from fastapi import Query from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user from danswer.background.celery.celery_utils import get_deletion_attempt_snapshot +from danswer.background.celery.celery_utils import skip_cc_pair_pruning_by_task +from danswer.background.task_utils import name_cc_prune_task from danswer.db.connector_credential_pair import add_credential_to_connector from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id from danswer.db.connector_credential_pair import remove_credential_from_connector from danswer.db.connector_credential_pair import ( update_connector_credential_pair_from_id, ) -from danswer.db.document import get_document_cnts_for_cc_pairs +from danswer.db.document import get_document_counts_for_cc_pairs from danswer.db.engine import get_session +from danswer.db.enums import AccessType from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.index_attempt import cancel_indexing_attempts_for_ccpair from danswer.db.index_attempt import cancel_indexing_attempts_past_model -from danswer.db.index_attempt import get_index_attempts_for_connector +from danswer.db.index_attempt import count_index_attempts_for_connector +from danswer.db.index_attempt import get_latest_index_attempt_for_cc_pair_id +from danswer.db.index_attempt import get_paginated_index_attempts_for_cc_pair_id from danswer.db.models import User -from danswer.db.models import UserRole +from danswer.db.tasks import get_latest_task from danswer.server.documents.models import CCPairFullInfo +from danswer.server.documents.models import CCPairPruningTask +from danswer.server.documents.models import CCStatusUpdateRequest from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.server.documents.models import ConnectorCredentialPairMetadata +from danswer.server.documents.models import PaginatedIndexAttempts from danswer.server.models import StatusResponse from danswer.utils.logger import setup_logger +from ee.danswer.db.user_group import validate_user_creation_permissions logger = setup_logger() - router = APIRouter(prefix="/manage") +@router.get("/admin/cc-pair/{cc_pair_id}/index-attempts") +def get_cc_pair_index_attempts( + cc_pair_id: int, + page: int = Query(1, ge=1), + page_size: int = Query(10, ge=1, le=1000), + user: User | None = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> PaginatedIndexAttempts: + cc_pair = get_connector_credential_pair_from_id( + cc_pair_id, db_session, user, get_editable=False + ) + if not cc_pair: + raise HTTPException( + status_code=400, detail="CC Pair not found for current user permissions" + ) + total_count = count_index_attempts_for_connector( + db_session=db_session, + connector_id=cc_pair.connector_id, + ) + index_attempts = get_paginated_index_attempts_for_cc_pair_id( + db_session=db_session, + connector_id=cc_pair.connector_id, + page=page, + page_size=page_size, + ) + return PaginatedIndexAttempts.from_models( + index_attempt_models=index_attempts, + page=page, + total_pages=math.ceil(total_count / page_size), + ) + + @router.get("/admin/cc-pair/{cc_pair_id}") def get_cc_pair_full_info( cc_pair_id: int, @@ -56,13 +99,8 @@ def get_cc_pair_full_info( credential_id=cc_pair.credential_id, ) - index_attempts = get_index_attempts_for_connector( - db_session, - cc_pair.connector_id, - ) - document_count_info_list = list( - get_document_cnts_for_cc_pairs( + get_document_counts_for_cc_pairs( db_session=db_session, cc_pair_identifiers=[cc_pair_identifier], ) @@ -71,9 +109,20 @@ def get_cc_pair_full_info( document_count_info_list[0][-1] if document_count_info_list else 0 ) + latest_attempt = get_latest_index_attempt_for_cc_pair_id( + db_session=db_session, + connector_credential_pair_id=cc_pair.id, + secondary_index=False, + only_finished=False, + ) + return CCPairFullInfo.from_models( cc_pair_model=cc_pair, - index_attempt_models=list(index_attempts), + number_of_index_attempts=count_index_attempts_for_connector( + db_session=db_session, + connector_id=cc_pair.connector_id, + ), + last_index_attempt=latest_attempt, latest_deletion_attempt=get_deletion_attempt_snapshot( connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id, @@ -84,10 +133,6 @@ def get_cc_pair_full_info( ) -class CCStatusUpdateRequest(BaseModel): - status: ConnectorCredentialPairStatus - - @router.put("/admin/cc-pair/{cc_pair_id}/status") def update_cc_pair_status( cc_pair_id: int, @@ -149,6 +194,92 @@ def update_cc_pair_name( raise HTTPException(status_code=400, detail="Name must be unique") +@router.get("/admin/cc-pair/{cc_pair_id}/prune") +def get_cc_pair_latest_prune( + cc_pair_id: int, + user: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> CCPairPruningTask: + cc_pair = get_connector_credential_pair_from_id( + cc_pair_id=cc_pair_id, + db_session=db_session, + user=user, + get_editable=False, + ) + if not cc_pair: + raise HTTPException( + status_code=400, + detail="Connection not found for current user's permissions", + ) + + # look up the last prune task for this connector (if it exists) + pruning_task_name = name_cc_prune_task( + connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id + ) + last_pruning_task = get_latest_task(pruning_task_name, db_session) + if not last_pruning_task: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, + detail="No pruning task found.", + ) + + return CCPairPruningTask( + id=last_pruning_task.task_id, + name=last_pruning_task.task_name, + status=last_pruning_task.status, + start_time=last_pruning_task.start_time, + register_time=last_pruning_task.register_time, + ) + + +@router.post("/admin/cc-pair/{cc_pair_id}/prune") +def prune_cc_pair( + cc_pair_id: int, + user: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> StatusResponse[list[int]]: + # avoiding circular refs + from danswer.background.celery.tasks.pruning.tasks import prune_documents_task + + cc_pair = get_connector_credential_pair_from_id( + cc_pair_id=cc_pair_id, + db_session=db_session, + user=user, + get_editable=False, + ) + if not cc_pair: + raise HTTPException( + status_code=400, + detail="Connection not found for current user's permissions", + ) + + pruning_task_name = name_cc_prune_task( + connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id + ) + last_pruning_task = get_latest_task(pruning_task_name, db_session) + if skip_cc_pair_pruning_by_task( + last_pruning_task, + db_session=db_session, + ): + raise HTTPException( + status_code=HTTPStatus.CONFLICT, + detail="Pruning task already in progress.", + ) + + logger.info(f"Pruning the {cc_pair.connector.name} connector.") + prune_documents_task.apply_async( + kwargs=dict( + connector_id=cc_pair.connector.id, + credential_id=cc_pair.credential.id, + ) + ) + + return StatusResponse( + success=True, + message="Successfully created the pruning task.", + ) + + @router.put("/connector/{connector_id}/credential/{credential_id}") def associate_credential_to_connector( connector_id: int, @@ -157,11 +288,12 @@ def associate_credential_to_connector( user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> StatusResponse[int]: - if user and user.role != UserRole.ADMIN and metadata.is_public: - raise HTTPException( - status_code=400, - detail="Public connections cannot be created by non-admin users", - ) + validate_user_creation_permissions( + db_session=db_session, + user=user, + target_group_ids=metadata.groups, + object_is_public=metadata.access_type == AccessType.PUBLIC, + ) try: response = add_credential_to_connector( @@ -170,7 +302,8 @@ def associate_credential_to_connector( connector_id=connector_id, credential_id=credential_id, cc_pair_name=metadata.name, - is_public=metadata.is_public or True, + access_type=metadata.access_type, + auto_sync_options=metadata.auto_sync_options, groups=metadata.groups, ) diff --git a/backend/danswer/server/documents/connector.py b/backend/danswer/server/documents/connector.py index 8d6b0ffc773..58dcf7e7691 100644 --- a/backend/danswer/server/documents/connector.py +++ b/backend/danswer/server/documents/connector.py @@ -62,12 +62,15 @@ from danswer.db.credentials import delete_google_drive_service_account_credentials from danswer.db.credentials import fetch_credential_by_id from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed -from danswer.db.document import get_document_cnts_for_cc_pairs +from danswer.db.document import get_document_counts_for_cc_pairs from danswer.db.engine import get_session +from danswer.db.enums import AccessType from danswer.db.index_attempt import create_index_attempt from danswer.db.index_attempt import get_index_attempts_for_cc_pair -from danswer.db.index_attempt import get_latest_finished_index_attempt_for_cc_pair +from danswer.db.index_attempt import get_latest_index_attempt_for_cc_pair_id from danswer.db.index_attempt import get_latest_index_attempts +from danswer.db.index_attempt import get_latest_index_attempts_by_status +from danswer.db.models import IndexingStatus from danswer.db.models import User from danswer.db.models import UserRole from danswer.db.search_settings import get_current_search_settings @@ -75,13 +78,13 @@ from danswer.file_store.file_store import get_default_file_store from danswer.server.documents.models import AuthStatus from danswer.server.documents.models import AuthUrl -from danswer.server.documents.models import ConnectorBase from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.server.documents.models import ConnectorIndexingStatus from danswer.server.documents.models import ConnectorSnapshot from danswer.server.documents.models import ConnectorUpdateRequest from danswer.server.documents.models import CredentialBase from danswer.server.documents.models import CredentialSnapshot +from danswer.server.documents.models import FailedConnectorIndexingStatus from danswer.server.documents.models import FileUploadResponse from danswer.server.documents.models import GDriveCallback from danswer.server.documents.models import GmailCallback @@ -93,6 +96,7 @@ from danswer.server.documents.models import RunConnectorRequest from danswer.server.models import StatusResponse from danswer.utils.logger import setup_logger +from ee.danswer.db.user_group import validate_user_creation_permissions logger = setup_logger() @@ -376,6 +380,95 @@ def upload_files( return FileUploadResponse(file_paths=deduped_file_paths) +# Retrieves most recent failure cases for connectors that are currently failing +@router.get("/admin/connector/failed-indexing-status") +def get_currently_failed_indexing_status( + secondary_index: bool = False, + user: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), + get_editable: bool = Query( + False, description="If true, return editable document sets" + ), +) -> list[FailedConnectorIndexingStatus]: + # Get the latest failed indexing attempts + latest_failed_indexing_attempts = get_latest_index_attempts_by_status( + secondary_index=secondary_index, + db_session=db_session, + status=IndexingStatus.FAILED, + ) + + # Get the latest successful indexing attempts + latest_successful_indexing_attempts = get_latest_index_attempts_by_status( + secondary_index=secondary_index, + db_session=db_session, + status=IndexingStatus.SUCCESS, + ) + + # Get all connector credential pairs + cc_pairs = get_connector_credential_pairs( + db_session=db_session, + user=user, + get_editable=get_editable, + ) + + # Filter out failed attempts that have a more recent successful attempt + filtered_failed_attempts = [ + failed_attempt + for failed_attempt in latest_failed_indexing_attempts + if not any( + success_attempt.connector_credential_pair_id + == failed_attempt.connector_credential_pair_id + and success_attempt.time_updated > failed_attempt.time_updated + for success_attempt in latest_successful_indexing_attempts + ) + ] + + # Filter cc_pairs to include only those with failed attempts + cc_pairs = [ + cc_pair + for cc_pair in cc_pairs + if any( + attempt.connector_credential_pair == cc_pair + for attempt in filtered_failed_attempts + ) + ] + + # Create a mapping of cc_pair_id to its latest failed index attempt + cc_pair_to_latest_index_attempt = { + attempt.connector_credential_pair_id: attempt + for attempt in filtered_failed_attempts + } + + indexing_statuses = [] + + for cc_pair in cc_pairs: + # Skip DefaultCCPair + if cc_pair.name == "DefaultCCPair": + continue + + latest_index_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id) + + indexing_statuses.append( + FailedConnectorIndexingStatus( + cc_pair_id=cc_pair.id, + name=cc_pair.name, + error_msg=( + latest_index_attempt.error_msg if latest_index_attempt else None + ), + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + is_deletable=check_deletion_attempt_is_allowed( + connector_credential_pair=cc_pair, + db_session=db_session, + allow_scheduled=True, + ) + is None, + ) + ) + + return indexing_statuses + + @router.get("/admin/connector/indexing-status") def get_connector_indexing_status( secondary_index: bool = False, @@ -387,7 +480,12 @@ def get_connector_indexing_status( ) -> list[ConnectorIndexingStatus]: indexing_statuses: list[ConnectorIndexingStatus] = [] - # TODO: make this one query + # NOTE: If the connector is deleting behind the scenes, + # accessing cc_pairs can be inconsistent and members like + # connector or credential may be None. + # Additional checks are done to make sure the connector and credential still exists. + # TODO: make this one query ... possibly eager load or wrap in a read transaction + # to avoid the complexity of trying to error check throughout the function cc_pairs = get_connector_credential_pairs( db_session=db_session, user=user, @@ -414,7 +512,7 @@ def get_connector_indexing_status( for index_attempt in latest_index_attempts } - document_count_info = get_document_cnts_for_cc_pairs( + document_count_info = get_document_counts_for_cc_pairs( db_session=db_session, cc_pair_identifiers=cc_pair_identifiers, ) @@ -440,14 +538,19 @@ def get_connector_indexing_status( connector = cc_pair.connector credential = cc_pair.credential + if not connector or not credential: + # This may happen if background deletion is happening + continue + latest_index_attempt = cc_pair_to_latest_index_attempt.get( (connector.id, credential.id) ) - latest_finished_attempt = get_latest_finished_index_attempt_for_cc_pair( + latest_finished_attempt = get_latest_index_attempt_for_cc_pair_id( + db_session=db_session, connector_credential_pair_id=cc_pair.id, secondary_index=secondary_index, - db_session=db_session, + only_finished=True, ) indexing_statuses.append( @@ -457,7 +560,7 @@ def get_connector_indexing_status( cc_pair_status=cc_pair.status, connector=ConnectorSnapshot.from_connector_db_model(connector), credential=CredentialSnapshot.from_credential_db_model(credential), - public_doc=cc_pair.is_public, + access_type=cc_pair.access_type, owner=credential.user.email if credential.user else "", groups=group_cc_pair_relationships_dict.get(cc_pair.id, []), last_finished_status=( @@ -514,35 +617,6 @@ def _validate_connector_allowed(source: DocumentSource) -> None: ) -def _check_connector_permissions( - connector_data: ConnectorUpdateRequest, user: User | None -) -> ConnectorBase: - """ - This is not a proper permission check, but this should prevent curators creating bad situations - until a long-term solution is implemented (Replacing CC pairs/Connectors with Connections) - """ - if user and user.role != UserRole.ADMIN: - if connector_data.is_public: - raise HTTPException( - status_code=400, - detail="Public connectors can only be created by admins", - ) - if not connector_data.groups: - raise HTTPException( - status_code=400, - detail="Connectors created by curators must have groups", - ) - return ConnectorBase( - name=connector_data.name, - source=connector_data.source, - input_type=connector_data.input_type, - connector_specific_config=connector_data.connector_specific_config, - refresh_freq=connector_data.refresh_freq, - prune_freq=connector_data.prune_freq, - indexing_start=connector_data.indexing_start, - ) - - @router.post("/admin/connector") def create_connector_from_model( connector_data: ConnectorUpdateRequest, @@ -551,12 +625,19 @@ def create_connector_from_model( ) -> ObjectCreationIdResponse: try: _validate_connector_allowed(connector_data.source) - connector_base = _check_connector_permissions(connector_data, user) + validate_user_creation_permissions( + db_session=db_session, + user=user, + target_group_ids=connector_data.groups, + object_is_public=connector_data.is_public, + ) + connector_base = connector_data.to_connector_base() return create_connector( db_session=db_session, connector_data=connector_base, ) except ValueError as e: + logger.error(f"Error creating connector: {e}") raise HTTPException(status_code=400, detail=str(e)) @@ -588,12 +669,15 @@ def create_connector_with_mock_credential( credential = create_credential( mock_credential, user=user, db_session=db_session ) + access_type = ( + AccessType.PUBLIC if connector_data.is_public else AccessType.PRIVATE + ) response = add_credential_to_connector( db_session=db_session, user=user, connector_id=cast(int, connector_response.id), # will aways be an int credential_id=credential.id, - is_public=connector_data.is_public or False, + access_type=access_type, cc_pair_name=connector_data.name, groups=connector_data.groups, ) @@ -607,12 +691,18 @@ def create_connector_with_mock_credential( def update_connector_from_model( connector_id: int, connector_data: ConnectorUpdateRequest, - user: User = Depends(current_admin_user), + user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> ConnectorSnapshot | StatusResponse[int]: try: _validate_connector_allowed(connector_data.source) - connector_base = _check_connector_permissions(connector_data, user) + validate_user_creation_permissions( + db_session=db_session, + user=user, + target_group_ids=connector_data.groups, + object_is_public=connector_data.is_public, + ) + connector_base = connector_data.to_connector_base() except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) @@ -642,7 +732,7 @@ def update_connector_from_model( @router.delete("/admin/connector/{connector_id}", response_model=StatusResponse[int]) def delete_connector_by_id( connector_id: int, - _: User = Depends(current_admin_user), + _: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> StatusResponse[int]: try: @@ -882,7 +972,7 @@ def get_basic_connector_indexing_status( ) for cc_pair in cc_pairs ] - document_count_info = get_document_cnts_for_cc_pairs( + document_count_info = get_document_counts_for_cc_pairs( db_session=db_session, cc_pair_identifiers=cc_pair_identifiers, ) diff --git a/backend/danswer/server/documents/credential.py b/backend/danswer/server/documents/credential.py index ba30b65f2f9..3d965481bf5 100644 --- a/backend/danswer/server/documents/credential.py +++ b/backend/danswer/server/documents/credential.py @@ -7,7 +7,6 @@ from danswer.auth.users import current_admin_user from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user -from danswer.auth.users import validate_curator_request from danswer.db.credentials import alter_credential from danswer.db.credentials import create_credential from danswer.db.credentials import CREDENTIAL_PERMISSIONS_TO_IGNORE @@ -20,7 +19,6 @@ from danswer.db.engine import get_session from danswer.db.models import DocumentSource from danswer.db.models import User -from danswer.db.models import UserRole from danswer.server.documents.models import CredentialBase from danswer.server.documents.models import CredentialDataUpdateRequest from danswer.server.documents.models import CredentialSnapshot @@ -28,6 +26,7 @@ from danswer.server.documents.models import ObjectCreationIdResponse from danswer.server.models import StatusResponse from danswer.utils.logger import setup_logger +from ee.danswer.db.user_group import validate_user_creation_permissions logger = setup_logger() @@ -80,7 +79,7 @@ def get_cc_source_full_info( ] -@router.get("/credentials/{id}") +@router.get("/credential/{id}") def list_credentials_by_id( user: User | None = Depends(current_user), db_session: Session = Depends(get_session), @@ -105,7 +104,7 @@ def delete_credential_by_id_admin( ) -@router.put("/admin/credentials/swap") +@router.put("/admin/credential/swap") def swap_credentials_for_connector( credential_swap_req: CredentialSwapRequest, user: User | None = Depends(current_user), @@ -131,14 +130,12 @@ def create_credential_from_model( user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> ObjectCreationIdResponse: - if ( - user - and user.role != UserRole.ADMIN - and not _ignore_credential_permissions(credential_info.source) - ): - validate_curator_request( - groups=credential_info.groups, - is_public=credential_info.curator_public, + if not _ignore_credential_permissions(credential_info.source): + validate_user_creation_permissions( + db_session=db_session, + user=user, + target_group_ids=credential_info.groups, + object_is_public=credential_info.curator_public, ) credential = create_credential(credential_info, user, db_session) @@ -179,7 +176,7 @@ def get_credential_by_id( return CredentialSnapshot.from_credential_db_model(credential) -@router.put("/admin/credentials/{credential_id}") +@router.put("/admin/credential/{credential_id}") def update_credential_data( credential_id: int, credential_update: CredentialDataUpdateRequest, diff --git a/backend/danswer/server/documents/models.py b/backend/danswer/server/documents/models.py index ba011afc196..ee266eca8b8 100644 --- a/backend/danswer/server/documents/models.py +++ b/backend/danswer/server/documents/models.py @@ -4,11 +4,13 @@ from pydantic import BaseModel from pydantic import Field +from pydantic import model_validator from danswer.configs.app_configs import MASK_CREDENTIAL_PREFIX from danswer.configs.constants import DocumentSource from danswer.connectors.models import DocumentErrorSummary from danswer.connectors.models import InputType +from danswer.db.enums import AccessType from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.models import Connector from danswer.db.models import ConnectorCredentialPair @@ -48,9 +50,12 @@ class ConnectorBase(BaseModel): class ConnectorUpdateRequest(ConnectorBase): - is_public: bool | None = None + is_public: bool = True groups: list[int] = Field(default_factory=list) + def to_connector_base(self) -> ConnectorBase: + return ConnectorBase(**self.model_dump(exclude={"is_public", "groups"})) + class ConnectorSnapshot(ConnectorBase): id: int @@ -103,11 +108,6 @@ class CredentialSnapshot(CredentialBase): user_id: UUID | None time_created: datetime time_updated: datetime - name: str | None - source: DocumentSource - credential_json: dict[str, Any] - admin_public: bool - curator_public: bool @classmethod def from_credential_db_model(cls, credential: Credential) -> "CredentialSnapshot": @@ -187,6 +187,28 @@ def from_db_model(cls, error: DbIndexAttemptError) -> "IndexAttemptError": ) +class PaginatedIndexAttempts(BaseModel): + index_attempts: list[IndexAttemptSnapshot] + page: int + total_pages: int + + @classmethod + def from_models( + cls, + index_attempt_models: list[IndexAttempt], + page: int, + total_pages: int, + ) -> "PaginatedIndexAttempts": + return cls( + index_attempts=[ + IndexAttemptSnapshot.from_index_attempt_db_model(index_attempt_model) + for index_attempt_model in index_attempt_models + ], + page=page, + total_pages=total_pages, + ) + + class CCPairFullInfo(BaseModel): id: int name: str @@ -194,20 +216,38 @@ class CCPairFullInfo(BaseModel): num_docs_indexed: int connector: ConnectorSnapshot credential: CredentialSnapshot - index_attempts: list[IndexAttemptSnapshot] + number_of_index_attempts: int + last_index_attempt_status: IndexingStatus | None latest_deletion_attempt: DeletionAttemptSnapshot | None - is_public: bool + access_type: AccessType is_editable_for_current_user: bool + deletion_failure_message: str | None @classmethod def from_models( cls, cc_pair_model: ConnectorCredentialPair, - index_attempt_models: list[IndexAttempt], latest_deletion_attempt: DeletionAttemptSnapshot | None, + number_of_index_attempts: int, + last_index_attempt: IndexAttempt | None, num_docs_indexed: int, # not ideal, but this must be computed separately is_editable_for_current_user: bool, ) -> "CCPairFullInfo": + # figure out if we need to artificially deflate the number of docs indexed. + # This is required since the total number of docs indexed by a CC Pair is + # updated before the new docs for an indexing attempt. If we don't do this, + # there is a mismatch between these two numbers which may confuse users. + last_indexing_status = last_index_attempt.status if last_index_attempt else None + if ( + last_indexing_status == IndexingStatus.SUCCESS + and number_of_index_attempts == 1 + and last_index_attempt + and last_index_attempt.new_docs_indexed + ): + num_docs_indexed = ( + last_index_attempt.new_docs_indexed if last_index_attempt else 0 + ) + return cls( id=cc_pair_model.id, name=cc_pair_model.name, @@ -219,16 +259,34 @@ def from_models( credential=CredentialSnapshot.from_credential_db_model( cc_pair_model.credential ), - index_attempts=[ - IndexAttemptSnapshot.from_index_attempt_db_model(index_attempt_model) - for index_attempt_model in index_attempt_models - ], + number_of_index_attempts=number_of_index_attempts, + last_index_attempt_status=last_indexing_status, latest_deletion_attempt=latest_deletion_attempt, - is_public=cc_pair_model.is_public, + access_type=cc_pair_model.access_type, is_editable_for_current_user=is_editable_for_current_user, + deletion_failure_message=cc_pair_model.deletion_failure_message, ) +class CCPairPruningTask(BaseModel): + id: str + name: str + status: TaskStatus + start_time: datetime | None + register_time: datetime | None + + +class FailedConnectorIndexingStatus(BaseModel): + """Simplified version of ConnectorIndexingStatus for failed indexing attempts""" + + cc_pair_id: int + name: str | None + error_msg: str | None + is_deletable: bool + connector_id: int + credential_id: int + + class ConnectorIndexingStatus(BaseModel): """Represents the latest indexing status of a connector""" @@ -239,7 +297,7 @@ class ConnectorIndexingStatus(BaseModel): credential: CredentialSnapshot owner: str groups: list[int] - public_doc: bool + access_type: AccessType last_finished_status: IndexingStatus | None last_status: IndexingStatus | None last_success: datetime | None @@ -257,10 +315,15 @@ class ConnectorCredentialPairIdentifier(BaseModel): class ConnectorCredentialPairMetadata(BaseModel): name: str | None = None - is_public: bool | None = None + access_type: AccessType + auto_sync_options: dict[str, Any] | None = None groups: list[int] = Field(default_factory=list) +class CCStatusUpdateRequest(BaseModel): + status: ConnectorCredentialPairStatus + + class ConnectorCredentialPairDescriptor(BaseModel): id: int name: str | None = None @@ -307,8 +370,18 @@ class GoogleServiceAccountKey(BaseModel): class GoogleServiceAccountCredentialRequest(BaseModel): - google_drive_delegated_user: str | None # email of user to impersonate - gmail_delegated_user: str | None # email of user to impersonate + google_drive_delegated_user: str | None = None # email of user to impersonate + gmail_delegated_user: str | None = None # email of user to impersonate + + @model_validator(mode="after") + def check_user_delegation(self) -> "GoogleServiceAccountCredentialRequest": + if (self.google_drive_delegated_user is None) == ( + self.gmail_delegated_user is None + ): + raise ValueError( + "Exactly one of google_drive_delegated_user or gmail_delegated_user must be set" + ) + return self class FileUploadResponse(BaseModel): diff --git a/backend/danswer/server/features/document_set/api.py b/backend/danswer/server/features/document_set/api.py index d1eff082891..c9cea2cf2a2 100644 --- a/backend/danswer/server/features/document_set/api.py +++ b/backend/danswer/server/features/document_set/api.py @@ -6,7 +6,6 @@ from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user -from danswer.auth.users import validate_curator_request from danswer.db.document_set import check_document_sets_are_public from danswer.db.document_set import fetch_all_document_sets_for_user from danswer.db.document_set import insert_document_set @@ -14,12 +13,12 @@ from danswer.db.document_set import update_document_set from danswer.db.engine import get_session from danswer.db.models import User -from danswer.db.models import UserRole from danswer.server.features.document_set.models import CheckDocSetPublicRequest from danswer.server.features.document_set.models import CheckDocSetPublicResponse from danswer.server.features.document_set.models import DocumentSet from danswer.server.features.document_set.models import DocumentSetCreationRequest from danswer.server.features.document_set.models import DocumentSetUpdateRequest +from ee.danswer.db.user_group import validate_user_creation_permissions router = APIRouter(prefix="/manage") @@ -31,11 +30,12 @@ def create_document_set( user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> int: - if user and user.role != UserRole.ADMIN: - validate_curator_request( - groups=document_set_creation_request.groups, - is_public=document_set_creation_request.is_public, - ) + validate_user_creation_permissions( + db_session=db_session, + user=user, + target_group_ids=document_set_creation_request.groups, + object_is_public=document_set_creation_request.is_public, + ) try: document_set_db_model, _ = insert_document_set( document_set_creation_request=document_set_creation_request, @@ -53,11 +53,12 @@ def patch_document_set( user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> None: - if user and user.role != UserRole.ADMIN: - validate_curator_request( - groups=document_set_update_request.groups, - is_public=document_set_update_request.is_public, - ) + validate_user_creation_permissions( + db_session=db_session, + user=user, + target_group_ids=document_set_update_request.groups, + object_is_public=document_set_update_request.is_public, + ) try: update_document_set( document_set_update_request=document_set_update_request, diff --git a/backend/danswer/server/features/document_set/models.py b/backend/danswer/server/features/document_set/models.py index 55f3376545f..740cb6906cf 100644 --- a/backend/danswer/server/features/document_set/models.py +++ b/backend/danswer/server/features/document_set/models.py @@ -47,7 +47,6 @@ class DocumentSet(BaseModel): description: str cc_pair_descriptors: list[ConnectorCredentialPairDescriptor] is_up_to_date: bool - contains_non_public: bool is_public: bool # For Private Document Sets, who should be able to access these users: list[UUID] @@ -59,12 +58,6 @@ def from_model(cls, document_set_model: DocumentSetDBModel) -> "DocumentSet": id=document_set_model.id, name=document_set_model.name, description=document_set_model.description, - contains_non_public=any( - [ - not cc_pair.is_public - for cc_pair in document_set_model.connector_credential_pairs - ] - ), cc_pair_descriptors=[ ConnectorCredentialPairDescriptor( id=cc_pair.id, diff --git a/backend/danswer/server/features/persona/api.py b/backend/danswer/server/features/persona/api.py index 72b16d719ff..bcc4800b860 100644 --- a/backend/danswer/server/features/persona/api.py +++ b/backend/danswer/server/features/persona/api.py @@ -3,6 +3,7 @@ from fastapi import APIRouter from fastapi import Depends +from fastapi import HTTPException from fastapi import Query from fastapi import UploadFile from pydantic import BaseModel @@ -20,6 +21,7 @@ from danswer.db.persona import mark_persona_as_deleted from danswer.db.persona import mark_persona_as_not_deleted from danswer.db.persona import update_all_personas_display_priority +from danswer.db.persona import update_persona_public_status from danswer.db.persona import update_persona_shared_users from danswer.db.persona import update_persona_visibility from danswer.file_store.file_store import get_default_file_store @@ -43,6 +45,10 @@ class IsVisibleRequest(BaseModel): is_visible: bool +class IsPublicRequest(BaseModel): + is_public: bool + + @admin_router.patch("/{persona_id}/visible") def patch_persona_visibility( persona_id: int, @@ -58,6 +64,25 @@ def patch_persona_visibility( ) +@basic_router.patch("/{persona_id}/public") +def patch_user_presona_public_status( + persona_id: int, + is_public_request: IsPublicRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + try: + update_persona_public_status( + persona_id=persona_id, + is_public=is_public_request.is_public, + db_session=db_session, + user=user, + ) + except ValueError as e: + logger.exception("Failed to update persona public status") + raise HTTPException(status_code=403, detail=str(e)) + + @admin_router.put("/display-priority") def patch_persona_display_priority( display_priority_request: DisplayPriorityRequest, diff --git a/backend/danswer/server/features/persona/models.py b/backend/danswer/server/features/persona/models.py index 777ef2037ee..016defda369 100644 --- a/backend/danswer/server/features/persona/models.py +++ b/backend/danswer/server/features/persona/models.py @@ -1,3 +1,4 @@ +from datetime import datetime from uuid import UUID from pydantic import BaseModel @@ -12,7 +13,6 @@ from danswer.server.models import MinimalUserSnapshot from danswer.utils.logger import setup_logger - logger = setup_logger() @@ -38,6 +38,9 @@ class CreatePersonaRequest(BaseModel): icon_shape: int | None = None uploaded_image_id: str | None = None # New field for uploaded image remove_image: bool | None = None + is_default_persona: bool = False + display_priority: int | None = None + search_start_date: datetime | None = None class PersonaSnapshot(BaseModel): @@ -54,7 +57,7 @@ class PersonaSnapshot(BaseModel): llm_model_provider_override: str | None llm_model_version_override: str | None starter_messages: list[StarterMessage] | None - default_persona: bool + builtin_persona: bool prompts: list[PromptSnapshot] tools: list[ToolSnapshot] document_sets: list[DocumentSet] @@ -63,6 +66,8 @@ class PersonaSnapshot(BaseModel): icon_color: str | None icon_shape: int | None uploaded_image_id: str | None = None + is_default_persona: bool + search_start_date: datetime | None = None @classmethod def from_model( @@ -93,7 +98,8 @@ def from_model( llm_model_provider_override=persona.llm_model_provider_override, llm_model_version_override=persona.llm_model_version_override, starter_messages=persona.starter_messages, - default_persona=persona.default_persona, + builtin_persona=persona.builtin_persona, + is_default_persona=persona.is_default_persona, prompts=[PromptSnapshot.from_model(prompt) for prompt in persona.prompts], tools=[ToolSnapshot.from_model(tool) for tool in persona.tools], document_sets=[ @@ -108,6 +114,7 @@ def from_model( icon_color=persona.icon_color, icon_shape=persona.icon_shape, uploaded_image_id=persona.uploaded_image_id, + search_start_date=persona.search_start_date, ) diff --git a/backend/danswer/server/features/tool/api.py b/backend/danswer/server/features/tool/api.py index 9635a276507..1d441593784 100644 --- a/backend/danswer/server/features/tool/api.py +++ b/backend/danswer/server/features/tool/api.py @@ -15,6 +15,8 @@ from danswer.db.tools import get_tool_by_id from danswer.db.tools import get_tools from danswer.db.tools import update_tool +from danswer.server.features.tool.models import CustomToolCreate +from danswer.server.features.tool.models import CustomToolUpdate from danswer.server.features.tool.models import ToolSnapshot from danswer.tools.custom.openapi_parsing import MethodSpec from danswer.tools.custom.openapi_parsing import openapi_to_method_specs @@ -24,18 +26,6 @@ admin_router = APIRouter(prefix="/admin/tool") -class CustomToolCreate(BaseModel): - name: str - description: str | None = None - definition: dict[str, Any] - - -class CustomToolUpdate(BaseModel): - name: str | None = None - description: str | None = None - definition: dict[str, Any] | None = None - - def _validate_tool_definition(definition: dict[str, Any]) -> None: try: validate_openapi_schema(definition) @@ -54,6 +44,7 @@ def create_custom_tool( name=tool_data.name, description=tool_data.description, openapi_schema=tool_data.definition, + custom_headers=tool_data.custom_headers, user_id=user.id if user else None, db_session=db_session, ) @@ -74,6 +65,7 @@ def update_custom_tool( name=tool_data.name, description=tool_data.description, openapi_schema=tool_data.definition, + custom_headers=tool_data.custom_headers, user_id=user.id if user else None, db_session=db_session, ) diff --git a/backend/danswer/server/features/tool/models.py b/backend/danswer/server/features/tool/models.py index 0c1da965d4f..bf3e4d159b6 100644 --- a/backend/danswer/server/features/tool/models.py +++ b/backend/danswer/server/features/tool/models.py @@ -12,6 +12,7 @@ class ToolSnapshot(BaseModel): definition: dict[str, Any] | None display_name: str in_code_tool_id: str | None + custom_headers: list[Any] | None @classmethod def from_model(cls, tool: Tool) -> "ToolSnapshot": @@ -22,4 +23,24 @@ def from_model(cls, tool: Tool) -> "ToolSnapshot": definition=tool.openapi_schema, display_name=tool.display_name or tool.name, in_code_tool_id=tool.in_code_tool_id, + custom_headers=tool.custom_headers, ) + + +class Header(BaseModel): + key: str + value: str + + +class CustomToolCreate(BaseModel): + name: str + description: str | None = None + definition: dict[str, Any] + custom_headers: list[Header] | None = None + + +class CustomToolUpdate(BaseModel): + name: str | None = None + description: str | None = None + definition: dict[str, Any] | None = None + custom_headers: list[Header] | None = None diff --git a/backend/danswer/server/manage/administrative.py b/backend/danswer/server/manage/administrative.py index 0ac90ba8d11..1ebe5bd0691 100644 --- a/backend/danswer/server/manage/administrative.py +++ b/backend/danswer/server/manage/administrative.py @@ -10,7 +10,9 @@ from danswer.auth.users import current_admin_user from danswer.auth.users import current_curator_or_admin_user +from danswer.background.celery.celery_app import celery_app from danswer.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ +from danswer.configs.constants import DanswerCeleryPriority from danswer.configs.constants import DocumentSource from danswer.configs.constants import KV_GEN_AI_KEY_CHECK_TIME from danswer.db.connector_credential_pair import get_connector_credential_pair @@ -77,16 +79,10 @@ def document_boost_update( user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> StatusResponse: - curr_ind_name, sec_ind_name = get_both_index_names(db_session) - document_index = get_default_document_index( - primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name - ) - update_document_boost( db_session=db_session, document_id=boost_update.document_id, boost=boost_update.boost, - document_index=document_index, user=user, ) return StatusResponse(success=True, message="Updated document boost") @@ -151,10 +147,6 @@ def create_deletion_attempt_for_connector_id( user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> None: - from danswer.background.celery.celery_app import ( - cleanup_connector_credential_pair_task, - ) - connector_id = connector_credential_pair_identifier.connector_id credential_id = connector_credential_pair_identifier.credential_id @@ -166,10 +158,14 @@ def create_deletion_attempt_for_connector_id( get_editable=True, ) if cc_pair is None: + error = ( + f"Connector with ID '{connector_id}' and credential ID " + f"'{credential_id}' does not exist. Has it already been deleted?" + ) + logger.error(error) raise HTTPException( status_code=404, - detail=f"Connector with ID '{connector_id}' and credential ID " - f"'{credential_id}' does not exist. Has it already been deleted?", + detail=error, ) # Cancel any scheduled indexing attempts @@ -193,9 +189,13 @@ def create_deletion_attempt_for_connector_id( cc_pair_id=cc_pair.id, status=ConnectorCredentialPairStatus.DELETING, ) - # actually kick off the deletion - cleanup_connector_credential_pair_task.apply_async( - kwargs=dict(connector_id=connector_id, credential_id=credential_id), + + db_session.commit() + + # run the beat task to pick up this deletion from the db immediately + celery_app.send_task( + "check_for_connector_deletion_task", + priority=DanswerCeleryPriority.HIGH, ) if cc_pair.connector.source == DocumentSource.FILE: diff --git a/backend/danswer/server/manage/embedding/api.py b/backend/danswer/server/manage/embedding/api.py index 90fa69401c2..eac872810ef 100644 --- a/backend/danswer/server/manage/embedding/api.py +++ b/backend/danswer/server/manage/embedding/api.py @@ -9,7 +9,9 @@ from danswer.db.llm import remove_embedding_provider from danswer.db.llm import upsert_cloud_embedding_provider from danswer.db.models import User +from danswer.db.search_settings import get_all_search_settings from danswer.db.search_settings import get_current_db_embedding_provider +from danswer.indexing.models import EmbeddingModelDetail from danswer.natural_language_processing.search_nlp_models import EmbeddingModel from danswer.server.manage.embedding.models import CloudEmbeddingProvider from danswer.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest @@ -20,6 +22,7 @@ from shared_configs.enums import EmbeddingProvider from shared_configs.enums import EmbedTextType + logger = setup_logger() @@ -37,11 +40,12 @@ def test_embedding_configuration( server_host=MODEL_SERVER_HOST, server_port=MODEL_SERVER_PORT, api_key=test_llm_request.api_key, + api_url=test_llm_request.api_url, provider_type=test_llm_request.provider_type, + model_name=test_llm_request.model_name, normalize=False, query_prefix=None, passage_prefix=None, - model_name=None, ) test_model.encode(["Testing Embedding"], text_type=EmbedTextType.QUERY) @@ -56,6 +60,15 @@ def test_embedding_configuration( raise HTTPException(status_code=400, detail=error_msg) +@admin_router.get("", response_model=list[EmbeddingModelDetail]) +def list_embedding_models( + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[EmbeddingModelDetail]: + search_settings = get_all_search_settings(db_session) + return [EmbeddingModelDetail.from_db_model(setting) for setting in search_settings] + + @admin_router.get("/embedding-provider") def list_embedding_providers( _: User | None = Depends(current_admin_user), diff --git a/backend/danswer/server/manage/embedding/models.py b/backend/danswer/server/manage/embedding/models.py index 132d311413c..d6210118df5 100644 --- a/backend/danswer/server/manage/embedding/models.py +++ b/backend/danswer/server/manage/embedding/models.py @@ -8,14 +8,24 @@ from danswer.db.models import CloudEmbeddingProvider as CloudEmbeddingProviderModel +class SearchSettingsDeleteRequest(BaseModel): + search_settings_id: int + + class TestEmbeddingRequest(BaseModel): provider_type: EmbeddingProvider api_key: str | None = None + api_url: str | None = None + model_name: str | None = None + + # This disables the "model_" protected namespace for pydantic + model_config = {"protected_namespaces": ()} class CloudEmbeddingProvider(BaseModel): provider_type: EmbeddingProvider api_key: str | None = None + api_url: str | None = None @classmethod def from_request( @@ -24,9 +34,11 @@ def from_request( return cls( provider_type=cloud_provider_model.provider_type, api_key=cloud_provider_model.api_key, + api_url=cloud_provider_model.api_url, ) class CloudEmbeddingProviderCreationRequest(BaseModel): provider_type: EmbeddingProvider api_key: str | None = None + api_url: str | None = None diff --git a/backend/danswer/server/manage/llm/api.py b/backend/danswer/server/manage/llm/api.py index 9ea9fe927db..23f16047e91 100644 --- a/backend/danswer/server/manage/llm/api.py +++ b/backend/danswer/server/manage/llm/api.py @@ -3,12 +3,14 @@ from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException +from fastapi import Query from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user from danswer.auth.users import current_user from danswer.db.engine import get_session from danswer.db.llm import fetch_existing_llm_providers +from danswer.db.llm import fetch_provider from danswer.db.llm import remove_llm_provider from danswer.db.llm import update_default_provider from danswer.db.llm import upsert_llm_provider @@ -17,6 +19,7 @@ from danswer.llm.factory import get_llm from danswer.llm.llm_provider_options import fetch_available_well_known_llms from danswer.llm.llm_provider_options import WellKnownLLMProviderDescriptor +from danswer.llm.utils import litellm_exception_to_error_msg from danswer.llm.utils import test_llm from danswer.server.manage.llm.models import FullLLMProvider from danswer.server.manage.llm.models import LLMProviderDescriptor @@ -77,7 +80,10 @@ def test_llm_configuration( ) if error: - raise HTTPException(status_code=400, detail=error) + client_error_msg = litellm_exception_to_error_msg( + error, llm, fallback_to_error_msg=True + ) + raise HTTPException(status_code=400, detail=client_error_msg) @admin_router.post("/test/default") @@ -118,10 +124,31 @@ def list_llm_providers( @admin_router.put("/provider") def put_llm_provider( llm_provider: LLMProviderUpsertRequest, + is_creation: bool = Query( + False, + description="True if updating an existing provider, False if creating a new one", + ), _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> FullLLMProvider: - return upsert_llm_provider(db_session, llm_provider) + # validate request (e.g. if we're intending to create but the name already exists we should throw an error) + # NOTE: may involve duplicate fetching to Postgres, but we're assuming SQLAlchemy is smart enough to cache + # the result + existing_provider = fetch_provider(db_session, llm_provider.name) + if existing_provider and is_creation: + raise HTTPException( + status_code=400, + detail=f"LLM Provider with name {llm_provider.name} already exists", + ) + + try: + return upsert_llm_provider( + llm_provider=llm_provider, + db_session=db_session, + ) + except ValueError as e: + logger.exception("Failed to upsert LLM Provider") + raise HTTPException(status_code=400, detail=str(e)) @admin_router.delete("/provider/{provider_id}") @@ -139,7 +166,7 @@ def set_provider_as_default( _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> None: - update_default_provider(db_session, provider_id) + update_default_provider(provider_id=provider_id, db_session=db_session) """Endpoints for all""" diff --git a/backend/danswer/server/manage/models.py b/backend/danswer/server/manage/models.py index 160c90bdb78..e3be4c4891d 100644 --- a/backend/danswer/server/manage/models.py +++ b/backend/danswer/server/manage/models.py @@ -15,13 +15,12 @@ from danswer.db.models import ChannelConfig from danswer.db.models import SlackBotConfig as SlackBotConfigModel from danswer.db.models import SlackBotResponseType -from danswer.db.models import StandardAnswer as StandardAnswerModel -from danswer.db.models import StandardAnswerCategory as StandardAnswerCategoryModel from danswer.db.models import User from danswer.search.models import SavedSearchSettings from danswer.server.features.persona.models import PersonaSnapshot from danswer.server.models import FullUserSnapshot from danswer.server.models import InvitedUserSnapshot +from ee.danswer.server.manage.models import StandardAnswerCategory if TYPE_CHECKING: @@ -41,6 +40,9 @@ class AuthTypeResponse(BaseModel): class UserPreferences(BaseModel): chosen_assistants: list[int] | None = None + hidden_assistants: list[int] = [] + visible_assistants: list[int] = [] + default_model: str | None = None @@ -74,6 +76,8 @@ def from_model( UserPreferences( chosen_assistants=user.chosen_assistants, default_model=user.default_model, + hidden_assistants=user.hidden_assistants, + visible_assistants=user.visible_assistants, ) ), # set to None if TRACK_EXTERNAL_IDP_EXPIRY is False so that we avoid cases @@ -117,58 +121,6 @@ class HiddenUpdateRequest(BaseModel): hidden: bool -class StandardAnswerCategoryCreationRequest(BaseModel): - name: str - - -class StandardAnswerCategory(BaseModel): - id: int - name: str - - @classmethod - def from_model( - cls, standard_answer_category: StandardAnswerCategoryModel - ) -> "StandardAnswerCategory": - return cls( - id=standard_answer_category.id, - name=standard_answer_category.name, - ) - - -class StandardAnswer(BaseModel): - id: int - keyword: str - answer: str - categories: list[StandardAnswerCategory] - - @classmethod - def from_model(cls, standard_answer_model: StandardAnswerModel) -> "StandardAnswer": - return cls( - id=standard_answer_model.id, - keyword=standard_answer_model.keyword, - answer=standard_answer_model.answer, - categories=[ - StandardAnswerCategory.from_model(standard_answer_category_model) - for standard_answer_category_model in standard_answer_model.categories - ], - ) - - -class StandardAnswerCreationRequest(BaseModel): - keyword: str - answer: str - categories: list[int] - - @field_validator("categories", mode="before") - @classmethod - def validate_categories(cls, value: list[int]) -> list[int]: - if len(value) < 1: - raise ValueError( - "At least one category must be attached to a standard answer" - ) - return value - - class SlackBotTokens(BaseModel): bot_token: str app_token: str @@ -194,6 +146,7 @@ class SlackBotConfigCreationRequest(BaseModel): # list of user emails follow_up_tags: list[str] | None = None response_type: SlackBotResponseType + # XXX this is going away soon standard_answer_categories: list[int] = Field(default_factory=list) @field_validator("answer_filters", mode="before") @@ -218,6 +171,7 @@ class SlackBotConfig(BaseModel): persona: PersonaSnapshot | None channel_config: ChannelConfig response_type: SlackBotResponseType + # XXX this is going away soon standard_answer_categories: list[StandardAnswerCategory] enable_auto_filters: bool @@ -236,6 +190,7 @@ def from_model( ), channel_config=slack_bot_config_model.channel_config, response_type=slack_bot_config_model.response_type, + # XXX this is going away soon standard_answer_categories=[ StandardAnswerCategory.from_model(standard_answer_category_model) for standard_answer_category_model in slack_bot_config_model.standard_answer_categories diff --git a/backend/danswer/server/manage/search_settings.py b/backend/danswer/server/manage/search_settings.py index db483eff5da..c8433467f6c 100644 --- a/backend/danswer/server/manage/search_settings.py +++ b/backend/danswer/server/manage/search_settings.py @@ -14,6 +14,7 @@ from danswer.db.models import IndexModelStatus from danswer.db.models import User from danswer.db.search_settings import create_search_settings +from danswer.db.search_settings import delete_search_settings from danswer.db.search_settings import get_current_search_settings from danswer.db.search_settings import get_embedding_provider_from_provider_type from danswer.db.search_settings import get_secondary_search_settings @@ -23,6 +24,7 @@ from danswer.natural_language_processing.search_nlp_models import clean_model_name from danswer.search.models import SavedSearchSettings from danswer.search.models import SearchSettingsCreationRequest +from danswer.server.manage.embedding.models import SearchSettingsDeleteRequest from danswer.server.manage.models import FullModelVersionResponse from danswer.server.models import IdReturn from danswer.utils.logger import setup_logger @@ -45,7 +47,7 @@ def set_new_search_settings( if search_settings_new.index_name: logger.warning("Index name was specified by request, this is not suggested") - # Validate cloud provider exists + # Validate cloud provider exists or create new LiteLLM provider if search_settings_new.provider_type is not None: cloud_provider = get_embedding_provider_from_provider_type( db_session, provider_type=search_settings_new.provider_type @@ -97,6 +99,7 @@ def set_new_search_settings( primary_index_name=search_settings.index_name, secondary_index_name=new_search_settings.index_name, ) + document_index.ensure_indices_exist( index_embedding_dim=search_settings.model_dim, secondary_index_embedding_dim=new_search_settings.model_dim, @@ -132,8 +135,23 @@ def cancel_new_embedding( ) +@router.delete("/delete-search-settings") +def delete_search_settings_endpoint( + deletion_request: SearchSettingsDeleteRequest, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> None: + try: + delete_search_settings( + db_session=db_session, + search_settings_id=deletion_request.search_settings_id, + ) + except ValueError as e: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + + @router.get("/get-current-search-settings") -def get_curr_search_settings( +def get_current_search_settings_endpoint( _: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> SavedSearchSettings: @@ -142,7 +160,7 @@ def get_curr_search_settings( @router.get("/get-secondary-search-settings") -def get_sec_search_settings( +def get_secondary_search_settings_endpoint( _: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> SavedSearchSettings | None: diff --git a/backend/danswer/server/manage/slack_bot.py b/backend/danswer/server/manage/slack_bot.py index 0fb1459072b..9a06b225cce 100644 --- a/backend/danswer/server/manage/slack_bot.py +++ b/backend/danswer/server/manage/slack_bot.py @@ -108,6 +108,7 @@ def create_slack_bot_config( persona_id=persona_id, channel_config=channel_config, response_type=slack_bot_config_creation_request.response_type, + # XXX this is going away soon standard_answer_category_ids=slack_bot_config_creation_request.standard_answer_categories, db_session=db_session, enable_auto_filters=slack_bot_config_creation_request.enable_auto_filters, diff --git a/backend/danswer/server/manage/users.py b/backend/danswer/server/manage/users.py index d2fd981b5b5..e72b85dedad 100644 --- a/backend/danswer/server/manage/users.py +++ b/backend/danswer/server/manage/users.py @@ -31,13 +31,18 @@ from danswer.configs.constants import AuthType from danswer.db.engine import get_session from danswer.db.models import AccessToken +from danswer.db.models import DocumentSet__User +from danswer.db.models import Persona__User +from danswer.db.models import SamlAccount from danswer.db.models import User +from danswer.db.models import User__UserGroup from danswer.db.users import get_user_by_email from danswer.db.users import list_users from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.server.manage.models import AllUsersResponse from danswer.server.manage.models import UserByEmail from danswer.server.manage.models import UserInfo +from danswer.server.manage.models import UserPreferences from danswer.server.manage.models import UserRoleResponse from danswer.server.manage.models import UserRoleUpdateRequest from danswer.server.models import FullUserSnapshot @@ -45,6 +50,7 @@ from danswer.server.models import MinimalUserSnapshot from danswer.utils.logger import setup_logger from ee.danswer.db.api_key import is_api_key_email_address +from ee.danswer.db.external_perm import delete_user__ext_group_for_user__no_commit from ee.danswer.db.user_group import remove_curator_status__no_commit logger = setup_logger() @@ -213,6 +219,71 @@ def deactivate_user( db_session.commit() +@router.delete("/manage/admin/delete-user") +async def delete_user( + user_email: UserByEmail, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> None: + user_to_delete = get_user_by_email( + email=user_email.user_email, db_session=db_session + ) + if not user_to_delete: + raise HTTPException(status_code=404, detail="User not found") + + if user_to_delete.is_active is True: + logger.warning( + "{} must be deactivated before deleting".format(user_to_delete.email) + ) + raise HTTPException( + status_code=400, detail="User must be deactivated before deleting" + ) + + # Detach the user from the current session + db_session.expunge(user_to_delete) + + try: + for oauth_account in user_to_delete.oauth_accounts: + db_session.delete(oauth_account) + + delete_user__ext_group_for_user__no_commit( + db_session=db_session, + user_id=user_to_delete.id, + ) + db_session.query(SamlAccount).filter( + SamlAccount.user_id == user_to_delete.id + ).delete() + db_session.query(DocumentSet__User).filter( + DocumentSet__User.user_id == user_to_delete.id + ).delete() + db_session.query(Persona__User).filter( + Persona__User.user_id == user_to_delete.id + ).delete() + db_session.query(User__UserGroup).filter( + User__UserGroup.user_id == user_to_delete.id + ).delete() + db_session.delete(user_to_delete) + db_session.commit() + + # NOTE: edge case may exist with race conditions + # with this `invited user` scheme generally. + user_emails = get_invited_users() + remaining_users = [ + user for user in user_emails if user != user_email.user_email + ] + write_invited_users(remaining_users) + + logger.info(f"Deleted user {user_to_delete.email}") + except Exception as e: + import traceback + + full_traceback = traceback.format_exc() + logger.error(f"Full stack trace:\n{full_traceback}") + db_session.rollback() + logger.error(f"Error deleting user {user_to_delete.email}: {str(e)}") + raise HTTPException(status_code=500, detail="Error deleting user") + + @router.patch("/manage/admin/activate-user") def activate_user( user_email: UserByEmail, @@ -377,3 +448,64 @@ def update_user_assistant_list( .values(chosen_assistants=request.chosen_assistants) ) db_session.commit() + + +def update_assistant_list( + preferences: UserPreferences, assistant_id: int, show: bool +) -> UserPreferences: + visible_assistants = preferences.visible_assistants or [] + hidden_assistants = preferences.hidden_assistants or [] + chosen_assistants = preferences.chosen_assistants or [] + + if show: + if assistant_id not in visible_assistants: + visible_assistants.append(assistant_id) + if assistant_id in hidden_assistants: + hidden_assistants.remove(assistant_id) + if assistant_id not in chosen_assistants: + chosen_assistants.append(assistant_id) + else: + if assistant_id in visible_assistants: + visible_assistants.remove(assistant_id) + if assistant_id not in hidden_assistants: + hidden_assistants.append(assistant_id) + if assistant_id in chosen_assistants: + chosen_assistants.remove(assistant_id) + + preferences.visible_assistants = visible_assistants + preferences.hidden_assistants = hidden_assistants + preferences.chosen_assistants = chosen_assistants + return preferences + + +@router.patch("/user/assistant-list/update/{assistant_id}") +def update_user_assistant_visibility( + assistant_id: int, + show: bool, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + if user is None: + if AUTH_TYPE == AuthType.DISABLED: + store = get_dynamic_config_store() + no_auth_user = fetch_no_auth_user(store) + preferences = no_auth_user.preferences + updated_preferences = update_assistant_list(preferences, assistant_id, show) + set_no_auth_user_preferences(store, updated_preferences) + return + else: + raise RuntimeError("This should never happen") + + user_preferences = UserInfo.from_model(user).preferences + updated_preferences = update_assistant_list(user_preferences, assistant_id, show) + + db_session.execute( + update(User) + .where(User.id == user.id) # type: ignore + .values( + hidden_assistants=updated_preferences.hidden_assistants, + visible_assistants=updated_preferences.visible_assistants, + chosen_assistants=updated_preferences.chosen_assistants, + ) + ) + db_session.commit() diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py index a37758336a2..c7f5983417d 100644 --- a/backend/danswer/server/query_and_chat/chat_backend.py +++ b/backend/danswer/server/query_and_chat/chat_backend.py @@ -164,7 +164,7 @@ def get_chat_session( chat_session_id=session_id, description=chat_session.description, persona_id=chat_session.persona_id, - persona_name=chat_session.persona.name, + persona_name=chat_session.persona.name if chat_session.persona else None, current_alternate_model=chat_session.current_alternate_model, messages=[ translate_db_message_to_chat_message_detail( @@ -269,7 +269,10 @@ def delete_chat_session_by_id( db_session: Session = Depends(get_session), ) -> None: user_id = user.id if user is not None else None - delete_chat_session(user_id, session_id, db_session) + try: + delete_chat_session(user_id, session_id, db_session) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) async def is_disconnected(request: Request) -> Callable[[], bool]: diff --git a/backend/danswer/server/query_and_chat/models.py b/backend/danswer/server/query_and_chat/models.py index 55d1094ea86..c9109b141c3 100644 --- a/backend/danswer/server/query_and_chat/models.py +++ b/backend/danswer/server/query_and_chat/models.py @@ -136,7 +136,7 @@ class RenameChatSessionResponse(BaseModel): class ChatSessionDetails(BaseModel): id: int name: str - persona_id: int + persona_id: int | None = None time_created: str shared_status: ChatSessionSharedStatus folder_id: int | None = None @@ -196,8 +196,8 @@ class SearchSessionDetailResponse(BaseModel): class ChatSessionDetailResponse(BaseModel): chat_session_id: int description: str - persona_id: int - persona_name: str + persona_id: int | None = None + persona_name: str | None messages: list[ChatMessageDetail] time_created: datetime shared_status: ChatSessionSharedStatus diff --git a/backend/danswer/server/query_and_chat/query_backend.py b/backend/danswer/server/query_and_chat/query_backend.py index 704b16d5eaa..96f674276f4 100644 --- a/backend/danswer/server/query_and_chat/query_backend.py +++ b/backend/danswer/server/query_and_chat/query_backend.py @@ -11,14 +11,14 @@ from danswer.db.chat import get_chat_messages_by_session from danswer.db.chat import get_chat_session_by_id from danswer.db.chat import get_chat_sessions_by_user -from danswer.db.chat import get_first_messages_for_chat_sessions from danswer.db.chat import get_search_docs_for_chat_message +from danswer.db.chat import get_valid_messages_from_query_sessions from danswer.db.chat import translate_db_message_to_chat_message_detail from danswer.db.chat import translate_db_search_doc_to_server_search_doc from danswer.db.engine import get_session from danswer.db.models import User from danswer.db.search_settings import get_current_search_settings -from danswer.db.tag import get_tags_by_value_prefix_for_source_types +from danswer.db.tag import find_tags from danswer.document_index.factory import get_default_document_index from danswer.document_index.vespa.index import VespaIndex from danswer.one_shot_answer.answer_question import stream_search_answer @@ -99,12 +99,25 @@ def get_tags( if not allow_prefix: raise NotImplementedError("Cannot disable prefix match for now") - db_tags = get_tags_by_value_prefix_for_source_types( - tag_key_prefix=match_pattern, - tag_value_prefix=match_pattern, + key_prefix = match_pattern + value_prefix = match_pattern + require_both_to_match = False + + # split on = to allow the user to type in "author=bob" + EQUAL_PAT = "=" + if match_pattern and EQUAL_PAT in match_pattern: + split_pattern = match_pattern.split(EQUAL_PAT) + key_prefix = split_pattern[0] + value_prefix = EQUAL_PAT.join(split_pattern[1:]) + require_both_to_match = True + + db_tags = find_tags( + tag_key_prefix=key_prefix, + tag_value_prefix=value_prefix, sources=sources, limit=limit, db_session=db_session, + require_both_to_match=require_both_to_match, ) server_tags = [ SourceTag( @@ -142,18 +155,20 @@ def get_user_search_sessions( raise HTTPException( status_code=404, detail="Chat session does not exist or has been deleted" ) - + # Extract IDs from search sessions search_session_ids = [chat.id for chat in search_sessions] - first_messages = get_first_messages_for_chat_sessions( + # Fetch first messages for each session, only including those with documents + sessions_with_documents = get_valid_messages_from_query_sessions( search_session_ids, db_session ) - first_messages_dict = dict(first_messages) + sessions_with_documents_dict = dict(sessions_with_documents) + # Prepare response with detailed information for each valid search session response = ChatSessionsResponse( sessions=[ ChatSessionDetails( id=search.id, - name=first_messages_dict.get(search.id, search.description), + name=sessions_with_documents_dict[search.id], persona_id=search.persona_id, time_created=search.time_created.isoformat(), shared_status=search.shared_status, @@ -161,8 +176,11 @@ def get_user_search_sessions( current_alternate_model=search.current_alternate_model, ) for search in search_sessions + if search.id + in sessions_with_documents_dict # Only include sessions with documents ] ) + return response diff --git a/backend/danswer/server/settings/api.py b/backend/danswer/server/settings/api.py index 3330f6cc5ff..5b8564c3d3a 100644 --- a/backend/danswer/server/settings/api.py +++ b/backend/danswer/server/settings/api.py @@ -66,7 +66,7 @@ def fetch_settings( return UserSettings( **general_settings.model_dump(), notifications=user_notifications, - needs_reindexing=needs_reindexing + needs_reindexing=needs_reindexing, ) diff --git a/backend/danswer/server/settings/models.py b/backend/danswer/server/settings/models.py index e999e7294e9..ae7e7236c8d 100644 --- a/backend/danswer/server/settings/models.py +++ b/backend/danswer/server/settings/models.py @@ -37,6 +37,7 @@ class Settings(BaseModel): search_page_enabled: bool = True default_page: PageType = PageType.SEARCH maximum_chat_retention_days: int | None = None + gpu_enabled: bool | None = None def check_validity(self) -> None: chat_page_enabled = self.chat_page_enabled diff --git a/backend/danswer/server/utils.py b/backend/danswer/server/utils.py index bf535661878..53ed5b426ba 100644 --- a/backend/danswer/server/utils.py +++ b/backend/danswer/server/utils.py @@ -1,9 +1,31 @@ import json +from datetime import datetime from typing import Any -def get_json_line(json_dict: dict) -> str: - return json.dumps(json_dict) + "\n" +class DateTimeEncoder(json.JSONEncoder): + """Custom JSON encoder that converts datetime objects to ISO format strings.""" + + def default(self, obj: Any) -> Any: + if isinstance(obj, datetime): + return obj.isoformat() + return super().default(obj) + + +def get_json_line( + json_dict: dict[str, Any], encoder: type[json.JSONEncoder] = DateTimeEncoder +) -> str: + """ + Convert a dictionary to a JSON string with datetime handling, and add a newline. + + Args: + json_dict: The dictionary to be converted to JSON. + encoder: JSON encoder class to use, defaults to DateTimeEncoder. + + Returns: + A JSON string representation of the input dictionary with a newline character. + """ + return json.dumps(json_dict, cls=encoder) + "\n" def mask_string(sensitive_str: str) -> str: diff --git a/backend/danswer/tools/custom/custom_tool.py b/backend/danswer/tools/custom/custom_tool.py index f7cbf236f2b..3d36d7bb055 100644 --- a/backend/danswer/tools/custom/custom_tool.py +++ b/backend/danswer/tools/custom/custom_tool.py @@ -24,6 +24,9 @@ from danswer.tools.custom.openapi_parsing import openapi_to_url from danswer.tools.custom.openapi_parsing import REQUEST_BODY from danswer.tools.custom.openapi_parsing import validate_openapi_schema +from danswer.tools.models import CHAT_SESSION_ID_PLACEHOLDER +from danswer.tools.models import DynamicSchemaInfo +from danswer.tools.models import MESSAGE_ID_PLACEHOLDER from danswer.tools.tool import Tool from danswer.tools.tool import ToolResponse from danswer.utils.logger import setup_logger @@ -39,13 +42,23 @@ class CustomToolCallSummary(BaseModel): class CustomTool(Tool): - def __init__(self, method_spec: MethodSpec, base_url: str) -> None: + def __init__( + self, + method_spec: MethodSpec, + base_url: str, + custom_headers: list[dict[str, str]] | None = [], + ) -> None: self._base_url = base_url self._method_spec = method_spec self._tool_definition = self._method_spec.to_tool_definition() self._name = self._method_spec.name self._description = self._method_spec.summary + self.headers = ( + {header["key"]: header["value"] for header in custom_headers} + if custom_headers + else {} + ) @property def name(self) -> str: @@ -141,6 +154,7 @@ def run(self, **kwargs: Any) -> Generator[ToolResponse, None, None]: request_body = kwargs.get(REQUEST_BODY) path_params = {} + for path_param_schema in self._method_spec.get_path_param_schemas(): path_params[path_param_schema["name"]] = kwargs[path_param_schema["name"]] @@ -153,8 +167,10 @@ def run(self, **kwargs: Any) -> Generator[ToolResponse, None, None]: url = self._method_spec.build_url(self._base_url, path_params, query_params) method = self._method_spec.method - - response = requests.request(method, url, json=request_body) + # Log request details + response = requests.request( + method, url, json=request_body, headers=self.headers + ) yield ToolResponse( id=CUSTOM_TOOL_RESPONSE_ID, @@ -167,12 +183,30 @@ def final_result(self, *args: ToolResponse) -> JSON_ro: return cast(CustomToolCallSummary, args[0].response).tool_result -def build_custom_tools_from_openapi_schema( - openapi_schema: dict[str, Any] +def build_custom_tools_from_openapi_schema_and_headers( + openapi_schema: dict[str, Any], + custom_headers: list[dict[str, str]] | None = [], + dynamic_schema_info: DynamicSchemaInfo | None = None, ) -> list[CustomTool]: + if dynamic_schema_info: + # Process dynamic schema information + schema_str = json.dumps(openapi_schema) + placeholders = { + CHAT_SESSION_ID_PLACEHOLDER: dynamic_schema_info.chat_session_id, + MESSAGE_ID_PLACEHOLDER: dynamic_schema_info.message_id, + } + + for placeholder, value in placeholders.items(): + if value: + schema_str = schema_str.replace(placeholder, str(value)) + + openapi_schema = json.loads(schema_str) + url = openapi_to_url(openapi_schema) method_specs = openapi_to_method_specs(openapi_schema) - return [CustomTool(method_spec, url) for method_spec in method_specs] + return [ + CustomTool(method_spec, url, custom_headers) for method_spec in method_specs + ] if __name__ == "__main__": @@ -223,7 +257,9 @@ def build_custom_tools_from_openapi_schema( } validate_openapi_schema(openapi_schema) - tools = build_custom_tools_from_openapi_schema(openapi_schema) + tools = build_custom_tools_from_openapi_schema_and_headers( + openapi_schema, dynamic_schema_info=None + ) openai_client = openai.OpenAI() response = openai_client.chat.completions.create( diff --git a/backend/danswer/tools/images/image_generation_tool.py b/backend/danswer/tools/images/image_generation_tool.py index fe839b7d68c..6e2515a8e9f 100644 --- a/backend/danswer/tools/images/image_generation_tool.py +++ b/backend/danswer/tools/images/image_generation_tool.py @@ -200,6 +200,7 @@ def _generate_image( revised_prompt=response.data[0]["revised_prompt"], url=response.data[0]["url"], ) + except Exception as e: logger.debug(f"Error occured during image generation: {e}") diff --git a/backend/danswer/tools/internet_search/internet_search_tool.py b/backend/danswer/tools/internet_search/internet_search_tool.py index 2640afcdf83..3012eb465f4 100644 --- a/backend/danswer/tools/internet_search/internet_search_tool.py +++ b/backend/danswer/tools/internet_search/internet_search_tool.py @@ -20,7 +20,7 @@ from danswer.secondary_llm_flows.query_expansion import history_based_query_rephrase from danswer.tools.internet_search.models import InternetSearchResponse from danswer.tools.internet_search.models import InternetSearchResult -from danswer.tools.search.search_tool import FINAL_CONTEXT_DOCUMENTS +from danswer.tools.search.search_tool import FINAL_CONTEXT_DOCUMENTS_ID from danswer.tools.tool import Tool from danswer.tools.tool import ToolResponse from danswer.utils.logger import setup_logger @@ -224,7 +224,7 @@ def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: ] yield ToolResponse( - id=FINAL_CONTEXT_DOCUMENTS, + id=FINAL_CONTEXT_DOCUMENTS_ID, response=llm_docs, ) diff --git a/backend/danswer/tools/models.py b/backend/danswer/tools/models.py index 052e4293a53..6317a95e2d3 100644 --- a/backend/danswer/tools/models.py +++ b/backend/danswer/tools/models.py @@ -37,3 +37,12 @@ class ToolCallFinalResult(ToolCallKickoff): tool_result: Any = ( None # we would like to use JSON_ro, but can't due to its recursive nature ) + + +class DynamicSchemaInfo(BaseModel): + chat_session_id: int | None + message_id: int | None + + +CHAT_SESSION_ID_PLACEHOLDER = "CHAT_SESSION_ID" +MESSAGE_ID_PLACEHOLDER = "MESSAGE_ID" diff --git a/backend/danswer/tools/search/search_tool.py b/backend/danswer/tools/search/search_tool.py index 13d3a304b06..cbfaf4f3d92 100644 --- a/backend/danswer/tools/search/search_tool.py +++ b/backend/danswer/tools/search/search_tool.py @@ -45,7 +45,7 @@ SEARCH_RESPONSE_SUMMARY_ID = "search_response_summary" SEARCH_DOC_CONTENT_ID = "search_doc_content" SECTION_RELEVANCE_LIST_ID = "section_relevance_list" -FINAL_CONTEXT_DOCUMENTS = "final_context_documents" +FINAL_CONTEXT_DOCUMENTS_ID = "final_context_documents" SEARCH_EVALUATION_ID = "llm_doc_eval" @@ -179,7 +179,7 @@ def build_tool_message_content( self, *args: ToolResponse ) -> str | list[str | dict[str, Any]]: final_context_docs_response = next( - response for response in args if response.id == FINAL_CONTEXT_DOCUMENTS + response for response in args if response.id == FINAL_CONTEXT_DOCUMENTS_ID ) final_context_docs = cast(list[LlmDoc], final_context_docs_response.response) @@ -260,7 +260,7 @@ def _build_response_for_specified_sections( for section in final_context_sections ] - yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS, response=llm_docs) + yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS_ID, response=llm_docs) def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: query = cast(str, kwargs["query"]) @@ -343,12 +343,12 @@ def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: llm_doc_from_inference_section(section) for section in pruned_sections ] - yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS, response=llm_docs) + yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS_ID, response=llm_docs) def final_result(self, *args: ToolResponse) -> JSON_ro: final_docs = cast( list[LlmDoc], - next(arg.response for arg in args if arg.id == FINAL_CONTEXT_DOCUMENTS), + next(arg.response for arg in args if arg.id == FINAL_CONTEXT_DOCUMENTS_ID), ) # NOTE: need to do this json.loads(doc.json()) stuff because there are some # subfields that are not serializable by default (datetime) diff --git a/backend/danswer/tools/tool_runner.py b/backend/danswer/tools/tool_runner.py index f962c214a03..58b94bdb0c8 100644 --- a/backend/danswer/tools/tool_runner.py +++ b/backend/danswer/tools/tool_runner.py @@ -1,3 +1,4 @@ +from collections.abc import Callable from collections.abc import Generator from typing import Any @@ -47,7 +48,7 @@ def tool_final_result(self) -> ToolCallFinalResult: def check_which_tools_should_run_for_non_tool_calling_llm( tools: list[Tool], query: str, history: list[PreviousMessage], llm: LLM ) -> list[dict[str, Any] | None]: - tool_args_list = [ + tool_args_list: list[tuple[Callable[..., Any], tuple[Any, ...]]] = [ (tool.get_args_for_non_tool_calling_llm, (query, history, llm)) for tool in tools ] diff --git a/backend/danswer/utils/errors.py b/backend/danswer/utils/errors.py new file mode 100644 index 00000000000..86b9d4252f3 --- /dev/null +++ b/backend/danswer/utils/errors.py @@ -0,0 +1,3 @@ +class EERequiredError(Exception): + """This error is thrown if an Enterprise Edition feature or API is + requested but the Enterprise Edition flag is not set.""" diff --git a/backend/danswer/utils/gpu_utils.py b/backend/danswer/utils/gpu_utils.py new file mode 100644 index 00000000000..70a3dbc2c95 --- /dev/null +++ b/backend/danswer/utils/gpu_utils.py @@ -0,0 +1,30 @@ +import requests +from retry import retry + +from danswer.utils.logger import setup_logger +from shared_configs.configs import INDEXING_MODEL_SERVER_HOST +from shared_configs.configs import INDEXING_MODEL_SERVER_PORT +from shared_configs.configs import MODEL_SERVER_HOST +from shared_configs.configs import MODEL_SERVER_PORT + +logger = setup_logger() + + +@retry(tries=5, delay=5) +def gpu_status_request(indexing: bool = True) -> bool: + if indexing: + model_server_url = f"{INDEXING_MODEL_SERVER_HOST}:{INDEXING_MODEL_SERVER_PORT}" + else: + model_server_url = f"{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}" + + if "http" not in model_server_url: + model_server_url = f"http://{model_server_url}" + + try: + response = requests.get(f"{model_server_url}/api/gpu-status", timeout=10) + response.raise_for_status() + gpu_status = response.json() + return gpu_status["gpu_available"] + except requests.RequestException as e: + logger.error(f"Error: Unable to fetch GPU status. Error: {str(e)}") + raise # Re-raise exception to trigger a retry diff --git a/backend/danswer/utils/logger.py b/backend/danswer/utils/logger.py index a7751ca3dc7..96d4ae2a25e 100644 --- a/backend/danswer/utils/logger.py +++ b/backend/danswer/utils/logger.py @@ -19,14 +19,22 @@ class IndexAttemptSingleton: main background job (scheduler), etc. this will not be used.""" _INDEX_ATTEMPT_ID: None | int = None + _CONNECTOR_CREDENTIAL_PAIR_ID: None | int = None @classmethod def get_index_attempt_id(cls) -> None | int: return cls._INDEX_ATTEMPT_ID @classmethod - def set_index_attempt_id(cls, index_attempt_id: int) -> None: + def get_connector_credential_pair_id(cls) -> None | int: + return cls._CONNECTOR_CREDENTIAL_PAIR_ID + + @classmethod + def set_cc_and_index_id( + cls, index_attempt_id: int, connector_credential_pair_id: int + ) -> None: cls._INDEX_ATTEMPT_ID = index_attempt_id + cls._CONNECTOR_CREDENTIAL_PAIR_ID = connector_credential_pair_id def get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int: @@ -50,9 +58,14 @@ def process( # If this is an indexing job, add the attempt ID to the log message # This helps filter the logs for this specific indexing attempt_id = IndexAttemptSingleton.get_index_attempt_id() + cc_pair_id = IndexAttemptSingleton.get_connector_credential_pair_id() + if attempt_id is not None: msg = f"[Attempt ID: {attempt_id}] {msg}" + if cc_pair_id is not None: + msg = f"[CC Pair ID: {cc_pair_id}] {msg}" + # For Slack Bot, logs the channel relevant to the request channel_id = self.extra.get(SLACK_CHANNEL_ID) if self.extra else None if channel_id: @@ -67,6 +80,16 @@ def notice(self, msg: Any, *args: Any, **kwargs: Any) -> None: ) +class PlainFormatter(logging.Formatter): + """Adds log levels.""" + + def format(self, record: logging.LogRecord) -> str: + levelname = record.levelname + level_display = f"{levelname}:" + formatted_message = super().format(record) + return f"{level_display.ljust(9)} {formatted_message}" + + class ColoredFormatter(logging.Formatter): """Custom formatter to add colors to log levels.""" @@ -101,6 +124,13 @@ def get_standard_formatter() -> ColoredFormatter: ) +DANSWER_DOCKER_ENV_STR = "DANSWER_RUNNING_IN_DOCKER" + + +def is_running_in_container() -> bool: + return os.getenv(DANSWER_DOCKER_ENV_STR) == "true" + + def setup_logger( name: str = __name__, log_level: int = get_log_level_from_str(), @@ -128,7 +158,7 @@ def setup_logger( uvicorn_logger.addHandler(handler) uvicorn_logger.setLevel(log_level) - is_containerized = os.path.exists("/.dockerenv") + is_containerized = is_running_in_container() if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED): log_levels = ["debug", "info", "notice"] for level in log_levels: diff --git a/backend/danswer/utils/telemetry.py b/backend/danswer/utils/telemetry.py index 80fcba65a16..d8a021877e6 100644 --- a/backend/danswer/utils/telemetry.py +++ b/backend/danswer/utils/telemetry.py @@ -4,13 +4,20 @@ from typing import cast import requests +from sqlalchemy.orm import Session from danswer.configs.app_configs import DISABLE_TELEMETRY +from danswer.configs.app_configs import ENTERPRISE_EDITION_ENABLED from danswer.configs.constants import KV_CUSTOMER_UUID_KEY +from danswer.configs.constants import KV_INSTANCE_DOMAIN_KEY +from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.models import User from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError -DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.danswer.ai/anonymous_telemetry" +_DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.danswer.ai/anonymous_telemetry" +_CACHED_UUID: str | None = None +_CACHED_INSTANCE_DOMAIN: str | None = None class RecordType(str, Enum): @@ -22,13 +29,42 @@ class RecordType(str, Enum): def get_or_generate_uuid() -> str: + global _CACHED_UUID + + if _CACHED_UUID is not None: + return _CACHED_UUID + + kv_store = get_dynamic_config_store() + + try: + _CACHED_UUID = cast(str, kv_store.load(KV_CUSTOMER_UUID_KEY)) + except ConfigNotFoundError: + _CACHED_UUID = str(uuid.uuid4()) + kv_store.store(KV_CUSTOMER_UUID_KEY, _CACHED_UUID, encrypt=True) + + return _CACHED_UUID + + +def _get_or_generate_instance_domain() -> str | None: + global _CACHED_INSTANCE_DOMAIN + + if _CACHED_INSTANCE_DOMAIN is not None: + return _CACHED_INSTANCE_DOMAIN + kv_store = get_dynamic_config_store() + try: - return cast(str, kv_store.load(KV_CUSTOMER_UUID_KEY)) + _CACHED_INSTANCE_DOMAIN = cast(str, kv_store.load(KV_INSTANCE_DOMAIN_KEY)) except ConfigNotFoundError: - customer_id = str(uuid.uuid4()) - kv_store.store(KV_CUSTOMER_UUID_KEY, customer_id, encrypt=True) - return customer_id + with Session(get_sqlalchemy_engine()) as db_session: + first_user = db_session.query(User).first() + if first_user: + _CACHED_INSTANCE_DOMAIN = first_user.email.split("@")[-1] + kv_store.store( + KV_INSTANCE_DOMAIN_KEY, _CACHED_INSTANCE_DOMAIN, encrypt=True + ) + + return _CACHED_INSTANCE_DOMAIN def optional_telemetry( @@ -41,16 +77,19 @@ def optional_telemetry( def telemetry_logic() -> None: try: + customer_uuid = get_or_generate_uuid() payload = { "data": data, "record": record_type, # If None then it's a flow that doesn't include a user # For cases where the User itself is None, a string is provided instead "user_id": user_id, - "customer_uuid": get_or_generate_uuid(), + "customer_uuid": customer_uuid, } + if ENTERPRISE_EDITION_ENABLED: + payload["instance_domain"] = _get_or_generate_instance_domain() requests.post( - DANSWER_TELEMETRY_ENDPOINT, + _DANSWER_TELEMETRY_ENDPOINT, headers={"Content-Type": "application/json"}, json=payload, ) diff --git a/backend/danswer/utils/text_processing.py b/backend/danswer/utils/text_processing.py index b0fbcdfa1e9..134859d4e74 100644 --- a/backend/danswer/utils/text_processing.py +++ b/backend/danswer/utils/text_processing.py @@ -43,6 +43,35 @@ def replace_whitespaces_w_space(s: str) -> str: return re.sub(r"\s", " ", s) +# Function to remove punctuation from a string +def remove_punctuation(s: str) -> str: + return s.translate(str.maketrans("", "", string.punctuation)) + + +def escape_quotes(original_json_str: str) -> str: + result = [] + in_string = False + for i, char in enumerate(original_json_str): + if char == '"': + if not in_string: + in_string = True + result.append(char) + else: + next_char = ( + original_json_str[i + 1] if i + 1 < len(original_json_str) else None + ) + if result and result[-1] == "\\": + result.append(char) + elif next_char not in [",", ":", "}", "\n"]: + result.append("\\" + char) + else: + result.append(char) + in_string = False + else: + result.append(char) + return "".join(result) + + def extract_embedded_json(s: str) -> dict: first_brace_index = s.find("{") last_brace_index = s.rfind("}") @@ -50,7 +79,15 @@ def extract_embedded_json(s: str) -> dict: if first_brace_index == -1 or last_brace_index == -1: raise ValueError("No valid json found") - return json.loads(s[first_brace_index : last_brace_index + 1], strict=False) + json_str = s[first_brace_index : last_brace_index + 1] + try: + return json.loads(json_str, strict=False) + + except json.JSONDecodeError: + try: + return json.loads(escape_quotes(json_str), strict=False) + except json.JSONDecodeError as e: + raise ValueError("Failed to parse JSON, even after escaping quotes") from e def clean_up_code_blocks(model_out_raw: str) -> str: diff --git a/backend/danswer/utils/variable_functionality.py b/backend/danswer/utils/variable_functionality.py index 97c6592601e..55f296aa8e7 100644 --- a/backend/danswer/utils/variable_functionality.py +++ b/backend/danswer/utils/variable_functionality.py @@ -31,6 +31,28 @@ def set_is_ee_based_on_env_variable() -> None: @functools.lru_cache(maxsize=128) def fetch_versioned_implementation(module: str, attribute: str) -> Any: + """ + Fetches a versioned implementation of a specified attribute from a given module. + This function first checks if the application is running in an Enterprise Edition (EE) + context. If so, it attempts to import the attribute from the EE-specific module. + If the module or attribute is not found, it falls back to the default module or + raises the appropriate exception depending on the context. + + Args: + module (str): The name of the module from which to fetch the attribute. + attribute (str): The name of the attribute to fetch from the module. + + Returns: + Any: The fetched implementation of the attribute. + + Raises: + ModuleNotFoundError: If the module cannot be found and the error is not related to + the Enterprise Edition fallback logic. + + Logs: + Logs debug information about the fetching process and warnings if the versioned + implementation cannot be found or loaded. + """ logger.debug("Fetching versioned implementation for %s.%s", module, attribute) is_ee = global_version.get_is_ee_version() @@ -66,6 +88,19 @@ def fetch_versioned_implementation(module: str, attribute: str) -> Any: def fetch_versioned_implementation_with_fallback( module: str, attribute: str, fallback: T ) -> T: + """ + Attempts to fetch a versioned implementation of a specified attribute from a given module. + If the attempt fails (e.g., due to an import error or missing attribute), the function logs + a warning and returns the provided fallback implementation. + + Args: + module (str): The name of the module from which to fetch the attribute. + attribute (str): The name of the attribute to fetch from the module. + fallback (T): The fallback implementation to return if fetching the attribute fails. + + Returns: + T: The fetched implementation if successful, otherwise the provided fallback. + """ try: return fetch_versioned_implementation(module, attribute) except Exception: @@ -73,4 +108,14 @@ def fetch_versioned_implementation_with_fallback( def noop_fallback(*args: Any, **kwargs: Any) -> None: - pass + """ + A no-op (no operation) fallback function that accepts any arguments but does nothing. + This is often used as a default or placeholder callback function. + + Args: + *args (Any): Positional arguments, which are ignored. + **kwargs (Any): Keyword arguments, which are ignored. + + Returns: + None + """ diff --git a/backend/ee/danswer/access/access.py b/backend/ee/danswer/access/access.py index c2b05ee881f..094298677a5 100644 --- a/backend/ee/danswer/access/access.py +++ b/backend/ee/danswer/access/access.py @@ -5,12 +5,32 @@ ) from danswer.access.access import _get_acl_for_user as get_acl_for_user_without_groups from danswer.access.models import DocumentAccess +from danswer.access.utils import prefix_external_group from danswer.access.utils import prefix_user_group +from danswer.db.document import get_documents_by_ids from danswer.db.models import User +from ee.danswer.db.external_perm import fetch_external_groups_for_user from ee.danswer.db.user_group import fetch_user_groups_for_documents from ee.danswer.db.user_group import fetch_user_groups_for_user +def _get_access_for_document( + document_id: str, + db_session: Session, +) -> DocumentAccess: + id_to_access = _get_access_for_documents([document_id], db_session) + if len(id_to_access) == 0: + return DocumentAccess.build( + user_emails=[], + user_groups=[], + external_user_emails=[], + external_user_group_ids=[], + is_public=False, + ) + + return next(iter(id_to_access.values())) + + def _get_access_for_documents( document_ids: list[str], db_session: Session, @@ -19,22 +39,48 @@ def _get_access_for_documents( document_ids=document_ids, db_session=db_session, ) - user_group_info = { + user_group_info: dict[str, list[str]] = { document_id: group_names for document_id, group_names in fetch_user_groups_for_documents( db_session=db_session, document_ids=document_ids, ) } + documents = get_documents_by_ids( + db_session=db_session, + document_ids=document_ids, + ) + doc_id_map = {doc.id: doc for doc in documents} - return { - document_id: DocumentAccess( - user_ids=non_ee_access.user_ids, - user_groups=user_group_info.get(document_id, []), # type: ignore - is_public=non_ee_access.is_public, + access_map = {} + for document_id, non_ee_access in non_ee_access_dict.items(): + document = doc_id_map[document_id] + + ext_u_emails = ( + set(document.external_user_emails) + if document.external_user_emails + else set() ) - for document_id, non_ee_access in non_ee_access_dict.items() - } + + ext_u_groups = ( + set(document.external_user_group_ids) + if document.external_user_group_ids + else set() + ) + + # If the document is determined to be "public" externally (through a SYNC connector) + # then it's given the same access level as if it were marked public within Danswer + is_public_anywhere = document.is_public or non_ee_access.is_public + + # To avoid collisions of group namings between connectors, they need to be prefixed + access_map[document_id] = DocumentAccess( + user_emails=non_ee_access.user_emails, + user_groups=set(user_group_info.get(document_id, [])), + is_public=is_public_anywhere, + external_user_emails=ext_u_emails, + external_user_group_ids=ext_u_groups, + ) + return access_map def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]: @@ -45,7 +91,20 @@ def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]: NOTE: is imported in danswer.access.access by `fetch_versioned_implementation` DO NOT REMOVE.""" - user_groups = fetch_user_groups_for_user(db_session, user.id) if user else [] - return set( - [prefix_user_group(user_group.name) for user_group in user_groups] - ).union(get_acl_for_user_without_groups(user, db_session)) + db_user_groups = fetch_user_groups_for_user(db_session, user.id) if user else [] + prefixed_user_groups = [ + prefix_user_group(db_user_group.name) for db_user_group in db_user_groups + ] + + db_external_groups = ( + fetch_external_groups_for_user(db_session, user.id) if user else [] + ) + prefixed_external_groups = [ + prefix_external_group(db_external_group.external_user_group_id) + for db_external_group in db_external_groups + ] + + user_acl = set(prefixed_user_groups + prefixed_external_groups) + user_acl.update(get_acl_for_user_without_groups(user, db_session)) + + return user_acl diff --git a/backend/ee/danswer/background/celery/celery_app.py b/backend/ee/danswer/background/celery/celery_app.py index 403adbd74e1..5dd0f72009f 100644 --- a/backend/ee/danswer/background/celery/celery_app.py +++ b/backend/ee/danswer/background/celery/celery_app.py @@ -1,28 +1,37 @@ from datetime import timedelta -from typing import Any -from celery.signals import beat_init -from celery.signals import worker_init from sqlalchemy.orm import Session from danswer.background.celery.celery_app import celery_app from danswer.background.task_utils import build_celery_task_wrapper from danswer.configs.app_configs import JOB_TIMEOUT -from danswer.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME -from danswer.configs.constants import POSTGRES_CELERY_WORKER_APP_NAME from danswer.db.chat import delete_chat_sessions_older_than from danswer.db.engine import get_sqlalchemy_engine -from danswer.db.engine import init_sqlalchemy_engine from danswer.server.settings.store import load_settings from danswer.utils.logger import setup_logger from danswer.utils.variable_functionality import global_version from ee.danswer.background.celery_utils import should_perform_chat_ttl_check -from ee.danswer.background.celery_utils import should_sync_user_groups +from ee.danswer.background.celery_utils import ( + should_perform_external_doc_permissions_check, +) +from ee.danswer.background.celery_utils import ( + should_perform_external_group_permissions_check, +) from ee.danswer.background.task_name_builders import name_chat_ttl_task -from ee.danswer.background.task_name_builders import name_user_group_sync_task -from ee.danswer.db.user_group import fetch_user_groups +from ee.danswer.background.task_name_builders import ( + name_sync_external_doc_permissions_task, +) +from ee.danswer.background.task_name_builders import ( + name_sync_external_group_permissions_task, +) +from ee.danswer.db.connector_credential_pair import get_all_auto_sync_cc_pairs +from ee.danswer.external_permissions.permission_sync import ( + run_external_doc_permission_sync, +) +from ee.danswer.external_permissions.permission_sync import ( + run_external_group_permission_sync, +) from ee.danswer.server.reporting.usage_export_generation import create_new_usage_report -from ee.danswer.user_groups.sync import sync_user_groups logger = setup_logger() @@ -30,15 +39,18 @@ global_version.set_ee() -@build_celery_task_wrapper(name_user_group_sync_task) +@build_celery_task_wrapper(name_sync_external_doc_permissions_task) @celery_app.task(soft_time_limit=JOB_TIMEOUT) -def sync_user_group_task(user_group_id: int) -> None: +def sync_external_doc_permissions_task(cc_pair_id: int) -> None: with Session(get_sqlalchemy_engine()) as db_session: - # actual sync logic - try: - sync_user_groups(user_group_id=user_group_id, db_session=db_session) - except Exception as e: - logger.exception(f"Failed to sync user group - {e}") + run_external_doc_permission_sync(db_session=db_session, cc_pair_id=cc_pair_id) + + +@build_celery_task_wrapper(name_sync_external_group_permissions_task) +@celery_app.task(soft_time_limit=JOB_TIMEOUT) +def sync_external_group_permissions_task(cc_pair_id: int) -> None: + with Session(get_sqlalchemy_engine()) as db_session: + run_external_group_permission_sync(db_session=db_session, cc_pair_id=cc_pair_id) @build_celery_task_wrapper(name_chat_ttl_task) @@ -51,6 +63,38 @@ def perform_ttl_management_task(retention_limit_days: int) -> None: ##### # Periodic Tasks ##### +@celery_app.task( + name="check_sync_external_doc_permissions_task", + soft_time_limit=JOB_TIMEOUT, +) +def check_sync_external_doc_permissions_task() -> None: + """Runs periodically to sync external permissions""" + with Session(get_sqlalchemy_engine()) as db_session: + cc_pairs = get_all_auto_sync_cc_pairs(db_session) + for cc_pair in cc_pairs: + if should_perform_external_doc_permissions_check( + cc_pair=cc_pair, db_session=db_session + ): + sync_external_doc_permissions_task.apply_async( + kwargs=dict(cc_pair_id=cc_pair.id), + ) + + +@celery_app.task( + name="check_sync_external_group_permissions_task", + soft_time_limit=JOB_TIMEOUT, +) +def check_sync_external_group_permissions_task() -> None: + """Runs periodically to sync external group permissions""" + with Session(get_sqlalchemy_engine()) as db_session: + cc_pairs = get_all_auto_sync_cc_pairs(db_session) + for cc_pair in cc_pairs: + if should_perform_external_group_permissions_check( + cc_pair=cc_pair, db_session=db_session + ): + sync_external_group_permissions_task.apply_async( + kwargs=dict(cc_pair_id=cc_pair.id), + ) @celery_app.task( @@ -69,24 +113,6 @@ def check_ttl_management_task() -> None: ) -@celery_app.task( - name="check_for_user_groups_sync_task", - soft_time_limit=JOB_TIMEOUT, -) -def check_for_user_groups_sync_task() -> None: - """Runs periodically to check if any user groups are out of sync - Creates a task to sync the user group if needed""" - with Session(get_sqlalchemy_engine()) as db_session: - # check if any document sets are not synced - user_groups = fetch_user_groups(db_session=db_session, only_current=False) - for user_group in user_groups: - if should_sync_user_groups(user_group, db_session): - logger.info(f"User Group {user_group.id} is not synced. Syncing now!") - sync_user_group_task.apply_async( - kwargs=dict(user_group_id=user_group.id), - ) - - @celery_app.task( name="autogenerate_usage_report_task", soft_time_limit=JOB_TIMEOUT, @@ -101,23 +127,17 @@ def autogenerate_usage_report_task() -> None: ) -@beat_init.connect -def on_beat_init(sender: Any, **kwargs: Any) -> None: - init_sqlalchemy_engine(POSTGRES_CELERY_BEAT_APP_NAME) - - -@worker_init.connect -def on_worker_init(sender: Any, **kwargs: Any) -> None: - init_sqlalchemy_engine(POSTGRES_CELERY_WORKER_APP_NAME) - - ##### # Celery Beat (Periodic Tasks) Settings ##### celery_app.conf.beat_schedule = { - "check-for-user-group-sync": { - "task": "check_for_user_groups_sync_task", - "schedule": timedelta(seconds=5), + "sync-external-doc-permissions": { + "task": "check_sync_external_doc_permissions_task", + "schedule": timedelta(seconds=5), # TODO: optimize this + }, + "sync-external-group-permissions": { + "task": "check_sync_external_group_permissions_task", + "schedule": timedelta(seconds=5), # TODO: optimize this }, "autogenerate_usage_report": { "task": "autogenerate_usage_report_task", diff --git a/backend/ee/danswer/background/celery/tasks/vespa/tasks.py b/backend/ee/danswer/background/celery/tasks/vespa/tasks.py new file mode 100644 index 00000000000..d194b2ef9a9 --- /dev/null +++ b/backend/ee/danswer/background/celery/tasks/vespa/tasks.py @@ -0,0 +1,52 @@ +from typing import cast + +from redis import Redis +from sqlalchemy.orm import Session + +from danswer.background.celery.celery_app import task_logger +from danswer.background.celery.celery_redis import RedisUserGroup +from danswer.utils.logger import setup_logger +from ee.danswer.db.user_group import delete_user_group +from ee.danswer.db.user_group import fetch_user_group +from ee.danswer.db.user_group import mark_user_group_as_synced + +logger = setup_logger() + + +def monitor_usergroup_taskset(key_bytes: bytes, r: Redis, db_session: Session) -> None: + """This function is likely to move in the worker refactor happening next.""" + key = key_bytes.decode("utf-8") + usergroup_id = RedisUserGroup.get_id_from_fence_key(key) + if not usergroup_id: + task_logger.warning("Could not parse usergroup id from {key}") + return + + rug = RedisUserGroup(usergroup_id) + fence_value = r.get(rug.fence_key) + if fence_value is None: + return + + try: + initial_count = int(cast(int, fence_value)) + except ValueError: + task_logger.error("The value is not an integer.") + return + + count = cast(int, r.scard(rug.taskset_key)) + task_logger.info( + f"User group sync: usergroup_id={usergroup_id} remaining={count} initial={initial_count}" + ) + if count > 0: + return + + user_group = fetch_user_group(db_session=db_session, user_group_id=usergroup_id) + if user_group: + if user_group.is_up_for_deletion: + delete_user_group(db_session=db_session, user_group=user_group) + task_logger.info(f"Deleted usergroup. id='{usergroup_id}'") + else: + mark_user_group_as_synced(db_session=db_session, user_group=user_group) + task_logger.info(f"Synced usergroup. id='{usergroup_id}'") + + r.delete(rug.taskset_key) + r.delete(rug.fence_key) diff --git a/backend/ee/danswer/background/celery_utils.py b/backend/ee/danswer/background/celery_utils.py index 0134f6642f7..c42812f81c3 100644 --- a/backend/ee/danswer/background/celery_utils.py +++ b/backend/ee/danswer/background/celery_utils.py @@ -1,27 +1,21 @@ from sqlalchemy.orm import Session -from danswer.db.models import UserGroup +from danswer.db.enums import AccessType +from danswer.db.models import ConnectorCredentialPair from danswer.db.tasks import check_task_is_live_and_not_timed_out from danswer.db.tasks import get_latest_task from danswer.utils.logger import setup_logger from ee.danswer.background.task_name_builders import name_chat_ttl_task -from ee.danswer.background.task_name_builders import name_user_group_sync_task +from ee.danswer.background.task_name_builders import ( + name_sync_external_doc_permissions_task, +) +from ee.danswer.background.task_name_builders import ( + name_sync_external_group_permissions_task, +) logger = setup_logger() -def should_sync_user_groups(user_group: UserGroup, db_session: Session) -> bool: - if user_group.is_up_to_date: - return False - task_name = name_user_group_sync_task(user_group.id) - latest_sync = get_latest_task(task_name, db_session) - - if latest_sync and check_task_is_live_and_not_timed_out(latest_sync, db_session): - logger.info("TTL check is already being performed. Skipping.") - return False - return True - - def should_perform_chat_ttl_check( retention_limit_days: int | None, db_session: Session ) -> bool: @@ -35,6 +29,44 @@ def should_perform_chat_ttl_check( return True if latest_task and check_task_is_live_and_not_timed_out(latest_task, db_session): - logger.info("TTL check is already being performed. Skipping.") + logger.debug(f"{task_name} is already being performed. Skipping.") + return False + return True + + +def should_perform_external_doc_permissions_check( + cc_pair: ConnectorCredentialPair, db_session: Session +) -> bool: + if cc_pair.access_type != AccessType.SYNC: + return False + + task_name = name_sync_external_doc_permissions_task(cc_pair_id=cc_pair.id) + + latest_task = get_latest_task(task_name, db_session) + if not latest_task: + return True + + if check_task_is_live_and_not_timed_out(latest_task, db_session): + logger.debug(f"{task_name} is already being performed. Skipping.") + return False + + return True + + +def should_perform_external_group_permissions_check( + cc_pair: ConnectorCredentialPair, db_session: Session +) -> bool: + if cc_pair.access_type != AccessType.SYNC: return False + + task_name = name_sync_external_group_permissions_task(cc_pair_id=cc_pair.id) + + latest_task = get_latest_task(task_name, db_session) + if not latest_task: + return True + + if check_task_is_live_and_not_timed_out(latest_task, db_session): + logger.debug(f"{task_name} is already being performed. Skipping.") + return False + return True diff --git a/backend/ee/danswer/background/permission_sync.py b/backend/ee/danswer/background/permission_sync.py deleted file mode 100644 index c14094b6042..00000000000 --- a/backend/ee/danswer/background/permission_sync.py +++ /dev/null @@ -1,224 +0,0 @@ -import logging -import time -from datetime import datetime - -import dask -from dask.distributed import Client -from dask.distributed import Future -from distributed import LocalCluster -from sqlalchemy.orm import Session - -from danswer.background.indexing.dask_utils import ResourceLogger -from danswer.background.indexing.job_client import SimpleJob -from danswer.background.indexing.job_client import SimpleJobClient -from danswer.configs.app_configs import CLEANUP_INDEXING_JOBS_TIMEOUT -from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED -from danswer.configs.constants import DocumentSource -from danswer.configs.constants import POSTGRES_PERMISSIONS_APP_NAME -from danswer.db.engine import get_sqlalchemy_engine -from danswer.db.engine import init_sqlalchemy_engine -from danswer.db.models import PermissionSyncStatus -from danswer.utils.logger import setup_logger -from ee.danswer.configs.app_configs import NUM_PERMISSION_WORKERS -from ee.danswer.connectors.factory import CONNECTOR_PERMISSION_FUNC_MAP -from ee.danswer.db.connector import fetch_sources_with_connectors -from ee.danswer.db.connector_credential_pair import get_cc_pairs_by_source -from ee.danswer.db.permission_sync import create_perm_sync -from ee.danswer.db.permission_sync import expire_perm_sync_timed_out -from ee.danswer.db.permission_sync import get_perm_sync_attempt -from ee.danswer.db.permission_sync import mark_all_inprogress_permission_sync_failed -from shared_configs.configs import LOG_LEVEL - -logger = setup_logger() - -# If the indexing dies, it's most likely due to resource constraints, -# restarting just delays the eventual failure, not useful to the user -dask.config.set({"distributed.scheduler.allowed-failures": 0}) - - -def cleanup_perm_sync_jobs( - existing_jobs: dict[tuple[int, int | DocumentSource], Future | SimpleJob], - # Just reusing the same timeout, fine for now - timeout_hours: int = CLEANUP_INDEXING_JOBS_TIMEOUT, -) -> dict[tuple[int, int | DocumentSource], Future | SimpleJob]: - existing_jobs_copy = existing_jobs.copy() - - with Session(get_sqlalchemy_engine()) as db_session: - # clean up completed jobs - for (attempt_id, details), job in existing_jobs.items(): - perm_sync_attempt = get_perm_sync_attempt( - attempt_id=attempt_id, db_session=db_session - ) - - # do nothing for ongoing jobs that haven't been stopped - if ( - not job.done() - and perm_sync_attempt.status == PermissionSyncStatus.IN_PROGRESS - ): - continue - - if job.status == "error": - logger.error(job.exception()) - - job.release() - del existing_jobs_copy[(attempt_id, details)] - - # clean up in-progress jobs that were never completed - expire_perm_sync_timed_out( - timeout_hours=timeout_hours, - db_session=db_session, - ) - - return existing_jobs_copy - - -def create_group_sync_jobs( - existing_jobs: dict[tuple[int, int | DocumentSource], Future | SimpleJob], - client: Client | SimpleJobClient, -) -> dict[tuple[int, int | DocumentSource], Future | SimpleJob]: - """Creates new relational DB group permission sync job for each source that: - - has permission sync enabled - - has at least 1 connector (enabled or paused) - - has no sync already running - """ - existing_jobs_copy = existing_jobs.copy() - sources_w_runs = [ - key[1] - for key in existing_jobs_copy.keys() - if isinstance(key[1], DocumentSource) - ] - with Session(get_sqlalchemy_engine()) as db_session: - sources_w_connector = fetch_sources_with_connectors(db_session) - for source_type in sources_w_connector: - if source_type not in CONNECTOR_PERMISSION_FUNC_MAP: - continue - if source_type in sources_w_runs: - continue - - db_group_fnc, _ = CONNECTOR_PERMISSION_FUNC_MAP[source_type] - perm_sync = create_perm_sync( - source_type=source_type, - group_update=True, - cc_pair_id=None, - db_session=db_session, - ) - - run = client.submit(db_group_fnc, pure=False) - - logger.info( - f"Kicked off group permission sync for source type {source_type}" - ) - - if run: - existing_jobs_copy[(perm_sync.id, source_type)] = run - - return existing_jobs_copy - - -def create_connector_perm_sync_jobs( - existing_jobs: dict[tuple[int, int | DocumentSource], Future | SimpleJob], - client: Client | SimpleJobClient, -) -> dict[tuple[int, int | DocumentSource], Future | SimpleJob]: - """Update Document Index ACL sync job for each cc-pair where: - - source type has permission sync enabled - - has no sync already running - """ - existing_jobs_copy = existing_jobs.copy() - cc_pairs_w_runs = [ - key[1] - for key in existing_jobs_copy.keys() - if isinstance(key[1], DocumentSource) - ] - with Session(get_sqlalchemy_engine()) as db_session: - sources_w_connector = fetch_sources_with_connectors(db_session) - for source_type in sources_w_connector: - if source_type not in CONNECTOR_PERMISSION_FUNC_MAP: - continue - - _, index_sync_fnc = CONNECTOR_PERMISSION_FUNC_MAP[source_type] - - cc_pairs = get_cc_pairs_by_source(source_type, db_session) - - for cc_pair in cc_pairs: - if cc_pair.id in cc_pairs_w_runs: - continue - - perm_sync = create_perm_sync( - source_type=source_type, - group_update=False, - cc_pair_id=cc_pair.id, - db_session=db_session, - ) - - run = client.submit(index_sync_fnc, cc_pair.id, pure=False) - - logger.info(f"Kicked off ACL sync for cc-pair {cc_pair.id}") - - if run: - existing_jobs_copy[(perm_sync.id, cc_pair.id)] = run - - return existing_jobs_copy - - -def permission_loop(delay: int = 60, num_workers: int = NUM_PERMISSION_WORKERS) -> None: - client: Client | SimpleJobClient - if DASK_JOB_CLIENT_ENABLED: - cluster_primary = LocalCluster( - n_workers=num_workers, - threads_per_worker=1, - # there are warning about high memory usage + "Event loop unresponsive" - # which are not relevant to us since our workers are expected to use a - # lot of memory + involve CPU intensive tasks that will not relinquish - # the event loop - silence_logs=logging.ERROR, - ) - client = Client(cluster_primary) - if LOG_LEVEL.lower() == "debug": - client.register_worker_plugin(ResourceLogger()) - else: - client = SimpleJobClient(n_workers=num_workers) - - existing_jobs: dict[tuple[int, int | DocumentSource], Future | SimpleJob] = {} - engine = get_sqlalchemy_engine() - - with Session(engine) as db_session: - # Any jobs still in progress on restart must have died - mark_all_inprogress_permission_sync_failed(db_session) - - while True: - start = time.time() - start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S") - logger.info(f"Running Permission Sync, current UTC time: {start_time_utc}") - - if existing_jobs: - logger.debug( - "Found existing permission sync jobs: " - f"{[(attempt_id, job.status) for attempt_id, job in existing_jobs.items()]}" - ) - - try: - # TODO turn this on when it works - """ - existing_jobs = cleanup_perm_sync_jobs(existing_jobs=existing_jobs) - existing_jobs = create_group_sync_jobs( - existing_jobs=existing_jobs, client=client - ) - existing_jobs = create_connector_perm_sync_jobs( - existing_jobs=existing_jobs, client=client - ) - """ - except Exception as e: - logger.exception(f"Failed to run update due to {e}") - sleep_time = delay - (time.time() - start) - if sleep_time > 0: - time.sleep(sleep_time) - - -def update__main() -> None: - logger.notice("Starting Permission Syncing Loop") - init_sqlalchemy_engine(POSTGRES_PERMISSIONS_APP_NAME) - permission_loop() - - -if __name__ == "__main__": - update__main() diff --git a/backend/ee/danswer/background/task_name_builders.py b/backend/ee/danswer/background/task_name_builders.py index 4f1046adbbb..c494329d366 100644 --- a/backend/ee/danswer/background/task_name_builders.py +++ b/backend/ee/danswer/background/task_name_builders.py @@ -1,6 +1,10 @@ -def name_user_group_sync_task(user_group_id: int) -> str: - return f"user_group_sync_task__{user_group_id}" - - def name_chat_ttl_task(retention_limit_days: int) -> str: return f"chat_ttl_{retention_limit_days}_days" + + +def name_sync_external_doc_permissions_task(cc_pair_id: int) -> str: + return f"sync_external_doc_permissions_task__{cc_pair_id}" + + +def name_sync_external_group_permissions_task(cc_pair_id: int) -> str: + return f"sync_external_group_permissions_task__{cc_pair_id}" diff --git a/backend/ee/danswer/connectors/confluence/perm_sync.py b/backend/ee/danswer/connectors/confluence/perm_sync.py deleted file mode 100644 index 2985b47b0d1..00000000000 --- a/backend/ee/danswer/connectors/confluence/perm_sync.py +++ /dev/null @@ -1,12 +0,0 @@ -from danswer.utils.logger import setup_logger - - -logger = setup_logger() - - -def confluence_update_db_group() -> None: - logger.debug("Not yet implemented group sync for confluence, no-op") - - -def confluence_update_index_acl(cc_pair_id: int) -> None: - logger.debug("Not yet implemented ACL sync for confluence, no-op") diff --git a/backend/ee/danswer/connectors/factory.py b/backend/ee/danswer/connectors/factory.py deleted file mode 100644 index 52f9324948b..00000000000 --- a/backend/ee/danswer/connectors/factory.py +++ /dev/null @@ -1,8 +0,0 @@ -from danswer.configs.constants import DocumentSource -from ee.danswer.connectors.confluence.perm_sync import confluence_update_db_group -from ee.danswer.connectors.confluence.perm_sync import confluence_update_index_acl - - -CONNECTOR_PERMISSION_FUNC_MAP = { - DocumentSource.CONFLUENCE: (confluence_update_db_group, confluence_update_index_acl) -} diff --git a/deployment/helm/templates/NOTES.txt b/backend/ee/danswer/danswerbot/slack/handlers/__init__.py similarity index 100% rename from deployment/helm/templates/NOTES.txt rename to backend/ee/danswer/danswerbot/slack/handlers/__init__.py diff --git a/backend/ee/danswer/danswerbot/slack/handlers/handle_standard_answers.py b/backend/ee/danswer/danswerbot/slack/handlers/handle_standard_answers.py new file mode 100644 index 00000000000..6807e77135a --- /dev/null +++ b/backend/ee/danswer/danswerbot/slack/handlers/handle_standard_answers.py @@ -0,0 +1,238 @@ +from slack_sdk import WebClient +from slack_sdk.models.blocks import ActionsBlock +from slack_sdk.models.blocks import Block +from slack_sdk.models.blocks import ButtonElement +from slack_sdk.models.blocks import SectionBlock +from sqlalchemy.orm import Session + +from danswer.configs.constants import MessageType +from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI +from danswer.danswerbot.slack.blocks import get_restate_blocks +from danswer.danswerbot.slack.constants import GENERATE_ANSWER_BUTTON_ACTION_ID +from danswer.danswerbot.slack.handlers.utils import send_team_member_message +from danswer.danswerbot.slack.models import SlackMessageInfo +from danswer.danswerbot.slack.utils import respond_in_thread +from danswer.danswerbot.slack.utils import update_emote_react +from danswer.db.chat import create_chat_session +from danswer.db.chat import create_new_chat_message +from danswer.db.chat import get_chat_messages_by_sessions +from danswer.db.chat import get_chat_sessions_by_slack_thread_id +from danswer.db.chat import get_or_create_root_message +from danswer.db.models import Prompt +from danswer.db.models import SlackBotConfig +from danswer.db.models import StandardAnswer as StandardAnswerModel +from danswer.utils.logger import DanswerLoggingAdapter +from danswer.utils.logger import setup_logger +from ee.danswer.db.standard_answer import fetch_standard_answer_categories_by_names +from ee.danswer.db.standard_answer import find_matching_standard_answers +from ee.danswer.server.manage.models import StandardAnswer as PydanticStandardAnswer + +logger = setup_logger() + + +def build_standard_answer_blocks( + answer_message: str, +) -> list[Block]: + generate_button_block = ButtonElement( + action_id=GENERATE_ANSWER_BUTTON_ACTION_ID, + text="Generate Full Answer", + ) + answer_block = SectionBlock(text=answer_message) + return [ + answer_block, + ActionsBlock( + elements=[generate_button_block], + ), + ] + + +def oneoff_standard_answers( + message: str, + slack_bot_categories: list[str], + db_session: Session, +) -> list[PydanticStandardAnswer]: + """ + Respond to the user message if it matches any configured standard answers. + + Returns a list of matching StandardAnswers if found, otherwise None. + """ + configured_standard_answers = { + standard_answer + for category in fetch_standard_answer_categories_by_names( + slack_bot_categories, db_session=db_session + ) + for standard_answer in category.standard_answers + } + + matching_standard_answers = find_matching_standard_answers( + query=message, + id_in=[answer.id for answer in configured_standard_answers], + db_session=db_session, + ) + + server_standard_answers = [ + PydanticStandardAnswer.from_model(standard_answer_model) + for (standard_answer_model, _) in matching_standard_answers + ] + return server_standard_answers + + +def _handle_standard_answers( + message_info: SlackMessageInfo, + receiver_ids: list[str] | None, + slack_bot_config: SlackBotConfig | None, + prompt: Prompt | None, + logger: DanswerLoggingAdapter, + client: WebClient, + db_session: Session, +) -> bool: + """ + Potentially respond to the user message depending on whether the user's message matches + any of the configured standard answers and also whether those answers have already been + provided in the current thread. + + Returns True if standard answers are found to match the user's message and therefore, + we still need to respond to the users. + """ + # if no channel config, then no standard answers are configured + if not slack_bot_config: + return False + + slack_thread_id = message_info.thread_to_respond + configured_standard_answer_categories = ( + slack_bot_config.standard_answer_categories if slack_bot_config else [] + ) + configured_standard_answers = set( + [ + standard_answer + for standard_answer_category in configured_standard_answer_categories + for standard_answer in standard_answer_category.standard_answers + ] + ) + query_msg = message_info.thread_messages[-1] + + if slack_thread_id is None: + used_standard_answer_ids = set([]) + else: + chat_sessions = get_chat_sessions_by_slack_thread_id( + slack_thread_id=slack_thread_id, + user_id=None, + db_session=db_session, + ) + chat_messages = get_chat_messages_by_sessions( + chat_session_ids=[chat_session.id for chat_session in chat_sessions], + user_id=None, + db_session=db_session, + skip_permission_check=True, + ) + used_standard_answer_ids = set( + [ + standard_answer.id + for chat_message in chat_messages + for standard_answer in chat_message.standard_answers + ] + ) + + usable_standard_answers = configured_standard_answers.difference( + used_standard_answer_ids + ) + + matching_standard_answers: list[tuple[StandardAnswerModel, str]] = [] + if usable_standard_answers: + matching_standard_answers = find_matching_standard_answers( + query=query_msg.message, + id_in=[standard_answer.id for standard_answer in usable_standard_answers], + db_session=db_session, + ) + + if matching_standard_answers: + chat_session = create_chat_session( + db_session=db_session, + description="", + user_id=None, + persona_id=slack_bot_config.persona.id if slack_bot_config.persona else 0, + danswerbot_flow=True, + slack_thread_id=slack_thread_id, + one_shot=True, + ) + + root_message = get_or_create_root_message( + chat_session_id=chat_session.id, db_session=db_session + ) + + new_user_message = create_new_chat_message( + chat_session_id=chat_session.id, + parent_message=root_message, + prompt_id=prompt.id if prompt else None, + message=query_msg.message, + token_count=0, + message_type=MessageType.USER, + db_session=db_session, + commit=True, + ) + + formatted_answers = [] + for standard_answer, match_str in matching_standard_answers: + since_you_mentioned_pretext = ( + f'Since your question contains "_{match_str}_"' + ) + block_quotified_answer = ">" + standard_answer.answer.replace("\n", "\n> ") + formatted_answer = f"{since_you_mentioned_pretext}, I thought this might be useful: \n\n{block_quotified_answer}" + formatted_answers.append(formatted_answer) + answer_message = "\n\n".join(formatted_answers) + + _ = create_new_chat_message( + chat_session_id=chat_session.id, + parent_message=new_user_message, + prompt_id=prompt.id if prompt else None, + message=answer_message, + token_count=0, + message_type=MessageType.ASSISTANT, + error=None, + db_session=db_session, + commit=True, + ) + + update_emote_react( + emoji=DANSWER_REACT_EMOJI, + channel=message_info.channel_to_respond, + message_ts=message_info.msg_to_respond, + remove=True, + client=client, + ) + + restate_question_blocks = get_restate_blocks( + msg=query_msg.message, + is_bot_msg=message_info.is_bot_msg, + ) + + answer_blocks = build_standard_answer_blocks( + answer_message=answer_message, + ) + + all_blocks = restate_question_blocks + answer_blocks + + try: + respond_in_thread( + client=client, + channel=message_info.channel_to_respond, + receiver_ids=receiver_ids, + text="Hello! Danswer has some results for you!", + blocks=all_blocks, + thread_ts=message_info.msg_to_respond, + unfurl=False, + ) + + if receiver_ids and slack_thread_id: + send_team_member_message( + client=client, + channel=message_info.channel_to_respond, + thread_ts=slack_thread_id, + ) + + return True + except Exception as e: + logger.exception(f"Unable to send standard answer message: {e}") + return False + else: + return False diff --git a/backend/ee/danswer/db/connector_credential_pair.py b/backend/ee/danswer/db/connector_credential_pair.py index a2172913476..bb91c0de74f 100644 --- a/backend/ee/danswer/db/connector_credential_pair.py +++ b/backend/ee/danswer/db/connector_credential_pair.py @@ -3,6 +3,7 @@ from danswer.configs.constants import DocumentSource from danswer.db.connector_credential_pair import get_connector_credential_pair +from danswer.db.enums import AccessType from danswer.db.models import Connector from danswer.db.models import ConnectorCredentialPair from danswer.db.models import UserGroup__ConnectorCredentialPair @@ -32,14 +33,30 @@ def _delete_connector_credential_pair_user_groups_relationship__no_commit( def get_cc_pairs_by_source( - source_type: DocumentSource, db_session: Session, + source_type: DocumentSource, + only_sync: bool, ) -> list[ConnectorCredentialPair]: - cc_pairs = ( + query = ( db_session.query(ConnectorCredentialPair) .join(ConnectorCredentialPair.connector) .filter(Connector.source == source_type) - .all() ) + if only_sync: + query = query.filter(ConnectorCredentialPair.access_type == AccessType.SYNC) + + cc_pairs = query.all() return cc_pairs + + +def get_all_auto_sync_cc_pairs( + db_session: Session, +) -> list[ConnectorCredentialPair]: + return ( + db_session.query(ConnectorCredentialPair) + .where( + ConnectorCredentialPair.access_type == AccessType.SYNC, + ) + .all() + ) diff --git a/backend/ee/danswer/db/document.py b/backend/ee/danswer/db/document.py index 5a368ea170e..d67bc0e57e7 100644 --- a/backend/ee/danswer/db/document.py +++ b/backend/ee/danswer/db/document.py @@ -1,14 +1,47 @@ -from collections.abc import Sequence - from sqlalchemy import select from sqlalchemy.orm import Session -from danswer.db.models import Document +from danswer.access.models import ExternalAccess +from danswer.access.utils import prefix_group_w_source +from danswer.configs.constants import DocumentSource +from danswer.db.models import Document as DbDocument + + +def upsert_document_external_perms__no_commit( + db_session: Session, + doc_id: str, + external_access: ExternalAccess, + source_type: DocumentSource, +) -> None: + """ + This sets the permissions for a document in postgres. + NOTE: this will replace any existing external access, it will not do a union + """ + document = db_session.scalars( + select(DbDocument).where(DbDocument.id == doc_id) + ).first() + + prefixed_external_groups = [ + prefix_group_w_source( + ext_group_name=group_id, + source=source_type, + ) + for group_id in external_access.external_user_group_ids + ] + if not document: + # If the document does not exist, still store the external access + # So that if the document is added later, the external access is already stored + document = DbDocument( + id=doc_id, + semantic_id="", + external_user_emails=external_access.external_user_emails, + external_user_group_ids=prefixed_external_groups, + is_public=external_access.is_public, + ) + db_session.add(document) + return -def fetch_documents_from_ids( - db_session: Session, document_ids: list[str] -) -> Sequence[Document]: - return db_session.scalars( - select(Document).where(Document.id.in_(document_ids)) - ).all() + document.external_user_emails = list(external_access.external_user_emails) + document.external_user_group_ids = prefixed_external_groups + document.is_public = external_access.is_public diff --git a/backend/ee/danswer/db/external_perm.py b/backend/ee/danswer/db/external_perm.py new file mode 100644 index 00000000000..25881df55d3 --- /dev/null +++ b/backend/ee/danswer/db/external_perm.py @@ -0,0 +1,77 @@ +from collections.abc import Sequence +from uuid import UUID + +from pydantic import BaseModel +from sqlalchemy import delete +from sqlalchemy import select +from sqlalchemy.orm import Session + +from danswer.access.utils import prefix_group_w_source +from danswer.configs.constants import DocumentSource +from danswer.db.models import User__ExternalUserGroupId + + +class ExternalUserGroup(BaseModel): + id: str + user_ids: list[UUID] + + +def delete_user__ext_group_for_user__no_commit( + db_session: Session, + user_id: UUID, +) -> None: + db_session.execute( + delete(User__ExternalUserGroupId).where( + User__ExternalUserGroupId.user_id == user_id + ) + ) + + +def delete_user__ext_group_for_cc_pair__no_commit( + db_session: Session, + cc_pair_id: int, +) -> None: + db_session.execute( + delete(User__ExternalUserGroupId).where( + User__ExternalUserGroupId.cc_pair_id == cc_pair_id + ) + ) + + +def replace_user__ext_group_for_cc_pair__no_commit( + db_session: Session, + cc_pair_id: int, + group_defs: list[ExternalUserGroup], + source: DocumentSource, +) -> None: + """ + This function clears all existing external user group relations for a given cc_pair_id + and replaces them with the new group definitions. + """ + delete_user__ext_group_for_cc_pair__no_commit( + db_session=db_session, + cc_pair_id=cc_pair_id, + ) + + new_external_permissions = [ + User__ExternalUserGroupId( + user_id=user_id, + external_user_group_id=prefix_group_w_source(external_group.id, source), + cc_pair_id=cc_pair_id, + ) + for external_group in group_defs + for user_id in external_group.user_ids + ] + + db_session.add_all(new_external_permissions) + + +def fetch_external_groups_for_user( + db_session: Session, + user_id: UUID, +) -> Sequence[User__ExternalUserGroupId]: + return db_session.scalars( + select(User__ExternalUserGroupId).where( + User__ExternalUserGroupId.user_id == user_id + ) + ).all() diff --git a/backend/ee/danswer/db/permission_sync.py b/backend/ee/danswer/db/permission_sync.py deleted file mode 100644 index 7642bb65321..00000000000 --- a/backend/ee/danswer/db/permission_sync.py +++ /dev/null @@ -1,72 +0,0 @@ -from datetime import timedelta - -from sqlalchemy import func -from sqlalchemy import select -from sqlalchemy import update -from sqlalchemy.exc import NoResultFound -from sqlalchemy.orm import Session - -from danswer.configs.constants import DocumentSource -from danswer.db.models import PermissionSyncRun -from danswer.db.models import PermissionSyncStatus -from danswer.utils.logger import setup_logger - -logger = setup_logger() - - -def mark_all_inprogress_permission_sync_failed( - db_session: Session, -) -> None: - stmt = ( - update(PermissionSyncRun) - .where(PermissionSyncRun.status == PermissionSyncStatus.IN_PROGRESS) - .values(status=PermissionSyncStatus.FAILED) - ) - db_session.execute(stmt) - db_session.commit() - - -def get_perm_sync_attempt(attempt_id: int, db_session: Session) -> PermissionSyncRun: - stmt = select(PermissionSyncRun).where(PermissionSyncRun.id == attempt_id) - try: - return db_session.scalars(stmt).one() - except NoResultFound: - raise ValueError(f"No PermissionSyncRun found with id {attempt_id}") - - -def expire_perm_sync_timed_out( - timeout_hours: int, - db_session: Session, -) -> None: - cutoff_time = func.now() - timedelta(hours=timeout_hours) - - update_stmt = ( - update(PermissionSyncRun) - .where( - PermissionSyncRun.status == PermissionSyncStatus.IN_PROGRESS, - PermissionSyncRun.updated_at < cutoff_time, - ) - .values(status=PermissionSyncStatus.FAILED, error_msg="timed out") - ) - - db_session.execute(update_stmt) - db_session.commit() - - -def create_perm_sync( - source_type: DocumentSource, - group_update: bool, - cc_pair_id: int | None, - db_session: Session, -) -> PermissionSyncRun: - new_run = PermissionSyncRun( - source_type=source_type, - status=PermissionSyncStatus.IN_PROGRESS, - group_update=group_update, - cc_pair_id=cc_pair_id, - ) - - db_session.add(new_run) - db_session.commit() - - return new_run diff --git a/backend/danswer/db/standard_answer.py b/backend/ee/danswer/db/standard_answer.py similarity index 72% rename from backend/danswer/db/standard_answer.py rename to backend/ee/danswer/db/standard_answer.py index 064a5fa59ef..0fa074e36a7 100644 --- a/backend/danswer/db/standard_answer.py +++ b/backend/ee/danswer/db/standard_answer.py @@ -1,3 +1,4 @@ +import re import string from collections.abc import Sequence @@ -41,6 +42,8 @@ def insert_standard_answer( keyword: str, answer: str, category_ids: list[int], + match_regex: bool, + match_any_keywords: bool, db_session: Session, ) -> StandardAnswer: existing_categories = fetch_standard_answer_categories_by_ids( @@ -55,6 +58,8 @@ def insert_standard_answer( answer=answer, categories=existing_categories, active=True, + match_regex=match_regex, + match_any_keywords=match_any_keywords, ) db_session.add(standard_answer) db_session.commit() @@ -66,6 +71,8 @@ def update_standard_answer( keyword: str, answer: str, category_ids: list[int], + match_regex: bool, + match_any_keywords: bool, db_session: Session, ) -> StandardAnswer: standard_answer = db_session.scalar( @@ -84,6 +91,8 @@ def update_standard_answer( standard_answer.keyword = keyword standard_answer.answer = answer standard_answer.categories = list(existing_categories) + standard_answer.match_regex = match_regex + standard_answer.match_any_keywords = match_any_keywords db_session.commit() @@ -140,17 +149,6 @@ def fetch_standard_answer_category( ) -def fetch_standard_answer_categories_by_names( - standard_answer_category_names: list[str], - db_session: Session, -) -> Sequence[StandardAnswerCategory]: - return db_session.scalars( - select(StandardAnswerCategory).where( - StandardAnswerCategory.name.in_(standard_answer_category_names) - ) - ).all() - - def fetch_standard_answer_categories_by_ids( standard_answer_category_ids: list[int], db_session: Session, @@ -177,39 +175,6 @@ def fetch_standard_answer( ) -def find_matching_standard_answers( - id_in: list[int], - query: str, - db_session: Session, -) -> list[StandardAnswer]: - stmt = ( - select(StandardAnswer) - .where(StandardAnswer.active.is_(True)) - .where(StandardAnswer.id.in_(id_in)) - ) - possible_standard_answers = db_session.scalars(stmt).all() - - matching_standard_answers: list[StandardAnswer] = [] - for standard_answer in possible_standard_answers: - # Remove punctuation and split the keyword into individual words - keyword_words = "".join( - char - for char in standard_answer.keyword.lower() - if char not in string.punctuation - ).split() - - # Remove punctuation and split the query into individual words - query_words = "".join( - char for char in query.lower() if char not in string.punctuation - ).split() - - # Check if all of the keyword words are in the query words - if all(word in query_words for word in keyword_words): - matching_standard_answers.append(standard_answer) - - return matching_standard_answers - - def fetch_standard_answers(db_session: Session) -> Sequence[StandardAnswer]: return db_session.scalars( select(StandardAnswer).where(StandardAnswer.active.is_(True)) @@ -237,3 +202,78 @@ def create_initial_default_standard_answer_category(db_session: Session) -> None ) db_session.add(standard_answer_category) db_session.commit() + + +def fetch_standard_answer_categories_by_names( + standard_answer_category_names: list[str], + db_session: Session, +) -> Sequence[StandardAnswerCategory]: + return db_session.scalars( + select(StandardAnswerCategory).where( + StandardAnswerCategory.name.in_(standard_answer_category_names) + ) + ).all() + + +def find_matching_standard_answers( + id_in: list[int], + query: str, + db_session: Session, +) -> list[tuple[StandardAnswer, str]]: + """ + Returns a list of tuples, where each tuple is a StandardAnswer definition matching + the query and a string representing the match (either the regex match group or the + set of keywords). + + If `answer_instance.match_regex` is true, the definition is considered "matched" + if the query matches the `answer_instance.keyword` using `re.search`. + + Otherwise, the definition is considered "matched" if the space-delimited tokens + in `keyword` exists in `query`, depending on the state of `match_any_keywords` + """ + stmt = ( + select(StandardAnswer) + .where(StandardAnswer.active.is_(True)) + .where(StandardAnswer.id.in_(id_in)) + ) + possible_standard_answers: Sequence[StandardAnswer] = db_session.scalars(stmt).all() + + matching_standard_answers: list[tuple[StandardAnswer, str]] = [] + for standard_answer in possible_standard_answers: + if standard_answer.match_regex: + maybe_matches = re.search(standard_answer.keyword, query, re.IGNORECASE) + if maybe_matches is not None: + match_group = maybe_matches.group(0) + matching_standard_answers.append((standard_answer, match_group)) + + else: + # Remove punctuation and split the keyword into individual words + keyword_words = set( + "".join( + char + for char in standard_answer.keyword.lower() + if char not in string.punctuation + ).split() + ) + + # Remove punctuation and split the query into individual words + query_words = "".join( + char for char in query.lower() if char not in string.punctuation + ).split() + + # Check if all of the keyword words are in the query words + if standard_answer.match_any_keywords: + for word in query_words: + if word in keyword_words: + matching_standard_answers.append((standard_answer, word)) + break + else: + if all(word in query_words for word in keyword_words): + matching_standard_answers.append( + ( + standard_answer, + re.sub(r"\s+?", ", ", standard_answer.keyword), + ) + ) + + return matching_standard_answers diff --git a/backend/ee/danswer/db/user_group.py b/backend/ee/danswer/db/user_group.py index 9d172c5d716..863b9170e3f 100644 --- a/backend/ee/danswer/db/user_group.py +++ b/backend/ee/danswer/db/user_group.py @@ -2,8 +2,10 @@ from operator import and_ from uuid import UUID +from fastapi import HTTPException from sqlalchemy import delete from sqlalchemy import func +from sqlalchemy import Select from sqlalchemy import select from sqlalchemy import update from sqlalchemy.orm import Session @@ -14,7 +16,9 @@ from danswer.db.models import Credential__UserGroup from danswer.db.models import Document from danswer.db.models import DocumentByConnectorCredentialPair +from danswer.db.models import DocumentSet__UserGroup from danswer.db.models import LLMProvider__UserGroup +from danswer.db.models import Persona__UserGroup from danswer.db.models import TokenRateLimit__UserGroup from danswer.db.models import User from danswer.db.models import User__UserGroup @@ -30,16 +34,166 @@ logger = setup_logger() +def _cleanup_user__user_group_relationships__no_commit( + db_session: Session, + user_group_id: int, + user_ids: list[UUID] | None = None, +) -> None: + """NOTE: does not commit the transaction.""" + where_clause = User__UserGroup.user_group_id == user_group_id + if user_ids: + where_clause &= User__UserGroup.user_id.in_(user_ids) + + user__user_group_relationships = db_session.scalars( + select(User__UserGroup).where(where_clause) + ).all() + for user__user_group_relationship in user__user_group_relationships: + db_session.delete(user__user_group_relationship) + + +def _cleanup_credential__user_group_relationships__no_commit( + db_session: Session, + user_group_id: int, +) -> None: + """NOTE: does not commit the transaction.""" + db_session.query(Credential__UserGroup).filter( + Credential__UserGroup.user_group_id == user_group_id + ).delete(synchronize_session=False) + + +def _cleanup_llm_provider__user_group_relationships__no_commit( + db_session: Session, user_group_id: int +) -> None: + """NOTE: does not commit the transaction.""" + db_session.query(LLMProvider__UserGroup).filter( + LLMProvider__UserGroup.user_group_id == user_group_id + ).delete(synchronize_session=False) + + +def _cleanup_persona__user_group_relationships__no_commit( + db_session: Session, user_group_id: int +) -> None: + """NOTE: does not commit the transaction.""" + db_session.query(Persona__UserGroup).filter( + Persona__UserGroup.user_group_id == user_group_id + ).delete(synchronize_session=False) + + +def _cleanup_token_rate_limit__user_group_relationships__no_commit( + db_session: Session, user_group_id: int +) -> None: + """NOTE: does not commit the transaction.""" + token_rate_limit__user_group_relationships = db_session.scalars( + select(TokenRateLimit__UserGroup).where( + TokenRateLimit__UserGroup.user_group_id == user_group_id + ) + ).all() + for ( + token_rate_limit__user_group_relationship + ) in token_rate_limit__user_group_relationships: + db_session.delete(token_rate_limit__user_group_relationship) + + +def _cleanup_user_group__cc_pair_relationships__no_commit( + db_session: Session, user_group_id: int, outdated_only: bool +) -> None: + """NOTE: does not commit the transaction.""" + stmt = select(UserGroup__ConnectorCredentialPair).where( + UserGroup__ConnectorCredentialPair.user_group_id == user_group_id + ) + if outdated_only: + stmt = stmt.where( + UserGroup__ConnectorCredentialPair.is_current == False # noqa: E712 + ) + user_group__cc_pair_relationships = db_session.scalars(stmt) + for user_group__cc_pair_relationship in user_group__cc_pair_relationships: + db_session.delete(user_group__cc_pair_relationship) + + +def _cleanup_document_set__user_group_relationships__no_commit( + db_session: Session, user_group_id: int +) -> None: + """NOTE: does not commit the transaction.""" + db_session.execute( + delete(DocumentSet__UserGroup).where( + DocumentSet__UserGroup.user_group_id == user_group_id + ) + ) + + +def validate_user_creation_permissions( + db_session: Session, + user: User | None, + target_group_ids: list[int] | None, + object_is_public: bool | None, +) -> None: + """ + All admin actions are allowed. + Prevents non-admins from creating/editing: + - public objects + - objects with no groups + - objects that belong to a group they don't curate + """ + if not user or user.role == UserRole.ADMIN: + return + + if object_is_public: + detail = "User does not have permission to create public credentials" + logger.error(detail) + raise HTTPException( + status_code=400, + detail=detail, + ) + if not target_group_ids: + detail = "Curators must specify 1+ groups" + logger.error(detail) + raise HTTPException( + status_code=400, + detail=detail, + ) + + user_curated_groups = fetch_user_groups_for_user( + db_session=db_session, + user_id=user.id, + # Global curators can curate all groups they are member of + only_curator_groups=user.role != UserRole.GLOBAL_CURATOR, + ) + user_curated_group_ids = set([group.id for group in user_curated_groups]) + target_group_ids_set = set(target_group_ids) + if not target_group_ids_set.issubset(user_curated_group_ids): + detail = "Curators cannot control groups they don't curate" + logger.error(detail) + raise HTTPException( + status_code=400, + detail=detail, + ) + + def fetch_user_group(db_session: Session, user_group_id: int) -> UserGroup | None: stmt = select(UserGroup).where(UserGroup.id == user_group_id) return db_session.scalar(stmt) def fetch_user_groups( - db_session: Session, only_current: bool = True + db_session: Session, only_up_to_date: bool = True ) -> Sequence[UserGroup]: + """ + Fetches user groups from the database. + + This function retrieves a sequence of `UserGroup` objects from the database. + If `only_up_to_date` is set to `True`, it filters the user groups to return only those + that are marked as up-to-date (`is_up_to_date` is `True`). + + Args: + db_session (Session): The SQLAlchemy session used to query the database. + only_up_to_date (bool, optional): Flag to determine whether to filter the results + to include only up to date user groups. Defaults to `True`. + + Returns: + Sequence[UserGroup]: A sequence of `UserGroup` objects matching the query criteria. + """ stmt = select(UserGroup) - if only_current: + if only_up_to_date: stmt = stmt.where(UserGroup.is_up_to_date == True) # noqa: E712 return db_session.scalars(stmt).all() @@ -58,6 +212,42 @@ def fetch_user_groups_for_user( return db_session.scalars(stmt).all() +def construct_document_select_by_usergroup( + user_group_id: int, +) -> Select: + """This returns a statement that should be executed using + .yield_per() to minimize overhead. The primary consumers of this function + are background processing task generators.""" + stmt = ( + select(Document) + .join( + DocumentByConnectorCredentialPair, + Document.id == DocumentByConnectorCredentialPair.id, + ) + .join( + ConnectorCredentialPair, + and_( + DocumentByConnectorCredentialPair.connector_id + == ConnectorCredentialPair.connector_id, + DocumentByConnectorCredentialPair.credential_id + == ConnectorCredentialPair.credential_id, + ), + ) + .join( + UserGroup__ConnectorCredentialPair, + UserGroup__ConnectorCredentialPair.cc_pair_id == ConnectorCredentialPair.id, + ) + .join( + UserGroup, + UserGroup__ConnectorCredentialPair.user_group_id == UserGroup.id, + ) + .where(UserGroup.id == user_group_id) + .order_by(Document.id) + ) + stmt = stmt.distinct() + return stmt + + def fetch_documents_for_user_group_paginated( db_session: Session, user_group_id: int, @@ -102,7 +292,7 @@ def fetch_documents_for_user_group_paginated( def fetch_user_groups_for_documents( db_session: Session, document_ids: list[str], -) -> Sequence[tuple[int, list[str]]]: +) -> Sequence[tuple[str, list[str]]]: stmt = ( select(Document.id, func.array_agg(UserGroup.name)) .join( @@ -188,42 +378,6 @@ def insert_user_group(db_session: Session, user_group: UserGroupCreate) -> UserG return db_user_group -def _cleanup_user__user_group_relationships__no_commit( - db_session: Session, - user_group_id: int, - user_ids: list[UUID] | None = None, -) -> None: - """NOTE: does not commit the transaction.""" - where_clause = User__UserGroup.user_group_id == user_group_id - if user_ids: - where_clause &= User__UserGroup.user_id.in_(user_ids) - - user__user_group_relationships = db_session.scalars( - select(User__UserGroup).where(where_clause) - ).all() - for user__user_group_relationship in user__user_group_relationships: - db_session.delete(user__user_group_relationship) - - -def _cleanup_credential__user_group_relationships__no_commit( - db_session: Session, - user_group_id: int, -) -> None: - """NOTE: does not commit the transaction.""" - db_session.query(Credential__UserGroup).filter( - Credential__UserGroup.user_group_id == user_group_id - ).delete(synchronize_session=False) - - -def _cleanup_llm_provider__user_group_relationships__no_commit( - db_session: Session, user_group_id: int -) -> None: - """NOTE: does not commit the transaction.""" - db_session.query(LLMProvider__UserGroup).filter( - LLMProvider__UserGroup.user_group_id == user_group_id - ).delete(synchronize_session=False) - - def _mark_user_group__cc_pair_relationships_outdated__no_commit( db_session: Session, user_group_id: int ) -> None: @@ -316,6 +470,10 @@ def update_user_group( user_group_id: int, user_group_update: UserGroupUpdate, ) -> UserGroup: + """If successful, this can set db_user_group.is_up_to_date = False. + That will be processed by check_for_vespa_user_groups_sync_task and trigger + a long running background sync to Vespa. + """ stmt = select(UserGroup).where(UserGroup.id == user_group_id) db_user_group = db_session.scalar(stmt) if db_user_group is None: @@ -374,21 +532,6 @@ def update_user_group( return db_user_group -def _cleanup_token_rate_limit__user_group_relationships__no_commit( - db_session: Session, user_group_id: int -) -> None: - """NOTE: does not commit the transaction.""" - token_rate_limit__user_group_relationships = db_session.scalars( - select(TokenRateLimit__UserGroup).where( - TokenRateLimit__UserGroup.user_group_id == user_group_id - ) - ).all() - for ( - token_rate_limit__user_group_relationship - ) in token_rate_limit__user_group_relationships: - db_session.delete(token_rate_limit__user_group_relationship) - - def prepare_user_group_for_deletion(db_session: Session, user_group_id: int) -> None: stmt = select(UserGroup).where(UserGroup.id == user_group_id) db_user_group = db_session.scalar(stmt) @@ -397,16 +540,31 @@ def prepare_user_group_for_deletion(db_session: Session, user_group_id: int) -> _check_user_group_is_modifiable(db_user_group) + _mark_user_group__cc_pair_relationships_outdated__no_commit( + db_session=db_session, user_group_id=user_group_id + ) + _cleanup_credential__user_group_relationships__no_commit( db_session=db_session, user_group_id=user_group_id ) _cleanup_user__user_group_relationships__no_commit( db_session=db_session, user_group_id=user_group_id ) - _mark_user_group__cc_pair_relationships_outdated__no_commit( + _cleanup_token_rate_limit__user_group_relationships__no_commit( db_session=db_session, user_group_id=user_group_id ) - _cleanup_token_rate_limit__user_group_relationships__no_commit( + _cleanup_document_set__user_group_relationships__no_commit( + db_session=db_session, user_group_id=user_group_id + ) + _cleanup_persona__user_group_relationships__no_commit( + db_session=db_session, user_group_id=user_group_id + ) + _cleanup_user_group__cc_pair_relationships__no_commit( + db_session=db_session, + user_group_id=user_group_id, + outdated_only=False, + ) + _cleanup_llm_provider__user_group_relationships__no_commit( db_session=db_session, user_group_id=user_group_id ) @@ -415,20 +573,12 @@ def prepare_user_group_for_deletion(db_session: Session, user_group_id: int) -> db_session.commit() -def _cleanup_user_group__cc_pair_relationships__no_commit( - db_session: Session, user_group_id: int, outdated_only: bool -) -> None: - """NOTE: does not commit the transaction.""" - stmt = select(UserGroup__ConnectorCredentialPair).where( - UserGroup__ConnectorCredentialPair.user_group_id == user_group_id - ) - if outdated_only: - stmt = stmt.where( - UserGroup__ConnectorCredentialPair.is_current == False # noqa: E712 - ) - user_group__cc_pair_relationships = db_session.scalars(stmt) - for user_group__cc_pair_relationship in user_group__cc_pair_relationships: - db_session.delete(user_group__cc_pair_relationship) +def delete_user_group(db_session: Session, user_group: UserGroup) -> None: + """ + This assumes that all the fk cleanup has already been done. + """ + db_session.delete(user_group) + db_session.commit() def mark_user_group_as_synced(db_session: Session, user_group: UserGroup) -> None: @@ -440,26 +590,6 @@ def mark_user_group_as_synced(db_session: Session, user_group: UserGroup) -> Non db_session.commit() -def delete_user_group(db_session: Session, user_group: UserGroup) -> None: - _cleanup_llm_provider__user_group_relationships__no_commit( - db_session=db_session, user_group_id=user_group.id - ) - _cleanup_user__user_group_relationships__no_commit( - db_session=db_session, user_group_id=user_group.id - ) - _cleanup_user_group__cc_pair_relationships__no_commit( - db_session=db_session, - user_group_id=user_group.id, - outdated_only=False, - ) - - # need to flush so that we don't get a foreign key error when deleting the user group row - db_session.flush() - - db_session.delete(user_group) - db_session.commit() - - def delete_user_group_cc_pair_relationship__no_commit( cc_pair_id: int, db_session: Session ) -> None: diff --git a/backend/ee/danswer/external_permissions/__init__.py b/backend/ee/danswer/external_permissions/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/ee/danswer/external_permissions/confluence/__init__.py b/backend/ee/danswer/external_permissions/confluence/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/ee/danswer/external_permissions/confluence/confluence_sync_utils.py b/backend/ee/danswer/external_permissions/confluence/confluence_sync_utils.py new file mode 100644 index 00000000000..e911e2649ba --- /dev/null +++ b/backend/ee/danswer/external_permissions/confluence/confluence_sync_utils.py @@ -0,0 +1,18 @@ +from typing import Any + +from atlassian import Confluence # type:ignore + + +def build_confluence_client( + connector_specific_config: dict[str, Any], raw_credentials_json: dict[str, Any] +) -> Confluence: + is_cloud = connector_specific_config.get("is_cloud", False) + return Confluence( + api_version="cloud" if is_cloud else "latest", + # Remove trailing slash from wiki_base if present + url=connector_specific_config["wiki_base"].rstrip("/"), + # passing in username causes issues for Confluence data center + username=raw_credentials_json["confluence_username"] if is_cloud else None, + password=raw_credentials_json["confluence_access_token"] if is_cloud else None, + token=raw_credentials_json["confluence_access_token"] if not is_cloud else None, + ) diff --git a/backend/ee/danswer/external_permissions/confluence/doc_sync.py b/backend/ee/danswer/external_permissions/confluence/doc_sync.py new file mode 100644 index 00000000000..b6812adb9e7 --- /dev/null +++ b/backend/ee/danswer/external_permissions/confluence/doc_sync.py @@ -0,0 +1,254 @@ +from typing import Any + +from atlassian import Confluence # type:ignore +from sqlalchemy.orm import Session + +from danswer.access.models import ExternalAccess +from danswer.connectors.confluence.confluence_utils import ( + build_confluence_document_id, +) +from danswer.connectors.confluence.rate_limit_handler import ( + make_confluence_call_handle_rate_limit, +) +from danswer.db.models import ConnectorCredentialPair +from danswer.db.users import batch_add_non_web_user_if_not_exists__no_commit +from danswer.utils.logger import setup_logger +from ee.danswer.db.document import upsert_document_external_perms__no_commit +from ee.danswer.external_permissions.confluence.confluence_sync_utils import ( + build_confluence_client, +) + + +logger = setup_logger() + +_REQUEST_PAGINATION_LIMIT = 100 + + +def _get_space_permissions( + db_session: Session, + confluence_client: Confluence, + space_id: str, +) -> ExternalAccess: + get_space_permissions = make_confluence_call_handle_rate_limit( + confluence_client.get_space_permissions + ) + + space_permissions = get_space_permissions(space_id).get("permissions", []) + user_emails = set() + # Confluence enforces that group names are unique + group_names = set() + is_externally_public = False + for permission in space_permissions: + subs = permission.get("subjects") + if subs: + # If there are subjects, then there are explicit users or groups with access + if email := subs.get("user", {}).get("results", [{}])[0].get("email"): + user_emails.add(email) + if group_name := subs.get("group", {}).get("results", [{}])[0].get("name"): + group_names.add(group_name) + else: + # If there are no subjects, then the permission is for everyone + if permission.get("operation", {}).get( + "operation" + ) == "read" and permission.get("anonymousAccess", False): + # If the permission specifies read access for anonymous users, then + # the space is publicly accessible + is_externally_public = True + batch_add_non_web_user_if_not_exists__no_commit( + db_session=db_session, emails=list(user_emails) + ) + return ExternalAccess( + external_user_emails=user_emails, + external_user_group_ids=group_names, + is_public=is_externally_public, + ) + + +def _get_restrictions_for_page( + db_session: Session, + page: dict[str, Any], + space_permissions: ExternalAccess, +) -> ExternalAccess: + """ + WARNING: This function includes no pagination. So if a page is private within + the space and has over 200 users or over 200 groups with explicitly read access, + this function will leave out some users or groups. + 200 is a large amount so it is unlikely, but just be aware. + """ + restrictions_json = page.get("restrictions", {}) + read_access_dict = restrictions_json.get("read", {}).get("restrictions", {}) + + read_access_user_jsons = read_access_dict.get("user", {}).get("results", []) + read_access_group_jsons = read_access_dict.get("group", {}).get("results", []) + + is_space_public = read_access_user_jsons == [] and read_access_group_jsons == [] + + if not is_space_public: + read_access_user_emails = [ + user["email"] for user in read_access_user_jsons if user.get("email") + ] + read_access_groups = [group["name"] for group in read_access_group_jsons] + batch_add_non_web_user_if_not_exists__no_commit( + db_session=db_session, emails=list(read_access_user_emails) + ) + external_access = ExternalAccess( + external_user_emails=set(read_access_user_emails), + external_user_group_ids=set(read_access_groups), + is_public=False, + ) + else: + external_access = space_permissions + + return external_access + + +def _fetch_attachment_document_ids_for_page_paginated( + confluence_client: Confluence, page: dict[str, Any] +) -> list[str]: + """ + Starts by just extracting the first page of attachments from + the page. If all attachments are in the first page, then + no calls to the api are made from this function. + """ + get_attachments_from_content = make_confluence_call_handle_rate_limit( + confluence_client.get_attachments_from_content + ) + + attachment_doc_ids = [] + attachments_dict = page["children"]["attachment"] + start = 0 + + while True: + attachments_list = attachments_dict["results"] + attachment_doc_ids.extend( + [ + build_confluence_document_id( + base_url=confluence_client.url, + content_url=attachment["_links"]["download"], + ) + for attachment in attachments_list + ] + ) + + if "next" not in attachments_dict["_links"]: + break + + start += len(attachments_list) + attachments_dict = get_attachments_from_content( + page_id=page["id"], + start=start, + limit=_REQUEST_PAGINATION_LIMIT, + ) + + return attachment_doc_ids + + +def _fetch_all_pages_paginated( + confluence_client: Confluence, + space_id: str, +) -> list[dict[str, Any]]: + get_all_pages_from_space = make_confluence_call_handle_rate_limit( + confluence_client.get_all_pages_from_space + ) + + # For each page, this fetches the page's attachments and restrictions. + expansion_strings = [ + "children.attachment", + "restrictions.read.restrictions.user", + "restrictions.read.restrictions.group", + ] + expansion_string = ",".join(expansion_strings) + + all_pages = [] + start = 0 + while True: + pages_dict = get_all_pages_from_space( + space=space_id, + start=start, + limit=_REQUEST_PAGINATION_LIMIT, + expand=expansion_string, + ) + all_pages.extend(pages_dict) + + response_size = len(pages_dict) + if response_size < _REQUEST_PAGINATION_LIMIT: + break + start += response_size + + return all_pages + + +def _fetch_all_page_restrictions_for_space( + db_session: Session, + confluence_client: Confluence, + space_id: str, + space_permissions: ExternalAccess, +) -> dict[str, ExternalAccess]: + all_pages = _fetch_all_pages_paginated( + confluence_client=confluence_client, + space_id=space_id, + ) + + document_restrictions: dict[str, ExternalAccess] = {} + for page in all_pages: + """ + This assigns the same permissions to all attachments of a page and + the page itself. + This is because the attachments are stored in the same Confluence space as the page. + WARNING: We create a dbDocument entry for all attachments, even though attachments + may not be their own standalone documents. This is likely fine as we just upsert a + document with just permissions. + """ + attachment_document_ids = [ + build_confluence_document_id( + base_url=confluence_client.url, + content_url=page["_links"]["webui"], + ) + ] + attachment_document_ids.extend( + _fetch_attachment_document_ids_for_page_paginated( + confluence_client=confluence_client, page=page + ) + ) + page_permissions = _get_restrictions_for_page( + db_session=db_session, + page=page, + space_permissions=space_permissions, + ) + for attachment_document_id in attachment_document_ids: + document_restrictions[attachment_document_id] = page_permissions + + return document_restrictions + + +def confluence_doc_sync( + db_session: Session, + cc_pair: ConnectorCredentialPair, +) -> None: + """ + Adds the external permissions to the documents in postgres + if the document doesn't already exists in postgres, we create + it in postgres so that when it gets created later, the permissions are + already populated + """ + confluence_client = build_confluence_client( + cc_pair.connector.connector_specific_config, cc_pair.credential.credential_json + ) + space_permissions = _get_space_permissions( + db_session=db_session, + confluence_client=confluence_client, + space_id=cc_pair.connector.connector_specific_config["space"], + ) + fresh_doc_permissions = _fetch_all_page_restrictions_for_space( + db_session=db_session, + confluence_client=confluence_client, + space_id=cc_pair.connector.connector_specific_config["space"], + space_permissions=space_permissions, + ) + for doc_id, ext_access in fresh_doc_permissions.items(): + upsert_document_external_perms__no_commit( + db_session=db_session, + doc_id=doc_id, + external_access=ext_access, + source_type=cc_pair.connector.source, + ) diff --git a/backend/ee/danswer/external_permissions/confluence/group_sync.py b/backend/ee/danswer/external_permissions/confluence/group_sync.py new file mode 100644 index 00000000000..33bc60cc6d5 --- /dev/null +++ b/backend/ee/danswer/external_permissions/confluence/group_sync.py @@ -0,0 +1,107 @@ +from collections.abc import Iterator + +from atlassian import Confluence # type:ignore +from requests import HTTPError +from sqlalchemy.orm import Session + +from danswer.connectors.confluence.rate_limit_handler import ( + make_confluence_call_handle_rate_limit, +) +from danswer.db.models import ConnectorCredentialPair +from danswer.db.users import batch_add_non_web_user_if_not_exists__no_commit +from danswer.utils.logger import setup_logger +from ee.danswer.db.external_perm import ExternalUserGroup +from ee.danswer.db.external_perm import replace_user__ext_group_for_cc_pair__no_commit +from ee.danswer.external_permissions.confluence.confluence_sync_utils import ( + build_confluence_client, +) + + +logger = setup_logger() + +_PAGE_SIZE = 100 + + +def _get_confluence_group_names_paginated( + confluence_client: Confluence, +) -> Iterator[str]: + get_all_groups = make_confluence_call_handle_rate_limit( + confluence_client.get_all_groups + ) + + start = 0 + while True: + try: + groups = get_all_groups(start=start, limit=_PAGE_SIZE) + except HTTPError as e: + if e.response.status_code in (403, 404): + return + raise e + + for group in groups: + if group_name := group.get("name"): + yield group_name + + if len(groups) < _PAGE_SIZE: + break + start += _PAGE_SIZE + + +def _get_group_members_email_paginated( + confluence_client: Confluence, + group_name: str, +) -> list[str]: + get_group_members = make_confluence_call_handle_rate_limit( + confluence_client.get_group_members + ) + group_member_emails: list[str] = [] + start = 0 + while True: + try: + members = get_group_members( + group_name=group_name, start=start, limit=_PAGE_SIZE + ) + except HTTPError as e: + if e.response.status_code == 403 or e.response.status_code == 404: + return group_member_emails + raise e + + group_member_emails.extend( + [member.get("email") for member in members if member.get("email")] + ) + if len(members) < _PAGE_SIZE: + break + start += _PAGE_SIZE + return group_member_emails + + +def confluence_group_sync( + db_session: Session, + cc_pair: ConnectorCredentialPair, +) -> None: + confluence_client = build_confluence_client( + cc_pair.connector.connector_specific_config, cc_pair.credential.credential_json + ) + + danswer_groups: list[ExternalUserGroup] = [] + # Confluence enforces that group names are unique + for group_name in _get_confluence_group_names_paginated(confluence_client): + group_member_emails = _get_group_members_email_paginated( + confluence_client, group_name + ) + group_members = batch_add_non_web_user_if_not_exists__no_commit( + db_session=db_session, emails=group_member_emails + ) + if group_members: + danswer_groups.append( + ExternalUserGroup( + id=group_name, user_ids=[user.id for user in group_members] + ) + ) + + replace_user__ext_group_for_cc_pair__no_commit( + db_session=db_session, + cc_pair_id=cc_pair.id, + group_defs=danswer_groups, + source=cc_pair.connector.source, + ) diff --git a/backend/ee/danswer/external_permissions/google_drive/__init__.py b/backend/ee/danswer/external_permissions/google_drive/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/ee/danswer/external_permissions/google_drive/doc_sync.py b/backend/ee/danswer/external_permissions/google_drive/doc_sync.py new file mode 100644 index 00000000000..a957558a99e --- /dev/null +++ b/backend/ee/danswer/external_permissions/google_drive/doc_sync.py @@ -0,0 +1,200 @@ +from collections.abc import Iterator +from datetime import datetime +from datetime import timezone +from typing import Any +from typing import cast + +from googleapiclient.discovery import build # type: ignore +from googleapiclient.errors import HttpError # type: ignore +from sqlalchemy.orm import Session + +from danswer.access.models import ExternalAccess +from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder +from danswer.connectors.factory import instantiate_connector +from danswer.connectors.google_drive.connector_auth import ( + get_google_drive_creds, +) +from danswer.connectors.google_drive.constants import FETCH_PERMISSIONS_SCOPES +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.models import InputType +from danswer.db.models import ConnectorCredentialPair +from danswer.db.users import batch_add_non_web_user_if_not_exists__no_commit +from danswer.utils.logger import setup_logger +from ee.danswer.db.document import upsert_document_external_perms__no_commit + +# Google Drive APIs are quite flakey and may 500 for an +# extended period of time. Trying to combat here by adding a very +# long retry period (~20 minutes of trying every minute) +add_retries = retry_builder(tries=5, delay=5, max_delay=30) + + +logger = setup_logger() + + +def _get_docs_with_additional_info( + db_session: Session, + cc_pair: ConnectorCredentialPair, +) -> dict[str, Any]: + # Get all document ids that need their permissions updated + runnable_connector = instantiate_connector( + db_session=db_session, + source=cc_pair.connector.source, + input_type=InputType.POLL, + connector_specific_config=cc_pair.connector.connector_specific_config, + credential=cc_pair.credential, + ) + + assert isinstance(runnable_connector, PollConnector) + + current_time = datetime.now(timezone.utc) + start_time = ( + cc_pair.last_time_perm_sync.replace(tzinfo=timezone.utc).timestamp() + if cc_pair.last_time_perm_sync + else 0.0 + ) + cc_pair.last_time_perm_sync = current_time + + doc_batch_generator = runnable_connector.poll_source( + start=start_time, end=current_time.timestamp() + ) + + docs_with_additional_info = { + doc.id: doc.additional_info + for doc_batch in doc_batch_generator + for doc in doc_batch + } + + return docs_with_additional_info + + +def _fetch_permissions_paginated( + drive_service: Any, drive_file_id: str +) -> Iterator[dict[str, Any]]: + next_token = None + + # Check if the file is trashed + # Returning nothing here will cause the external permissions to + # be empty which will get written to vespa (failing shut) + try: + file_metadata = add_retries( + lambda: drive_service.files() + .get(fileId=drive_file_id, fields="id, trashed") + .execute() + )() + except HttpError as e: + if e.resp.status == 404 or e.resp.status == 403: + return + logger.error(f"Failed to fetch permissions: {e}") + raise + + if file_metadata.get("trashed", False): + logger.debug(f"File with ID {drive_file_id} is trashed") + return + + # Get paginated permissions for the file id + while True: + try: + permissions_resp: dict[str, Any] = add_retries( + lambda: ( + drive_service.permissions() + .list( + fileId=drive_file_id, + fields="permissions(id, emailAddress, role, type, domain)", + supportsAllDrives=True, + pageToken=next_token, + ) + .execute() + ) + )() + except HttpError as e: + if e.resp.status == 404 or e.resp.status == 403: + break + logger.error(f"Failed to fetch permissions: {e}") + raise + + for permission in permissions_resp.get("permissions", []): + yield permission + + next_token = permissions_resp.get("nextPageToken") + if not next_token: + break + + +def _fetch_google_permissions_for_document_id( + db_session: Session, + drive_file_id: str, + raw_credentials_json: dict[str, str], + company_google_domains: list[str], +) -> ExternalAccess: + # Authenticate and construct service + google_drive_creds, _ = get_google_drive_creds( + raw_credentials_json, scopes=FETCH_PERMISSIONS_SCOPES + ) + if not google_drive_creds.valid: + raise ValueError("Invalid Google Drive credentials") + + drive_service = build("drive", "v3", credentials=google_drive_creds) + + user_emails: set[str] = set() + group_emails: set[str] = set() + public = False + for permission in _fetch_permissions_paginated(drive_service, drive_file_id): + permission_type = permission["type"] + if permission_type == "user": + user_emails.add(permission["emailAddress"]) + elif permission_type == "group": + group_emails.add(permission["emailAddress"]) + elif permission_type == "domain": + if permission["domain"] in company_google_domains: + public = True + elif permission_type == "anyone": + public = True + + batch_add_non_web_user_if_not_exists__no_commit(db_session, list(user_emails)) + + return ExternalAccess( + external_user_emails=user_emails, + external_user_group_ids=group_emails, + is_public=public, + ) + + +def gdrive_doc_sync( + db_session: Session, + cc_pair: ConnectorCredentialPair, +) -> None: + """ + Adds the external permissions to the documents in postgres + if the document doesn't already exists in postgres, we create + it in postgres so that when it gets created later, the permissions are + already populated + """ + sync_details = cc_pair.auto_sync_options + if sync_details is None: + logger.error("Sync details not found for Google Drive") + raise ValueError("Sync details not found for Google Drive") + + # Here we run the connector to grab all the ids + # this may grab ids before they are indexed but that is fine because + # we create a document in postgres to hold the permissions info + # until the indexing job has a chance to run + docs_with_additional_info = _get_docs_with_additional_info( + db_session=db_session, + cc_pair=cc_pair, + ) + + for doc_id, doc_additional_info in docs_with_additional_info.items(): + ext_access = _fetch_google_permissions_for_document_id( + db_session=db_session, + drive_file_id=doc_additional_info, + raw_credentials_json=cc_pair.credential.credential_json, + company_google_domains=[ + cast(dict[str, str], sync_details)["company_domain"] + ], + ) + upsert_document_external_perms__no_commit( + db_session=db_session, + doc_id=doc_id, + external_access=ext_access, + source_type=cc_pair.connector.source, + ) diff --git a/backend/ee/danswer/external_permissions/google_drive/group_sync.py b/backend/ee/danswer/external_permissions/google_drive/group_sync.py new file mode 100644 index 00000000000..7bb919d4686 --- /dev/null +++ b/backend/ee/danswer/external_permissions/google_drive/group_sync.py @@ -0,0 +1,149 @@ +from collections.abc import Iterator +from typing import Any + +from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore +from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore +from googleapiclient.discovery import build # type: ignore +from googleapiclient.errors import HttpError # type: ignore +from sqlalchemy.orm import Session + +from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder +from danswer.connectors.google_drive.connector_auth import ( + get_google_drive_creds, +) +from danswer.connectors.google_drive.constants import FETCH_GROUPS_SCOPES +from danswer.db.models import ConnectorCredentialPair +from danswer.db.users import batch_add_non_web_user_if_not_exists__no_commit +from danswer.utils.logger import setup_logger +from ee.danswer.db.external_perm import ExternalUserGroup +from ee.danswer.db.external_perm import replace_user__ext_group_for_cc_pair__no_commit + +logger = setup_logger() + + +# Google Drive APIs are quite flakey and may 500 for an +# extended period of time. Trying to combat here by adding a very +# long retry period (~20 minutes of trying every minute) +add_retries = retry_builder(tries=5, delay=5, max_delay=30) + + +def _fetch_groups_paginated( + google_drive_creds: ServiceAccountCredentials | OAuthCredentials, + identity_source: str | None = None, + customer_id: str | None = None, +) -> Iterator[dict[str, Any]]: + # Note that Google Drive does not use of update the user_cache as the user email + # comes directly with the call to fetch the groups, therefore this is not a valid + # place to save on requests + if identity_source is None and customer_id is None: + raise ValueError( + "Either identity_source or customer_id must be provided to fetch groups" + ) + + cloud_identity_service = build( + "cloudidentity", "v1", credentials=google_drive_creds + ) + parent = ( + f"identitysources/{identity_source}" + if identity_source + else f"customers/{customer_id}" + ) + + while True: + try: + groups_resp: dict[str, Any] = add_retries( + lambda: (cloud_identity_service.groups().list(parent=parent).execute()) + )() + for group in groups_resp.get("groups", []): + yield group + + next_token = groups_resp.get("nextPageToken") + if not next_token: + break + except HttpError as e: + if e.resp.status == 404 or e.resp.status == 403: + break + logger.error(f"Error fetching groups: {e}") + raise + + +def _fetch_group_members_paginated( + google_drive_creds: ServiceAccountCredentials | OAuthCredentials, + group_name: str, +) -> Iterator[dict[str, Any]]: + cloud_identity_service = build( + "cloudidentity", "v1", credentials=google_drive_creds + ) + next_token = None + while True: + try: + membership_info = add_retries( + lambda: ( + cloud_identity_service.groups() + .memberships() + .searchTransitiveMemberships( + parent=group_name, pageToken=next_token + ) + .execute() + ) + )() + + for member in membership_info.get("memberships", []): + yield member + + next_token = membership_info.get("nextPageToken") + if not next_token: + break + except HttpError as e: + if e.resp.status == 404 or e.resp.status == 403: + break + logger.error(f"Error fetching group members: {e}") + raise + + +def gdrive_group_sync( + db_session: Session, + cc_pair: ConnectorCredentialPair, +) -> None: + sync_details = cc_pair.auto_sync_options + if sync_details is None: + logger.error("Sync details not found for Google Drive") + raise ValueError("Sync details not found for Google Drive") + + google_drive_creds, _ = get_google_drive_creds( + cc_pair.credential.credential_json, + scopes=FETCH_GROUPS_SCOPES, + ) + + danswer_groups: list[ExternalUserGroup] = [] + for group in _fetch_groups_paginated( + google_drive_creds, + identity_source=sync_details.get("identity_source"), + customer_id=sync_details.get("customer_id"), + ): + # The id is the group email + group_email = group["groupKey"]["id"] + + group_member_emails: list[str] = [] + for member in _fetch_group_members_paginated(google_drive_creds, group["name"]): + member_keys = member["preferredMemberKey"] + member_emails = [member_key["id"] for member_key in member_keys] + for member_email in member_emails: + group_member_emails.append(member_email) + + group_members = batch_add_non_web_user_if_not_exists__no_commit( + db_session=db_session, emails=group_member_emails + ) + if group_members: + danswer_groups.append( + ExternalUserGroup( + id=group_email, user_ids=[user.id for user in group_members] + ) + ) + + replace_user__ext_group_for_cc_pair__no_commit( + db_session=db_session, + cc_pair_id=cc_pair.id, + group_defs=danswer_groups, + source=cc_pair.connector.source, + ) diff --git a/backend/ee/danswer/external_permissions/permission_sync.py b/backend/ee/danswer/external_permissions/permission_sync.py new file mode 100644 index 00000000000..3a4357f7c10 --- /dev/null +++ b/backend/ee/danswer/external_permissions/permission_sync.py @@ -0,0 +1,141 @@ +from datetime import datetime +from datetime import timezone + +from sqlalchemy.orm import Session + +from danswer.access.access import get_access_for_documents +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id +from danswer.db.document import get_document_ids_for_connector_credential_pair +from danswer.db.models import ConnectorCredentialPair +from danswer.document_index.factory import get_current_primary_default_document_index +from danswer.document_index.interfaces import UpdateRequest +from danswer.utils.logger import setup_logger +from ee.danswer.external_permissions.sync_params import DOC_PERMISSIONS_FUNC_MAP +from ee.danswer.external_permissions.sync_params import GROUP_PERMISSIONS_FUNC_MAP +from ee.danswer.external_permissions.sync_params import PERMISSION_SYNC_PERIODS + +logger = setup_logger() + + +def _is_time_to_run_sync(cc_pair: ConnectorCredentialPair) -> bool: + source_sync_period = PERMISSION_SYNC_PERIODS.get(cc_pair.connector.source) + + # If RESTRICTED_FETCH_PERIOD[source] is None, we always run the sync. + if not source_sync_period: + return True + + # If the last sync is None, it has never been run so we run the sync + if cc_pair.last_time_perm_sync is None: + return True + + last_sync = cc_pair.last_time_perm_sync.replace(tzinfo=timezone.utc) + current_time = datetime.now(timezone.utc) + + # If the last sync is greater than the full fetch period, we run the sync + if (current_time - last_sync).total_seconds() > source_sync_period: + return True + + return False + + +def run_external_group_permission_sync( + db_session: Session, + cc_pair_id: int, +) -> None: + cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session) + if cc_pair is None: + raise ValueError(f"No connector credential pair found for id: {cc_pair_id}") + + source_type = cc_pair.connector.source + group_sync_func = GROUP_PERMISSIONS_FUNC_MAP.get(source_type) + + if group_sync_func is None: + # Not all sync connectors support group permissions so this is fine + return + + if not _is_time_to_run_sync(cc_pair): + return + + try: + # This function updates: + # - the user_email <-> external_user_group_id mapping + # in postgres without committing + logger.debug(f"Syncing groups for {source_type}") + if group_sync_func is not None: + group_sync_func( + db_session, + cc_pair, + ) + + # update postgres + db_session.commit() + except Exception as e: + logger.error(f"Error updating document index: {e}") + db_session.rollback() + + +def run_external_doc_permission_sync( + db_session: Session, + cc_pair_id: int, +) -> None: + cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session) + if cc_pair is None: + raise ValueError(f"No connector credential pair found for id: {cc_pair_id}") + + source_type = cc_pair.connector.source + + doc_sync_func = DOC_PERMISSIONS_FUNC_MAP.get(source_type) + + if doc_sync_func is None: + raise ValueError( + f"No permission sync function found for source type: {source_type}" + ) + + if not _is_time_to_run_sync(cc_pair): + return + + try: + # This function updates: + # - the user_email <-> document mapping + # - the external_user_group_id <-> document mapping + # in postgres without committing + logger.debug(f"Syncing docs for {source_type}") + doc_sync_func( + db_session, + cc_pair, + ) + + # Get the document ids for the cc pair + document_ids_for_cc_pair = get_document_ids_for_connector_credential_pair( + db_session=db_session, + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + ) + + # This function fetches the updated access for the documents + # and returns a dictionary of document_ids and access + # This is the access we want to update vespa with + docs_access = get_access_for_documents( + document_ids=document_ids_for_cc_pair, + db_session=db_session, + ) + + # Then we build the update requests to update vespa + update_reqs = [ + UpdateRequest(document_ids=[doc_id], access=doc_access) + for doc_id, doc_access in docs_access.items() + ] + + # Don't bother sync-ing secondary, it will be sync-ed after switch anyway + document_index = get_current_primary_default_document_index(db_session) + + # update vespa + document_index.update(update_reqs) + + cc_pair.last_time_perm_sync = datetime.now(timezone.utc) + + # update postgres + db_session.commit() + except Exception as e: + logger.error(f"Error Syncing Permissions: {e}") + db_session.rollback() diff --git a/backend/ee/danswer/external_permissions/slack/doc_sync.py b/backend/ee/danswer/external_permissions/slack/doc_sync.py new file mode 100644 index 00000000000..fe731746a44 --- /dev/null +++ b/backend/ee/danswer/external_permissions/slack/doc_sync.py @@ -0,0 +1,192 @@ +from slack_sdk import WebClient +from sqlalchemy.orm import Session + +from danswer.access.models import ExternalAccess +from danswer.connectors.factory import instantiate_connector +from danswer.connectors.interfaces import IdConnector +from danswer.connectors.models import InputType +from danswer.connectors.slack.connector import get_channels +from danswer.connectors.slack.connector import make_paginated_slack_api_call_w_retries +from danswer.db.models import ConnectorCredentialPair +from danswer.db.users import batch_add_non_web_user_if_not_exists__no_commit +from danswer.utils.logger import setup_logger +from ee.danswer.db.document import upsert_document_external_perms__no_commit +from ee.danswer.external_permissions.slack.utils import fetch_user_id_to_email_map + + +logger = setup_logger() + + +def _extract_channel_id_from_doc_id(doc_id: str) -> str: + """ + Extracts the channel ID from a document ID string. + + The document ID is expected to be in the format: "{channel_id}__{message_ts}" + + Args: + doc_id (str): The document ID string. + + Returns: + str: The extracted channel ID. + + Raises: + ValueError: If the doc_id doesn't contain the expected separator. + """ + try: + channel_id, _ = doc_id.split("__", 1) + return channel_id + except ValueError: + raise ValueError(f"Invalid doc_id format: {doc_id}") + + +def _get_slack_document_ids_and_channels( + db_session: Session, + cc_pair: ConnectorCredentialPair, +) -> dict[str, list[str]]: + # Get all document ids that need their permissions updated + runnable_connector = instantiate_connector( + db_session=db_session, + source=cc_pair.connector.source, + input_type=InputType.PRUNE, + connector_specific_config=cc_pair.connector.connector_specific_config, + credential=cc_pair.credential, + ) + + assert isinstance(runnable_connector, IdConnector) + + channel_doc_map: dict[str, list[str]] = {} + for doc_id in runnable_connector.retrieve_all_source_ids(): + channel_id = _extract_channel_id_from_doc_id(doc_id) + if channel_id not in channel_doc_map: + channel_doc_map[channel_id] = [] + channel_doc_map[channel_id].append(doc_id) + + return channel_doc_map + + +def _fetch_worspace_permissions( + db_session: Session, + user_id_to_email_map: dict[str, str], +) -> ExternalAccess: + user_emails = set() + for email in user_id_to_email_map.values(): + user_emails.add(email) + batch_add_non_web_user_if_not_exists__no_commit(db_session, list(user_emails)) + return ExternalAccess( + external_user_emails=user_emails, + # No group<->document mapping for slack + external_user_group_ids=set(), + # No way to determine if slack is invite only without enterprise liscense + is_public=False, + ) + + +def _fetch_channel_permissions( + db_session: Session, + slack_client: WebClient, + workspace_permissions: ExternalAccess, + user_id_to_email_map: dict[str, str], +) -> dict[str, ExternalAccess]: + channel_permissions = {} + public_channels = get_channels( + client=slack_client, + get_public=True, + get_private=False, + ) + public_channel_ids = [ + channel["id"] for channel in public_channels if "id" in channel + ] + for channel_id in public_channel_ids: + channel_permissions[channel_id] = workspace_permissions + + private_channels = get_channels( + client=slack_client, + get_public=False, + get_private=True, + ) + private_channel_ids = [ + channel["id"] for channel in private_channels if "id" in channel + ] + + for channel_id in private_channel_ids: + # Collect all member ids for the channel pagination calls + member_ids = [] + for result in make_paginated_slack_api_call_w_retries( + slack_client.conversations_members, + channel=channel_id, + ): + member_ids.extend(result.get("members", [])) + + # Collect all member emails for the channel + member_emails = set() + for member_id in member_ids: + member_email = user_id_to_email_map.get(member_id) + + if not member_email: + # If the user is an external user, they wont get returned from the + # conversations_members call so we need to make a separate call to users_info + # and add them to the user_id_to_email_map + member_info = slack_client.users_info(user=member_id) + member_email = member_info["user"]["profile"].get("email") + if not member_email: + # If no email is found, we skip the user + continue + user_id_to_email_map[member_id] = member_email + batch_add_non_web_user_if_not_exists__no_commit( + db_session, [member_email] + ) + + member_emails.add(member_email) + + channel_permissions[channel_id] = ExternalAccess( + external_user_emails=member_emails, + # No group<->document mapping for slack + external_user_group_ids=set(), + # No way to determine if slack is invite only without enterprise liscense + is_public=False, + ) + + return channel_permissions + + +def slack_doc_sync( + db_session: Session, + cc_pair: ConnectorCredentialPair, +) -> None: + """ + Adds the external permissions to the documents in postgres + if the document doesn't already exists in postgres, we create + it in postgres so that when it gets created later, the permissions are + already populated + """ + slack_client = WebClient( + token=cc_pair.credential.credential_json["slack_bot_token"] + ) + user_id_to_email_map = fetch_user_id_to_email_map(slack_client) + channel_doc_map = _get_slack_document_ids_and_channels( + db_session=db_session, + cc_pair=cc_pair, + ) + workspace_permissions = _fetch_worspace_permissions( + db_session=db_session, + user_id_to_email_map=user_id_to_email_map, + ) + channel_permissions = _fetch_channel_permissions( + db_session=db_session, + slack_client=slack_client, + workspace_permissions=workspace_permissions, + user_id_to_email_map=user_id_to_email_map, + ) + for channel_id, ext_access in channel_permissions.items(): + doc_ids = channel_doc_map.get(channel_id) + if not doc_ids: + # No documents found for channel the channel_id + continue + + for doc_id in doc_ids: + upsert_document_external_perms__no_commit( + db_session=db_session, + doc_id=doc_id, + external_access=ext_access, + source_type=cc_pair.connector.source, + ) diff --git a/backend/ee/danswer/external_permissions/slack/group_sync.py b/backend/ee/danswer/external_permissions/slack/group_sync.py new file mode 100644 index 00000000000..80838895219 --- /dev/null +++ b/backend/ee/danswer/external_permissions/slack/group_sync.py @@ -0,0 +1,92 @@ +""" +THIS IS NOT USEFUL OR USED FOR PERMISSION SYNCING +WHEN USERGROUPS ARE ADDED TO A CHANNEL, IT JUST RESOLVES ALL THE USERS TO THAT CHANNEL +SO WHEN CHECKING IF A USER CAN ACCESS A DOCUMENT, WE ONLY NEED TO CHECK THEIR EMAIL +THERE IS NO USERGROUP <-> DOCUMENT PERMISSION MAPPING +""" +from slack_sdk import WebClient +from sqlalchemy.orm import Session + +from danswer.connectors.slack.connector import make_paginated_slack_api_call_w_retries +from danswer.db.models import ConnectorCredentialPair +from danswer.db.users import batch_add_non_web_user_if_not_exists__no_commit +from danswer.utils.logger import setup_logger +from ee.danswer.db.external_perm import ExternalUserGroup +from ee.danswer.db.external_perm import replace_user__ext_group_for_cc_pair__no_commit +from ee.danswer.external_permissions.slack.utils import fetch_user_id_to_email_map + +logger = setup_logger() + + +def _get_slack_group_ids( + slack_client: WebClient, +) -> list[str]: + group_ids = [] + for result in make_paginated_slack_api_call_w_retries(slack_client.usergroups_list): + for group in result.get("usergroups", []): + group_ids.append(group.get("id")) + return group_ids + + +def _get_slack_group_members_email( + db_session: Session, + slack_client: WebClient, + group_name: str, + user_id_to_email_map: dict[str, str], +) -> list[str]: + group_member_emails = [] + for result in make_paginated_slack_api_call_w_retries( + slack_client.usergroups_users_list, usergroup=group_name + ): + for member_id in result.get("users", []): + member_email = user_id_to_email_map.get(member_id) + if not member_email: + # If the user is an external user, they wont get returned from the + # conversations_members call so we need to make a separate call to users_info + member_info = slack_client.users_info(user=member_id) + member_email = member_info["user"]["profile"].get("email") + if not member_email: + # If no email is found, we skip the user + continue + user_id_to_email_map[member_id] = member_email + batch_add_non_web_user_if_not_exists__no_commit( + db_session, [member_email] + ) + group_member_emails.append(member_email) + + return group_member_emails + + +def slack_group_sync( + db_session: Session, + cc_pair: ConnectorCredentialPair, +) -> None: + slack_client = WebClient( + token=cc_pair.credential.credential_json["slack_bot_token"] + ) + user_id_to_email_map = fetch_user_id_to_email_map(slack_client) + + danswer_groups: list[ExternalUserGroup] = [] + for group_name in _get_slack_group_ids(slack_client): + group_member_emails = _get_slack_group_members_email( + db_session=db_session, + slack_client=slack_client, + group_name=group_name, + user_id_to_email_map=user_id_to_email_map, + ) + group_members = batch_add_non_web_user_if_not_exists__no_commit( + db_session=db_session, emails=group_member_emails + ) + if group_members: + danswer_groups.append( + ExternalUserGroup( + id=group_name, user_ids=[user.id for user in group_members] + ) + ) + + replace_user__ext_group_for_cc_pair__no_commit( + db_session=db_session, + cc_pair_id=cc_pair.id, + group_defs=danswer_groups, + source=cc_pair.connector.source, + ) diff --git a/backend/ee/danswer/external_permissions/slack/utils.py b/backend/ee/danswer/external_permissions/slack/utils.py new file mode 100644 index 00000000000..a6a049aee03 --- /dev/null +++ b/backend/ee/danswer/external_permissions/slack/utils.py @@ -0,0 +1,18 @@ +from slack_sdk import WebClient + +from danswer.connectors.slack.connector import make_paginated_slack_api_call_w_retries + + +def fetch_user_id_to_email_map( + slack_client: WebClient, +) -> dict[str, str]: + user_id_to_email_map = {} + for user_info in make_paginated_slack_api_call_w_retries( + slack_client.users_list, + ): + for user in user_info.get("members", []): + if user.get("profile", {}).get("email"): + user_id_to_email_map[user.get("id")] = user.get("profile", {}).get( + "email" + ) + return user_id_to_email_map diff --git a/backend/ee/danswer/external_permissions/sync_params.py b/backend/ee/danswer/external_permissions/sync_params.py new file mode 100644 index 00000000000..10b080cd7f4 --- /dev/null +++ b/backend/ee/danswer/external_permissions/sync_params.py @@ -0,0 +1,52 @@ +from collections.abc import Callable + +from sqlalchemy.orm import Session + +from danswer.configs.constants import DocumentSource +from danswer.db.models import ConnectorCredentialPair +from ee.danswer.external_permissions.confluence.doc_sync import confluence_doc_sync +from ee.danswer.external_permissions.confluence.group_sync import confluence_group_sync +from ee.danswer.external_permissions.google_drive.doc_sync import gdrive_doc_sync +from ee.danswer.external_permissions.google_drive.group_sync import gdrive_group_sync +from ee.danswer.external_permissions.slack.doc_sync import slack_doc_sync + +# Defining the input/output types for the sync functions +SyncFuncType = Callable[ + [ + Session, + ConnectorCredentialPair, + ], + None, +] + +# These functions update: +# - the user_email <-> document mapping +# - the external_user_group_id <-> document mapping +# in postgres without committing +# THIS ONE IS NECESSARY FOR AUTO SYNC TO WORK +DOC_PERMISSIONS_FUNC_MAP: dict[DocumentSource, SyncFuncType] = { + DocumentSource.GOOGLE_DRIVE: gdrive_doc_sync, + DocumentSource.CONFLUENCE: confluence_doc_sync, + DocumentSource.SLACK: slack_doc_sync, +} + +# These functions update: +# - the user_email <-> external_user_group_id mapping +# in postgres without committing +# THIS ONE IS OPTIONAL ON AN APP BY APP BASIS +GROUP_PERMISSIONS_FUNC_MAP: dict[DocumentSource, SyncFuncType] = { + DocumentSource.GOOGLE_DRIVE: gdrive_group_sync, + DocumentSource.CONFLUENCE: confluence_group_sync, +} + + +# If nothing is specified here, we run the doc_sync every time the celery beat runs +PERMISSION_SYNC_PERIODS: dict[DocumentSource, int] = { + # Polling is not supported so we fetch all doc permissions every 5 minutes + DocumentSource.CONFLUENCE: 5 * 60, + DocumentSource.SLACK: 5 * 60, +} + + +def check_if_valid_sync_source(source_type: DocumentSource) -> bool: + return source_type in DOC_PERMISSIONS_FUNC_MAP diff --git a/backend/ee/danswer/main.py b/backend/ee/danswer/main.py index d7d1d6406a3..7d150107c75 100644 --- a/backend/ee/danswer/main.py +++ b/backend/ee/danswer/main.py @@ -23,6 +23,7 @@ from ee.danswer.server.enterprise_settings.api import ( basic_router as enterprise_settings_router, ) +from ee.danswer.server.manage.standard_answer import router as standard_answer_router from ee.danswer.server.query_and_chat.chat_backend import ( router as chat_router, ) @@ -86,6 +87,7 @@ def get_application() -> FastAPI: # EE only backend APIs include_router_with_global_prefix_prepended(application, query_router) include_router_with_global_prefix_prepended(application, chat_router) + include_router_with_global_prefix_prepended(application, standard_answer_router) # Enterprise-only global settings include_router_with_global_prefix_prepended( application, enterprise_settings_admin_router diff --git a/backend/ee/danswer/server/enterprise_settings/api.py b/backend/ee/danswer/server/enterprise_settings/api.py index 736296517db..385adcf689e 100644 --- a/backend/ee/danswer/server/enterprise_settings/api.py +++ b/backend/ee/danswer/server/enterprise_settings/api.py @@ -1,14 +1,26 @@ +from datetime import datetime +from datetime import timezone +from typing import Any + +import httpx from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException from fastapi import Response +from fastapi import status from fastapi import UploadFile +from pydantic import BaseModel +from pydantic import Field from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user +from danswer.auth.users import current_user_with_expired_token +from danswer.auth.users import get_user_manager +from danswer.auth.users import UserManager from danswer.db.engine import get_session from danswer.db.models import User from danswer.file_store.file_store import get_default_file_store +from danswer.utils.logger import setup_logger from ee.danswer.server.enterprise_settings.models import AnalyticsScriptUpload from ee.danswer.server.enterprise_settings.models import EnterpriseSettings from ee.danswer.server.enterprise_settings.store import _LOGO_FILENAME @@ -22,6 +34,80 @@ admin_router = APIRouter(prefix="/admin/enterprise-settings") basic_router = APIRouter(prefix="/enterprise-settings") +logger = setup_logger() + + +class RefreshTokenData(BaseModel): + access_token: str + refresh_token: str + session: dict = Field(..., description="Contains session information") + userinfo: dict = Field(..., description="Contains user information") + + def __init__(self, **data: Any) -> None: + super().__init__(**data) + if "exp" not in self.session: + raise ValueError("'exp' must be set in the session dictionary") + if "userId" not in self.userinfo or "email" not in self.userinfo: + raise ValueError( + "'userId' and 'email' must be set in the userinfo dictionary" + ) + + +@basic_router.post("/refresh-token") +async def refresh_access_token( + refresh_token: RefreshTokenData, + user: User = Depends(current_user_with_expired_token), + user_manager: UserManager = Depends(get_user_manager), +) -> None: + try: + logger.debug(f"Received response from Meechum auth URL for user {user.id}") + + # Extract new tokens + new_access_token = refresh_token.access_token + new_refresh_token = refresh_token.refresh_token + + new_expiry = datetime.fromtimestamp( + refresh_token.session["exp"] / 1000, tz=timezone.utc + ) + expires_at_timestamp = int(new_expiry.timestamp()) + + logger.debug(f"Access token has been refreshed for user {user.id}") + + await user_manager.oauth_callback( + oauth_name="custom", + access_token=new_access_token, + account_id=refresh_token.userinfo["userId"], + account_email=refresh_token.userinfo["email"], + expires_at=expires_at_timestamp, + refresh_token=new_refresh_token, + associate_by_email=True, + ) + + logger.info(f"Successfully refreshed tokens for user {user.id}") + + except httpx.HTTPStatusError as e: + if e.response.status_code == 401: + logger.warning(f"Full authentication required for user {user.id}") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Full authentication required", + ) + logger.error( + f"HTTP error occurred while refreshing token for user {user.id}: {str(e)}" + ) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to refresh token", + ) + except Exception as e: + logger.error( + f"Unexpected error occurred while refreshing token for user {user.id}: {str(e)}" + ) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="An unexpected error occurred", + ) + @admin_router.put("") def put_settings( diff --git a/backend/ee/danswer/server/enterprise_settings/models.py b/backend/ee/danswer/server/enterprise_settings/models.py index c9831d87aeb..df8f022a402 100644 --- a/backend/ee/danswer/server/enterprise_settings/models.py +++ b/backend/ee/danswer/server/enterprise_settings/models.py @@ -1,4 +1,26 @@ +from typing import Any +from typing import List + from pydantic import BaseModel +from pydantic import Field + + +class NavigationItem(BaseModel): + link: str + title: str + # Right now must be one of the FA icons + icon: str | None = None + # NOTE: SVG must not have a width / height specified + # This is the actual SVG as a string. Done this way to reduce + # complexity / having to store additional "logos" in Postgres + svg_logo: str | None = None + + @classmethod + def model_validate(cls, *args: Any, **kwargs: Any) -> "NavigationItem": + instance = super().model_validate(*args, **kwargs) + if bool(instance.icon) == bool(instance.svg_logo): + raise ValueError("Exactly one of fa_icon or svg_logo must be specified") + return instance class EnterpriseSettings(BaseModel): @@ -10,11 +32,16 @@ class EnterpriseSettings(BaseModel): use_custom_logo: bool = False use_custom_logotype: bool = False + # custom navigation + custom_nav_items: List[NavigationItem] = Field(default_factory=list) + # custom Chat components + two_lines_for_chat_header: bool | None = None custom_lower_disclaimer_content: str | None = None custom_header_content: str | None = None custom_popup_header: str | None = None custom_popup_content: str | None = None + enable_consent_screen: bool | None = None def check_validity(self) -> None: return diff --git a/backend/ee/danswer/server/manage/models.py b/backend/ee/danswer/server/manage/models.py new file mode 100644 index 00000000000..ae2c401a2fa --- /dev/null +++ b/backend/ee/danswer/server/manage/models.py @@ -0,0 +1,98 @@ +import re +from typing import Any + +from pydantic import BaseModel +from pydantic import field_validator +from pydantic import model_validator + +from danswer.db.models import StandardAnswer as StandardAnswerModel +from danswer.db.models import StandardAnswerCategory as StandardAnswerCategoryModel + + +class StandardAnswerCategoryCreationRequest(BaseModel): + name: str + + +class StandardAnswerCategory(BaseModel): + id: int + name: str + + @classmethod + def from_model( + cls, standard_answer_category: StandardAnswerCategoryModel + ) -> "StandardAnswerCategory": + return cls( + id=standard_answer_category.id, + name=standard_answer_category.name, + ) + + +class StandardAnswer(BaseModel): + id: int + keyword: str + answer: str + categories: list[StandardAnswerCategory] + match_regex: bool + match_any_keywords: bool + + @classmethod + def from_model(cls, standard_answer_model: StandardAnswerModel) -> "StandardAnswer": + return cls( + id=standard_answer_model.id, + keyword=standard_answer_model.keyword, + answer=standard_answer_model.answer, + match_regex=standard_answer_model.match_regex, + match_any_keywords=standard_answer_model.match_any_keywords, + categories=[ + StandardAnswerCategory.from_model(standard_answer_category_model) + for standard_answer_category_model in standard_answer_model.categories + ], + ) + + +class StandardAnswerCreationRequest(BaseModel): + keyword: str + answer: str + categories: list[int] + match_regex: bool + match_any_keywords: bool + + @field_validator("categories", mode="before") + @classmethod + def validate_categories(cls, value: list[int]) -> list[int]: + if len(value) < 1: + raise ValueError( + "At least one category must be attached to a standard answer" + ) + return value + + @model_validator(mode="after") + def validate_only_match_any_if_not_regex(self) -> Any: + if self.match_regex and self.match_any_keywords: + raise ValueError( + "Can only match any keywords in keyword mode, not regex mode" + ) + + return self + + @model_validator(mode="after") + def validate_keyword_if_regex(self) -> Any: + if not self.match_regex: + # no validation for keywords + return self + + try: + re.compile(self.keyword) + return self + except re.error as err: + if isinstance(err.pattern, bytes): + raise ValueError( + f'invalid regex pattern r"{err.pattern.decode()}" in `keyword`: {err.msg}' + ) + else: + pattern = f'r"{err.pattern}"' if err.pattern is not None else "" + raise ValueError( + " ".join( + ["invalid regex pattern", pattern, f"in `keyword`: {err.msg}"] + ) + ) diff --git a/backend/danswer/server/manage/standard_answer.py b/backend/ee/danswer/server/manage/standard_answer.py similarity index 79% rename from backend/danswer/server/manage/standard_answer.py rename to backend/ee/danswer/server/manage/standard_answer.py index 69f9e8146df..e832fa19078 100644 --- a/backend/danswer/server/manage/standard_answer.py +++ b/backend/ee/danswer/server/manage/standard_answer.py @@ -6,19 +6,19 @@ from danswer.auth.users import current_admin_user from danswer.db.engine import get_session from danswer.db.models import User -from danswer.db.standard_answer import fetch_standard_answer -from danswer.db.standard_answer import fetch_standard_answer_categories -from danswer.db.standard_answer import fetch_standard_answer_category -from danswer.db.standard_answer import fetch_standard_answers -from danswer.db.standard_answer import insert_standard_answer -from danswer.db.standard_answer import insert_standard_answer_category -from danswer.db.standard_answer import remove_standard_answer -from danswer.db.standard_answer import update_standard_answer -from danswer.db.standard_answer import update_standard_answer_category -from danswer.server.manage.models import StandardAnswer -from danswer.server.manage.models import StandardAnswerCategory -from danswer.server.manage.models import StandardAnswerCategoryCreationRequest -from danswer.server.manage.models import StandardAnswerCreationRequest +from ee.danswer.db.standard_answer import fetch_standard_answer +from ee.danswer.db.standard_answer import fetch_standard_answer_categories +from ee.danswer.db.standard_answer import fetch_standard_answer_category +from ee.danswer.db.standard_answer import fetch_standard_answers +from ee.danswer.db.standard_answer import insert_standard_answer +from ee.danswer.db.standard_answer import insert_standard_answer_category +from ee.danswer.db.standard_answer import remove_standard_answer +from ee.danswer.db.standard_answer import update_standard_answer +from ee.danswer.db.standard_answer import update_standard_answer_category +from ee.danswer.server.manage.models import StandardAnswer +from ee.danswer.server.manage.models import StandardAnswerCategory +from ee.danswer.server.manage.models import StandardAnswerCategoryCreationRequest +from ee.danswer.server.manage.models import StandardAnswerCreationRequest router = APIRouter(prefix="/manage") @@ -33,6 +33,8 @@ def create_standard_answer( keyword=standard_answer_creation_request.keyword, answer=standard_answer_creation_request.answer, category_ids=standard_answer_creation_request.categories, + match_regex=standard_answer_creation_request.match_regex, + match_any_keywords=standard_answer_creation_request.match_any_keywords, db_session=db_session, ) return StandardAnswer.from_model(standard_answer_model) @@ -70,6 +72,8 @@ def patch_standard_answer( keyword=standard_answer_creation_request.keyword, answer=standard_answer_creation_request.answer, category_ids=standard_answer_creation_request.categories, + match_regex=standard_answer_creation_request.match_regex, + match_any_keywords=standard_answer_creation_request.match_any_keywords, db_session=db_session, ) return StandardAnswer.from_model(standard_answer_model) diff --git a/backend/ee/danswer/server/query_and_chat/chat_backend.py b/backend/ee/danswer/server/query_and_chat/chat_backend.py index 0d5d1987f34..dd637dcf081 100644 --- a/backend/ee/danswer/server/query_and_chat/chat_backend.py +++ b/backend/ee/danswer/server/query_and_chat/chat_backend.py @@ -7,10 +7,14 @@ from danswer.auth.users import current_user from danswer.chat.chat_utils import create_chat_chain +from danswer.chat.models import AllCitations from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import FinalUsedContextDocsResponse +from danswer.chat.models import LlmDoc from danswer.chat.models import LLMRelevanceFilterResponse from danswer.chat.models import QADocsResponse from danswer.chat.models import StreamingError +from danswer.chat.process_message import ChatPacketStream from danswer.chat.process_message import stream_chat_message_objects from danswer.configs.constants import MessageType from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE @@ -25,6 +29,7 @@ from danswer.one_shot_answer.qa_utils import combine_message_thread from danswer.search.models import OptionalSearchSetting from danswer.search.models import RetrievalDetails +from danswer.search.models import SavedSearchDoc from danswer.secondary_llm_flows.query_expansion import thread_based_query_rephrase from danswer.server.query_and_chat.models import ChatMessageDetail from danswer.server.query_and_chat.models import CreateChatMessageRequest @@ -41,7 +46,7 @@ router = APIRouter(prefix="/chat") -def translate_doc_response_to_simple_doc( +def _translate_doc_response_to_simple_doc( doc_response: QADocsResponse, ) -> list[SimpleDoc]: return [ @@ -60,6 +65,66 @@ def translate_doc_response_to_simple_doc( ] +def _get_final_context_doc_indices( + final_context_docs: list[LlmDoc] | None, + top_docs: list[SavedSearchDoc] | None, +) -> list[int] | None: + """ + this function returns a list of indices of the simple search docs + that were actually fed to the LLM. + """ + if final_context_docs is None or top_docs is None: + return None + + final_context_doc_ids = {doc.document_id for doc in final_context_docs} + return [ + i for i, doc in enumerate(top_docs) if doc.document_id in final_context_doc_ids + ] + + +def _convert_packet_stream_to_response( + packets: ChatPacketStream, +) -> ChatBasicResponse: + response = ChatBasicResponse() + final_context_docs: list[LlmDoc] = [] + + answer = "" + for packet in packets: + if isinstance(packet, DanswerAnswerPiece) and packet.answer_piece: + answer += packet.answer_piece + elif isinstance(packet, QADocsResponse): + response.top_documents = packet.top_documents + + # TODO: deprecate `simple_search_docs` + response.simple_search_docs = _translate_doc_response_to_simple_doc(packet) + elif isinstance(packet, StreamingError): + response.error_msg = packet.error + elif isinstance(packet, ChatMessageDetail): + response.message_id = packet.message_id + elif isinstance(packet, LLMRelevanceFilterResponse): + response.llm_selected_doc_indices = packet.llm_selected_doc_indices + + # TODO: deprecate `llm_chunks_indices` + response.llm_chunks_indices = packet.llm_selected_doc_indices + elif isinstance(packet, FinalUsedContextDocsResponse): + final_context_docs = packet.final_context_docs + elif isinstance(packet, AllCitations): + response.cited_documents = { + citation.citation_num: citation.document_id + for citation in packet.citations + } + + response.final_context_doc_indices = _get_final_context_doc_indices( + final_context_docs, response.top_documents + ) + + response.answer = answer + if answer: + response.answer_citationless = remove_answer_citations(answer) + + return response + + def remove_answer_citations(answer: str) -> str: pattern = r"\s*\[\[\d+\]\]\(http[s]?://[^\s]+\)" @@ -117,26 +182,10 @@ def handle_simplified_chat_message( new_msg_req=full_chat_msg_info, user=user, db_session=db_session, + enforce_chat_session_id_for_search_docs=False, ) - response = ChatBasicResponse() - - answer = "" - for packet in packets: - if isinstance(packet, DanswerAnswerPiece) and packet.answer_piece: - answer += packet.answer_piece - elif isinstance(packet, QADocsResponse): - response.simple_search_docs = translate_doc_response_to_simple_doc(packet) - elif isinstance(packet, StreamingError): - response.error_msg = packet.error - elif isinstance(packet, ChatMessageDetail): - response.message_id = packet.message_id - - response.answer = answer - if answer: - response.answer_citationless = remove_answer_citations(answer) - - return response + return _convert_packet_stream_to_response(packets) @router.post("/send-message-simple-with-history") @@ -152,6 +201,8 @@ def handle_send_message_simple_with_history( if len(req.messages) == 0: raise HTTPException(status_code=400, detail="Messages cannot be zero length") + # This is a sanity check to make sure the chat history is valid + # It must start with a user message and alternate between user and assistant expected_role = MessageType.USER for msg in req.messages: if not msg.message: @@ -225,14 +276,22 @@ def handle_send_message_simple_with_history( history_str=history_str, ) + if req.retrieval_options is None and req.search_doc_ids is None: + retrieval_options: RetrievalDetails | None = RetrievalDetails( + run_search=OptionalSearchSetting.ALWAYS, + real_time=False, + ) + else: + retrieval_options = req.retrieval_options + full_chat_msg_info = CreateChatMessageRequest( chat_session_id=chat_session.id, parent_message_id=chat_message.id, message=query, file_descriptors=[], prompt_id=req.prompt_id, - search_doc_ids=None, - retrieval_options=req.retrieval_options, + search_doc_ids=req.search_doc_ids, + retrieval_options=retrieval_options, query_override=rephrased_query, chunks_above=0, chunks_below=0, @@ -243,25 +302,7 @@ def handle_send_message_simple_with_history( new_msg_req=full_chat_msg_info, user=user, db_session=db_session, + enforce_chat_session_id_for_search_docs=False, ) - response = ChatBasicResponse() - - answer = "" - for packet in packets: - if isinstance(packet, DanswerAnswerPiece) and packet.answer_piece: - answer += packet.answer_piece - elif isinstance(packet, QADocsResponse): - response.simple_search_docs = translate_doc_response_to_simple_doc(packet) - elif isinstance(packet, StreamingError): - response.error_msg = packet.error - elif isinstance(packet, ChatMessageDetail): - response.message_id = packet.message_id - elif isinstance(packet, LLMRelevanceFilterResponse): - response.llm_chunks_indices = packet.relevant_chunk_indices - - response.answer = answer - if answer: - response.answer_citationless = remove_answer_citations(answer) - - return response + return _convert_packet_stream_to_response(packets) diff --git a/backend/ee/danswer/server/query_and_chat/models.py b/backend/ee/danswer/server/query_and_chat/models.py index b0ce553ebe0..ec9db73ecff 100644 --- a/backend/ee/danswer/server/query_and_chat/models.py +++ b/backend/ee/danswer/server/query_and_chat/models.py @@ -8,7 +8,8 @@ from danswer.search.models import ChunkContext from danswer.search.models import RerankingDetails from danswer.search.models import RetrievalDetails -from danswer.server.manage.models import StandardAnswer +from danswer.search.models import SavedSearchDoc +from ee.danswer.server.manage.models import StandardAnswer class StandardAnswerRequest(BaseModel): @@ -52,9 +53,11 @@ class BasicCreateChatMessageWithHistoryRequest(ChunkContext): messages: list[ThreadMessage] prompt_id: int | None persona_id: int - retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails) + retrieval_options: RetrievalDetails | None = None query_override: str | None = None skip_rerank: bool | None = None + # If search_doc_ids provided, then retrieval options are unused + search_doc_ids: list[int] | None = None class SimpleDoc(BaseModel): @@ -71,7 +74,17 @@ class ChatBasicResponse(BaseModel): # This is built piece by piece, any of these can be None as the flow could break answer: str | None = None answer_citationless: str | None = None - simple_search_docs: list[SimpleDoc] | None = None + + top_documents: list[SavedSearchDoc] | None = None + error_msg: str | None = None message_id: int | None = None + llm_selected_doc_indices: list[int] | None = None + final_context_doc_indices: list[int] | None = None + # this is a map of the citation number to the document id + cited_documents: dict[int, str] | None = None + + # FOR BACKWARDS COMPATIBILITY + # TODO: deprecate both of these + simple_search_docs: list[SimpleDoc] | None = None llm_chunks_indices: list[int] | None = None diff --git a/backend/ee/danswer/server/query_and_chat/query_backend.py b/backend/ee/danswer/server/query_and_chat/query_backend.py index aef3648220e..59e61ba12df 100644 --- a/backend/ee/danswer/server/query_and_chat/query_backend.py +++ b/backend/ee/danswer/server/query_and_chat/query_backend.py @@ -6,9 +6,6 @@ from danswer.auth.users import current_user from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE -from danswer.danswerbot.slack.handlers.handle_standard_answers import ( - oneoff_standard_answers, -) from danswer.db.engine import get_session from danswer.db.models import User from danswer.db.persona import get_persona_by_id @@ -29,9 +26,13 @@ from danswer.search.utils import drop_llm_indices from danswer.search.utils import relevant_sections_to_indices from danswer.utils.logger import setup_logger +from ee.danswer.danswerbot.slack.handlers.handle_standard_answers import ( + oneoff_standard_answers, +) from ee.danswer.server.query_and_chat.models import DocumentSearchRequest from ee.danswer.server.query_and_chat.models import StandardAnswerRequest from ee.danswer.server.query_and_chat.models import StandardAnswerResponse +from ee.danswer.server.query_and_chat.utils import create_temporary_persona logger = setup_logger() @@ -133,12 +134,23 @@ def get_answer_with_quote( query = query_request.messages[0].message logger.notice(f"Received query for one shot answer API with quotes: {query}") - persona = get_persona_by_id( - persona_id=query_request.persona_id, - user=user, - db_session=db_session, - is_for_edit=False, - ) + if query_request.persona_config is not None: + new_persona = create_temporary_persona( + db_session=db_session, + persona_config=query_request.persona_config, + user=user, + ) + persona = new_persona + + elif query_request.persona_id is not None: + persona = get_persona_by_id( + persona_id=query_request.persona_id, + user=user, + db_session=db_session, + is_for_edit=False, + ) + else: + raise KeyError("Must provide persona ID or Persona Config") llm = get_main_llm_from_tuple( get_default_llms() if not persona else get_llms_for_persona(persona) diff --git a/backend/ee/danswer/server/query_and_chat/utils.py b/backend/ee/danswer/server/query_and_chat/utils.py new file mode 100644 index 00000000000..a2f7253517a --- /dev/null +++ b/backend/ee/danswer/server/query_and_chat/utils.py @@ -0,0 +1,85 @@ +from typing import cast + +from fastapi import HTTPException +from sqlalchemy.orm import Session + +from danswer.auth.users import is_user_admin +from danswer.db.llm import fetch_existing_doc_sets +from danswer.db.llm import fetch_existing_tools +from danswer.db.models import Persona +from danswer.db.models import Prompt +from danswer.db.models import Tool +from danswer.db.models import User +from danswer.db.persona import get_prompts_by_ids +from danswer.one_shot_answer.models import PersonaConfig +from danswer.tools.custom.custom_tool import ( + build_custom_tools_from_openapi_schema_and_headers, +) + + +def create_temporary_persona( + persona_config: PersonaConfig, db_session: Session, user: User | None = None +) -> Persona: + if not is_user_admin(user): + raise HTTPException( + status_code=403, + detail="User is not authorized to create a persona in one shot queries", + ) + + """Create a temporary Persona object from the provided configuration.""" + persona = Persona( + name=persona_config.name, + description=persona_config.description, + num_chunks=persona_config.num_chunks, + llm_relevance_filter=persona_config.llm_relevance_filter, + llm_filter_extraction=persona_config.llm_filter_extraction, + recency_bias=persona_config.recency_bias, + llm_model_provider_override=persona_config.llm_model_provider_override, + llm_model_version_override=persona_config.llm_model_version_override, + ) + + if persona_config.prompts: + persona.prompts = [ + Prompt( + name=p.name, + description=p.description, + system_prompt=p.system_prompt, + task_prompt=p.task_prompt, + include_citations=p.include_citations, + datetime_aware=p.datetime_aware, + ) + for p in persona_config.prompts + ] + elif persona_config.prompt_ids: + persona.prompts = get_prompts_by_ids( + db_session=db_session, prompt_ids=persona_config.prompt_ids + ) + + persona.tools = [] + if persona_config.custom_tools_openapi: + for schema in persona_config.custom_tools_openapi: + tools = cast( + list[Tool], + build_custom_tools_from_openapi_schema_and_headers(schema), + ) + persona.tools.extend(tools) + + if persona_config.tools: + tool_ids = [tool.id for tool in persona_config.tools] + persona.tools.extend( + fetch_existing_tools(db_session=db_session, tool_ids=tool_ids) + ) + + if persona_config.tool_ids: + persona.tools.extend( + fetch_existing_tools( + db_session=db_session, tool_ids=persona_config.tool_ids + ) + ) + + fetched_docs = fetch_existing_doc_sets( + db_session=db_session, doc_ids=persona_config.document_set_ids + ) + persona.document_sets = fetched_docs + + return persona diff --git a/backend/ee/danswer/server/query_history/api.py b/backend/ee/danswer/server/query_history/api.py index ed532a85603..3fc0a98153a 100644 --- a/backend/ee/danswer/server/query_history/api.py +++ b/backend/ee/danswer/server/query_history/api.py @@ -17,6 +17,7 @@ from danswer.chat.chat_utils import create_chat_chain from danswer.configs.constants import MessageType from danswer.configs.constants import QAFeedbackType +from danswer.configs.constants import SessionType from danswer.db.chat import get_chat_session_by_id from danswer.db.engine import get_session from danswer.db.models import ChatMessage @@ -87,9 +88,10 @@ class ChatSessionMinimal(BaseModel): name: str | None first_user_message: str first_ai_message: str - persona_name: str + persona_name: str | None time_created: datetime feedback_type: QAFeedbackType | Literal["mixed"] | None + flow_type: SessionType class ChatSessionSnapshot(BaseModel): @@ -97,8 +99,9 @@ class ChatSessionSnapshot(BaseModel): user_email: str name: str | None messages: list[MessageSnapshot] - persona_name: str + persona_name: str | None time_created: datetime + flow_type: SessionType class QuestionAnswerPairSnapshot(BaseModel): @@ -111,9 +114,10 @@ class QuestionAnswerPairSnapshot(BaseModel): retrieved_documents: list[AbridgedSearchDoc] feedback_type: QAFeedbackType | None feedback_text: str | None - persona_name: str + persona_name: str | None user_email: str time_created: datetime + flow_type: SessionType @classmethod def from_chat_session_snapshot( @@ -141,11 +145,12 @@ def from_chat_session_snapshot( persona_name=chat_session_snapshot.persona_name, user_email=get_display_email(chat_session_snapshot.user_email), time_created=user_message.time_created, + flow_type=chat_session_snapshot.flow_type, ) for ind, (user_message, ai_message) in enumerate(message_pairs) ] - def to_json(self) -> dict[str, str]: + def to_json(self) -> dict[str, str | None]: return { "chat_session_id": str(self.chat_session_id), "message_pair_num": str(self.message_pair_num), @@ -162,9 +167,20 @@ def to_json(self) -> dict[str, str]: "persona_name": self.persona_name, "user_email": self.user_email, "time_created": str(self.time_created), + "flow_type": self.flow_type, } +def determine_flow_type(chat_session: ChatSession) -> SessionType: + return ( + SessionType.SLACK + if chat_session.danswerbot_flow + else SessionType.SEARCH + if chat_session.one_shot + else SessionType.CHAT + ) + + def fetch_and_process_chat_session_history_minimal( db_session: Session, start: datetime, @@ -226,6 +242,8 @@ def fetch_and_process_chat_session_history_minimal( if feedback_filter == QAFeedbackType.DISLIKE and not has_negative_feedback: continue + flow_type = determine_flow_type(chat_session) + minimal_sessions.append( ChatSessionMinimal( id=chat_session.id, @@ -235,9 +253,12 @@ def fetch_and_process_chat_session_history_minimal( name=chat_session.description, first_user_message=first_user_message, first_ai_message=first_ai_message, - persona_name=chat_session.persona.name, + persona_name=chat_session.persona.name + if chat_session.persona + else None, time_created=chat_session.time_created, feedback_type=feedback_type, + flow_type=flow_type, ) ) @@ -289,6 +310,8 @@ def snapshot_from_chat_session( except RuntimeError: return None + flow_type = determine_flow_type(chat_session) + return ChatSessionSnapshot( id=chat_session.id, user_email=get_display_email( @@ -300,8 +323,9 @@ def snapshot_from_chat_session( for message in messages if message.message_type != MessageType.SYSTEM ], - persona_name=chat_session.persona.name, + persona_name=chat_session.persona.name if chat_session.persona else None, time_created=chat_session.time_created, + flow_type=flow_type, ) diff --git a/backend/ee/danswer/server/saml.py b/backend/ee/danswer/server/saml.py index 5bc62e98d61..7ff385d1377 100644 --- a/backend/ee/danswer/server/saml.py +++ b/backend/ee/danswer/server/saml.py @@ -12,7 +12,6 @@ from fastapi_users.password import PasswordHelper from onelogin.saml2.auth import OneLogin_Saml2_Auth # type: ignore from pydantic import BaseModel -from pydantic import EmailStr from sqlalchemy.orm import Session from danswer.auth.schemas import UserCreate @@ -61,10 +60,11 @@ async def upsert_saml_user(email: str) -> User: user: User = await user_manager.create( UserCreate( - email=EmailStr(email), + email=email, password=hashed_pass, is_verified=True, role=role, + has_web_login=True, ) ) diff --git a/backend/ee/danswer/server/seeding.py b/backend/ee/danswer/server/seeding.py index bbca5acc20a..feb10cc19ce 100644 --- a/backend/ee/danswer/server/seeding.py +++ b/backend/ee/danswer/server/seeding.py @@ -1,4 +1,8 @@ +import json import os +from copy import deepcopy +from typing import List +from typing import Optional from pydantic import BaseModel from sqlalchemy.orm import Session @@ -6,6 +10,7 @@ from danswer.db.engine import get_session_context_manager from danswer.db.llm import update_default_provider from danswer.db.llm import upsert_llm_provider +from danswer.db.models import Tool from danswer.db.persona import upsert_persona from danswer.search.enums import RecencyBiasSetting from danswer.server.features.persona.models import CreatePersonaRequest @@ -13,19 +18,41 @@ from danswer.server.settings.models import Settings from danswer.server.settings.store import store_settings as store_base_settings from danswer.utils.logger import setup_logger +from ee.danswer.db.standard_answer import ( + create_initial_default_standard_answer_category, +) from ee.danswer.server.enterprise_settings.models import AnalyticsScriptUpload from ee.danswer.server.enterprise_settings.models import EnterpriseSettings +from ee.danswer.server.enterprise_settings.models import NavigationItem from ee.danswer.server.enterprise_settings.store import store_analytics_script from ee.danswer.server.enterprise_settings.store import ( store_settings as store_ee_settings, ) from ee.danswer.server.enterprise_settings.store import upload_logo + +class CustomToolSeed(BaseModel): + name: str + description: str + definition_path: str + custom_headers: Optional[List[dict]] = None + display_name: Optional[str] = None + in_code_tool_id: Optional[str] = None + user_id: Optional[str] = None + + logger = setup_logger() _SEED_CONFIG_ENV_VAR_NAME = "ENV_SEED_CONFIGURATION" +class NavigationItemSeed(BaseModel): + link: str + title: str + # NOTE: SVG at this path must not have a width / height specified + svg_path: str + + class SeedConfiguration(BaseModel): llms: list[LLMProviderUpsertRequest] | None = None admin_user_emails: list[str] | None = None @@ -33,28 +60,72 @@ class SeedConfiguration(BaseModel): personas: list[CreatePersonaRequest] | None = None settings: Settings | None = None enterprise_settings: EnterpriseSettings | None = None + + # allows for specifying custom navigation items that have your own custom SVG logos + nav_item_overrides: list[NavigationItemSeed] | None = None + # Use existing `CUSTOM_ANALYTICS_SECRET_KEY` for reference analytics_script_path: str | None = None + custom_tools: List[CustomToolSeed] | None = None def _parse_env() -> SeedConfiguration | None: seed_config_str = os.getenv(_SEED_CONFIG_ENV_VAR_NAME) if not seed_config_str: return None - seed_config = SeedConfiguration.parse_raw(seed_config_str) + seed_config = SeedConfiguration.model_validate_json(seed_config_str) return seed_config +def _seed_custom_tools(db_session: Session, tools: List[CustomToolSeed]) -> None: + if tools: + logger.notice("Seeding Custom Tools") + for tool in tools: + try: + logger.debug(f"Attempting to seed tool: {tool.name}") + logger.debug(f"Reading definition from: {tool.definition_path}") + with open(tool.definition_path, "r") as file: + file_content = file.read() + if not file_content.strip(): + raise ValueError("File is empty") + openapi_schema = json.loads(file_content) + db_tool = Tool( + name=tool.name, + description=tool.description, + openapi_schema=openapi_schema, + custom_headers=tool.custom_headers, + display_name=tool.display_name, + in_code_tool_id=tool.in_code_tool_id, + user_id=tool.user_id, + ) + db_session.add(db_tool) + logger.debug(f"Successfully added tool: {tool.name}") + except FileNotFoundError: + logger.error( + f"Definition file not found for tool {tool.name}: {tool.definition_path}" + ) + except json.JSONDecodeError as e: + logger.error( + f"Invalid JSON in definition file for tool {tool.name}: {str(e)}" + ) + except Exception as e: + logger.error(f"Failed to seed tool {tool.name}: {str(e)}") + db_session.commit() + logger.notice(f"Successfully seeded {len(tools)} Custom Tools") + + def _seed_llms( db_session: Session, llm_upsert_requests: list[LLMProviderUpsertRequest] ) -> None: if llm_upsert_requests: logger.notice("Seeding LLMs") seeded_providers = [ - upsert_llm_provider(db_session, llm_upsert_request) + upsert_llm_provider(llm_upsert_request, db_session) for llm_upsert_request in llm_upsert_requests ] - update_default_provider(db_session, seeded_providers[0].id) + update_default_provider( + provider_id=seeded_providers[0].id, db_session=db_session + ) def _seed_personas(db_session: Session, personas: list[CreatePersonaRequest]) -> None: @@ -79,6 +150,7 @@ def _seed_personas(db_session: Session, personas: list[CreatePersonaRequest]) -> is_public=persona.is_public, db_session=db_session, tool_ids=persona.tool_ids, + display_priority=persona.display_priority, ) @@ -93,9 +165,35 @@ def _seed_settings(settings: Settings) -> None: def _seed_enterprise_settings(seed_config: SeedConfiguration) -> None: - if seed_config.enterprise_settings is not None: + if ( + seed_config.enterprise_settings is not None + or seed_config.nav_item_overrides is not None + ): + final_enterprise_settings = ( + deepcopy(seed_config.enterprise_settings) + if seed_config.enterprise_settings + else EnterpriseSettings() + ) + + final_nav_items = final_enterprise_settings.custom_nav_items + if seed_config.nav_item_overrides is not None: + final_nav_items = [] + for item in seed_config.nav_item_overrides: + with open(item.svg_path, "r") as file: + svg_content = file.read().strip() + + final_nav_items.append( + NavigationItem( + link=item.link, + title=item.title, + svg_logo=svg_content, + ) + ) + + final_enterprise_settings.custom_nav_items = final_nav_items + logger.notice("Seeding enterprise settings") - store_ee_settings(seed_config.enterprise_settings) + store_ee_settings(final_enterprise_settings) def _seed_logo(db_session: Session, logo_path: str | None) -> None: @@ -140,7 +238,12 @@ def seed_db() -> None: _seed_personas(db_session, seed_config.personas) if seed_config.settings is not None: _seed_settings(seed_config.settings) + if seed_config.custom_tools is not None: + _seed_custom_tools(db_session, seed_config.custom_tools) _seed_logo(db_session, seed_config.seeded_logo_path) _seed_enterprise_settings(seed_config) _seed_analytics_script(seed_config) + + logger.notice("Verifying default standard answer category exists.") + create_initial_default_standard_answer_category(db_session) diff --git a/backend/ee/danswer/server/user_group/api.py b/backend/ee/danswer/server/user_group/api.py index e18487d5491..355e59fff1d 100644 --- a/backend/ee/danswer/server/user_group/api.py +++ b/backend/ee/danswer/server/user_group/api.py @@ -9,6 +9,7 @@ from danswer.db.engine import get_session from danswer.db.models import User from danswer.db.models import UserRole +from danswer.utils.logger import setup_logger from ee.danswer.db.user_group import fetch_user_groups from ee.danswer.db.user_group import fetch_user_groups_for_user from ee.danswer.db.user_group import insert_user_group @@ -20,6 +21,8 @@ from ee.danswer.server.user_group.models import UserGroupCreate from ee.danswer.server.user_group.models import UserGroupUpdate +logger = setup_logger() + router = APIRouter(prefix="/manage") @@ -29,7 +32,7 @@ def list_user_groups( db_session: Session = Depends(get_session), ) -> list[UserGroup]: if user is None or user.role == UserRole.ADMIN: - user_groups = fetch_user_groups(db_session, only_current=False) + user_groups = fetch_user_groups(db_session, only_up_to_date=False) else: user_groups = fetch_user_groups_for_user( db_session=db_session, @@ -90,6 +93,7 @@ def set_user_curator( set_curator_request=set_curator_request, ) except ValueError as e: + logger.error(f"Error setting user curator: {e}") raise HTTPException(status_code=404, detail=str(e)) diff --git a/backend/ee/danswer/user_groups/sync.py b/backend/ee/danswer/user_groups/sync.py deleted file mode 100644 index e3bea192670..00000000000 --- a/backend/ee/danswer/user_groups/sync.py +++ /dev/null @@ -1,87 +0,0 @@ -from sqlalchemy.orm import Session - -from danswer.access.access import get_access_for_documents -from danswer.db.document import prepare_to_modify_documents -from danswer.db.search_settings import get_current_search_settings -from danswer.db.search_settings import get_secondary_search_settings -from danswer.document_index.factory import get_default_document_index -from danswer.document_index.interfaces import DocumentIndex -from danswer.document_index.interfaces import UpdateRequest -from danswer.utils.logger import setup_logger -from ee.danswer.db.user_group import delete_user_group -from ee.danswer.db.user_group import fetch_documents_for_user_group_paginated -from ee.danswer.db.user_group import fetch_user_group -from ee.danswer.db.user_group import mark_user_group_as_synced - -logger = setup_logger() - -_SYNC_BATCH_SIZE = 100 - - -def _sync_user_group_batch( - document_ids: list[str], document_index: DocumentIndex, db_session: Session -) -> None: - logger.debug(f"Syncing document sets for: {document_ids}") - - # Acquires a lock on the documents so that no other process can modify them - with prepare_to_modify_documents(db_session=db_session, document_ids=document_ids): - # get current state of document sets for these documents - document_id_to_access = get_access_for_documents( - document_ids=document_ids, db_session=db_session - ) - - # update Vespa - document_index.update( - update_requests=[ - UpdateRequest( - document_ids=[document_id], - access=document_id_to_access[document_id], - ) - for document_id in document_ids - ] - ) - - # Finish the transaction and release the locks - db_session.commit() - - -def sync_user_groups(user_group_id: int, db_session: Session) -> None: - """Sync the status of Postgres for the specified user group""" - search_settings = get_current_search_settings(db_session) - secondary_search_settings = get_secondary_search_settings(db_session) - - document_index = get_default_document_index( - primary_index_name=search_settings.index_name, - secondary_index_name=secondary_search_settings.index_name - if secondary_search_settings - else None, - ) - - user_group = fetch_user_group(db_session=db_session, user_group_id=user_group_id) - if user_group is None: - raise ValueError(f"User group '{user_group_id}' does not exist") - - cursor = None - while True: - # NOTE: this may miss some documents, but that is okay. Any new documents added - # will be added with the correct group membership - document_batch, cursor = fetch_documents_for_user_group_paginated( - db_session=db_session, - user_group_id=user_group_id, - last_document_id=cursor, - limit=_SYNC_BATCH_SIZE, - ) - - _sync_user_group_batch( - document_ids=[document.id for document in document_batch], - document_index=document_index, - db_session=db_session, - ) - - if cursor is None: - break - - if user_group.is_up_for_deletion: - delete_user_group(db_session=db_session, user_group=user_group) - else: - mark_user_group_as_synced(db_session=db_session, user_group=user_group) diff --git a/backend/model_server/custom_models.py b/backend/model_server/custom_models.py index 38bf4b077fa..fde3c8d0dc9 100644 --- a/backend/model_server/custom_models.py +++ b/backend/model_server/custom_models.py @@ -3,15 +3,21 @@ from fastapi import APIRouter from huggingface_hub import snapshot_download # type: ignore from transformers import AutoTokenizer # type: ignore -from transformers import BatchEncoding +from transformers import BatchEncoding # type: ignore +from transformers import PreTrainedTokenizer # type: ignore from danswer.utils.logger import setup_logger from model_server.constants import MODEL_WARM_UP_STRING +from model_server.danswer_torch_model import ConnectorClassifier from model_server.danswer_torch_model import HybridClassifier from model_server.utils import simple_log_function_time +from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_REPO +from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_TAG from shared_configs.configs import INDEXING_ONLY from shared_configs.configs import INTENT_MODEL_TAG from shared_configs.configs import INTENT_MODEL_VERSION +from shared_configs.model_server_models import ConnectorClassificationRequest +from shared_configs.model_server_models import ConnectorClassificationResponse from shared_configs.model_server_models import IntentRequest from shared_configs.model_server_models import IntentResponse @@ -19,10 +25,55 @@ router = APIRouter(prefix="/custom") +_CONNECTOR_CLASSIFIER_TOKENIZER: AutoTokenizer | None = None +_CONNECTOR_CLASSIFIER_MODEL: ConnectorClassifier | None = None + _INTENT_TOKENIZER: AutoTokenizer | None = None _INTENT_MODEL: HybridClassifier | None = None +def get_connector_classifier_tokenizer() -> AutoTokenizer: + global _CONNECTOR_CLASSIFIER_TOKENIZER + if _CONNECTOR_CLASSIFIER_TOKENIZER is None: + # The tokenizer details are not uploaded to the HF hub since it's just the + # unmodified distilbert tokenizer. + _CONNECTOR_CLASSIFIER_TOKENIZER = AutoTokenizer.from_pretrained( + "distilbert-base-uncased" + ) + return _CONNECTOR_CLASSIFIER_TOKENIZER + + +def get_local_connector_classifier( + model_name_or_path: str = CONNECTOR_CLASSIFIER_MODEL_REPO, + tag: str = CONNECTOR_CLASSIFIER_MODEL_TAG, +) -> ConnectorClassifier: + global _CONNECTOR_CLASSIFIER_MODEL + if _CONNECTOR_CLASSIFIER_MODEL is None: + try: + # Calculate where the cache should be, then load from local if available + local_path = snapshot_download( + repo_id=model_name_or_path, revision=tag, local_files_only=True + ) + _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained( + local_path + ) + except Exception as e: + logger.warning(f"Failed to load model directly: {e}") + try: + # Attempt to download the model snapshot + logger.info(f"Downloading model snapshot for {model_name_or_path}") + local_path = snapshot_download(repo_id=model_name_or_path, revision=tag) + _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained( + local_path + ) + except Exception as e: + logger.error( + f"Failed to load model even after attempted snapshot download: {e}" + ) + raise + return _CONNECTOR_CLASSIFIER_MODEL + + def get_intent_model_tokenizer() -> AutoTokenizer: global _INTENT_TOKENIZER if _INTENT_TOKENIZER is None: @@ -61,6 +112,74 @@ def get_local_intent_model( return _INTENT_MODEL +def tokenize_connector_classification_query( + connectors: list[str], + query: str, + tokenizer: PreTrainedTokenizer, + connector_token_end_id: int, +) -> tuple[torch.Tensor, torch.Tensor]: + """ + Tokenize the connectors & user query into one prompt for the forward pass of ConnectorClassifier models + + The attention mask is just all 1s. The prompt is CLS + each connector name suffixed with the connector end + token and then the user query. + """ + + input_ids = torch.tensor([tokenizer.cls_token_id], dtype=torch.long) + + for connector in connectors: + connector_token_ids = tokenizer( + connector, + add_special_tokens=False, + return_tensors="pt", + ) + + input_ids = torch.cat( + ( + input_ids, + connector_token_ids["input_ids"].squeeze(dim=0), + torch.tensor([connector_token_end_id], dtype=torch.long), + ), + dim=-1, + ) + query_token_ids = tokenizer( + query, + add_special_tokens=False, + return_tensors="pt", + ) + + input_ids = torch.cat( + ( + input_ids, + query_token_ids["input_ids"].squeeze(dim=0), + torch.tensor([tokenizer.sep_token_id], dtype=torch.long), + ), + dim=-1, + ) + attention_mask = torch.ones(input_ids.numel(), dtype=torch.long) + + return input_ids.unsqueeze(0), attention_mask.unsqueeze(0) + + +def warm_up_connector_classifier_model() -> None: + logger.info( + f"Warming up connector_classifier model {CONNECTOR_CLASSIFIER_MODEL_TAG}" + ) + connector_classifier_tokenizer = get_connector_classifier_tokenizer() + connector_classifier = get_local_connector_classifier() + + input_ids, attention_mask = tokenize_connector_classification_query( + ["GitHub"], + "danswer classifier query google doc", + connector_classifier_tokenizer, + connector_classifier.connector_end_token_id, + ) + input_ids = input_ids.to(connector_classifier.device) + attention_mask = attention_mask.to(connector_classifier.device) + + connector_classifier(input_ids, attention_mask) + + def warm_up_intent_model() -> None: logger.notice(f"Warming up Intent Model: {INTENT_MODEL_VERSION}") intent_tokenizer = get_intent_model_tokenizer() @@ -157,6 +276,35 @@ def clean_keywords(keywords: list[str]) -> list[str]: return cleaned_words +def run_connector_classification(req: ConnectorClassificationRequest) -> list[str]: + tokenizer = get_connector_classifier_tokenizer() + model = get_local_connector_classifier() + + connector_names = req.available_connectors + + input_ids, attention_mask = tokenize_connector_classification_query( + connector_names, + req.query, + tokenizer, + model.connector_end_token_id, + ) + input_ids = input_ids.to(model.device) + attention_mask = attention_mask.to(model.device) + + global_confidence, classifier_confidence = model(input_ids, attention_mask) + + if global_confidence.item() < 0.5: + return [] + + passed_connectors = [] + + for i, connector_name in enumerate(connector_names): + if classifier_confidence.view(-1)[i].item() > 0.5: + passed_connectors.append(connector_name) + + return passed_connectors + + def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]: tokenizer = get_intent_model_tokenizer() model_input = tokenizer( @@ -189,6 +337,22 @@ def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]: return is_keyword_sequence, cleaned_keywords +@router.post("/connector-classification") +async def process_connector_classification_request( + classification_request: ConnectorClassificationRequest, +) -> ConnectorClassificationResponse: + if INDEXING_ONLY: + raise RuntimeError( + "Indexing model server should not call connector classification endpoint" + ) + + if len(classification_request.available_connectors) == 0: + return ConnectorClassificationResponse(connectors=[]) + + connectors = run_connector_classification(classification_request) + return ConnectorClassificationResponse(connectors=connectors) + + @router.post("/query-analysis") async def process_analysis_request( intent_request: IntentRequest, diff --git a/backend/model_server/danswer_torch_model.py b/backend/model_server/danswer_torch_model.py index 28554a4fd2d..7390a97e049 100644 --- a/backend/model_server/danswer_torch_model.py +++ b/backend/model_server/danswer_torch_model.py @@ -4,7 +4,8 @@ import torch import torch.nn as nn from transformers import DistilBertConfig # type: ignore -from transformers import DistilBertModel +from transformers import DistilBertModel # type: ignore +from transformers import DistilBertTokenizer # type: ignore class HybridClassifier(nn.Module): @@ -21,7 +22,6 @@ def __init__(self) -> None: self.distilbert.config.dim, self.distilbert.config.dim ) self.intent_classifier = nn.Linear(self.distilbert.config.dim, 2) - self.dropout = nn.Dropout(self.distilbert.config.seq_classif_dropout) self.device = torch.device("cpu") @@ -36,8 +36,7 @@ def forward( # Intent classification on the CLS token cls_token_state = sequence_output[:, 0, :] pre_classifier_out = self.pre_classifier(cls_token_state) - dropout_out = self.dropout(pre_classifier_out) - intent_logits = self.intent_classifier(dropout_out) + intent_logits = self.intent_classifier(pre_classifier_out) # Keyword classification on all tokens token_logits = self.keyword_classifier(sequence_output) @@ -72,3 +71,70 @@ def from_pretrained(cls, load_directory: str) -> "HybridClassifier": param.requires_grad = False return model + + +class ConnectorClassifier(nn.Module): + def __init__(self, config: DistilBertConfig) -> None: + super().__init__() + + self.config = config + self.distilbert = DistilBertModel(config) + self.connector_global_classifier = nn.Linear(self.distilbert.config.dim, 1) + self.connector_match_classifier = nn.Linear(self.distilbert.config.dim, 1) + self.tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased") + + # Token indicating end of connector name, and on which classifier is used + self.connector_end_token_id = self.tokenizer.get_vocab()[ + self.config.connector_end_token + ] + + self.device = torch.device("cpu") + + def forward( + self, + input_ids: torch.Tensor, + attention_mask: torch.Tensor, + ) -> tuple[torch.Tensor, torch.Tensor]: + hidden_states = self.distilbert( + input_ids=input_ids, attention_mask=attention_mask + ).last_hidden_state + + cls_hidden_states = hidden_states[ + :, 0, : + ] # Take leap of faith that first token is always [CLS] + global_logits = self.connector_global_classifier(cls_hidden_states).view(-1) + global_confidence = torch.sigmoid(global_logits).view(-1) + + connector_end_position_ids = input_ids == self.connector_end_token_id + connector_end_hidden_states = hidden_states[connector_end_position_ids] + classifier_output = self.connector_match_classifier(connector_end_hidden_states) + classifier_confidence = torch.nn.functional.sigmoid(classifier_output).view(-1) + + return global_confidence, classifier_confidence + + @classmethod + def from_pretrained(cls, repo_dir: str) -> "ConnectorClassifier": + config = DistilBertConfig.from_pretrained(os.path.join(repo_dir, "config.json")) + device = ( + torch.device("cuda") + if torch.cuda.is_available() + else torch.device("mps") + if torch.backends.mps.is_available() + else torch.device("cpu") + ) + state_dict = torch.load( + os.path.join(repo_dir, "pytorch_model.pt"), + map_location=device, + weights_only=True, + ) + + model = cls(config) + model.load_state_dict(state_dict) + model.to(device) + model.device = device + model.eval() + + for param in model.parameters(): + param.requires_grad = False + + return model diff --git a/backend/model_server/encoders.py b/backend/model_server/encoders.py index 4e97bd00f27..860151b3dc4 100644 --- a/backend/model_server/encoders.py +++ b/backend/model_server/encoders.py @@ -2,6 +2,7 @@ from typing import Any from typing import Optional +import httpx import openai import vertexai # type: ignore import voyageai # type: ignore @@ -83,7 +84,7 @@ def __init__( self.client = _initialize_client(api_key, self.provider, model) def _embed_openai(self, texts: list[str], model: str | None) -> list[Embedding]: - if model is None: + if not model: model = DEFAULT_OPENAI_MODEL # OpenAI does not seem to provide truncation option, however @@ -110,7 +111,7 @@ def _embed_openai(self, texts: list[str], model: str | None) -> list[Embedding]: def _embed_cohere( self, texts: list[str], model: str | None, embedding_type: str ) -> list[Embedding]: - if model is None: + if not model: model = DEFAULT_COHERE_MODEL final_embeddings: list[Embedding] = [] @@ -129,7 +130,7 @@ def _embed_cohere( def _embed_voyage( self, texts: list[str], model: str | None, embedding_type: str ) -> list[Embedding]: - if model is None: + if not model: model = DEFAULT_VOYAGE_MODEL # Similar to Cohere, the API server will do approximate size chunking @@ -145,7 +146,7 @@ def _embed_voyage( def _embed_vertex( self, texts: list[str], model: str | None, embedding_type: str ) -> list[Embedding]: - if model is None: + if not model: model = DEFAULT_VERTEX_MODEL embeddings = self.client.get_embeddings( @@ -171,7 +172,6 @@ def embed( try: if self.provider == EmbeddingProvider.OPENAI: return self._embed_openai(texts, model_name) - embedding_type = EmbeddingModelTextType.get_type(self.provider, text_type) if self.provider == EmbeddingProvider.COHERE: return self._embed_cohere(texts, model_name, embedding_type) @@ -235,6 +235,25 @@ def get_local_reranking_model( return _RERANK_MODEL +def embed_with_litellm_proxy( + texts: list[str], api_url: str, model_name: str, api_key: str | None +) -> list[Embedding]: + headers = {} if not api_key else {"Authorization": f"Bearer {api_key}"} + + with httpx.Client() as client: + response = client.post( + api_url, + json={ + "model": model_name, + "input": texts, + }, + headers=headers, + ) + response.raise_for_status() + result = response.json() + return [embedding["embedding"] for embedding in result["data"]] + + @simple_log_function_time() def embed_text( texts: list[str], @@ -245,21 +264,42 @@ def embed_text( api_key: str | None, provider_type: EmbeddingProvider | None, prefix: str | None, + api_url: str | None, ) -> list[Embedding]: + logger.info(f"Embedding {len(texts)} texts with provider: {provider_type}") + if not all(texts): + logger.error("Empty strings provided for embedding") raise ValueError("Empty strings are not allowed for embedding.") - # Third party API based embedding model if not texts: + logger.error("No texts provided for embedding") raise ValueError("No texts provided for embedding.") + + if provider_type == EmbeddingProvider.LITELLM: + logger.debug(f"Using LiteLLM proxy for embedding with URL: {api_url}") + if not api_url: + logger.error("API URL not provided for LiteLLM proxy") + raise ValueError("API URL is required for LiteLLM proxy embedding.") + try: + return embed_with_litellm_proxy( + texts=texts, + api_url=api_url, + model_name=model_name or "", + api_key=api_key, + ) + except Exception as e: + logger.exception(f"Error during LiteLLM proxy embedding: {str(e)}") + raise + elif provider_type is not None: - logger.debug(f"Embedding text with provider: {provider_type}") + logger.debug(f"Using cloud provider {provider_type} for embedding") if api_key is None: + logger.error("API key not provided for cloud model") raise RuntimeError("API key not provided for cloud model") if prefix: - # This may change in the future if some providers require the user - # to manually append a prefix but this is not the case currently + logger.warning("Prefix provided for cloud model, which is not supported") raise ValueError( "Prefix string is not valid for cloud models. " "Cloud models take an explicit text type instead." @@ -274,14 +314,15 @@ def embed_text( text_type=text_type, ) - # Check for None values in embeddings if any(embedding is None for embedding in embeddings): error_message = "Embeddings contain None values\n" error_message += "Corresponding texts:\n" error_message += "\n".join(texts) + logger.error(error_message) raise ValueError(error_message) elif model_name is not None: + logger.debug(f"Using local model {model_name} for embedding") prefixed_texts = [f"{prefix}{text}" for text in texts] if prefix else texts local_model = get_embedding_model( @@ -296,10 +337,12 @@ def embed_text( ] else: + logger.error("Neither model name nor provider specified for embedding") raise ValueError( "Either model name or provider must be provided to run embeddings." ) + logger.info(f"Successfully embedded {len(texts)} texts") return embeddings @@ -319,6 +362,28 @@ def cohere_rerank( return [result.relevance_score for result in sorted_results] +def litellm_rerank( + query: str, docs: list[str], api_url: str, model_name: str, api_key: str | None +) -> list[float]: + headers = {} if not api_key else {"Authorization": f"Bearer {api_key}"} + with httpx.Client() as client: + response = client.post( + api_url, + json={ + "model": model_name, + "query": query, + "documents": docs, + }, + headers=headers, + ) + response.raise_for_status() + result = response.json() + return [ + item["relevance_score"] + for item in sorted(result["results"], key=lambda x: x["index"]) + ] + + @router.post("/bi-encoder-embed") async def process_embed_request( embed_request: EmbedRequest, @@ -344,6 +409,7 @@ async def process_embed_request( api_key=embed_request.api_key, provider_type=embed_request.provider_type, text_type=embed_request.text_type, + api_url=embed_request.api_url, prefix=prefix, ) return EmbedResponse(embeddings=embeddings) @@ -374,6 +440,20 @@ async def process_rerank_request(rerank_request: RerankRequest) -> RerankRespons model_name=rerank_request.model_name, ) return RerankResponse(scores=sim_scores) + elif rerank_request.provider_type == RerankerProvider.LITELLM: + if rerank_request.api_url is None: + raise ValueError("API URL is required for LiteLLM reranking.") + + sim_scores = litellm_rerank( + query=rerank_request.query, + docs=rerank_request.documents, + api_url=rerank_request.api_url, + model_name=rerank_request.model_name, + api_key=rerank_request.api_key, + ) + + return RerankResponse(scores=sim_scores) + elif rerank_request.provider_type == RerankerProvider.COHERE: if rerank_request.api_key is None: raise RuntimeError("Cohere Rerank Requires an API Key") diff --git a/backend/pytest.ini b/backend/pytest.ini index db3dbf8b00d..954a0274064 100644 --- a/backend/pytest.ini +++ b/backend/pytest.ini @@ -1,4 +1,8 @@ [pytest] pythonpath = . markers = - slow: marks tests as slow \ No newline at end of file + slow: marks tests as slow +filterwarnings = + ignore::DeprecationWarning + ignore::cryptography.utils.CryptographyDeprecationWarning + \ No newline at end of file diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index 9427335c47d..2b7da8b84d4 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -1,10 +1,10 @@ -aiohttp==3.9.4 +aiohttp==3.10.2 alembic==1.10.4 asyncpg==0.27.0 atlassian-python-api==3.37.0 beautifulsoup4==4.12.2 boto3==1.34.84 -celery==5.3.4 +celery==5.5.0b4 chardet==5.2.0 dask==2023.8.1 ddtrace==2.6.5 @@ -12,7 +12,7 @@ distributed==2023.8.1 fastapi==0.109.2 fastapi-users==12.1.3 fastapi-users-db-sqlalchemy==5.0.0 -filelock==3.12.0 +filelock==3.15.4 google-api-python-client==2.86.0 google-auth-httplib2==0.1.0 google-auth-oauthlib==1.0.0 @@ -26,17 +26,16 @@ huggingface-hub==0.20.1 jira==3.5.1 jsonref==1.1.0 langchain==0.1.17 -langchain-community==0.0.36 langchain-core==0.1.50 langchain-text-splitters==0.0.1 -litellm==1.43.18 +litellm==1.48.7 llama-index==0.9.45 Mako==1.2.4 -msal==1.26.0 +msal==1.28.0 nltk==3.8.1 Office365-REST-Python-Client==2.5.9 oauthlib==3.2.2 -openai==1.41.1 +openai==1.47.0 openpyxl==3.1.2 playwright==1.41.2 psutil==5.9.5 @@ -50,10 +49,11 @@ python-pptx==0.6.23 pypdf==3.17.0 pytest-mock==3.12.0 pytest-playwright==0.3.2 -python-docx==1.1.0 +python-docx==1.1.2 python-dotenv==1.0.0 python-multipart==0.0.7 pywikibot==9.0.0 +redis==5.0.8 requests==2.32.2 requests-oauthlib==1.3.1 retry==0.9.2 # This pulls in py which is in CVE-2022-42969, must remove py from image @@ -70,6 +70,7 @@ transformers==4.39.2 uvicorn==0.21.1 zulip==0.8.2 hubspot-api-client==8.1.0 +asana==5.0.8 zenpy==2.0.41 dropbox==11.36.2 boto3-stubs[s3]==1.34.133 diff --git a/backend/requirements/model_server.txt b/backend/requirements/model_server.txt index 0fb0e74b67b..1e7baa415ee 100644 --- a/backend/requirements/model_server.txt +++ b/backend/requirements/model_server.txt @@ -3,12 +3,12 @@ einops==0.8.0 fastapi==0.109.2 google-cloud-aiplatform==1.58.0 numpy==1.26.4 -openai==1.41.1 +openai==1.47.0 pydantic==2.8.2 retry==0.9.2 safetensors==0.4.2 sentence-transformers==2.6.1 -torch==2.0.1 +torch==2.2.0 transformers==4.39.2 uvicorn==0.21.1 voyageai==0.2.3 diff --git a/backend/scripts/dev_run_background_jobs.py b/backend/scripts/dev_run_background_jobs.py index 3a917fbed1a..a4a253a10df 100644 --- a/backend/scripts/dev_run_background_jobs.py +++ b/backend/scripts/dev_run_background_jobs.py @@ -18,7 +18,8 @@ def monitor_process(process_name: str, process: subprocess.Popen) -> None: def run_jobs(exclude_indexing: bool) -> None: - cmd_worker = [ + # command setup + cmd_worker_primary = [ "celery", "-A", "ee.danswer.background.celery.celery_app", @@ -26,6 +27,38 @@ def run_jobs(exclude_indexing: bool) -> None: "--pool=threads", "--concurrency=6", "--loglevel=INFO", + "-n", + "primary@%n", + "-Q", + "celery", + ] + + cmd_worker_light = [ + "celery", + "-A", + "ee.danswer.background.celery.celery_app", + "worker", + "--pool=threads", + "--concurrency=16", + "--loglevel=INFO", + "-n", + "light@%n", + "-Q", + "vespa_metadata_sync,connector_deletion", + ] + + cmd_worker_heavy = [ + "celery", + "-A", + "ee.danswer.background.celery.celery_app", + "worker", + "--pool=threads", + "--concurrency=6", + "--loglevel=INFO", + "-n", + "heavy@%n", + "-Q", + "connector_pruning", ] cmd_beat = [ @@ -36,19 +69,38 @@ def run_jobs(exclude_indexing: bool) -> None: "--loglevel=INFO", ] - worker_process = subprocess.Popen( - cmd_worker, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True + # spawn processes + worker_primary_process = subprocess.Popen( + cmd_worker_primary, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True + ) + + worker_light_process = subprocess.Popen( + cmd_worker_light, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True + ) + + worker_heavy_process = subprocess.Popen( + cmd_worker_heavy, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True ) + beat_process = subprocess.Popen( cmd_beat, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True ) - worker_thread = threading.Thread( - target=monitor_process, args=("WORKER", worker_process) + # monitor threads + worker_primary_thread = threading.Thread( + target=monitor_process, args=("PRIMARY", worker_primary_process) + ) + worker_light_thread = threading.Thread( + target=monitor_process, args=("LIGHT", worker_light_process) + ) + worker_heavy_thread = threading.Thread( + target=monitor_process, args=("HEAVY", worker_heavy_process) ) beat_thread = threading.Thread(target=monitor_process, args=("BEAT", beat_process)) - worker_thread.start() + worker_primary_thread.start() + worker_light_thread.start() + worker_heavy_thread.start() beat_thread.start() if not exclude_indexing: @@ -91,7 +143,9 @@ def run_jobs(exclude_indexing: bool) -> None: except Exception: pass - worker_thread.join() + worker_primary_thread.join() + worker_light_thread.join() + worker_heavy_thread.join() beat_thread.join() diff --git a/backend/scripts/force_delete_connector_by_id.py b/backend/scripts/force_delete_connector_by_id.py index 118a4dfa4b4..0a9857304c8 100755 --- a/backend/scripts/force_delete_connector_by_id.py +++ b/backend/scripts/force_delete_connector_by_id.py @@ -83,8 +83,7 @@ def _unsafe_deletion( # Delete index attempts delete_index_attempts( db_session=db_session, - connector_id=connector_id, - credential_id=credential_id, + cc_pair_id=cc_pair.id, ) # Delete document sets diff --git a/backend/scripts/query_time_check/seed_dummy_docs.py b/backend/scripts/query_time_check/seed_dummy_docs.py new file mode 100644 index 00000000000..96b6b4a0133 --- /dev/null +++ b/backend/scripts/query_time_check/seed_dummy_docs.py @@ -0,0 +1,166 @@ +""" +launch: +- api server +- postgres +- vespa +- model server (this is only needed so the api server can startup, no embedding is done) + +Run this script to seed the database with dummy documents. +Then run test_query_times.py to test query times. +""" +import random +from datetime import datetime + +from danswer.access.models import DocumentAccess +from danswer.configs.constants import DocumentSource +from danswer.connectors.models import Document +from danswer.db.engine import get_session_context_manager +from danswer.db.search_settings import get_current_search_settings +from danswer.document_index.vespa.index import VespaIndex +from danswer.indexing.models import ChunkEmbedding +from danswer.indexing.models import DocMetadataAwareIndexChunk +from danswer.indexing.models import IndexChunk +from danswer.utils.timing import log_function_time +from shared_configs.model_server_models import Embedding + + +TOTAL_DOC_SETS = 8 +TOTAL_ACL_ENTRIES_PER_CATEGORY = 80 + + +def generate_random_embedding(dim: int) -> Embedding: + return [random.uniform(-1, 1) for _ in range(dim)] + + +def generate_random_identifier() -> str: + return f"dummy_doc_{random.randint(1, 1000)}" + + +def generate_dummy_chunk( + doc_id: str, + chunk_id: int, + embedding_dim: int, + number_of_acl_entries: int, + number_of_document_sets: int, +) -> DocMetadataAwareIndexChunk: + document = Document( + id=doc_id, + source=DocumentSource.GOOGLE_DRIVE, + sections=[], + metadata={}, + semantic_identifier=generate_random_identifier(), + ) + + chunk = IndexChunk( + chunk_id=chunk_id, + blurb=f"Blurb for chunk {chunk_id} of document {doc_id}.", + content=f"Content for chunk {chunk_id} of document {doc_id}. This is dummy text for testing purposes.", + source_links={}, + section_continuation=False, + source_document=document, + title_prefix=f"Title prefix for doc {doc_id}", + metadata_suffix_semantic="", + metadata_suffix_keyword="", + mini_chunk_texts=None, + embeddings=ChunkEmbedding( + full_embedding=generate_random_embedding(embedding_dim), + mini_chunk_embeddings=[], + ), + title_embedding=generate_random_embedding(embedding_dim), + ) + + document_set_names = [] + for i in range(number_of_document_sets): + document_set_names.append(f"Document Set {i}") + + user_emails: set[str | None] = set() + user_groups: set[str] = set() + external_user_emails: set[str] = set() + external_user_group_ids: set[str] = set() + for i in range(number_of_acl_entries): + user_emails.add(f"user_{i}@example.com") + user_groups.add(f"group_{i}") + external_user_emails.add(f"external_user_{i}@example.com") + external_user_group_ids.add(f"external_group_{i}") + + return DocMetadataAwareIndexChunk.from_index_chunk( + index_chunk=chunk, + access=DocumentAccess( + user_emails=user_emails, + user_groups=user_groups, + external_user_emails=external_user_emails, + external_user_group_ids=external_user_group_ids, + is_public=random.choice([True, False]), + ), + document_sets={document_set for document_set in document_set_names}, + boost=random.randint(-1, 1), + ) + + +@log_function_time() +def do_insertion( + vespa_index: VespaIndex, all_chunks: list[DocMetadataAwareIndexChunk] +) -> None: + insertion_records = vespa_index.index(all_chunks) + print(f"Indexed {len(insertion_records)} documents.") + print( + f"New documents: {sum(1 for record in insertion_records if not record.already_existed)}" + ) + print( + f"Existing documents updated: {sum(1 for record in insertion_records if record.already_existed)}" + ) + + +@log_function_time() +def seed_dummy_docs( + number_of_document_sets: int, + number_of_acl_entries: int, + num_docs: int = 1000, + chunks_per_doc: int = 5, + batch_size: int = 100, +) -> None: + with get_session_context_manager() as db_session: + search_settings = get_current_search_settings(db_session) + index_name = search_settings.index_name + embedding_dim = search_settings.model_dim + + vespa_index = VespaIndex(index_name=index_name, secondary_index_name=None) + print(index_name) + + all_chunks = [] + chunk_count = 0 + for doc_num in range(num_docs): + doc_id = f"dummy_doc_{doc_num}_{datetime.now().isoformat()}" + for chunk_num in range(chunks_per_doc): + chunk = generate_dummy_chunk( + doc_id=doc_id, + chunk_id=chunk_num, + embedding_dim=embedding_dim, + number_of_acl_entries=number_of_acl_entries, + number_of_document_sets=number_of_document_sets, + ) + all_chunks.append(chunk) + chunk_count += 1 + + if len(all_chunks) >= chunks_per_doc * batch_size: + do_insertion(vespa_index, all_chunks) + print( + f"Indexed {chunk_count} chunks out of {num_docs * chunks_per_doc}." + ) + print( + f"percentage: {chunk_count / (num_docs * chunks_per_doc) * 100:.2f}% \n" + ) + all_chunks = [] + + if all_chunks: + do_insertion(vespa_index, all_chunks) + + +if __name__ == "__main__": + seed_dummy_docs( + number_of_document_sets=TOTAL_DOC_SETS, + number_of_acl_entries=TOTAL_ACL_ENTRIES_PER_CATEGORY, + num_docs=100000, + chunks_per_doc=5, + batch_size=1000, + ) diff --git a/backend/scripts/query_time_check/test_query_times.py b/backend/scripts/query_time_check/test_query_times.py new file mode 100644 index 00000000000..c839fc610e1 --- /dev/null +++ b/backend/scripts/query_time_check/test_query_times.py @@ -0,0 +1,122 @@ +""" +RUN THIS AFTER SEED_DUMMY_DOCS.PY +""" +import random +import time + +from danswer.configs.constants import DocumentSource +from danswer.configs.model_configs import DOC_EMBEDDING_DIM +from danswer.db.engine import get_session_context_manager +from danswer.db.search_settings import get_current_search_settings +from danswer.document_index.vespa.index import VespaIndex +from danswer.search.models import IndexFilters +from scripts.query_time_check.seed_dummy_docs import TOTAL_ACL_ENTRIES_PER_CATEGORY +from scripts.query_time_check.seed_dummy_docs import TOTAL_DOC_SETS +from shared_configs.model_server_models import Embedding + +# make sure these are smaller than TOTAL_ACL_ENTRIES_PER_CATEGORY and TOTAL_DOC_SETS, respectively +NUMBER_OF_ACL_ENTRIES_PER_QUERY = 6 +NUMBER_OF_DOC_SETS_PER_QUERY = 2 + + +def get_slowest_99th_percentile(results: list[float]) -> float: + return sorted(results)[int(0.99 * len(results))] + + +# Generate random filters +def _random_filters() -> IndexFilters: + """ + Generate random filters for the query containing: + - NUMBER_OF_ACL_ENTRIES_PER_QUERY user emails + - NUMBER_OF_ACL_ENTRIES_PER_QUERY groups + - NUMBER_OF_ACL_ENTRIES_PER_QUERY external groups + - NUMBER_OF_DOC_SETS_PER_QUERY document sets + """ + access_control_list = [ + f"user_email:user_{random.randint(0, TOTAL_ACL_ENTRIES_PER_CATEGORY - 1)}@example.com", + ] + acl_indices = random.sample( + range(TOTAL_ACL_ENTRIES_PER_CATEGORY), NUMBER_OF_ACL_ENTRIES_PER_QUERY + ) + for i in acl_indices: + access_control_list.append(f"group:group_{acl_indices[i]}") + access_control_list.append(f"external_group:external_group_{acl_indices[i]}") + + doc_sets = [] + doc_set_indices = random.sample( + range(TOTAL_DOC_SETS), NUMBER_OF_ACL_ENTRIES_PER_QUERY + ) + for i in doc_set_indices: + doc_sets.append(f"document_set:Document Set {doc_set_indices[i]}") + + return IndexFilters( + source_type=[DocumentSource.GOOGLE_DRIVE], + document_set=doc_sets, + tags=[], + access_control_list=access_control_list, + ) + + +def test_hybrid_retrieval_times( + number_of_queries: int, +) -> None: + with get_session_context_manager() as db_session: + search_settings = get_current_search_settings(db_session) + index_name = search_settings.index_name + + vespa_index = VespaIndex(index_name=index_name, secondary_index_name=None) + + # Generate random queries + queries = [f"Random Query {i}" for i in range(number_of_queries)] + + # Generate random embeddings + embeddings = [ + Embedding([random.random() for _ in range(DOC_EMBEDDING_DIM)]) + for _ in range(number_of_queries) + ] + + total_time = 0.0 + results = [] + for i in range(number_of_queries): + start_time = time.time() + + vespa_index.hybrid_retrieval( + query=queries[i], + query_embedding=embeddings[i], + final_keywords=None, + filters=_random_filters(), + hybrid_alpha=0.5, + time_decay_multiplier=1.0, + num_to_retrieve=50, + offset=0, + title_content_ratio=0.5, + ) + + end_time = time.time() + query_time = end_time - start_time + total_time += query_time + results.append(query_time) + + print(f"Query {i+1}: {query_time:.4f} seconds") + + avg_time = total_time / number_of_queries + fast_time = min(results) + slow_time = max(results) + ninety_ninth_percentile = get_slowest_99th_percentile(results) + # Write results to a file + _OUTPUT_PATH = "query_times_results_large_more.txt" + with open(_OUTPUT_PATH, "w") as f: + f.write(f"Average query time: {avg_time:.4f} seconds\n") + f.write(f"Fastest query: {fast_time:.4f} seconds\n") + f.write(f"Slowest query: {slow_time:.4f} seconds\n") + f.write(f"99th percentile: {ninety_ninth_percentile:.4f} seconds\n") + print(f"Results written to {_OUTPUT_PATH}") + + print(f"\nAverage query time: {avg_time:.4f} seconds") + print(f"Fastest query: {fast_time:.4f} seconds") + print(f"Slowest query: {max(results):.4f} seconds") + print(f"99th percentile: {get_slowest_99th_percentile(results):.4f} seconds") + + +if __name__ == "__main__": + test_hybrid_retrieval_times(number_of_queries=1000) diff --git a/backend/scripts/restart_containers.sh b/backend/scripts/restart_containers.sh index c60d1905eb5..838df5b5c79 100755 --- a/backend/scripts/restart_containers.sh +++ b/backend/scripts/restart_containers.sh @@ -1,15 +1,16 @@ #!/bin/bash # Usage of the script with optional volume arguments -# ./restart_containers.sh [vespa_volume] [postgres_volume] +# ./restart_containers.sh [vespa_volume] [postgres_volume] [redis_volume] VESPA_VOLUME=${1:-""} # Default is empty if not provided POSTGRES_VOLUME=${2:-""} # Default is empty if not provided +REDIS_VOLUME=${3:-""} # Default is empty if not provided # Stop and remove the existing containers echo "Stopping and removing existing containers..." -docker stop danswer_postgres danswer_vespa -docker rm danswer_postgres danswer_vespa +docker stop danswer_postgres danswer_vespa danswer_redis +docker rm danswer_postgres danswer_vespa danswer_redis # Start the PostgreSQL container with optional volume echo "Starting PostgreSQL container..." @@ -27,6 +28,14 @@ else docker run --detach --name danswer_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 vespaengine/vespa:8 fi +# Start the Redis container with optional volume +echo "Starting Redis container..." +if [[ -n "$REDIS_VOLUME" ]]; then + docker run --detach --name danswer_redis --publish 6379:6379 -v $REDIS_VOLUME:/data redis +else + docker run --detach --name danswer_redis --publish 6379:6379 redis +fi + # Ensure alembic runs in the correct directory SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" PARENT_DIR="$(dirname "$SCRIPT_DIR")" diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py index 5ad36cc93c4..ea37b031c7a 100644 --- a/backend/shared_configs/configs.py +++ b/backend/shared_configs/configs.py @@ -1,4 +1,5 @@ import os +from urllib.parse import urlparse # Used for logging SLACK_CHANNEL_ID = "channel_id" @@ -16,9 +17,12 @@ ) # Danswer custom Deep Learning Models +CONNECTOR_CLASSIFIER_MODEL_REPO = "Danswer/filter-extraction-model" +CONNECTOR_CLASSIFIER_MODEL_TAG = "1.0.0" INTENT_MODEL_VERSION = "danswer/hybrid-intent-token-classifier" INTENT_MODEL_TAG = "v1.0.3" + # Bi-Encoder, other details DOC_EMBEDDING_CONTEXT_SIZE = 512 @@ -58,9 +62,11 @@ # Fields which should only be set on new search setting PRESERVED_SEARCH_FIELDS = [ + "id", "provider_type", "api_key", "model_name", + "api_url", "index_name", "multipass_indexing", "model_dim", @@ -68,3 +74,18 @@ "passage_prefix", "query_prefix", ] + + +# CORS +def validate_cors_origin(origin: str) -> None: + parsed = urlparse(origin) + if parsed.scheme not in ["http", "https"] or not parsed.netloc: + raise ValueError(f"Invalid CORS origin: '{origin}'") + + +CORS_ALLOWED_ORIGIN = os.environ.get("CORS_ALLOWED_ORIGIN", "*").split(",") or ["*"] + +# Validate non-wildcard origins +for origin in CORS_ALLOWED_ORIGIN: + if origin != "*" and (stripped_origin := origin.strip()): + validate_cors_origin(stripped_origin) diff --git a/backend/shared_configs/enums.py b/backend/shared_configs/enums.py index 918872d44b3..b58ac0a8928 100644 --- a/backend/shared_configs/enums.py +++ b/backend/shared_configs/enums.py @@ -6,10 +6,12 @@ class EmbeddingProvider(str, Enum): COHERE = "cohere" VOYAGE = "voyage" GOOGLE = "google" + LITELLM = "litellm" class RerankerProvider(str, Enum): COHERE = "cohere" + LITELLM = "litellm" class EmbedTextType(str, Enum): diff --git a/backend/shared_configs/model_server_models.py b/backend/shared_configs/model_server_models.py index 3014616c620..dd846ed6bad 100644 --- a/backend/shared_configs/model_server_models.py +++ b/backend/shared_configs/model_server_models.py @@ -7,6 +7,15 @@ Embedding = list[float] +class ConnectorClassificationRequest(BaseModel): + available_connectors: list[str] + query: str + + +class ConnectorClassificationResponse(BaseModel): + connectors: list[str] + + class EmbedRequest(BaseModel): texts: list[str] # Can be none for cloud embedding model requests, error handling logic exists for other cases @@ -18,6 +27,7 @@ class EmbedRequest(BaseModel): text_type: EmbedTextType manual_query_prefix: str | None = None manual_passage_prefix: str | None = None + api_url: str | None = None # This disables the "model_" protected namespace for pydantic model_config = {"protected_namespaces": ()} @@ -33,6 +43,7 @@ class RerankRequest(BaseModel): model_name: str provider_type: RerankerProvider | None = None api_key: str | None = None + api_url: str | None = None # This disables the "model_" protected namespace for pydantic model_config = {"protected_namespaces": ()} diff --git a/backend/supervisord.conf b/backend/supervisord.conf index b56c763b94f..3dc2edcc6a5 100644 --- a/backend/supervisord.conf +++ b/backend/supervisord.conf @@ -7,8 +7,10 @@ logfile=/var/log/supervisord.log # Cannot place this in Celery for now because Celery must run as a single process (see note below) # Indexing uses multi-processing to speed things up [program:document_indexing] -environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true,LOG_FILE_NAME=document_indexing +environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true command=python danswer/background/update.py +stdout_logfile=/var/log/document_indexing.log +stdout_logfile_maxbytes=16MB redirect_stderr=true autorestart=true @@ -23,18 +25,59 @@ autorestart=true # on a system, but this should be okay for now since all our celery tasks are # relatively compute-light (e.g. they tend to just make a bunch of requests to # Vespa / Postgres) -[program:celery_worker] -command=celery -A danswer.background.celery.celery_run:celery_app worker --pool=threads --concurrency=6 --loglevel=INFO --logfile=/var/log/celery_worker_supervisor.log -environment=LOG_FILE_NAME=celery_worker +[program:celery_worker_primary] +command=celery -A danswer.background.celery.celery_run:celery_app worker + --pool=threads + --concurrency=4 + --prefetch-multiplier=1 + --loglevel=INFO + --hostname=primary@%%n + -Q celery +stdout_logfile=/var/log/celery_worker_primary.log +stdout_logfile_maxbytes=16MB redirect_stderr=true autorestart=true +startsecs=10 +stopasgroup=true + +[program:celery_worker_light] +command=bash -c "celery -A danswer.background.celery.celery_run:celery_app worker \ + --pool=threads \ + --concurrency=${CELERY_WORKER_LIGHT_CONCURRENCY:-24} \ + --prefetch-multiplier=${CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER:-8} \ + --loglevel=INFO \ + --hostname=light@%%n \ + -Q vespa_metadata_sync,connector_deletion" +stdout_logfile=/var/log/celery_worker_light.log +stdout_logfile_maxbytes=16MB +redirect_stderr=true +autorestart=true +startsecs=10 +stopasgroup=true + +[program:celery_worker_heavy] +command=celery -A danswer.background.celery.celery_run:celery_app worker + --pool=threads + --concurrency=4 + --prefetch-multiplier=1 + --loglevel=INFO + --hostname=heavy@%%n + -Q connector_pruning +stdout_logfile=/var/log/celery_worker_heavy.log +stdout_logfile_maxbytes=16MB +redirect_stderr=true +autorestart=true +startsecs=10 +stopasgroup=true # Job scheduler for periodic tasks [program:celery_beat] -command=celery -A danswer.background.celery.celery_run:celery_app beat --loglevel=INFO --logfile=/var/log/celery_beat_supervisor.log -environment=LOG_FILE_NAME=celery_beat +command=celery -A danswer.background.celery.celery_run:celery_app beat +stdout_logfile=/var/log/celery_beat.log +stdout_logfile_maxbytes=16MB redirect_stderr=true -autorestart=true +startsecs=10 +stopasgroup=true # Listens for Slack messages and responds with answers # for all channels that the DanswerBot has been added to. @@ -42,7 +85,8 @@ autorestart=true # More details on setup here: https://docs.danswer.dev/slack_bot_setup [program:slack_bot] command=python danswer/danswerbot/slack/listener.py -environment=LOG_FILE_NAME=slack_bot +stdout_logfile=/var/log/slack_bot.log +stdout_logfile_maxbytes=16MB redirect_stderr=true autorestart=true startretries=5 @@ -52,13 +96,12 @@ startsecs=60 # No log rotation here, since it's stdout it's handled by the Docker container logging [program:log-redirect-handler] command=tail -qF - /var/log/document_indexing_info.log - /var/log/celery_beat_supervisor.log - /var/log/celery_worker_supervisor.log - /var/log/celery_beat_debug.log - /var/log/celery_worker_debug.log - /var/log/slack_bot_debug.log + /var/log/celery_beat.log + /var/log/celery_worker_primary.log + /var/log/celery_worker_light.log + /var/log/celery_worker_heavy.log + /var/log/document_indexing.log + /var/log/slack_bot.log stdout_logfile=/dev/stdout -stdout_logfile_maxbytes=0 -redirect_stderr=true +stdout_logfile_maxbytes = 0 # must be set to 0 when stdout_logfile=/dev/stdout autorestart=true diff --git a/backend/tests/api/test_api.py b/backend/tests/api/test_api.py index 059c40824d5..9a3571ef585 100644 --- a/backend/tests/api/test_api.py +++ b/backend/tests/api/test_api.py @@ -101,4 +101,4 @@ def test_handle_send_message_simple_with_history(client: TestClient) -> None: resp_json = response.json() # persona must have LLM relevance enabled for this to pass - assert len(resp_json["llm_chunks_indices"]) > 0 + assert len(resp_json["llm_selected_doc_indices"]) > 0 diff --git a/backend/tests/daily/connectors/confluence/test_confluence_basic.py b/backend/tests/daily/connectors/confluence/test_confluence_basic.py index 7f05242c50b..4eb25207814 100644 --- a/backend/tests/daily/connectors/confluence/test_confluence_basic.py +++ b/backend/tests/daily/connectors/confluence/test_confluence_basic.py @@ -8,7 +8,13 @@ @pytest.fixture def confluence_connector() -> ConfluenceConnector: - connector = ConfluenceConnector(os.environ["CONFLUENCE_TEST_SPACE_URL"]) + connector = ConfluenceConnector( + wiki_base=os.environ["CONFLUENCE_TEST_SPACE_URL"], + space=os.environ["CONFLUENCE_TEST_SPACE"], + is_cloud=os.environ.get("CONFLUENCE_IS_CLOUD", "true").lower() == "true", + page_id=os.environ.get("CONFLUENCE_TEST_PAGE_ID", ""), + ) + connector.load_credentials( { "confluence_username": os.environ["CONFLUENCE_USER_NAME"], diff --git a/backend/tests/daily/connectors/jira/test_jira_basic.py b/backend/tests/daily/connectors/jira/test_jira_basic.py new file mode 100644 index 00000000000..19d69dfadcf --- /dev/null +++ b/backend/tests/daily/connectors/jira/test_jira_basic.py @@ -0,0 +1,48 @@ +import os +import time + +import pytest + +from danswer.configs.constants import DocumentSource +from danswer.connectors.danswer_jira.connector import JiraConnector + + +@pytest.fixture +def jira_connector() -> JiraConnector: + connector = JiraConnector( + "https://danswerai.atlassian.net/jira/software/c/projects/AS/boards/6", + comment_email_blacklist=[], + ) + connector.load_credentials( + { + "jira_user_email": os.environ["JIRA_USER_EMAIL"], + "jira_api_token": os.environ["JIRA_API_TOKEN"], + } + ) + return connector + + +def test_jira_connector_basic(jira_connector: JiraConnector) -> None: + doc_batch_generator = jira_connector.poll_source(0, time.time()) + + doc_batch = next(doc_batch_generator) + with pytest.raises(StopIteration): + next(doc_batch_generator) + + assert len(doc_batch) == 1 + + doc = doc_batch[0] + + assert doc.id == "https://danswerai.atlassian.net/browse/AS-2" + assert doc.semantic_identifier == "test123small" + assert doc.source == DocumentSource.JIRA + assert doc.metadata == {"priority": "Medium", "status": "Backlog"} + assert doc.secondary_owners is None + assert doc.title is None + assert doc.from_ingestion_api is False + assert doc.additional_info is None + + assert len(doc.sections) == 1 + section = doc.sections[0] + assert section.text == "example_text\n" + assert section.link == "https://danswerai.atlassian.net/browse/AS-2" diff --git a/backend/tests/daily/embedding/test_embeddings.py b/backend/tests/daily/embedding/test_embeddings.py index a9c12b236cf..b736f374741 100644 --- a/backend/tests/daily/embedding/test_embeddings.py +++ b/backend/tests/daily/embedding/test_embeddings.py @@ -32,6 +32,7 @@ def openai_embedding_model() -> EmbeddingModel: passage_prefix=None, api_key=os.getenv("OPENAI_API_KEY"), provider_type=EmbeddingProvider.OPENAI, + api_url=None, ) @@ -51,6 +52,7 @@ def cohere_embedding_model() -> EmbeddingModel: passage_prefix=None, api_key=os.getenv("COHERE_API_KEY"), provider_type=EmbeddingProvider.COHERE, + api_url=None, ) @@ -70,6 +72,7 @@ def local_nomic_embedding_model() -> EmbeddingModel: passage_prefix="search_document: ", api_key=None, provider_type=None, + api_url=None, ) diff --git a/backend/tests/integration/Dockerfile b/backend/tests/integration/Dockerfile index d4869dd76c2..02cdcad0b44 100644 --- a/backend/tests/integration/Dockerfile +++ b/backend/tests/integration/Dockerfile @@ -31,6 +31,8 @@ RUN apt-get update && \ COPY ./requirements/default.txt /tmp/requirements.txt COPY ./requirements/ee.txt /tmp/ee-requirements.txt RUN pip install --no-cache-dir --upgrade \ + --retries 5 \ + --timeout 30 \ -r /tmp/requirements.txt \ -r /tmp/ee-requirements.txt && \ pip uninstall -y py && \ @@ -70,6 +72,7 @@ COPY ./danswer /app/danswer COPY ./shared_configs /app/shared_configs COPY ./alembic /app/alembic COPY ./alembic.ini /app/alembic.ini +COPY ./pytest.ini /app/pytest.ini COPY supervisord.conf /usr/etc/supervisord.conf # Integration test stuff @@ -78,6 +81,6 @@ RUN pip install --no-cache-dir --upgrade \ -r /tmp/dev-requirements.txt COPY ./tests/integration /app/tests/integration -ENV PYTHONPATH /app +ENV PYTHONPATH=/app CMD ["pytest", "-s", "/app/tests/integration"] diff --git a/backend/tests/integration/README.md b/backend/tests/integration/README.md new file mode 100644 index 00000000000..bc5e388082f --- /dev/null +++ b/backend/tests/integration/README.md @@ -0,0 +1,70 @@ +# Integration Tests + +## General Testing Overview +The integration tests are designed with a "manager" class and a "test" class for each type of object being manipulated (e.g., user, persona, credential): +- **Manager Class**: Contains methods for each type of API call. Responsible for creating, deleting, and verifying the existence of an entity. +- **Test Class**: Stores data for each entity being tested. This is our "expected state" of the object. + +The idea is that each test can use the manager class to create (.create()) a "test_" object. It can then perform an operation on the object (e.g., send a request to the API) and then check if the "test_" object is in the expected state by using the manager class (.verify()) function. + +## Instructions for Running Integration Tests Locally +1. Launch danswer (using Docker or running with a debugger), ensuring the API server is running on port 8080. + a. If you'd like to set environment variables, you can do so by creating a `.env` file in the danswer/backend/tests/integration/ directory. +2. Navigate to `danswer/backend`. +3. Run the following command in the terminal: + ```sh + pytest -s tests/integration/tests/ + ``` + or to run all tests in a file: + ```sh + pytest -s tests/integration/tests/path_to/test_file.py + ``` + or to run a single test: + ```sh + pytest -s tests/integration/tests/path_to/test_file.py::test_function_name + ``` + +## Guidelines for Writing Integration Tests +- As authentication is currently required for all tests, each test should start by creating a user. +- Each test should ideally focus on a single API flow. +- The test writer should try to consider failure cases and edge cases for the flow and write the tests to check for these cases. +- Every step of the test should be commented describing what is being done and what the expected behavior is. +- A summary of the test should be given at the top of the test function as well! +- When writing new tests, manager classes, manager functions, and test classes, try to copy the style of the other ones that have already been written. +- Be careful for scope creep! + - No need to overcomplicate every test by verifying after every single API call so long as the case you would be verifying is covered elsewhere (ideally in a test focused on covering that case). + - An example of this is: Creating an admin user is done at the beginning of nearly every test, but we only need to verify that the user is actually an admin in the test focused on checking admin permissions. For every other test, we can just create the admin user and assume that the permissions are working as expected. + +## Current Testing Limitations +### Test coverage +- All tests are probably not as high coverage as they could be. +- The "connector" tests in particular are super bare bones because we will be reworking connector/cc_pair sometime soon. +- Global Curator role is not thoroughly tested. +- No auth is not tested at all. +### Failure checking +- While we test expected auth failures, we only check that it failed at all. +- We dont check that the return codes are what we expect. +- This means that a test could be failing for a different reason than expected. +- We should ensure that the proper codes are being returned for each failure case. +- We should also query the db after each failure to ensure that the db is in the expected state. +### Scope/focus +- The tests may be scoped sub-optimally. +- The scoping of each test may be overlapping. + +## Current Testing Coverage +The current testing coverage should be checked by reading the comments at the top of each test file. + + +## TODO: Testing Coverage +- Persona permissions testing +- Read only (and/or basic) user permissions + - Ensuring proper permission enforcement using the chat/doc_search endpoints +- No auth + +## Ideas for integration testing design +### Combine the "test" and "manager" classes +This could make test writing a bit cleaner by preventing test writers from having to pass around objects into functions that the objects have a 1:1 relationship with. + +### Rework VespaClient +Right now, its used a fixture and has to be passed around between manager classes. +Could just be built where its used diff --git a/backend/tests/integration/common_utils/connectors.py b/backend/tests/integration/common_utils/connectors.py deleted file mode 100644 index e7734cec3c8..00000000000 --- a/backend/tests/integration/common_utils/connectors.py +++ /dev/null @@ -1,114 +0,0 @@ -import uuid -from typing import cast - -import requests -from pydantic import BaseModel - -from danswer.configs.constants import DocumentSource -from danswer.db.enums import ConnectorCredentialPairStatus -from tests.integration.common_utils.constants import API_SERVER_URL - - -class ConnectorCreationDetails(BaseModel): - connector_id: int - credential_id: int - cc_pair_id: int - - -class ConnectorClient: - @staticmethod - def create_connector( - name_prefix: str = "test_connector", credential_id: int | None = None - ) -> ConnectorCreationDetails: - unique_id = uuid.uuid4() - - connector_name = f"{name_prefix}_{unique_id}" - connector_data = { - "name": connector_name, - "source": DocumentSource.NOT_APPLICABLE, - "input_type": "load_state", - "connector_specific_config": {}, - "refresh_freq": 60, - "disabled": True, - } - response = requests.post( - f"{API_SERVER_URL}/manage/admin/connector", - json=connector_data, - ) - response.raise_for_status() - connector_id = response.json()["id"] - - # associate the credential with the connector - if not credential_id: - print("ID not specified, creating new credential") - # Create a new credential - credential_data = { - "credential_json": {}, - "admin_public": True, - "source": DocumentSource.NOT_APPLICABLE, - } - response = requests.post( - f"{API_SERVER_URL}/manage/credential", - json=credential_data, - ) - response.raise_for_status() - credential_id = cast(int, response.json()["id"]) - - cc_pair_metadata = {"name": f"test_cc_pair_{unique_id}", "is_public": True} - response = requests.put( - f"{API_SERVER_URL}/manage/connector/{connector_id}/credential/{credential_id}", - json=cc_pair_metadata, - ) - response.raise_for_status() - - # fetch the conenector credential pair id using the indexing status API - response = requests.get( - f"{API_SERVER_URL}/manage/admin/connector/indexing-status" - ) - response.raise_for_status() - indexing_statuses = response.json() - - cc_pair_id = None - for status in indexing_statuses: - if ( - status["connector"]["id"] == connector_id - and status["credential"]["id"] == credential_id - ): - cc_pair_id = status["cc_pair_id"] - break - - if cc_pair_id is None: - raise ValueError("Could not find the connector credential pair id") - - print( - f"Created connector with connector_id: {connector_id}, credential_id: {credential_id}, cc_pair_id: {cc_pair_id}" - ) - return ConnectorCreationDetails( - connector_id=int(connector_id), - credential_id=int(credential_id), - cc_pair_id=int(cc_pair_id), - ) - - @staticmethod - def update_connector_status( - cc_pair_id: int, status: ConnectorCredentialPairStatus - ) -> None: - response = requests.put( - f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair_id}/status", - json={"status": status}, - ) - response.raise_for_status() - - @staticmethod - def delete_connector(connector_id: int, credential_id: int) -> None: - response = requests.post( - f"{API_SERVER_URL}/manage/admin/deletion-attempt", - json={"connector_id": connector_id, "credential_id": credential_id}, - ) - response.raise_for_status() - - @staticmethod - def get_connectors() -> list[dict]: - response = requests.get(f"{API_SERVER_URL}/manage/connector") - response.raise_for_status() - return response.json() diff --git a/backend/tests/integration/common_utils/constants.py b/backend/tests/integration/common_utils/constants.py index efc98dde7de..7d729191cf6 100644 --- a/backend/tests/integration/common_utils/constants.py +++ b/backend/tests/integration/common_utils/constants.py @@ -5,3 +5,7 @@ API_SERVER_PORT = os.getenv("API_SERVER_PORT") or "8080" API_SERVER_URL = f"{API_SERVER_PROTOCOL}://{API_SERVER_HOST}:{API_SERVER_PORT}" MAX_DELAY = 30 + +GENERAL_HEADERS = {"Content-Type": "application/json"} + +NUM_DOCS = 5 diff --git a/backend/tests/integration/common_utils/document_sets.py b/backend/tests/integration/common_utils/document_sets.py deleted file mode 100644 index dc898611108..00000000000 --- a/backend/tests/integration/common_utils/document_sets.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import cast - -import requests - -from danswer.server.features.document_set.models import DocumentSet -from danswer.server.features.document_set.models import DocumentSetCreationRequest -from tests.integration.common_utils.constants import API_SERVER_URL - - -class DocumentSetClient: - @staticmethod - def create_document_set( - doc_set_creation_request: DocumentSetCreationRequest, - ) -> int: - response = requests.post( - f"{API_SERVER_URL}/manage/admin/document-set", - json=doc_set_creation_request.model_dump(), - ) - response.raise_for_status() - return cast(int, response.json()) - - @staticmethod - def fetch_document_sets() -> list[DocumentSet]: - response = requests.get(f"{API_SERVER_URL}/manage/document-set") - response.raise_for_status() - - document_sets = [ - DocumentSet.parse_obj(doc_set_data) for doc_set_data in response.json() - ] - return document_sets diff --git a/backend/tests/integration/common_utils/llm.py b/backend/tests/integration/common_utils/llm.py deleted file mode 100644 index ba8b89d6b4d..00000000000 --- a/backend/tests/integration/common_utils/llm.py +++ /dev/null @@ -1,62 +0,0 @@ -import os -from typing import cast - -import requests -from pydantic import BaseModel -from pydantic import PrivateAttr - -from danswer.server.manage.llm.models import LLMProviderUpsertRequest -from tests.integration.common_utils.constants import API_SERVER_URL - - -class LLMProvider(BaseModel): - provider: str - api_key: str - default_model_name: str - api_base: str | None = None - api_version: str | None = None - is_default: bool = True - - # only populated after creation - _provider_id: int | None = PrivateAttr() - - def create(self) -> int: - llm_provider = LLMProviderUpsertRequest( - name=self.provider, - provider=self.provider, - default_model_name=self.default_model_name, - api_key=self.api_key, - api_base=self.api_base, - api_version=self.api_version, - custom_config=None, - fast_default_model_name=None, - is_public=True, - groups=[], - display_model_names=None, - model_names=None, - ) - - response = requests.put( - f"{API_SERVER_URL}/admin/llm/provider", - json=llm_provider.dict(), - ) - response.raise_for_status() - - self._provider_id = cast(int, response.json()["id"]) - return self._provider_id - - def delete(self) -> None: - response = requests.delete( - f"{API_SERVER_URL}/admin/llm/provider/{self._provider_id}" - ) - response.raise_for_status() - - -def seed_default_openai_provider() -> LLMProvider: - llm = LLMProvider( - provider="openai", - default_model_name="gpt-4o-mini", - api_key=os.environ["OPENAI_API_KEY"], - ) - llm.create() - return llm diff --git a/backend/tests/integration/common_utils/managers/api_key.py b/backend/tests/integration/common_utils/managers/api_key.py new file mode 100644 index 00000000000..2a90c22fd76 --- /dev/null +++ b/backend/tests/integration/common_utils/managers/api_key.py @@ -0,0 +1,92 @@ +from uuid import uuid4 + +import requests + +from danswer.db.models import UserRole +from ee.danswer.server.api_key.models import APIKeyArgs +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.test_models import DATestAPIKey +from tests.integration.common_utils.test_models import DATestUser + + +class APIKeyManager: + @staticmethod + def create( + name: str | None = None, + api_key_role: UserRole = UserRole.ADMIN, + user_performing_action: DATestUser | None = None, + ) -> DATestAPIKey: + name = f"{name}-api-key" if name else f"test-api-key-{uuid4()}" + api_key_request = APIKeyArgs( + name=name, + role=api_key_role, + ) + api_key_response = requests.post( + f"{API_SERVER_URL}/admin/api-key", + json=api_key_request.model_dump(), + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + api_key_response.raise_for_status() + api_key = api_key_response.json() + result_api_key = DATestAPIKey( + api_key_id=api_key["api_key_id"], + api_key_display=api_key["api_key_display"], + api_key=api_key["api_key"], + api_key_name=name, + api_key_role=api_key_role, + user_id=api_key["user_id"], + headers=GENERAL_HEADERS, + ) + result_api_key.headers["Authorization"] = f"Bearer {result_api_key.api_key}" + return result_api_key + + @staticmethod + def delete( + api_key: DATestAPIKey, + user_performing_action: DATestUser | None = None, + ) -> None: + api_key_response = requests.delete( + f"{API_SERVER_URL}/admin/api-key/{api_key.api_key_id}", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + api_key_response.raise_for_status() + + @staticmethod + def get_all( + user_performing_action: DATestUser | None = None, + ) -> list[DATestAPIKey]: + api_key_response = requests.get( + f"{API_SERVER_URL}/admin/api-key", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + api_key_response.raise_for_status() + return [DATestAPIKey(**api_key) for api_key in api_key_response.json()] + + @staticmethod + def verify( + api_key: DATestAPIKey, + verify_deleted: bool = False, + user_performing_action: DATestUser | None = None, + ) -> None: + retrieved_keys = APIKeyManager.get_all( + user_performing_action=user_performing_action + ) + for key in retrieved_keys: + if key.api_key_id == api_key.api_key_id: + if verify_deleted: + raise ValueError("API Key found when it should have been deleted") + if ( + key.api_key_name == api_key.api_key_name + and key.api_key_role == api_key.api_key_role + ): + return + + if not verify_deleted: + raise Exception("API Key not found") diff --git a/backend/tests/integration/common_utils/managers/cc_pair.py b/backend/tests/integration/common_utils/managers/cc_pair.py new file mode 100644 index 00000000000..000bbac59d0 --- /dev/null +++ b/backend/tests/integration/common_utils/managers/cc_pair.py @@ -0,0 +1,314 @@ +import time +from datetime import datetime +from typing import Any +from uuid import uuid4 + +import requests + +from danswer.connectors.models import InputType +from danswer.db.enums import AccessType +from danswer.db.enums import ConnectorCredentialPairStatus +from danswer.db.enums import TaskStatus +from danswer.server.documents.models import CCPairPruningTask +from danswer.server.documents.models import ConnectorCredentialPairIdentifier +from danswer.server.documents.models import ConnectorIndexingStatus +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.constants import MAX_DELAY +from tests.integration.common_utils.managers.connector import ConnectorManager +from tests.integration.common_utils.managers.credential import CredentialManager +from tests.integration.common_utils.test_models import DATestCCPair +from tests.integration.common_utils.test_models import DATestUser + + +def _cc_pair_creator( + connector_id: int, + credential_id: int, + name: str | None = None, + access_type: AccessType = AccessType.PUBLIC, + groups: list[int] | None = None, + user_performing_action: DATestUser | None = None, +) -> DATestCCPair: + name = f"{name}-cc-pair" if name else f"test-cc-pair-{uuid4()}" + + request = { + "name": name, + "access_type": access_type, + "groups": groups or [], + } + + response = requests.put( + url=f"{API_SERVER_URL}/manage/connector/{connector_id}/credential/{credential_id}", + json=request, + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return DATestCCPair( + id=response.json()["data"], + name=name, + connector_id=connector_id, + credential_id=credential_id, + access_type=access_type, + groups=groups or [], + ) + + +class CCPairManager: + @staticmethod + def create_from_scratch( + name: str | None = None, + access_type: AccessType = AccessType.PUBLIC, + groups: list[int] | None = None, + source: DocumentSource = DocumentSource.FILE, + input_type: InputType = InputType.LOAD_STATE, + connector_specific_config: dict[str, Any] | None = None, + credential_json: dict[str, Any] | None = None, + user_performing_action: DATestUser | None = None, + ) -> DATestCCPair: + connector = ConnectorManager.create( + name=name, + source=source, + input_type=input_type, + connector_specific_config=connector_specific_config, + is_public=(access_type == AccessType.PUBLIC), + groups=groups, + user_performing_action=user_performing_action, + ) + credential = CredentialManager.create( + credential_json=credential_json, + name=name, + source=source, + curator_public=(access_type == AccessType.PUBLIC), + groups=groups, + user_performing_action=user_performing_action, + ) + return _cc_pair_creator( + connector_id=connector.id, + credential_id=credential.id, + name=name, + access_type=access_type, + groups=groups, + user_performing_action=user_performing_action, + ) + + @staticmethod + def create( + connector_id: int, + credential_id: int, + name: str | None = None, + access_type: AccessType = AccessType.PUBLIC, + groups: list[int] | None = None, + user_performing_action: DATestUser | None = None, + ) -> DATestCCPair: + return _cc_pair_creator( + connector_id=connector_id, + credential_id=credential_id, + name=name, + access_type=access_type, + groups=groups, + user_performing_action=user_performing_action, + ) + + @staticmethod + def pause_cc_pair( + cc_pair: DATestCCPair, + user_performing_action: DATestUser | None = None, + ) -> None: + result = requests.put( + url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/status", + json={"status": "PAUSED"}, + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + result.raise_for_status() + + @staticmethod + def delete( + cc_pair: DATestCCPair, + user_performing_action: DATestUser | None = None, + ) -> None: + cc_pair_identifier = ConnectorCredentialPairIdentifier( + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + ) + result = requests.post( + url=f"{API_SERVER_URL}/manage/admin/deletion-attempt", + json=cc_pair_identifier.model_dump(), + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + result.raise_for_status() + + @staticmethod + def get_one( + cc_pair_id: int, + user_performing_action: DATestUser | None = None, + ) -> ConnectorIndexingStatus | None: + response = requests.get( + f"{API_SERVER_URL}/manage/admin/connector/indexing-status", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + for cc_pair_json in response.json(): + cc_pair = ConnectorIndexingStatus(**cc_pair_json) + if cc_pair.cc_pair_id == cc_pair_id: + return cc_pair + + return None + + @staticmethod + def get_all( + user_performing_action: DATestUser | None = None, + ) -> list[ConnectorIndexingStatus]: + response = requests.get( + f"{API_SERVER_URL}/manage/admin/connector/indexing-status", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return [ConnectorIndexingStatus(**cc_pair) for cc_pair in response.json()] + + @staticmethod + def verify( + cc_pair: DATestCCPair, + verify_deleted: bool = False, + user_performing_action: DATestUser | None = None, + ) -> None: + all_cc_pairs = CCPairManager.get_all(user_performing_action) + for retrieved_cc_pair in all_cc_pairs: + if retrieved_cc_pair.cc_pair_id == cc_pair.id: + if verify_deleted: + # We assume that this check will be performed after the deletion is + # already waited for + raise ValueError( + f"CC pair {cc_pair.id} found but should be deleted" + ) + if ( + retrieved_cc_pair.name == cc_pair.name + and retrieved_cc_pair.connector.id == cc_pair.connector_id + and retrieved_cc_pair.credential.id == cc_pair.credential_id + and retrieved_cc_pair.access_type == cc_pair.access_type + and set(retrieved_cc_pair.groups) == set(cc_pair.groups) + ): + return + + if not verify_deleted: + raise ValueError(f"CC pair {cc_pair.id} not found") + + @staticmethod + def wait_for_indexing( + cc_pair_test: DATestCCPair, + after: datetime, + timeout: float = MAX_DELAY, + user_performing_action: DATestUser | None = None, + ) -> None: + """after: Wait for an indexing success time after this time""" + start = time.monotonic() + while True: + cc_pairs = CCPairManager.get_all(user_performing_action) + for cc_pair in cc_pairs: + if cc_pair.cc_pair_id != cc_pair_test.id: + continue + + if cc_pair.last_success and cc_pair.last_success > after: + print(f"cc_pair {cc_pair_test.id} indexing complete.") + return + + elapsed = time.monotonic() - start + if elapsed > timeout: + raise TimeoutError( + f"CC pair indexing was not completed within {timeout} seconds" + ) + + print( + f"Waiting for CC indexing to complete. elapsed={elapsed:.2f} timeout={timeout}" + ) + time.sleep(5) + + @staticmethod + def prune( + cc_pair: DATestCCPair, + user_performing_action: DATestUser | None = None, + ) -> None: + result = requests.post( + url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/prune", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + result.raise_for_status() + + @staticmethod + def get_prune_task( + cc_pair: DATestCCPair, + user_performing_action: DATestUser | None = None, + ) -> CCPairPruningTask: + response = requests.get( + url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/prune", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return CCPairPruningTask(**response.json()) + + @staticmethod + def wait_for_prune( + cc_pair_test: DATestCCPair, + after: datetime, + timeout: float = MAX_DELAY, + user_performing_action: DATestUser | None = None, + ) -> None: + """after: The task register time must be after this time.""" + start = time.monotonic() + while True: + task = CCPairManager.get_prune_task(cc_pair_test, user_performing_action) + if not task: + raise ValueError("Prune task not found.") + + if not task.register_time or task.register_time < after: + raise ValueError("Prune task register time is too early.") + + if task.status == TaskStatus.SUCCESS: + # Pruning succeeded + return + + elapsed = time.monotonic() - start + if elapsed > timeout: + raise TimeoutError( + f"CC pair pruning was not completed within {timeout} seconds" + ) + + print( + f"Waiting for CC pruning to complete. elapsed={elapsed:.2f} timeout={timeout}" + ) + time.sleep(5) + + @staticmethod + def wait_for_deletion_completion( + user_performing_action: DATestUser | None = None, + ) -> None: + start = time.monotonic() + while True: + cc_pairs = CCPairManager.get_all(user_performing_action) + if all( + cc_pair.cc_pair_status != ConnectorCredentialPairStatus.DELETING + for cc_pair in cc_pairs + ): + return + + if time.monotonic() - start > MAX_DELAY: + raise TimeoutError( + f"CC pairs deletion was not completed within the {MAX_DELAY} seconds" + ) + else: + print("Some CC pairs are still being deleted, waiting...") + time.sleep(2) diff --git a/backend/tests/integration/common_utils/managers/chat.py b/backend/tests/integration/common_utils/managers/chat.py new file mode 100644 index 00000000000..696baa2ad8b --- /dev/null +++ b/backend/tests/integration/common_utils/managers/chat.py @@ -0,0 +1,160 @@ +import json + +import requests +from requests.models import Response + +from danswer.file_store.models import FileDescriptor +from danswer.llm.override_models import LLMOverride +from danswer.llm.override_models import PromptOverride +from danswer.one_shot_answer.models import DirectQARequest +from danswer.one_shot_answer.models import ThreadMessage +from danswer.search.models import RetrievalDetails +from danswer.server.query_and_chat.models import ChatSessionCreationRequest +from danswer.server.query_and_chat.models import CreateChatMessageRequest +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.test_models import DATestChatMessage +from tests.integration.common_utils.test_models import DATestChatSession +from tests.integration.common_utils.test_models import DATestUser +from tests.integration.common_utils.test_models import StreamedResponse + + +class ChatSessionManager: + @staticmethod + def create( + persona_id: int = -1, + description: str = "Test chat session", + user_performing_action: DATestUser | None = None, + ) -> DATestChatSession: + chat_session_creation_req = ChatSessionCreationRequest( + persona_id=persona_id, description=description + ) + response = requests.post( + f"{API_SERVER_URL}/chat/create-chat-session", + json=chat_session_creation_req.model_dump(), + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + chat_session_id = response.json()["chat_session_id"] + return DATestChatSession( + id=chat_session_id, persona_id=persona_id, description=description + ) + + @staticmethod + def send_message( + chat_session_id: int, + message: str, + parent_message_id: int | None = None, + user_performing_action: DATestUser | None = None, + file_descriptors: list[FileDescriptor] = [], + prompt_id: int | None = None, + search_doc_ids: list[int] | None = None, + retrieval_options: RetrievalDetails | None = None, + query_override: str | None = None, + regenerate: bool | None = None, + llm_override: LLMOverride | None = None, + prompt_override: PromptOverride | None = None, + alternate_assistant_id: int | None = None, + use_existing_user_message: bool = False, + ) -> StreamedResponse: + chat_message_req = CreateChatMessageRequest( + chat_session_id=chat_session_id, + parent_message_id=parent_message_id, + message=message, + file_descriptors=file_descriptors or [], + prompt_id=prompt_id, + search_doc_ids=search_doc_ids or [], + retrieval_options=retrieval_options, + query_override=query_override, + regenerate=regenerate, + llm_override=llm_override, + prompt_override=prompt_override, + alternate_assistant_id=alternate_assistant_id, + use_existing_user_message=use_existing_user_message, + ) + + response = requests.post( + f"{API_SERVER_URL}/chat/send-message", + json=chat_message_req.model_dump(), + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + stream=True, + ) + + return ChatSessionManager.analyze_response(response) + + @staticmethod + def get_answer_with_quote( + persona_id: int, + message: str, + user_performing_action: DATestUser | None = None, + ) -> StreamedResponse: + direct_qa_request = DirectQARequest( + messages=[ThreadMessage(message=message)], + prompt_id=None, + persona_id=persona_id, + ) + + response = requests.post( + f"{API_SERVER_URL}/query/stream-answer-with-quote", + json=direct_qa_request.model_dump(), + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + stream=True, + ) + response.raise_for_status() + + return ChatSessionManager.analyze_response(response) + + @staticmethod + def analyze_response(response: Response) -> StreamedResponse: + response_data = [ + json.loads(line.decode("utf-8")) for line in response.iter_lines() if line + ] + + analyzed = StreamedResponse() + + for data in response_data: + if "rephrased_query" in data: + analyzed.rephrased_query = data["rephrased_query"] + elif "tool_name" in data: + analyzed.tool_name = data["tool_name"] + analyzed.tool_result = ( + data.get("tool_result") + if analyzed.tool_name == "run_search" + else None + ) + elif "relevance_summaries" in data: + analyzed.relevance_summaries = data["relevance_summaries"] + elif "answer_piece" in data and data["answer_piece"]: + analyzed.full_message += data["answer_piece"] + + return analyzed + + @staticmethod + def get_chat_history( + chat_session: DATestChatSession, + user_performing_action: DATestUser | None = None, + ) -> list[DATestChatMessage]: + response = requests.get( + f"{API_SERVER_URL}/chat/history/{chat_session.id}", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + return [ + DATestChatMessage( + id=msg["id"], + chat_session_id=chat_session.id, + parent_message_id=msg.get("parent_message_id"), + message=msg["message"], + response=msg.get("response", ""), + ) + for msg in response.json() + ] diff --git a/backend/tests/integration/common_utils/managers/connector.py b/backend/tests/integration/common_utils/managers/connector.py new file mode 100644 index 00000000000..d904bc6f226 --- /dev/null +++ b/backend/tests/integration/common_utils/managers/connector.py @@ -0,0 +1,124 @@ +from typing import Any +from uuid import uuid4 + +import requests + +from danswer.connectors.models import InputType +from danswer.server.documents.models import ConnectorUpdateRequest +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.test_models import DATestConnector +from tests.integration.common_utils.test_models import DATestUser + + +class ConnectorManager: + @staticmethod + def create( + name: str | None = None, + source: DocumentSource = DocumentSource.FILE, + input_type: InputType = InputType.LOAD_STATE, + connector_specific_config: dict[str, Any] | None = None, + is_public: bool = True, + groups: list[int] | None = None, + user_performing_action: DATestUser | None = None, + ) -> DATestConnector: + name = f"{name}-connector" if name else f"test-connector-{uuid4()}" + + connector_update_request = ConnectorUpdateRequest( + name=name, + source=source, + input_type=input_type, + connector_specific_config=connector_specific_config or {}, + is_public=is_public, + groups=groups or [], + ) + + response = requests.post( + url=f"{API_SERVER_URL}/manage/admin/connector", + json=connector_update_request.model_dump(), + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + response_data = response.json() + return DATestConnector( + id=response_data.get("id"), + name=name, + source=source, + input_type=input_type, + connector_specific_config=connector_specific_config or {}, + groups=groups, + is_public=is_public, + ) + + @staticmethod + def edit( + connector: DATestConnector, + user_performing_action: DATestUser | None = None, + ) -> None: + response = requests.patch( + url=f"{API_SERVER_URL}/manage/admin/connector/{connector.id}", + json=connector.model_dump(exclude={"id"}), + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + @staticmethod + def delete( + connector: DATestConnector, + user_performing_action: DATestUser | None = None, + ) -> None: + response = requests.delete( + url=f"{API_SERVER_URL}/manage/admin/connector/{connector.id}", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + @staticmethod + def get_all( + user_performing_action: DATestUser | None = None, + ) -> list[DATestConnector]: + response = requests.get( + url=f"{API_SERVER_URL}/manage/connector", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return [ + DATestConnector( + id=conn.get("id"), + name=conn.get("name", ""), + source=conn.get("source", DocumentSource.FILE), + input_type=conn.get("input_type", InputType.LOAD_STATE), + connector_specific_config=conn.get("connector_specific_config", {}), + ) + for conn in response.json() + ] + + @staticmethod + def get( + connector_id: int, user_performing_action: DATestUser | None = None + ) -> DATestConnector: + response = requests.get( + url=f"{API_SERVER_URL}/manage/connector/{connector_id}", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + conn = response.json() + return DATestConnector( + id=conn.get("id"), + name=conn.get("name", ""), + source=conn.get("source", DocumentSource.FILE), + input_type=conn.get("input_type", InputType.LOAD_STATE), + connector_specific_config=conn.get("connector_specific_config", {}), + ) diff --git a/backend/tests/integration/common_utils/managers/credential.py b/backend/tests/integration/common_utils/managers/credential.py new file mode 100644 index 00000000000..8f729e4b06c --- /dev/null +++ b/backend/tests/integration/common_utils/managers/credential.py @@ -0,0 +1,129 @@ +from typing import Any +from uuid import uuid4 + +import requests + +from danswer.server.documents.models import CredentialSnapshot +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.test_models import DATestCredential +from tests.integration.common_utils.test_models import DATestUser + + +class CredentialManager: + @staticmethod + def create( + credential_json: dict[str, Any] | None = None, + admin_public: bool = True, + name: str | None = None, + source: DocumentSource = DocumentSource.FILE, + curator_public: bool = True, + groups: list[int] | None = None, + user_performing_action: DATestUser | None = None, + ) -> DATestCredential: + name = f"{name}-credential" if name else f"test-credential-{uuid4()}" + + credential_request = { + "name": name, + "credential_json": credential_json or {}, + "admin_public": admin_public, + "source": source, + "curator_public": curator_public, + "groups": groups or [], + } + response = requests.post( + url=f"{API_SERVER_URL}/manage/credential", + json=credential_request, + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + + response.raise_for_status() + return DATestCredential( + id=response.json()["id"], + name=name, + credential_json=credential_json or {}, + admin_public=admin_public, + source=source, + curator_public=curator_public, + groups=groups or [], + ) + + @staticmethod + def edit( + credential: DATestCredential, + user_performing_action: DATestUser | None = None, + ) -> None: + request = credential.model_dump(include={"name", "credential_json"}) + response = requests.put( + url=f"{API_SERVER_URL}/manage/admin/credential/{credential.id}", + json=request, + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + @staticmethod + def delete( + credential: DATestCredential, + user_performing_action: DATestUser | None = None, + ) -> None: + response = requests.delete( + url=f"{API_SERVER_URL}/manage/credential/{credential.id}", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + @staticmethod + def get( + credential_id: int, user_performing_action: DATestUser | None = None + ) -> CredentialSnapshot: + response = requests.get( + url=f"{API_SERVER_URL}/manage/credential/{credential_id}", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return CredentialSnapshot(**response.json()) + + @staticmethod + def get_all( + user_performing_action: DATestUser | None = None, + ) -> list[CredentialSnapshot]: + response = requests.get( + f"{API_SERVER_URL}/manage/credential", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return [CredentialSnapshot(**cred) for cred in response.json()] + + @staticmethod + def verify( + credential: DATestCredential, + verify_deleted: bool = False, + user_performing_action: DATestUser | None = None, + ) -> None: + all_credentials = CredentialManager.get_all(user_performing_action) + for fetched_credential in all_credentials: + if credential.id == fetched_credential.id: + if verify_deleted: + raise ValueError( + f"Credential {credential.id} found but should be deleted" + ) + if ( + credential.name == fetched_credential.name + and credential.admin_public == fetched_credential.admin_public + and credential.source == fetched_credential.source + and credential.curator_public == fetched_credential.curator_public + ): + return + if not verify_deleted: + raise ValueError(f"Credential {credential.id} not found") diff --git a/backend/tests/integration/common_utils/managers/document.py b/backend/tests/integration/common_utils/managers/document.py new file mode 100644 index 00000000000..eecd75fa157 --- /dev/null +++ b/backend/tests/integration/common_utils/managers/document.py @@ -0,0 +1,184 @@ +from uuid import uuid4 + +import requests + +from danswer.configs.constants import DocumentSource +from danswer.db.enums import AccessType +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.constants import NUM_DOCS +from tests.integration.common_utils.managers.api_key import DATestAPIKey +from tests.integration.common_utils.managers.cc_pair import DATestCCPair +from tests.integration.common_utils.test_models import DATestUser +from tests.integration.common_utils.test_models import SimpleTestDocument +from tests.integration.common_utils.vespa import vespa_fixture + + +def _verify_document_permissions( + retrieved_doc: dict, + cc_pair: DATestCCPair, + doc_set_names: list[str] | None = None, + group_names: list[str] | None = None, + doc_creating_user: DATestUser | None = None, +) -> None: + acl_keys = set(retrieved_doc["access_control_list"].keys()) + print(f"ACL keys: {acl_keys}") + if cc_pair.access_type == AccessType.PUBLIC: + if "PUBLIC" not in acl_keys: + raise ValueError( + f"Document {retrieved_doc['document_id']} is public but" + " does not have the PUBLIC ACL key" + ) + + if doc_creating_user is not None: + if f"user_email:{doc_creating_user.email}" not in acl_keys: + raise ValueError( + f"Document {retrieved_doc['document_id']} was created by user" + f" {doc_creating_user.email} but does not have the user_email:{doc_creating_user.email} ACL key" + ) + + if group_names is not None: + expected_group_keys = {f"group:{group_name}" for group_name in group_names} + found_group_keys = {key for key in acl_keys if key.startswith("group:")} + if found_group_keys != expected_group_keys: + raise ValueError( + f"Document {retrieved_doc['document_id']} has incorrect group ACL keys. Found: {found_group_keys}, \n" + f"Expected: {expected_group_keys}" + ) + + if doc_set_names is not None: + found_doc_set_names = set(retrieved_doc.get("document_sets", {}).keys()) + if found_doc_set_names != set(doc_set_names): + raise ValueError( + f"Document set names mismatch. \nFound: {found_doc_set_names}, \n" + f"Expected: {set(doc_set_names)}" + ) + + +def _generate_dummy_document( + document_id: str, + cc_pair_id: int, + content: str | None = None, +) -> dict: + text = content if content else f"This is test document {document_id}" + return { + "document": { + "id": document_id, + "sections": [ + { + "text": text, + "link": f"{document_id}", + } + ], + "source": DocumentSource.NOT_APPLICABLE, + # just for testing metadata + "metadata": {"document_id": document_id}, + "semantic_identifier": f"Test Document {document_id}", + "from_ingestion_api": True, + }, + "cc_pair_id": cc_pair_id, + } + + +class DocumentManager: + @staticmethod + def seed_dummy_docs( + cc_pair: DATestCCPair, + num_docs: int = NUM_DOCS, + document_ids: list[str] | None = None, + api_key: DATestAPIKey | None = None, + ) -> list[SimpleTestDocument]: + # Use provided document_ids if available, otherwise generate random UUIDs + if document_ids is None: + document_ids = [f"test-doc-{uuid4()}" for _ in range(num_docs)] + else: + num_docs = len(document_ids) + # Create and ingest some documents + documents: list[dict] = [] + for document_id in document_ids: + document = _generate_dummy_document(document_id, cc_pair.id) + documents.append(document) + response = requests.post( + f"{API_SERVER_URL}/danswer-api/ingestion", + json=document, + headers=api_key.headers if api_key else GENERAL_HEADERS, + ) + response.raise_for_status() + + print("Seeding completed successfully.") + return [ + SimpleTestDocument( + id=document["document"]["id"], + content=document["document"]["sections"][0]["text"], + ) + for document in documents + ] + + @staticmethod + def seed_doc_with_content( + cc_pair: DATestCCPair, + content: str, + document_id: str | None = None, + api_key: DATestAPIKey | None = None, + ) -> SimpleTestDocument: + # Use provided document_ids if available, otherwise generate random UUIDs + if document_id is None: + document_id = f"test-doc-{uuid4()}" + # Create and ingest some documents + document: dict = _generate_dummy_document(document_id, cc_pair.id, content) + response = requests.post( + f"{API_SERVER_URL}/danswer-api/ingestion", + json=document, + headers=api_key.headers if api_key else GENERAL_HEADERS, + ) + response.raise_for_status() + + print("Seeding completed successfully.") + + return SimpleTestDocument( + id=document["document"]["id"], + content=document["document"]["sections"][0]["text"], + ) + + @staticmethod + def verify( + vespa_client: vespa_fixture, + cc_pair: DATestCCPair, + # If None, will not check doc sets or groups + # If empty list, will check for empty doc sets or groups + doc_set_names: list[str] | None = None, + group_names: list[str] | None = None, + doc_creating_user: DATestUser | None = None, + verify_deleted: bool = False, + ) -> None: + doc_ids = [document.id for document in cc_pair.documents] + retrieved_docs_dict = vespa_client.get_documents_by_id(doc_ids)["documents"] + retrieved_docs = { + doc["fields"]["document_id"]: doc["fields"] for doc in retrieved_docs_dict + } + # Left this here for debugging purposes. + # import json + # for doc in retrieved_docs.values(): + # printable_doc = doc.copy() + # print(printable_doc.keys()) + # printable_doc.pop("embeddings") + # printable_doc.pop("title_embedding") + # print(json.dumps(printable_doc, indent=2)) + + for document in cc_pair.documents: + retrieved_doc = retrieved_docs.get(document.id) + if not retrieved_doc: + if not verify_deleted: + raise ValueError(f"Document not found: {document.id}") + continue + if verify_deleted: + raise ValueError( + f"Document found when it should be deleted: {document.id}" + ) + _verify_document_permissions( + retrieved_doc, + cc_pair, + doc_set_names, + group_names, + doc_creating_user, + ) diff --git a/backend/tests/integration/common_utils/managers/document_set.py b/backend/tests/integration/common_utils/managers/document_set.py new file mode 100644 index 00000000000..cd6936602ea --- /dev/null +++ b/backend/tests/integration/common_utils/managers/document_set.py @@ -0,0 +1,171 @@ +import time +from uuid import uuid4 + +import requests + +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.constants import MAX_DELAY +from tests.integration.common_utils.test_models import DATestDocumentSet +from tests.integration.common_utils.test_models import DATestUser + + +class DocumentSetManager: + @staticmethod + def create( + name: str | None = None, + description: str | None = None, + cc_pair_ids: list[int] | None = None, + is_public: bool = True, + users: list[str] | None = None, + groups: list[int] | None = None, + user_performing_action: DATestUser | None = None, + ) -> DATestDocumentSet: + if name is None: + name = f"test_doc_set_{str(uuid4())}" + + doc_set_creation_request = { + "name": name, + "description": description or name, + "cc_pair_ids": cc_pair_ids or [], + "is_public": is_public, + "users": users or [], + "groups": groups or [], + } + + response = requests.post( + f"{API_SERVER_URL}/manage/admin/document-set", + json=doc_set_creation_request, + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + return DATestDocumentSet( + id=int(response.json()), + name=name, + description=description or name, + cc_pair_ids=cc_pair_ids or [], + is_public=is_public, + is_up_to_date=True, + users=users or [], + groups=groups or [], + ) + + @staticmethod + def edit( + document_set: DATestDocumentSet, + user_performing_action: DATestUser | None = None, + ) -> bool: + doc_set_update_request = { + "id": document_set.id, + "description": document_set.description, + "cc_pair_ids": document_set.cc_pair_ids, + "is_public": document_set.is_public, + "users": document_set.users, + "groups": document_set.groups, + } + response = requests.patch( + f"{API_SERVER_URL}/manage/admin/document-set", + json=doc_set_update_request, + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return True + + @staticmethod + def delete( + document_set: DATestDocumentSet, + user_performing_action: DATestUser | None = None, + ) -> bool: + response = requests.delete( + f"{API_SERVER_URL}/manage/admin/document-set/{document_set.id}", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return True + + @staticmethod + def get_all( + user_performing_action: DATestUser | None = None, + ) -> list[DATestDocumentSet]: + response = requests.get( + f"{API_SERVER_URL}/manage/document-set", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return [ + DATestDocumentSet( + id=doc_set["id"], + name=doc_set["name"], + description=doc_set["description"], + cc_pair_ids=[ + cc_pair["id"] for cc_pair in doc_set["cc_pair_descriptors"] + ], + is_public=doc_set["is_public"], + is_up_to_date=doc_set["is_up_to_date"], + users=doc_set["users"], + groups=doc_set["groups"], + ) + for doc_set in response.json() + ] + + @staticmethod + def wait_for_sync( + document_sets_to_check: list[DATestDocumentSet] | None = None, + user_performing_action: DATestUser | None = None, + ) -> None: + # wait for document sets to be synced + start = time.time() + while True: + doc_sets = DocumentSetManager.get_all(user_performing_action) + if document_sets_to_check: + check_ids = {doc_set.id for doc_set in document_sets_to_check} + doc_set_ids = {doc_set.id for doc_set in doc_sets} + if not check_ids.issubset(doc_set_ids): + raise RuntimeError("Document set not found") + doc_sets = [doc_set for doc_set in doc_sets if doc_set.id in check_ids] + all_up_to_date = all(doc_set.is_up_to_date for doc_set in doc_sets) + + if all_up_to_date: + break + + if time.time() - start > MAX_DELAY: + raise TimeoutError( + f"Document sets were not synced within the {MAX_DELAY} seconds" + ) + else: + print("Document sets were not synced yet, waiting...") + + time.sleep(2) + + @staticmethod + def verify( + document_set: DATestDocumentSet, + verify_deleted: bool = False, + user_performing_action: DATestUser | None = None, + ) -> None: + doc_sets = DocumentSetManager.get_all(user_performing_action) + for doc_set in doc_sets: + if doc_set.id == document_set.id: + if verify_deleted: + raise ValueError( + f"Document set {document_set.id} found but should have been deleted" + ) + if ( + doc_set.name == document_set.name + and set(doc_set.cc_pair_ids) == set(document_set.cc_pair_ids) + and doc_set.is_public == document_set.is_public + and set(doc_set.users) == set(document_set.users) + and set(doc_set.groups) == set(document_set.groups) + ): + return + if not verify_deleted: + raise ValueError(f"Document set {document_set.id} not found") diff --git a/backend/tests/integration/common_utils/managers/llm_provider.py b/backend/tests/integration/common_utils/managers/llm_provider.py new file mode 100644 index 00000000000..cde75284ca8 --- /dev/null +++ b/backend/tests/integration/common_utils/managers/llm_provider.py @@ -0,0 +1,130 @@ +import os +from uuid import uuid4 + +import requests + +from danswer.server.manage.llm.models import FullLLMProvider +from danswer.server.manage.llm.models import LLMProviderUpsertRequest +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.test_models import DATestLLMProvider +from tests.integration.common_utils.test_models import DATestUser + + +class LLMProviderManager: + @staticmethod + def create( + name: str | None = None, + provider: str | None = None, + api_key: str | None = None, + default_model_name: str | None = None, + api_base: str | None = None, + api_version: str | None = None, + groups: list[int] | None = None, + is_public: bool | None = None, + user_performing_action: DATestUser | None = None, + ) -> DATestLLMProvider: + print("Seeding LLM Providers...") + + llm_provider = LLMProviderUpsertRequest( + name=name or f"test-provider-{uuid4()}", + provider=provider or "openai", + default_model_name=default_model_name or "gpt-4o-mini", + api_key=api_key or os.environ["OPENAI_API_KEY"], + api_base=api_base, + api_version=api_version, + custom_config=None, + fast_default_model_name=default_model_name or "gpt-4o-mini", + is_public=is_public or True, + groups=groups or [], + display_model_names=None, + model_names=None, + ) + + llm_response = requests.put( + f"{API_SERVER_URL}/admin/llm/provider", + json=llm_provider.model_dump(), + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + llm_response.raise_for_status() + response_data = llm_response.json() + import json + + print(json.dumps(response_data, indent=4)) + result_llm = DATestLLMProvider( + id=response_data["id"], + name=response_data["name"], + provider=response_data["provider"], + api_key=response_data["api_key"], + default_model_name=response_data["default_model_name"], + is_public=response_data["is_public"], + groups=response_data["groups"], + api_base=response_data["api_base"], + api_version=response_data["api_version"], + ) + + set_default_response = requests.post( + f"{API_SERVER_URL}/admin/llm/provider/{llm_response.json()['id']}/default", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + set_default_response.raise_for_status() + + return result_llm + + @staticmethod + def delete( + llm_provider: DATestLLMProvider, + user_performing_action: DATestUser | None = None, + ) -> bool: + response = requests.delete( + f"{API_SERVER_URL}/admin/llm/provider/{llm_provider.id}", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return True + + @staticmethod + def get_all( + user_performing_action: DATestUser | None = None, + ) -> list[FullLLMProvider]: + response = requests.get( + f"{API_SERVER_URL}/admin/llm/provider", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return [FullLLMProvider(**ug) for ug in response.json()] + + @staticmethod + def verify( + llm_provider: DATestLLMProvider, + verify_deleted: bool = False, + user_performing_action: DATestUser | None = None, + ) -> None: + all_llm_providers = LLMProviderManager.get_all(user_performing_action) + for fetched_llm_provider in all_llm_providers: + if llm_provider.id == fetched_llm_provider.id: + if verify_deleted: + raise ValueError( + f"User group {llm_provider.id} found but should be deleted" + ) + fetched_llm_groups = set(fetched_llm_provider.groups) + llm_provider_groups = set(llm_provider.groups) + if ( + fetched_llm_groups == llm_provider_groups + and llm_provider.provider == fetched_llm_provider.provider + and llm_provider.api_key == fetched_llm_provider.api_key + and llm_provider.default_model_name + == fetched_llm_provider.default_model_name + and llm_provider.is_public == fetched_llm_provider.is_public + ): + return + if not verify_deleted: + raise ValueError(f"User group {llm_provider.id} not found") diff --git a/backend/tests/integration/common_utils/managers/persona.py b/backend/tests/integration/common_utils/managers/persona.py new file mode 100644 index 00000000000..4e8e58224fb --- /dev/null +++ b/backend/tests/integration/common_utils/managers/persona.py @@ -0,0 +1,214 @@ +from uuid import uuid4 + +import requests + +from danswer.search.enums import RecencyBiasSetting +from danswer.server.features.persona.models import PersonaSnapshot +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.test_models import DATestPersona +from tests.integration.common_utils.test_models import DATestUser + + +class PersonaManager: + @staticmethod + def create( + name: str | None = None, + description: str | None = None, + num_chunks: float = 5, + llm_relevance_filter: bool = True, + is_public: bool = True, + llm_filter_extraction: bool = True, + recency_bias: RecencyBiasSetting = RecencyBiasSetting.AUTO, + prompt_ids: list[int] | None = None, + document_set_ids: list[int] | None = None, + tool_ids: list[int] | None = None, + llm_model_provider_override: str | None = None, + llm_model_version_override: str | None = None, + users: list[str] | None = None, + groups: list[int] | None = None, + user_performing_action: DATestUser | None = None, + ) -> DATestPersona: + name = name or f"test-persona-{uuid4()}" + description = description or f"Description for {name}" + + persona_creation_request = { + "name": name, + "description": description, + "num_chunks": num_chunks, + "llm_relevance_filter": llm_relevance_filter, + "is_public": is_public, + "llm_filter_extraction": llm_filter_extraction, + "recency_bias": recency_bias, + "prompt_ids": prompt_ids or [], + "document_set_ids": document_set_ids or [], + "tool_ids": tool_ids or [], + "llm_model_provider_override": llm_model_provider_override, + "llm_model_version_override": llm_model_version_override, + "users": users or [], + "groups": groups or [], + } + + response = requests.post( + f"{API_SERVER_URL}/persona", + json=persona_creation_request, + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + persona_data = response.json() + + return DATestPersona( + id=persona_data["id"], + name=name, + description=description, + num_chunks=num_chunks, + llm_relevance_filter=llm_relevance_filter, + is_public=is_public, + llm_filter_extraction=llm_filter_extraction, + recency_bias=recency_bias, + prompt_ids=prompt_ids or [], + document_set_ids=document_set_ids or [], + tool_ids=tool_ids or [], + llm_model_provider_override=llm_model_provider_override, + llm_model_version_override=llm_model_version_override, + users=users or [], + groups=groups or [], + ) + + @staticmethod + def edit( + persona: DATestPersona, + name: str | None = None, + description: str | None = None, + num_chunks: float | None = None, + llm_relevance_filter: bool | None = None, + is_public: bool | None = None, + llm_filter_extraction: bool | None = None, + recency_bias: RecencyBiasSetting | None = None, + prompt_ids: list[int] | None = None, + document_set_ids: list[int] | None = None, + tool_ids: list[int] | None = None, + llm_model_provider_override: str | None = None, + llm_model_version_override: str | None = None, + users: list[str] | None = None, + groups: list[int] | None = None, + user_performing_action: DATestUser | None = None, + ) -> DATestPersona: + persona_update_request = { + "name": name or persona.name, + "description": description or persona.description, + "num_chunks": num_chunks or persona.num_chunks, + "llm_relevance_filter": llm_relevance_filter + or persona.llm_relevance_filter, + "is_public": is_public or persona.is_public, + "llm_filter_extraction": llm_filter_extraction + or persona.llm_filter_extraction, + "recency_bias": recency_bias or persona.recency_bias, + "prompt_ids": prompt_ids or persona.prompt_ids, + "document_set_ids": document_set_ids or persona.document_set_ids, + "tool_ids": tool_ids or persona.tool_ids, + "llm_model_provider_override": llm_model_provider_override + or persona.llm_model_provider_override, + "llm_model_version_override": llm_model_version_override + or persona.llm_model_version_override, + "users": users or persona.users, + "groups": groups or persona.groups, + } + + response = requests.patch( + f"{API_SERVER_URL}/persona/{persona.id}", + json=persona_update_request, + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + updated_persona_data = response.json() + + return DATestPersona( + id=updated_persona_data["id"], + name=updated_persona_data["name"], + description=updated_persona_data["description"], + num_chunks=updated_persona_data["num_chunks"], + llm_relevance_filter=updated_persona_data["llm_relevance_filter"], + is_public=updated_persona_data["is_public"], + llm_filter_extraction=updated_persona_data["llm_filter_extraction"], + recency_bias=updated_persona_data["recency_bias"], + prompt_ids=updated_persona_data["prompts"], + document_set_ids=updated_persona_data["document_sets"], + tool_ids=updated_persona_data["tools"], + llm_model_provider_override=updated_persona_data[ + "llm_model_provider_override" + ], + llm_model_version_override=updated_persona_data[ + "llm_model_version_override" + ], + users=[user["email"] for user in updated_persona_data["users"]], + groups=updated_persona_data["groups"], + ) + + @staticmethod + def get_all( + user_performing_action: DATestUser | None = None, + ) -> list[PersonaSnapshot]: + response = requests.get( + f"{API_SERVER_URL}/admin/persona", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return [PersonaSnapshot(**persona) for persona in response.json()] + + @staticmethod + def verify( + persona: DATestPersona, + user_performing_action: DATestUser | None = None, + ) -> bool: + all_personas = PersonaManager.get_all(user_performing_action) + for fetched_persona in all_personas: + if fetched_persona.id == persona.id: + return ( + fetched_persona.name == persona.name + and fetched_persona.description == persona.description + and fetched_persona.num_chunks == persona.num_chunks + and fetched_persona.llm_relevance_filter + == persona.llm_relevance_filter + and fetched_persona.is_public == persona.is_public + and fetched_persona.llm_filter_extraction + == persona.llm_filter_extraction + and fetched_persona.llm_model_provider_override + == persona.llm_model_provider_override + and fetched_persona.llm_model_version_override + == persona.llm_model_version_override + and set([prompt.id for prompt in fetched_persona.prompts]) + == set(persona.prompt_ids) + and set( + [ + document_set.id + for document_set in fetched_persona.document_sets + ] + ) + == set(persona.document_set_ids) + and set([tool.id for tool in fetched_persona.tools]) + == set(persona.tool_ids) + and set(user.email for user in fetched_persona.users) + == set(persona.users) + and set(fetched_persona.groups) == set(persona.groups) + ) + return False + + @staticmethod + def delete( + persona: DATestPersona, + user_performing_action: DATestUser | None = None, + ) -> bool: + response = requests.delete( + f"{API_SERVER_URL}/persona/{persona.id}", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + return response.ok diff --git a/backend/tests/integration/common_utils/managers/user.py b/backend/tests/integration/common_utils/managers/user.py new file mode 100644 index 00000000000..c299a5eb38a --- /dev/null +++ b/backend/tests/integration/common_utils/managers/user.py @@ -0,0 +1,124 @@ +from copy import deepcopy +from urllib.parse import urlencode +from uuid import uuid4 + +import requests + +from danswer.db.models import UserRole +from danswer.server.manage.models import AllUsersResponse +from danswer.server.models import FullUserSnapshot +from danswer.server.models import InvitedUserSnapshot +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.test_models import DATestUser + + +class UserManager: + @staticmethod + def create( + name: str | None = None, + ) -> DATestUser: + if name is None: + name = f"test{str(uuid4())}" + + email = f"{name}@test.com" + password = "test" + + body = { + "email": email, + "username": email, + "password": password, + } + response = requests.post( + url=f"{API_SERVER_URL}/auth/register", + json=body, + headers=GENERAL_HEADERS, + ) + response.raise_for_status() + + test_user = DATestUser( + id=response.json()["id"], + email=email, + password=password, + headers=deepcopy(GENERAL_HEADERS), + ) + print(f"Created user {test_user.email}") + + test_user.headers["Cookie"] = UserManager.login_as_user(test_user) + + return test_user + + @staticmethod + def login_as_user(test_user: DATestUser) -> str: + data = urlencode( + { + "username": test_user.email, + "password": test_user.password, + } + ) + headers = test_user.headers.copy() + headers["Content-Type"] = "application/x-www-form-urlencoded" + + response = requests.post( + url=f"{API_SERVER_URL}/auth/login", + data=data, + headers=headers, + ) + response.raise_for_status() + result_cookie = next(iter(response.cookies), None) + + if not result_cookie: + raise Exception("Failed to login") + + print(f"Logged in as {test_user.email}") + return f"{result_cookie.name}={result_cookie.value}" + + @staticmethod + def verify_role(user_to_verify: DATestUser, target_role: UserRole) -> bool: + response = requests.get( + url=f"{API_SERVER_URL}/me", + headers=user_to_verify.headers, + ) + response.raise_for_status() + return target_role == UserRole(response.json().get("role", "")) + + @staticmethod + def set_role( + user_to_set: DATestUser, + target_role: UserRole, + user_to_perform_action: DATestUser | None = None, + ) -> None: + if user_to_perform_action is None: + user_to_perform_action = user_to_set + response = requests.patch( + url=f"{API_SERVER_URL}/manage/set-user-role", + json={"user_email": user_to_set.email, "new_role": target_role.value}, + headers=user_to_perform_action.headers, + ) + response.raise_for_status() + + @staticmethod + def verify( + user: DATestUser, user_to_perform_action: DATestUser | None = None + ) -> None: + if user_to_perform_action is None: + user_to_perform_action = user + response = requests.get( + url=f"{API_SERVER_URL}/manage/users", + headers=user_to_perform_action.headers + if user_to_perform_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + data = response.json() + all_users = AllUsersResponse( + accepted=[FullUserSnapshot(**user) for user in data["accepted"]], + invited=[InvitedUserSnapshot(**user) for user in data["invited"]], + accepted_pages=data["accepted_pages"], + invited_pages=data["invited_pages"], + ) + for accepted_user in all_users.accepted: + if accepted_user.email == user.email and accepted_user.id == user.id: + return + raise ValueError(f"User {user.email} not found") diff --git a/backend/tests/integration/common_utils/managers/user_group.py b/backend/tests/integration/common_utils/managers/user_group.py new file mode 100644 index 00000000000..baf2008b965 --- /dev/null +++ b/backend/tests/integration/common_utils/managers/user_group.py @@ -0,0 +1,180 @@ +import time +from uuid import uuid4 + +import requests + +from ee.danswer.server.user_group.models import UserGroup +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.constants import MAX_DELAY +from tests.integration.common_utils.test_models import DATestUser +from tests.integration.common_utils.test_models import DATestUserGroup + + +class UserGroupManager: + @staticmethod + def create( + name: str | None = None, + user_ids: list[str] | None = None, + cc_pair_ids: list[int] | None = None, + user_performing_action: DATestUser | None = None, + ) -> DATestUserGroup: + name = f"{name}-user-group" if name else f"test-user-group-{uuid4()}" + + request = { + "name": name, + "user_ids": user_ids or [], + "cc_pair_ids": cc_pair_ids or [], + } + response = requests.post( + f"{API_SERVER_URL}/manage/admin/user-group", + json=request, + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + test_user_group = DATestUserGroup( + id=response.json()["id"], + name=response.json()["name"], + user_ids=[user["id"] for user in response.json()["users"]], + cc_pair_ids=[cc_pair["id"] for cc_pair in response.json()["cc_pairs"]], + ) + return test_user_group + + @staticmethod + def edit( + user_group: DATestUserGroup, + user_performing_action: DATestUser | None = None, + ) -> None: + response = requests.patch( + f"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}", + json=user_group.model_dump(), + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + @staticmethod + def delete( + user_group: DATestUserGroup, + user_performing_action: DATestUser | None = None, + ) -> None: + response = requests.delete( + f"{API_SERVER_URL}/manage/admin/user-group/{user_group.id}", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + @staticmethod + def set_curator_status( + test_user_group: DATestUserGroup, + user_to_set_as_curator: DATestUser, + is_curator: bool = True, + user_performing_action: DATestUser | None = None, + ) -> None: + set_curator_request = { + "user_id": user_to_set_as_curator.id, + "is_curator": is_curator, + } + response = requests.post( + f"{API_SERVER_URL}/manage/admin/user-group/{test_user_group.id}/set-curator", + json=set_curator_request, + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + @staticmethod + def get_all( + user_performing_action: DATestUser | None = None, + ) -> list[UserGroup]: + response = requests.get( + f"{API_SERVER_URL}/manage/admin/user-group", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return [UserGroup(**ug) for ug in response.json()] + + @staticmethod + def verify( + user_group: DATestUserGroup, + verify_deleted: bool = False, + user_performing_action: DATestUser | None = None, + ) -> None: + all_user_groups = UserGroupManager.get_all(user_performing_action) + for fetched_user_group in all_user_groups: + if user_group.id == fetched_user_group.id: + if verify_deleted: + raise ValueError( + f"User group {user_group.id} found but should be deleted" + ) + fetched_cc_ids = {cc_pair.id for cc_pair in fetched_user_group.cc_pairs} + fetched_user_ids = {user.id for user in fetched_user_group.users} + user_group_cc_ids = set(user_group.cc_pair_ids) + user_group_user_ids = set(user_group.user_ids) + if ( + fetched_cc_ids == user_group_cc_ids + and fetched_user_ids == user_group_user_ids + ): + return + if not verify_deleted: + raise ValueError(f"User group {user_group.id} not found") + + @staticmethod + def wait_for_sync( + user_groups_to_check: list[DATestUserGroup] | None = None, + user_performing_action: DATestUser | None = None, + ) -> None: + start = time.time() + while True: + user_groups = UserGroupManager.get_all(user_performing_action) + if user_groups_to_check: + check_ids = {user_group.id for user_group in user_groups_to_check} + user_group_ids = {user_group.id for user_group in user_groups} + if not check_ids.issubset(user_group_ids): + raise RuntimeError("User group not found") + user_groups = [ + user_group + for user_group in user_groups + if user_group.id in check_ids + ] + if all(ug.is_up_to_date for ug in user_groups): + return + + if time.time() - start > MAX_DELAY: + raise TimeoutError( + f"User groups were not synced within the {MAX_DELAY} seconds" + ) + else: + print("User groups were not synced yet, waiting...") + time.sleep(2) + + @staticmethod + def wait_for_deletion_completion( + user_groups_to_check: list[DATestUserGroup], + user_performing_action: DATestUser | None = None, + ) -> None: + start = time.time() + user_group_ids_to_check = {user_group.id for user_group in user_groups_to_check} + while True: + fetched_user_groups = UserGroupManager.get_all(user_performing_action) + fetched_user_group_ids = { + user_group.id for user_group in fetched_user_groups + } + if not user_group_ids_to_check.intersection(fetched_user_group_ids): + return + + if time.time() - start > MAX_DELAY: + raise TimeoutError( + f"User groups deletion was not completed within the {MAX_DELAY} seconds" + ) + else: + print("Some user groups are still being deleted, waiting...") + time.sleep(2) diff --git a/backend/tests/integration/common_utils/reset.py b/backend/tests/integration/common_utils/reset.py index 3815aa9f972..95b3f734ed4 100644 --- a/backend/tests/integration/common_utils/reset.py +++ b/backend/tests/integration/common_utils/reset.py @@ -20,7 +20,9 @@ from danswer.indexing.models import IndexingSetting from danswer.main import setup_postgres from danswer.main import setup_vespa -from tests.integration.common_utils.llm import seed_default_openai_provider +from danswer.utils.logger import setup_logger + +logger = setup_logger() def _run_migrations( @@ -32,6 +34,7 @@ def _run_migrations( # Create an Alembic configuration object alembic_cfg = Config("alembic.ini") alembic_cfg.set_section_option("logger_alembic", "level", "WARN") + alembic_cfg.attributes["configure_logger"] = False # Set the SQLAlchemy URL in the Alembic configuration alembic_cfg.set_main_option("sqlalchemy.url", database_url) @@ -131,11 +134,13 @@ def reset_vespa() -> None: search_settings = get_current_search_settings(db_session) index_name = search_settings.index_name - setup_vespa( + success = setup_vespa( document_index=VespaIndex(index_name=index_name, secondary_index_name=None), index_setting=IndexingSetting.from_db_model(search_settings), secondary_index_setting=None, ) + if not success: + raise RuntimeError("Could not connect to Vespa within the specified timeout.") for _ in range(5): try: @@ -163,10 +168,8 @@ def reset_vespa() -> None: def reset_all() -> None: """Reset both Postgres and Vespa.""" - print("Resetting Postgres...") + logger.info("Resetting Postgres...") reset_postgres() - print("Resetting Vespa...") + logger.info("Resetting Vespa...") reset_vespa() - print("Seeding LLM Providers...") - seed_default_openai_provider() - print("Finished resetting all.") + logger.info("Finished resetting all.") diff --git a/backend/tests/integration/common_utils/seed_documents.py b/backend/tests/integration/common_utils/seed_documents.py deleted file mode 100644 index b6720c9aebe..00000000000 --- a/backend/tests/integration/common_utils/seed_documents.py +++ /dev/null @@ -1,72 +0,0 @@ -import uuid - -import requests -from pydantic import BaseModel - -from danswer.configs.constants import DocumentSource -from tests.integration.common_utils.connectors import ConnectorClient -from tests.integration.common_utils.constants import API_SERVER_URL - - -class SimpleTestDocument(BaseModel): - id: str - content: str - - -class SeedDocumentResponse(BaseModel): - cc_pair_id: int - documents: list[SimpleTestDocument] - - -class TestDocumentClient: - @staticmethod - def seed_documents( - num_docs: int = 5, cc_pair_id: int | None = None - ) -> SeedDocumentResponse: - if not cc_pair_id: - connector_details = ConnectorClient.create_connector() - cc_pair_id = connector_details.cc_pair_id - - # Create and ingest some documents - documents: list[dict] = [] - for _ in range(num_docs): - document_id = f"test-doc-{uuid.uuid4()}" - document = { - "document": { - "id": document_id, - "sections": [ - { - "text": f"This is test document {document_id}", - "link": f"{document_id}", - } - ], - "source": DocumentSource.NOT_APPLICABLE, - # just for testing metadata - "metadata": {"document_id": document_id}, - "semantic_identifier": f"Test Document {document_id}", - "from_ingestion_api": True, - }, - "cc_pair_id": cc_pair_id, - } - documents.append(document) - response = requests.post( - f"{API_SERVER_URL}/danswer-api/ingestion", - json=document, - ) - response.raise_for_status() - - print("Seeding completed successfully.") - return SeedDocumentResponse( - cc_pair_id=cc_pair_id, - documents=[ - SimpleTestDocument( - id=document["document"]["id"], - content=document["document"]["sections"][0]["text"], - ) - for document in documents - ], - ) - - -if __name__ == "__main__": - seed_documents_resp = TestDocumentClient.seed_documents() diff --git a/backend/tests/integration/common_utils/test_models.py b/backend/tests/integration/common_utils/test_models.py new file mode 100644 index 00000000000..ca573663e72 --- /dev/null +++ b/backend/tests/integration/common_utils/test_models.py @@ -0,0 +1,146 @@ +from typing import Any +from uuid import UUID + +from pydantic import BaseModel +from pydantic import Field + +from danswer.auth.schemas import UserRole +from danswer.db.enums import AccessType +from danswer.search.enums import RecencyBiasSetting +from danswer.server.documents.models import DocumentSource +from danswer.server.documents.models import InputType + +""" +These data models are used to represent the data on the testing side of things. +This means the flow is: +1. Make request that changes data in db +2. Make a change to the testing model +3. Retrieve data from db +4. Compare db data with testing model to verify +""" + + +class DATestAPIKey(BaseModel): + api_key_id: int + api_key_display: str + api_key: str | None = None # only present on initial creation + api_key_name: str | None = None + api_key_role: UserRole + + user_id: UUID + headers: dict + + +class DATestUser(BaseModel): + id: str + email: str + password: str + headers: dict + + +class DATestCredential(BaseModel): + id: int + name: str + credential_json: dict[str, Any] + admin_public: bool + source: DocumentSource + curator_public: bool + groups: list[int] + + +class DATestConnector(BaseModel): + id: int + name: str + source: DocumentSource + input_type: InputType + connector_specific_config: dict[str, Any] + groups: list[int] | None = None + is_public: bool | None = None + + +class SimpleTestDocument(BaseModel): + id: str + content: str + + +class DATestCCPair(BaseModel): + id: int + name: str + connector_id: int + credential_id: int + access_type: AccessType + groups: list[int] + documents: list[SimpleTestDocument] = Field(default_factory=list) + + +class DATestUserGroup(BaseModel): + id: int + name: str + user_ids: list[str] + cc_pair_ids: list[int] + + +class DATestLLMProvider(BaseModel): + id: int + name: str + provider: str + api_key: str + default_model_name: str + is_public: bool + groups: list[int] + api_base: str | None = None + api_version: str | None = None + + +class DATestDocumentSet(BaseModel): + id: int + name: str + description: str + cc_pair_ids: list[int] = Field(default_factory=list) + is_public: bool + is_up_to_date: bool + users: list[str] = Field(default_factory=list) + groups: list[int] = Field(default_factory=list) + + +class DATestPersona(BaseModel): + id: int + name: str + description: str + num_chunks: float + llm_relevance_filter: bool + is_public: bool + llm_filter_extraction: bool + recency_bias: RecencyBiasSetting + prompt_ids: list[int] + document_set_ids: list[int] + tool_ids: list[int] + llm_model_provider_override: str | None + llm_model_version_override: str | None + users: list[str] + groups: list[int] + + +# +class DATestChatSession(BaseModel): + id: int + persona_id: int + description: str + + +class DATestChatMessage(BaseModel): + id: str | None = None + chat_session_id: int + parent_message_id: str | None + message: str + response: str + + +class StreamedResponse(BaseModel): + full_message: str = "" + rephrased_query: str | None = None + tool_name: str | None = None + top_documents: list[dict[str, Any]] | None = None + relevance_summaries: list[dict[str, Any]] | None = None + tool_result: Any | None = None + user: str | None = None diff --git a/backend/tests/integration/common_utils/user_groups.py b/backend/tests/integration/common_utils/user_groups.py deleted file mode 100644 index 0cd44066463..00000000000 --- a/backend/tests/integration/common_utils/user_groups.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import cast - -import requests - -from ee.danswer.server.user_group.models import UserGroup -from ee.danswer.server.user_group.models import UserGroupCreate -from tests.integration.common_utils.constants import API_SERVER_URL - - -class UserGroupClient: - @staticmethod - def create_user_group(user_group_creation_request: UserGroupCreate) -> int: - response = requests.post( - f"{API_SERVER_URL}/manage/admin/user-group", - json=user_group_creation_request.model_dump(), - ) - response.raise_for_status() - return cast(int, response.json()["id"]) - - @staticmethod - def fetch_user_groups() -> list[UserGroup]: - response = requests.get(f"{API_SERVER_URL}/manage/admin/user-group") - response.raise_for_status() - return [UserGroup(**ug) for ug in response.json()] diff --git a/backend/tests/integration/common_utils/vespa.py b/backend/tests/integration/common_utils/vespa.py index aff7ef5eca6..1bd0060d89b 100644 --- a/backend/tests/integration/common_utils/vespa.py +++ b/backend/tests/integration/common_utils/vespa.py @@ -3,7 +3,7 @@ from danswer.document_index.vespa.index import DOCUMENT_ID_ENDPOINT -class TestVespaClient: +class vespa_fixture: def __init__(self, index_name: str): self.index_name = index_name self.vespa_document_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name) diff --git a/backend/tests/integration/conftest.py b/backend/tests/integration/conftest.py index 6c46e9f875e..77d9e0e7022 100644 --- a/backend/tests/integration/conftest.py +++ b/backend/tests/integration/conftest.py @@ -1,3 +1,4 @@ +import os from collections.abc import Generator import pytest @@ -6,7 +7,26 @@ from danswer.db.engine import get_session_context_manager from danswer.db.search_settings import get_current_search_settings from tests.integration.common_utils.reset import reset_all -from tests.integration.common_utils.vespa import TestVespaClient +from tests.integration.common_utils.vespa import vespa_fixture + + +def load_env_vars(env_file: str = ".env") -> None: + current_dir = os.path.dirname(os.path.abspath(__file__)) + env_path = os.path.join(current_dir, env_file) + try: + with open(env_path, "r") as f: + for line in f: + line = line.strip() + if line and not line.startswith("#"): + key, value = line.split("=", 1) + os.environ[key] = value.strip() + print("Successfully loaded environment variables") + except FileNotFoundError: + print(f"File {env_file} not found") + + +# Load environment variables at the module level +load_env_vars() @pytest.fixture @@ -16,9 +36,9 @@ def db_session() -> Generator[Session, None, None]: @pytest.fixture -def vespa_client(db_session: Session) -> TestVespaClient: +def vespa_client(db_session: Session) -> vespa_fixture: search_settings = get_current_search_settings(db_session) - return TestVespaClient(index_name=search_settings.index_name) + return vespa_fixture(index_name=search_settings.index_name) @pytest.fixture diff --git a/backend/tests/integration/tests/connector/test_connector_deletion.py b/backend/tests/integration/tests/connector/test_connector_deletion.py new file mode 100644 index 00000000000..46a65f768a9 --- /dev/null +++ b/backend/tests/integration/tests/connector/test_connector_deletion.py @@ -0,0 +1,331 @@ +""" +This file contains tests for the following: +- Ensuring deletion of a connector also: + - deletes the documents in vespa for that connector + - updates the document sets and user groups to remove the connector +- Ensure that deleting a connector that is part of an overlapping document set and/or user group works as expected +""" +from uuid import uuid4 + +from sqlalchemy.orm import Session + +from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.enums import IndexingStatus +from danswer.db.index_attempt import create_index_attempt_error +from danswer.db.models import IndexAttempt +from danswer.db.search_settings import get_current_search_settings +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.constants import NUM_DOCS +from tests.integration.common_utils.managers.api_key import APIKeyManager +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.document import DocumentManager +from tests.integration.common_utils.managers.document_set import DocumentSetManager +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.managers.user_group import UserGroupManager +from tests.integration.common_utils.test_models import DATestAPIKey +from tests.integration.common_utils.test_models import DATestUser +from tests.integration.common_utils.test_models import DATestUserGroup +from tests.integration.common_utils.vespa import vespa_fixture + + +def test_connector_deletion(reset: None, vespa_client: vespa_fixture) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + # create api key + api_key: DATestAPIKey = APIKeyManager.create( + user_performing_action=admin_user, + ) + + # create connectors + cc_pair_1 = CCPairManager.create_from_scratch( + source=DocumentSource.INGESTION_API, + user_performing_action=admin_user, + ) + cc_pair_2 = CCPairManager.create_from_scratch( + source=DocumentSource.INGESTION_API, + user_performing_action=admin_user, + ) + + # seed documents + cc_pair_1.documents = DocumentManager.seed_dummy_docs( + cc_pair=cc_pair_1, + num_docs=NUM_DOCS, + api_key=api_key, + ) + cc_pair_2.documents = DocumentManager.seed_dummy_docs( + cc_pair=cc_pair_2, + num_docs=NUM_DOCS, + api_key=api_key, + ) + + # create document sets + doc_set_1 = DocumentSetManager.create( + name="Test Document Set 1", + cc_pair_ids=[cc_pair_1.id], + user_performing_action=admin_user, + ) + doc_set_2 = DocumentSetManager.create( + name="Test Document Set 2", + cc_pair_ids=[cc_pair_1.id, cc_pair_2.id], + user_performing_action=admin_user, + ) + + # wait for document sets to be synced + DocumentSetManager.wait_for_sync(user_performing_action=admin_user) + + print("Document sets created and synced") + + # create user groups + user_group_1: DATestUserGroup = UserGroupManager.create( + cc_pair_ids=[cc_pair_1.id], + user_performing_action=admin_user, + ) + user_group_2: DATestUserGroup = UserGroupManager.create( + cc_pair_ids=[cc_pair_1.id, cc_pair_2.id], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync(user_performing_action=admin_user) + + # inject a finished index attempt and index attempt error (exercises foreign key errors) + with Session(get_sqlalchemy_engine()) as db_session: + primary_search_settings = get_current_search_settings(db_session) + new_attempt = IndexAttempt( + connector_credential_pair_id=cc_pair_1.id, + search_settings_id=primary_search_settings.id, + from_beginning=False, + status=IndexingStatus.COMPLETED_WITH_ERRORS, + ) + db_session.add(new_attempt) + db_session.commit() + + create_index_attempt_error( + index_attempt_id=new_attempt.id, + batch=1, + docs=[], + exception_msg="", + exception_traceback="", + db_session=db_session, + ) + + # delete connector 1 + CCPairManager.pause_cc_pair( + cc_pair=cc_pair_1, + user_performing_action=admin_user, + ) + CCPairManager.delete( + cc_pair=cc_pair_1, + user_performing_action=admin_user, + ) + + # Update local records to match the database for later comparison + user_group_1.cc_pair_ids = [] + user_group_2.cc_pair_ids = [cc_pair_2.id] + doc_set_1.cc_pair_ids = [] + doc_set_2.cc_pair_ids = [cc_pair_2.id] + cc_pair_1.groups = [] + cc_pair_2.groups = [user_group_2.id] + + CCPairManager.wait_for_deletion_completion(user_performing_action=admin_user) + + # validate vespa documents + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_1, + doc_set_names=[], + group_names=[], + doc_creating_user=admin_user, + verify_deleted=True, + ) + + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_2, + doc_set_names=[doc_set_2.name], + group_names=[user_group_2.name], + doc_creating_user=admin_user, + verify_deleted=False, + ) + + # check that only connector 1 is deleted + CCPairManager.verify( + cc_pair=cc_pair_2, + user_performing_action=admin_user, + ) + + # validate document sets + DocumentSetManager.verify( + document_set=doc_set_1, + user_performing_action=admin_user, + ) + DocumentSetManager.verify( + document_set=doc_set_2, + user_performing_action=admin_user, + ) + + # validate user groups + UserGroupManager.verify( + user_group=user_group_1, + user_performing_action=admin_user, + ) + UserGroupManager.verify( + user_group=user_group_2, + user_performing_action=admin_user, + ) + + +def test_connector_deletion_for_overlapping_connectors( + reset: None, vespa_client: vespa_fixture +) -> None: + """Checks to make sure that connectors with overlapping documents work properly. Specifically, that the overlapping + document (1) still exists and (2) has the right document set / group post-deletion of one of the connectors. + """ + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + # create api key + api_key: DATestAPIKey = APIKeyManager.create( + user_performing_action=admin_user, + ) + + # create connectors + cc_pair_1 = CCPairManager.create_from_scratch( + source=DocumentSource.INGESTION_API, + user_performing_action=admin_user, + ) + cc_pair_2 = CCPairManager.create_from_scratch( + source=DocumentSource.INGESTION_API, + user_performing_action=admin_user, + ) + + doc_ids = [str(uuid4())] + cc_pair_1.documents = DocumentManager.seed_dummy_docs( + cc_pair=cc_pair_1, + document_ids=doc_ids, + api_key=api_key, + ) + cc_pair_2.documents = DocumentManager.seed_dummy_docs( + cc_pair=cc_pair_2, + document_ids=doc_ids, + api_key=api_key, + ) + + # verify vespa document exists and that it is not in any document sets or groups + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_1, + doc_set_names=[], + group_names=[], + doc_creating_user=admin_user, + ) + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_2, + doc_set_names=[], + group_names=[], + doc_creating_user=admin_user, + ) + + # create document set + doc_set_1 = DocumentSetManager.create( + name="Test Document Set 1", + cc_pair_ids=[cc_pair_1.id], + user_performing_action=admin_user, + ) + DocumentSetManager.wait_for_sync( + document_sets_to_check=[doc_set_1], + user_performing_action=admin_user, + ) + + print("Document set 1 created and synced") + + # verify vespa document is in the document set + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_1, + doc_set_names=[doc_set_1.name], + doc_creating_user=admin_user, + ) + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_2, + doc_creating_user=admin_user, + ) + + # create a user group and attach it to connector 1 + user_group_1: DATestUserGroup = UserGroupManager.create( + name="Test User Group 1", + cc_pair_ids=[cc_pair_1.id], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], + user_performing_action=admin_user, + ) + cc_pair_1.groups = [user_group_1.id] + + print("User group 1 created and synced") + + # create a user group and attach it to connector 2 + user_group_2: DATestUserGroup = UserGroupManager.create( + name="Test User Group 2", + cc_pair_ids=[cc_pair_2.id], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_2], + user_performing_action=admin_user, + ) + cc_pair_2.groups = [user_group_2.id] + + print("User group 2 created and synced") + + # verify vespa document is in the user group + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_1, + group_names=[user_group_1.name, user_group_2.name], + doc_creating_user=admin_user, + ) + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_2, + group_names=[user_group_1.name, user_group_2.name], + doc_creating_user=admin_user, + ) + + # delete connector 1 + CCPairManager.pause_cc_pair( + cc_pair=cc_pair_1, + user_performing_action=admin_user, + ) + CCPairManager.delete( + cc_pair=cc_pair_1, + user_performing_action=admin_user, + ) + + # wait for deletion to finish + CCPairManager.wait_for_deletion_completion(user_performing_action=admin_user) + + print("Connector 1 deleted") + + # check that only connector 1 is deleted + # TODO: check for the CC pair rather than the connector once the refactor is done + CCPairManager.verify( + cc_pair=cc_pair_1, + verify_deleted=True, + user_performing_action=admin_user, + ) + CCPairManager.verify( + cc_pair=cc_pair_2, + user_performing_action=admin_user, + ) + + # verify the document is not in any document sets + # verify the document is only in user group 2 + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_2, + doc_set_names=[], + group_names=[user_group_2.name], + doc_creating_user=admin_user, + verify_deleted=False, + ) diff --git a/backend/tests/integration/tests/connector/test_deletion.py b/backend/tests/integration/tests/connector/test_deletion.py deleted file mode 100644 index 78ad2378af9..00000000000 --- a/backend/tests/integration/tests/connector/test_deletion.py +++ /dev/null @@ -1,190 +0,0 @@ -import time - -from danswer.db.enums import ConnectorCredentialPairStatus -from danswer.server.features.document_set.models import DocumentSetCreationRequest -from tests.integration.common_utils.connectors import ConnectorClient -from tests.integration.common_utils.constants import MAX_DELAY -from tests.integration.common_utils.document_sets import DocumentSetClient -from tests.integration.common_utils.seed_documents import TestDocumentClient -from tests.integration.common_utils.user_groups import UserGroupClient -from tests.integration.common_utils.user_groups import UserGroupCreate -from tests.integration.common_utils.vespa import TestVespaClient - - -def test_connector_deletion(reset: None, vespa_client: TestVespaClient) -> None: - # create connectors - c1_details = ConnectorClient.create_connector(name_prefix="tc1") - c2_details = ConnectorClient.create_connector(name_prefix="tc2") - c1_seed_res = TestDocumentClient.seed_documents( - num_docs=5, cc_pair_id=c1_details.cc_pair_id - ) - c2_seed_res = TestDocumentClient.seed_documents( - num_docs=5, cc_pair_id=c2_details.cc_pair_id - ) - - # create document sets - doc_set_1_id = DocumentSetClient.create_document_set( - DocumentSetCreationRequest( - name="Test Document Set 1", - description="Intially connector to be deleted, should be empty after test", - cc_pair_ids=[c1_details.cc_pair_id], - is_public=True, - users=[], - groups=[], - ) - ) - - doc_set_2_id = DocumentSetClient.create_document_set( - DocumentSetCreationRequest( - name="Test Document Set 2", - description="Intially both connectors, should contain undeleted connector after test", - cc_pair_ids=[c1_details.cc_pair_id, c2_details.cc_pair_id], - is_public=True, - users=[], - groups=[], - ) - ) - - # wait for document sets to be synced - start = time.time() - while True: - doc_sets = DocumentSetClient.fetch_document_sets() - doc_set_1 = next( - (doc_set for doc_set in doc_sets if doc_set.id == doc_set_1_id), None - ) - doc_set_2 = next( - (doc_set for doc_set in doc_sets if doc_set.id == doc_set_2_id), None - ) - - if not doc_set_1 or not doc_set_2: - raise RuntimeError("Document set not found") - - if doc_set_1.is_up_to_date and doc_set_2.is_up_to_date: - break - - if time.time() - start > MAX_DELAY: - raise TimeoutError("Document sets were not synced within the max delay") - - time.sleep(2) - - print("Document sets created and synced") - - # if so, create ACLs - user_group_1 = UserGroupClient.create_user_group( - UserGroupCreate( - name="Test User Group 1", user_ids=[], cc_pair_ids=[c1_details.cc_pair_id] - ) - ) - user_group_2 = UserGroupClient.create_user_group( - UserGroupCreate( - name="Test User Group 2", - user_ids=[], - cc_pair_ids=[c1_details.cc_pair_id, c2_details.cc_pair_id], - ) - ) - - # wait for user groups to be available - start = time.time() - while True: - user_groups = {ug.id: ug for ug in UserGroupClient.fetch_user_groups()} - - if not ( - user_group_1 in user_groups.keys() and user_group_2 in user_groups.keys() - ): - raise RuntimeError("User groups not found") - - if ( - user_groups[user_group_1].is_up_to_date - and user_groups[user_group_2].is_up_to_date - ): - break - - if time.time() - start > MAX_DELAY: - raise TimeoutError("User groups were not synced within the max delay") - - time.sleep(2) - - print("User groups created and synced") - - # delete connector 1 - ConnectorClient.update_connector_status( - cc_pair_id=c1_details.cc_pair_id, status=ConnectorCredentialPairStatus.PAUSED - ) - ConnectorClient.delete_connector( - connector_id=c1_details.connector_id, credential_id=c1_details.credential_id - ) - - start = time.time() - while True: - connectors = ConnectorClient.get_connectors() - - if c1_details.connector_id not in [c["id"] for c in connectors]: - break - - if time.time() - start > MAX_DELAY: - raise TimeoutError("Connector 1 was not deleted within the max delay") - - time.sleep(2) - - print("Connector 1 deleted") - - # validate vespa documents - c1_vespa_docs = vespa_client.get_documents_by_id( - [doc.id for doc in c1_seed_res.documents] - )["documents"] - c2_vespa_docs = vespa_client.get_documents_by_id( - [doc.id for doc in c2_seed_res.documents] - )["documents"] - - assert len(c1_vespa_docs) == 0 - assert len(c2_vespa_docs) == 5 - - for doc in c2_vespa_docs: - assert doc["fields"]["access_control_list"] == { - "PUBLIC": 1, - "group:Test User Group 2": 1, - } - assert doc["fields"]["document_sets"] == {"Test Document Set 2": 1} - - # check that only connector 1 is deleted - # TODO: check for the CC pair rather than the connector once the refactor is done - all_connectors = ConnectorClient.get_connectors() - assert len(all_connectors) == 1 - assert all_connectors[0]["id"] == c2_details.connector_id - - # validate document sets - all_doc_sets = DocumentSetClient.fetch_document_sets() - assert len(all_doc_sets) == 2 - - doc_set_1_found = False - doc_set_2_found = False - for doc_set in all_doc_sets: - if doc_set.id == doc_set_1_id: - doc_set_1_found = True - assert doc_set.cc_pair_descriptors == [] - - if doc_set.id == doc_set_2_id: - doc_set_2_found = True - assert len(doc_set.cc_pair_descriptors) == 1 - assert doc_set.cc_pair_descriptors[0].id == c2_details.cc_pair_id - - assert doc_set_1_found - assert doc_set_2_found - - # validate user groups - all_user_groups = UserGroupClient.fetch_user_groups() - assert len(all_user_groups) == 2 - - user_group_1_found = False - user_group_2_found = False - for user_group in all_user_groups: - if user_group.id == user_group_1: - user_group_1_found = True - assert user_group.cc_pairs == [] - if user_group.id == user_group_2: - user_group_2_found = True - assert len(user_group.cc_pairs) == 1 - assert user_group.cc_pairs[0].id == c2_details.cc_pair_id - - assert user_group_1_found - assert user_group_2_found diff --git a/backend/tests/integration/tests/dev_apis/test_knowledge_chat.py b/backend/tests/integration/tests/dev_apis/test_knowledge_chat.py new file mode 100644 index 00000000000..2cf6fd399ea --- /dev/null +++ b/backend/tests/integration/tests/dev_apis/test_knowledge_chat.py @@ -0,0 +1,188 @@ +import requests + +from danswer.configs.constants import MessageType +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.managers.api_key import APIKeyManager +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.document import DocumentManager +from tests.integration.common_utils.managers.llm_provider import LLMProviderManager +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.test_models import DATestAPIKey +from tests.integration.common_utils.test_models import DATestCCPair +from tests.integration.common_utils.test_models import DATestUser + + +def test_all_stream_chat_message_objects_outputs(reset: None) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + + # create connector + cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch( + user_performing_action=admin_user, + ) + api_key: DATestAPIKey = APIKeyManager.create( + user_performing_action=admin_user, + ) + LLMProviderManager.create(user_performing_action=admin_user) + + # SEEDING DOCUMENTS + cc_pair_1.documents = [] + cc_pair_1.documents.append( + DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_1, + content="Pablo's favorite color is blue", + api_key=api_key, + ) + ) + cc_pair_1.documents.append( + DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_1, + content="Chris's favorite color is red", + api_key=api_key, + ) + ) + cc_pair_1.documents.append( + DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_1, + content="Pika's favorite color is green", + api_key=api_key, + ) + ) + + # TESTING RESPONSE FOR QUESTION 1 + response = requests.post( + f"{API_SERVER_URL}/chat/send-message-simple-with-history", + json={ + "messages": [ + { + "message": "What is Pablo's favorite color?", + "role": MessageType.USER.value, + } + ], + "persona_id": 0, + "prompt_id": 0, + }, + headers=admin_user.headers, + ) + assert response.status_code == 200 + response_json = response.json() + + # check that the answer is correct + answer_1 = response_json["answer"] + assert "blue" in answer_1.lower() + + # check that the llm selected a document + assert 0 in response_json["llm_selected_doc_indices"] + + # check that the final context documents are correct + # (it should contain all documents because there arent enough to exclude any) + assert 0 in response_json["final_context_doc_indices"] + assert 1 in response_json["final_context_doc_indices"] + assert 2 in response_json["final_context_doc_indices"] + + # check that the cited documents are correct + assert cc_pair_1.documents[0].id in response_json["cited_documents"].values() + + # check that the top documents are correct + assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[0].id + print("response 1/3 passed") + + # TESTING RESPONSE FOR QUESTION 2 + response = requests.post( + f"{API_SERVER_URL}/chat/send-message-simple-with-history", + json={ + "messages": [ + { + "message": "What is Pablo's favorite color?", + "role": MessageType.USER.value, + }, + { + "message": answer_1, + "role": MessageType.ASSISTANT.value, + }, + { + "message": "What is Chris's favorite color?", + "role": MessageType.USER.value, + }, + ], + "persona_id": 0, + "prompt_id": 0, + }, + headers=admin_user.headers, + ) + assert response.status_code == 200 + response_json = response.json() + + # check that the answer is correct + answer_2 = response_json["answer"] + assert "red" in answer_2.lower() + + # check that the llm selected a document + assert 0 in response_json["llm_selected_doc_indices"] + + # check that the final context documents are correct + # (it should contain all documents because there arent enough to exclude any) + assert 0 in response_json["final_context_doc_indices"] + assert 1 in response_json["final_context_doc_indices"] + assert 2 in response_json["final_context_doc_indices"] + + # check that the cited documents are correct + assert cc_pair_1.documents[1].id in response_json["cited_documents"].values() + + # check that the top documents are correct + assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[1].id + print("response 2/3 passed") + + # TESTING RESPONSE FOR QUESTION 3 + response = requests.post( + f"{API_SERVER_URL}/chat/send-message-simple-with-history", + json={ + "messages": [ + { + "message": "What is Pablo's favorite color?", + "role": MessageType.USER.value, + }, + { + "message": answer_1, + "role": MessageType.ASSISTANT.value, + }, + { + "message": "What is Chris's favorite color?", + "role": MessageType.USER.value, + }, + { + "message": answer_2, + "role": MessageType.ASSISTANT.value, + }, + { + "message": "What is Pika's favorite color?", + "role": MessageType.USER.value, + }, + ], + "persona_id": 0, + "prompt_id": 0, + }, + headers=admin_user.headers, + ) + assert response.status_code == 200 + response_json = response.json() + + # check that the answer is correct + answer_3 = response_json["answer"] + assert "green" in answer_3.lower() + + # check that the llm selected a document + assert 0 in response_json["llm_selected_doc_indices"] + + # check that the final context documents are correct + # (it should contain all documents because there arent enough to exclude any) + assert 0 in response_json["final_context_doc_indices"] + assert 1 in response_json["final_context_doc_indices"] + assert 2 in response_json["final_context_doc_indices"] + + # check that the cited documents are correct + assert cc_pair_1.documents[2].id in response_json["cited_documents"].values() + + # check that the top documents are correct + assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[2].id + print("response 3/3 passed") diff --git a/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py b/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py index b00c2e3d1e6..0a4e7b40b57 100644 --- a/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py +++ b/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py @@ -1,36 +1,147 @@ import requests -from tests.integration.common_utils.connectors import ConnectorClient +from danswer.configs.constants import MessageType from tests.integration.common_utils.constants import API_SERVER_URL -from tests.integration.common_utils.seed_documents import TestDocumentClient +from tests.integration.common_utils.constants import NUM_DOCS +from tests.integration.common_utils.managers.api_key import APIKeyManager +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.document import DocumentManager +from tests.integration.common_utils.managers.llm_provider import LLMProviderManager +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.test_models import DATestAPIKey +from tests.integration.common_utils.test_models import DATestCCPair +from tests.integration.common_utils.test_models import DATestUser def test_send_message_simple_with_history(reset: None) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + # create connectors - c1_details = ConnectorClient.create_connector(name_prefix="tc1") - c1_seed_res = TestDocumentClient.seed_documents( - num_docs=5, cc_pair_id=c1_details.cc_pair_id + cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch( + user_performing_action=admin_user, + ) + api_key: DATestAPIKey = APIKeyManager.create( + user_performing_action=admin_user, + ) + LLMProviderManager.create(user_performing_action=admin_user) + cc_pair_1.documents = DocumentManager.seed_dummy_docs( + cc_pair=cc_pair_1, + num_docs=NUM_DOCS, + api_key=api_key, ) response = requests.post( f"{API_SERVER_URL}/chat/send-message-simple-with-history", json={ - "messages": [{"message": c1_seed_res.documents[0].content, "role": "user"}], + "messages": [ + { + "message": cc_pair_1.documents[0].content, + "role": MessageType.USER.value, + } + ], "persona_id": 0, "prompt_id": 0, }, + headers=admin_user.headers, ) assert response.status_code == 200 response_json = response.json() # Check that the top document is the correct document - assert response_json["simple_search_docs"][0]["id"] == c1_seed_res.documents[0].id + assert response_json["simple_search_docs"][0]["id"] == cc_pair_1.documents[0].id + assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[0].id # assert that the metadata is correct - for doc in c1_seed_res.documents: + for doc in cc_pair_1.documents: found_doc = next( (x for x in response_json["simple_search_docs"] if x["id"] == doc.id), None ) assert found_doc assert found_doc["metadata"]["document_id"] == doc.id + + +def test_using_reference_docs_with_simple_with_history_api_flow(reset: None) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + + # create connector + cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch( + user_performing_action=admin_user, + ) + api_key: DATestAPIKey = APIKeyManager.create( + user_performing_action=admin_user, + ) + LLMProviderManager.create(user_performing_action=admin_user) + + # SEEDING DOCUMENTS + cc_pair_1.documents = [] + cc_pair_1.documents.append( + DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_1, + content="Chris's favorite color is blue", + api_key=api_key, + ) + ) + cc_pair_1.documents.append( + DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_1, + content="Hagen's favorite color is red", + api_key=api_key, + ) + ) + cc_pair_1.documents.append( + DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_1, + content="Pablo's favorite color is green", + api_key=api_key, + ) + ) + + # SEINDING MESSAGE 1 + response = requests.post( + f"{API_SERVER_URL}/chat/send-message-simple-with-history", + json={ + "messages": [ + { + "message": "What is Pablo's favorite color?", + "role": MessageType.USER.value, + } + ], + "persona_id": 0, + "prompt_id": 0, + }, + headers=admin_user.headers, + ) + assert response.status_code == 200 + response_json = response.json() + # get the db_doc_id of the top document to use as a search doc id for second message + first_db_doc_id = response_json["top_documents"][0]["db_doc_id"] + + # SEINDING MESSAGE 2 + response = requests.post( + f"{API_SERVER_URL}/chat/send-message-simple-with-history", + json={ + "messages": [ + { + "message": "What is Pablo's favorite color?", + "role": MessageType.USER.value, + } + ], + "persona_id": 0, + "prompt_id": 0, + "search_doc_ids": [first_db_doc_id], + }, + headers=admin_user.headers, + ) + assert response.status_code == 200 + response_json = response.json() + + # since we only gave it one search doc, all responses should only contain that doc + assert response_json["final_context_doc_indices"] == [0] + assert response_json["llm_selected_doc_indices"] == [0] + assert cc_pair_1.documents[2].id in response_json["cited_documents"].values() + # This ensures the the document we think we are referencing when we send the search_doc_ids in the second + # message is the document that we expect it to be + assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[2].id diff --git a/backend/tests/integration/tests/document_set/test_syncing.py b/backend/tests/integration/tests/document_set/test_syncing.py index 9a6b42ab5df..ed00870663a 100644 --- a/backend/tests/integration/tests/document_set/test_syncing.py +++ b/backend/tests/integration/tests/document_set/test_syncing.py @@ -1,78 +1,157 @@ -import time - -from danswer.server.features.document_set.models import DocumentSetCreationRequest -from tests.integration.common_utils.document_sets import DocumentSetClient -from tests.integration.common_utils.seed_documents import TestDocumentClient -from tests.integration.common_utils.vespa import TestVespaClient +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.constants import NUM_DOCS +from tests.integration.common_utils.managers.api_key import APIKeyManager +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.document import DocumentManager +from tests.integration.common_utils.managers.document_set import DocumentSetManager +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.test_models import DATestAPIKey +from tests.integration.common_utils.test_models import DATestUser +from tests.integration.common_utils.vespa import vespa_fixture def test_multiple_document_sets_syncing_same_connnector( - reset: None, vespa_client: TestVespaClient + reset: None, vespa_client: vespa_fixture ) -> None: - # Seed documents - seed_result = TestDocumentClient.seed_documents(num_docs=5) - cc_pair_id = seed_result.cc_pair_id - - # Create first document set - doc_set_1_id = DocumentSetClient.create_document_set( - DocumentSetCreationRequest( - name="Test Document Set 1", - description="First test document set", - cc_pair_ids=[cc_pair_id], - is_public=True, - users=[], - groups=[], - ) - ) - - doc_set_2_id = DocumentSetClient.create_document_set( - DocumentSetCreationRequest( - name="Test Document Set 2", - description="Second test document set", - cc_pair_ids=[cc_pair_id], - is_public=True, - users=[], - groups=[], - ) - ) - - # wait for syncing to be complete - max_delay = 45 - start = time.time() - while True: - doc_sets = DocumentSetClient.fetch_document_sets() - doc_set_1 = next( - (doc_set for doc_set in doc_sets if doc_set.id == doc_set_1_id), None - ) - doc_set_2 = next( - (doc_set for doc_set in doc_sets if doc_set.id == doc_set_2_id), None - ) - - if not doc_set_1 or not doc_set_2: - raise RuntimeError("Document set not found") - - if doc_set_1.is_up_to_date and doc_set_2.is_up_to_date: - assert [ccp.id for ccp in doc_set_1.cc_pair_descriptors] == [ - ccp.id for ccp in doc_set_2.cc_pair_descriptors - ] - break - - if time.time() - start > max_delay: - raise TimeoutError("Document sets were not synced within the max delay") - - time.sleep(2) - - # get names so we can compare to what is in vespa - doc_sets = DocumentSetClient.fetch_document_sets() - doc_set_names = {doc_set.name for doc_set in doc_sets} + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + + # create api key + api_key: DATestAPIKey = APIKeyManager.create( + user_performing_action=admin_user, + ) + + # create connector + cc_pair_1 = CCPairManager.create_from_scratch( + source=DocumentSource.INGESTION_API, + user_performing_action=admin_user, + ) + + # seed documents + cc_pair_1.documents = DocumentManager.seed_dummy_docs( + cc_pair=cc_pair_1, + num_docs=NUM_DOCS, + api_key=api_key, + ) + + # Create document sets + doc_set_1 = DocumentSetManager.create( + cc_pair_ids=[cc_pair_1.id], + user_performing_action=admin_user, + ) + doc_set_2 = DocumentSetManager.create( + cc_pair_ids=[cc_pair_1.id], + user_performing_action=admin_user, + ) + + DocumentSetManager.wait_for_sync( + user_performing_action=admin_user, + ) + + DocumentSetManager.verify( + document_set=doc_set_1, + user_performing_action=admin_user, + ) + DocumentSetManager.verify( + document_set=doc_set_2, + user_performing_action=admin_user, + ) # make sure documents are as expected - seeded_document_ids = [doc.id for doc in seed_result.documents] - - result = vespa_client.get_documents_by_id([doc.id for doc in seed_result.documents]) - documents = result["documents"] - assert len(documents) == len(seed_result.documents) - assert all(doc["fields"]["document_id"] in seeded_document_ids for doc in documents) - assert all( - set(doc["fields"]["document_sets"].keys()) == doc_set_names for doc in documents + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_1, + doc_set_names=[doc_set_1.name, doc_set_2.name], + doc_creating_user=admin_user, + ) + + +def test_removing_connector(reset: None, vespa_client: vespa_fixture) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + + # create api key + api_key: DATestAPIKey = APIKeyManager.create( + user_performing_action=admin_user, + ) + + # create connectors + cc_pair_1 = CCPairManager.create_from_scratch( + source=DocumentSource.INGESTION_API, + user_performing_action=admin_user, + ) + cc_pair_2 = CCPairManager.create_from_scratch( + source=DocumentSource.INGESTION_API, + user_performing_action=admin_user, + ) + + # seed documents + cc_pair_1.documents = DocumentManager.seed_dummy_docs( + cc_pair=cc_pair_1, + num_docs=NUM_DOCS, + api_key=api_key, + ) + + cc_pair_2.documents = DocumentManager.seed_dummy_docs( + cc_pair=cc_pair_2, + num_docs=NUM_DOCS, + api_key=api_key, + ) + + # Create document sets + doc_set_1 = DocumentSetManager.create( + cc_pair_ids=[cc_pair_1.id, cc_pair_2.id], + user_performing_action=admin_user, + ) + + DocumentSetManager.wait_for_sync( + user_performing_action=admin_user, + ) + + DocumentSetManager.verify( + document_set=doc_set_1, + user_performing_action=admin_user, + ) + + # make sure cc_pair_1 docs are doc_set_1 only + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_1, + doc_set_names=[doc_set_1.name], + doc_creating_user=admin_user, + ) + + # make sure cc_pair_2 docs are doc_set_1 only + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_2, + doc_set_names=[doc_set_1.name], + doc_creating_user=admin_user, + ) + + # remove cc_pair_2 from document set + doc_set_1.cc_pair_ids = [cc_pair_1.id] + DocumentSetManager.edit( + doc_set_1, + user_performing_action=admin_user, + ) + + DocumentSetManager.wait_for_sync( + user_performing_action=admin_user, + ) + + # make sure cc_pair_1 docs are doc_set_1 only + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_1, + doc_set_names=[doc_set_1.name], + doc_creating_user=admin_user, + ) + + # make sure cc_pair_2 docs have no doc set + DocumentManager.verify( + vespa_client=vespa_client, + cc_pair=cc_pair_2, + doc_set_names=[], + doc_creating_user=admin_user, ) diff --git a/backend/tests/integration/tests/permissions/test_cc_pair_permissions.py b/backend/tests/integration/tests/permissions/test_cc_pair_permissions.py new file mode 100644 index 00000000000..5fba8ff64fc --- /dev/null +++ b/backend/tests/integration/tests/permissions/test_cc_pair_permissions.py @@ -0,0 +1,180 @@ +""" +This file takes the happy path to adding a curator to a user group and then tests +the permissions of the curator manipulating connector-credential pairs. +""" +import pytest +from requests.exceptions import HTTPError + +from danswer.db.enums import AccessType +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.connector import ConnectorManager +from tests.integration.common_utils.managers.credential import CredentialManager +from tests.integration.common_utils.managers.user import DATestUser +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.managers.user_group import UserGroupManager + + +def test_cc_pair_permissions(reset: None) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + + # Creating a curator + curator: DATestUser = UserManager.create(name="curator") + + # Creating a user group + user_group_1 = UserGroupManager.create( + name="curated_user_group", + user_ids=[curator.id], + cc_pair_ids=[], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + # setting the user as a curator for the user group + UserGroupManager.set_curator_status( + test_user_group=user_group_1, + user_to_set_as_curator=curator, + user_performing_action=admin_user, + ) + + # Creating another user group that the user is not a curator of + user_group_2 = UserGroupManager.create( + name="uncurated_user_group", + user_ids=[curator.id], + cc_pair_ids=[], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + + # Create a credentials that the curator is and is not curator of + connector_1 = ConnectorManager.create( + name="curator_owned_connector", + source=DocumentSource.CONFLUENCE, + groups=[user_group_1.id], + is_public=False, + user_performing_action=admin_user, + ) + # currently we dont enforce permissions at the connector level + # pending cc_pair -> connector rework + # connector_2 = ConnectorManager.create( + # name="curator_visible_connector", + # source=DocumentSource.CONFLUENCE, + # groups=[user_group_2.id], + # is_public=False, + # user_performing_action=admin_user, + # ) + credential_1 = CredentialManager.create( + name="curator_owned_credential", + source=DocumentSource.CONFLUENCE, + groups=[user_group_1.id], + curator_public=False, + user_performing_action=admin_user, + ) + credential_2 = CredentialManager.create( + name="curator_visible_credential", + source=DocumentSource.CONFLUENCE, + groups=[user_group_2.id], + curator_public=False, + user_performing_action=admin_user, + ) + + # END OF HAPPY PATH + + """Tests for things Curators should not be able to do""" + + # Curators should not be able to create a public cc pair + with pytest.raises(HTTPError): + CCPairManager.create( + connector_id=connector_1.id, + credential_id=credential_1.id, + name="invalid_cc_pair_1", + access_type=AccessType.PUBLIC, + groups=[user_group_1.id], + user_performing_action=curator, + ) + + # Curators should not be able to create a cc + # pair for a user group they are not a curator of + with pytest.raises(HTTPError): + CCPairManager.create( + connector_id=connector_1.id, + credential_id=credential_1.id, + name="invalid_cc_pair_2", + access_type=AccessType.PRIVATE, + groups=[user_group_1.id, user_group_2.id], + user_performing_action=curator, + ) + + # Curators should not be able to create a cc + # pair without an attached user group + with pytest.raises(HTTPError): + CCPairManager.create( + connector_id=connector_1.id, + credential_id=credential_1.id, + name="invalid_cc_pair_2", + access_type=AccessType.PRIVATE, + groups=[], + user_performing_action=curator, + ) + + # # This test is currently disabled because permissions are + # # not enforced at the connector level + # # Curators should not be able to create a cc pair + # # for a user group that the connector does not belong to (NOT WORKING) + # with pytest.raises(HTTPError): + # CCPairManager.create( + # connector_id=connector_2.id, + # credential_id=credential_1.id, + # name="invalid_cc_pair_3", + # access_type=AccessType.PRIVATE, + # groups=[user_group_1.id], + # user_performing_action=curator, + # ) + + # Curators should not be able to create a cc + # pair for a user group that the credential does not belong to + with pytest.raises(HTTPError): + CCPairManager.create( + connector_id=connector_1.id, + credential_id=credential_2.id, + name="invalid_cc_pair_4", + access_type=AccessType.PRIVATE, + groups=[user_group_1.id], + user_performing_action=curator, + ) + + """Tests for things Curators should be able to do""" + + # Curators should be able to create a private + # cc pair for a user group they are a curator of + valid_cc_pair = CCPairManager.create( + name="valid_cc_pair", + connector_id=connector_1.id, + credential_id=credential_1.id, + access_type=AccessType.PRIVATE, + groups=[user_group_1.id], + user_performing_action=curator, + ) + + # Verify the created cc pair + CCPairManager.verify( + cc_pair=valid_cc_pair, + user_performing_action=curator, + ) + + # Test pausing the cc pair + CCPairManager.pause_cc_pair(valid_cc_pair, user_performing_action=curator) + + # Test deleting the cc pair + CCPairManager.delete(valid_cc_pair, user_performing_action=curator) + CCPairManager.wait_for_deletion_completion(user_performing_action=curator) + + CCPairManager.verify( + cc_pair=valid_cc_pair, + verify_deleted=True, + user_performing_action=curator, + ) diff --git a/backend/tests/integration/tests/permissions/test_connector_permissions.py b/backend/tests/integration/tests/permissions/test_connector_permissions.py new file mode 100644 index 00000000000..292c8b54d95 --- /dev/null +++ b/backend/tests/integration/tests/permissions/test_connector_permissions.py @@ -0,0 +1,136 @@ +""" +This file takes the happy path to adding a curator to a user group and then tests +the permissions of the curator manipulating connectors. +""" +import pytest +from requests.exceptions import HTTPError + +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.managers.connector import ConnectorManager +from tests.integration.common_utils.managers.user import DATestUser +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.managers.user_group import UserGroupManager + + +def test_connector_permissions(reset: None) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + + # Creating a curator + curator: DATestUser = UserManager.create(name="curator") + + # Creating a user group + user_group_1 = UserGroupManager.create( + name="user_group_1", + user_ids=[curator.id], + cc_pair_ids=[], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + # setting the user as a curator for the user group + UserGroupManager.set_curator_status( + test_user_group=user_group_1, + user_to_set_as_curator=curator, + user_performing_action=admin_user, + ) + + # Creating another user group that the user is not a curator of + user_group_2 = UserGroupManager.create( + name="user_group_2", + user_ids=[curator.id], + cc_pair_ids=[], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + + # END OF HAPPY PATH + + """Tests for things Curators should not be able to do""" + + # Curators should not be able to create a public connector + with pytest.raises(HTTPError): + ConnectorManager.create( + name="invalid_connector_1", + source=DocumentSource.CONFLUENCE, + groups=[user_group_1.id], + is_public=True, + user_performing_action=curator, + ) + + # Curators should not be able to create a cc pair for a + # user group they are not a curator of + with pytest.raises(HTTPError): + ConnectorManager.create( + name="invalid_connector_2", + source=DocumentSource.CONFLUENCE, + groups=[user_group_1.id, user_group_2.id], + is_public=False, + user_performing_action=curator, + ) + + """Tests for things Curators should be able to do""" + + # Curators should be able to create a private + # connector for a user group they are a curator of + valid_connector = ConnectorManager.create( + name="valid_connector", + source=DocumentSource.CONFLUENCE, + groups=[user_group_1.id], + is_public=False, + user_performing_action=curator, + ) + assert valid_connector.id is not None + + # Verify the created connector + created_connector = ConnectorManager.get( + valid_connector.id, user_performing_action=curator + ) + assert created_connector.name == valid_connector.name + assert created_connector.source == valid_connector.source + + # Verify that the connector can be found in the list of all connectors + all_connectors = ConnectorManager.get_all(user_performing_action=curator) + assert any(conn.id == valid_connector.id for conn in all_connectors) + + # Test editing the connector + valid_connector.name = "updated_valid_connector" + ConnectorManager.edit(valid_connector, user_performing_action=curator) + + # Verify the edit + updated_connector = ConnectorManager.get( + valid_connector.id, user_performing_action=curator + ) + assert updated_connector.name == "updated_valid_connector" + + # Test deleting the connector + ConnectorManager.delete(connector=valid_connector, user_performing_action=curator) + + # Verify the deletion + all_connectors_after_delete = ConnectorManager.get_all( + user_performing_action=curator + ) + assert all(conn.id != valid_connector.id for conn in all_connectors_after_delete) + + # Test that curator cannot create a connector for a group they are not a curator of + with pytest.raises(HTTPError): + ConnectorManager.create( + name="invalid_connector_3", + source=DocumentSource.CONFLUENCE, + groups=[user_group_2.id], + is_public=False, + user_performing_action=curator, + ) + + # Test that curator cannot create a public connector + with pytest.raises(HTTPError): + ConnectorManager.create( + name="invalid_connector_4", + source=DocumentSource.CONFLUENCE, + groups=[user_group_1.id], + is_public=True, + user_performing_action=curator, + ) diff --git a/backend/tests/integration/tests/permissions/test_credential_permissions.py b/backend/tests/integration/tests/permissions/test_credential_permissions.py new file mode 100644 index 00000000000..7433389feb2 --- /dev/null +++ b/backend/tests/integration/tests/permissions/test_credential_permissions.py @@ -0,0 +1,108 @@ +""" +This file takes the happy path to adding a curator to a user group and then tests +the permissions of the curator manipulating credentials. +""" +import pytest +from requests.exceptions import HTTPError + +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.managers.credential import CredentialManager +from tests.integration.common_utils.managers.user import DATestUser +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.managers.user_group import UserGroupManager + + +def test_credential_permissions(reset: None) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + + # Creating a curator + curator: DATestUser = UserManager.create(name="curator") + + # Creating a user group + user_group_1 = UserGroupManager.create( + name="user_group_1", + user_ids=[curator.id], + cc_pair_ids=[], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + # setting the user as a curator for the user group + UserGroupManager.set_curator_status( + test_user_group=user_group_1, + user_to_set_as_curator=curator, + user_performing_action=admin_user, + ) + + # Creating another user group that the user is not a curator of + user_group_2 = UserGroupManager.create( + name="user_group_2", + user_ids=[curator.id], + cc_pair_ids=[], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + + # END OF HAPPY PATH + + """Tests for things Curators should not be able to do""" + + # Curators should not be able to create a public credential + with pytest.raises(HTTPError): + CredentialManager.create( + name="invalid_credential_1", + source=DocumentSource.CONFLUENCE, + groups=[user_group_1.id], + curator_public=True, + user_performing_action=curator, + ) + + # Curators should not be able to create a credential for a user group they are not a curator of + with pytest.raises(HTTPError): + CredentialManager.create( + name="invalid_credential_2", + source=DocumentSource.CONFLUENCE, + groups=[user_group_1.id, user_group_2.id], + curator_public=False, + user_performing_action=curator, + ) + + """Tests for things Curators should be able to do""" + # Curators should be able to create a private credential for a user group they are a curator of + valid_credential = CredentialManager.create( + name="valid_credential", + source=DocumentSource.CONFLUENCE, + groups=[user_group_1.id], + curator_public=False, + user_performing_action=curator, + ) + + # Verify the created credential + CredentialManager.verify( + credential=valid_credential, + user_performing_action=curator, + ) + + # Test editing the credential + valid_credential.name = "updated_valid_credential" + CredentialManager.edit(valid_credential, user_performing_action=curator) + + # Verify the edit + CredentialManager.verify( + credential=valid_credential, + user_performing_action=curator, + ) + + # Test deleting the credential + CredentialManager.delete(valid_credential, user_performing_action=curator) + + # Verify the deletion + CredentialManager.verify( + credential=valid_credential, + verify_deleted=True, + user_performing_action=curator, + ) diff --git a/backend/tests/integration/tests/permissions/test_doc_set_permissions.py b/backend/tests/integration/tests/permissions/test_doc_set_permissions.py new file mode 100644 index 00000000000..e352d5eb303 --- /dev/null +++ b/backend/tests/integration/tests/permissions/test_doc_set_permissions.py @@ -0,0 +1,191 @@ +import pytest +from requests.exceptions import HTTPError + +from danswer.db.enums import AccessType +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.document_set import DocumentSetManager +from tests.integration.common_utils.managers.user import DATestUser +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.managers.user_group import UserGroupManager + + +def test_doc_set_permissions_setup(reset: None) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + + # Creating a second user (curator) + curator: DATestUser = UserManager.create(name="curator") + + # Creating the first user group + user_group_1 = UserGroupManager.create( + name="curated_user_group", + user_ids=[curator.id], + cc_pair_ids=[], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + + # Setting the curator as a curator for the first user group + UserGroupManager.set_curator_status( + test_user_group=user_group_1, + user_to_set_as_curator=curator, + user_performing_action=admin_user, + ) + + # Creating a second user group + user_group_2 = UserGroupManager.create( + name="uncurated_user_group", + user_ids=[curator.id], + cc_pair_ids=[], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + + # Admin creates a cc_pair + private_cc_pair = CCPairManager.create_from_scratch( + access_type=AccessType.PRIVATE, + source=DocumentSource.INGESTION_API, + user_performing_action=admin_user, + ) + + # Admin creates a public cc_pair + public_cc_pair = CCPairManager.create_from_scratch( + access_type=AccessType.PUBLIC, + source=DocumentSource.INGESTION_API, + user_performing_action=admin_user, + ) + + # END OF HAPPY PATH + + """Tests for things Curators/Admins should not be able to do""" + + # Test that curator cannot create a document set for the group they don't curate + with pytest.raises(HTTPError): + DocumentSetManager.create( + name="Invalid Document Set 1", + groups=[user_group_2.id], + cc_pair_ids=[public_cc_pair.id], + user_performing_action=curator, + ) + + # Test that curator cannot create a document set attached to both groups + with pytest.raises(HTTPError): + DocumentSetManager.create( + name="Invalid Document Set 2", + is_public=False, + cc_pair_ids=[public_cc_pair.id], + groups=[user_group_1.id, user_group_2.id], + user_performing_action=curator, + ) + + # Test that curator cannot create a document set with no groups + with pytest.raises(HTTPError): + DocumentSetManager.create( + name="Invalid Document Set 3", + is_public=False, + cc_pair_ids=[public_cc_pair.id], + groups=[], + user_performing_action=curator, + ) + + # Test that curator cannot create a document set with no cc_pairs + with pytest.raises(HTTPError): + DocumentSetManager.create( + name="Invalid Document Set 4", + is_public=False, + cc_pair_ids=[], + groups=[user_group_1.id], + user_performing_action=curator, + ) + + # Test that admin cannot create a document set with no cc_pairs + with pytest.raises(HTTPError): + DocumentSetManager.create( + name="Invalid Document Set 4", + is_public=False, + cc_pair_ids=[], + groups=[user_group_1.id], + user_performing_action=admin_user, + ) + + """Tests for things Curators should be able to do""" + # Test that curator can create a document set for the group they curate + valid_doc_set = DocumentSetManager.create( + name="Valid Document Set", + is_public=False, + cc_pair_ids=[public_cc_pair.id], + groups=[user_group_1.id], + user_performing_action=curator, + ) + + DocumentSetManager.wait_for_sync( + document_sets_to_check=[valid_doc_set], user_performing_action=admin_user + ) + + # Verify that the valid document set was created + DocumentSetManager.verify( + document_set=valid_doc_set, + user_performing_action=admin_user, + ) + + # Verify that only one document set exists + all_doc_sets = DocumentSetManager.get_all(user_performing_action=admin_user) + assert len(all_doc_sets) == 1 + + # Add the private_cc_pair to the doc set on our end for later comparison + valid_doc_set.cc_pair_ids.append(private_cc_pair.id) + + # Confirm the curator can't add the private_cc_pair to the doc set + with pytest.raises(HTTPError): + DocumentSetManager.edit( + document_set=valid_doc_set, + user_performing_action=curator, + ) + # Confirm the admin can't add the private_cc_pair to the doc set + with pytest.raises(HTTPError): + DocumentSetManager.edit( + document_set=valid_doc_set, + user_performing_action=admin_user, + ) + + # Verify the document set has not been updated in the db + with pytest.raises(ValueError): + DocumentSetManager.verify( + document_set=valid_doc_set, + user_performing_action=admin_user, + ) + + # Add the private_cc_pair to the user group on our end for later comparison + user_group_1.cc_pair_ids.append(private_cc_pair.id) + + # Admin adds the cc_pair to the group the curator curates + UserGroupManager.edit( + user_group=user_group_1, + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + UserGroupManager.verify( + user_group=user_group_1, + user_performing_action=admin_user, + ) + + # Confirm the curator can now add the cc_pair to the doc set + DocumentSetManager.edit( + document_set=valid_doc_set, + user_performing_action=curator, + ) + DocumentSetManager.wait_for_sync( + document_sets_to_check=[valid_doc_set], user_performing_action=admin_user + ) + # Verify the updated document set + DocumentSetManager.verify( + document_set=valid_doc_set, + user_performing_action=admin_user, + ) diff --git a/backend/tests/integration/tests/permissions/test_user_role_permissions.py b/backend/tests/integration/tests/permissions/test_user_role_permissions.py new file mode 100644 index 00000000000..5be49d25c5e --- /dev/null +++ b/backend/tests/integration/tests/permissions/test_user_role_permissions.py @@ -0,0 +1,93 @@ +""" +This file tests the ability of different user types to set the role of other users. +""" +import pytest +from requests.exceptions import HTTPError + +from danswer.db.models import UserRole +from tests.integration.common_utils.managers.user import DATestUser +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.managers.user_group import UserGroupManager + + +def test_user_role_setting_permissions(reset: None) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + assert UserManager.verify_role(admin_user, UserRole.ADMIN) + + # Creating a basic user + basic_user: DATestUser = UserManager.create(name="basic_user") + assert UserManager.verify_role(basic_user, UserRole.BASIC) + + # Creating a curator + curator: DATestUser = UserManager.create(name="curator") + assert UserManager.verify_role(curator, UserRole.BASIC) + + # Creating a curator without adding to a group should not work + with pytest.raises(HTTPError): + UserManager.set_role( + user_to_set=curator, + target_role=UserRole.CURATOR, + user_to_perform_action=admin_user, + ) + + global_curator: DATestUser = UserManager.create(name="global_curator") + assert UserManager.verify_role(global_curator, UserRole.BASIC) + + # Setting the role of a global curator should not work for a basic user + with pytest.raises(HTTPError): + UserManager.set_role( + user_to_set=global_curator, + target_role=UserRole.GLOBAL_CURATOR, + user_to_perform_action=basic_user, + ) + + # Setting the role of a global curator should work for an admin user + UserManager.set_role( + user_to_set=global_curator, + target_role=UserRole.GLOBAL_CURATOR, + user_to_perform_action=admin_user, + ) + assert UserManager.verify_role(global_curator, UserRole.GLOBAL_CURATOR) + + # Setting the role of a global curator should not work for an invalid curator + with pytest.raises(HTTPError): + UserManager.set_role( + user_to_set=global_curator, + target_role=UserRole.BASIC, + user_to_perform_action=global_curator, + ) + assert UserManager.verify_role(global_curator, UserRole.GLOBAL_CURATOR) + + # Creating a user group + user_group_1 = UserGroupManager.create( + name="user_group_1", + user_ids=[], + cc_pair_ids=[], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + + # This should fail because the curator is not in the user group + with pytest.raises(HTTPError): + UserGroupManager.set_curator_status( + test_user_group=user_group_1, + user_to_set_as_curator=curator, + user_performing_action=admin_user, + ) + + # Adding the curator to the user group + user_group_1.user_ids = [curator.id] + UserGroupManager.edit(user_group=user_group_1, user_performing_action=admin_user) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + + # This should work because the curator is in the user group + UserGroupManager.set_curator_status( + test_user_group=user_group_1, + user_to_set_as_curator=curator, + user_performing_action=admin_user, + ) diff --git a/backend/tests/integration/tests/permissions/test_whole_curator_flow.py b/backend/tests/integration/tests/permissions/test_whole_curator_flow.py new file mode 100644 index 00000000000..1ce9052c108 --- /dev/null +++ b/backend/tests/integration/tests/permissions/test_whole_curator_flow.py @@ -0,0 +1,168 @@ +""" +This test tests the happy path for curator permissions +""" +from danswer.db.enums import AccessType +from danswer.db.models import UserRole +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.connector import ConnectorManager +from tests.integration.common_utils.managers.credential import CredentialManager +from tests.integration.common_utils.managers.user import DATestUser +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.managers.user_group import UserGroupManager + + +def test_whole_curator_flow(reset: None) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + assert UserManager.verify_role(admin_user, UserRole.ADMIN) + + # Creating a curator + curator: DATestUser = UserManager.create(name="curator") + + # Creating a user group + user_group_1 = UserGroupManager.create( + name="user_group_1", + user_ids=[curator.id], + cc_pair_ids=[], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + # Making curator a curator of user_group_1 + UserGroupManager.set_curator_status( + test_user_group=user_group_1, + user_to_set_as_curator=curator, + user_performing_action=admin_user, + ) + assert UserManager.verify_role(curator, UserRole.CURATOR) + + # Creating a credential as curator + test_credential = CredentialManager.create( + name="curator_test_credential", + source=DocumentSource.FILE, + curator_public=False, + groups=[user_group_1.id], + user_performing_action=curator, + ) + + # Creating a connector as curator + test_connector = ConnectorManager.create( + name="curator_test_connector", + source=DocumentSource.FILE, + is_public=False, + groups=[user_group_1.id], + user_performing_action=curator, + ) + + # Test editing the connector + test_connector.name = "updated_test_connector" + ConnectorManager.edit(connector=test_connector, user_performing_action=curator) + + # Creating a CC pair as curator + test_cc_pair = CCPairManager.create( + connector_id=test_connector.id, + credential_id=test_credential.id, + name="curator_test_cc_pair", + access_type=AccessType.PRIVATE, + groups=[user_group_1.id], + user_performing_action=curator, + ) + + CCPairManager.verify(cc_pair=test_cc_pair, user_performing_action=admin_user) + + # Verify that the curator can pause and unpause the CC pair + CCPairManager.pause_cc_pair(cc_pair=test_cc_pair, user_performing_action=curator) + + # Verify that the curator can delete the CC pair + CCPairManager.delete(cc_pair=test_cc_pair, user_performing_action=curator) + CCPairManager.wait_for_deletion_completion(user_performing_action=curator) + + # Verify that the CC pair has been deleted + CCPairManager.verify( + cc_pair=test_cc_pair, + verify_deleted=True, + user_performing_action=admin_user, + ) + + +def test_global_curator_flow(reset: None) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + assert UserManager.verify_role(admin_user, UserRole.ADMIN) + + # Creating a user + global_curator: DATestUser = UserManager.create(name="global_curator") + assert UserManager.verify_role(global_curator, UserRole.BASIC) + + # Set the user to a global curator + UserManager.set_role( + user_to_set=global_curator, + target_role=UserRole.GLOBAL_CURATOR, + user_to_perform_action=admin_user, + ) + assert UserManager.verify_role(global_curator, UserRole.GLOBAL_CURATOR) + + # Creating a user group containing the global curator + user_group_1 = UserGroupManager.create( + name="user_group_1", + user_ids=[global_curator.id], + cc_pair_ids=[], + user_performing_action=admin_user, + ) + UserGroupManager.wait_for_sync( + user_groups_to_check=[user_group_1], user_performing_action=admin_user + ) + + # Creating a credential as global curator + test_credential = CredentialManager.create( + name="curator_test_credential", + source=DocumentSource.FILE, + curator_public=False, + groups=[user_group_1.id], + user_performing_action=global_curator, + ) + + # Creating a connector as global curator + test_connector = ConnectorManager.create( + name="curator_test_connector", + source=DocumentSource.FILE, + is_public=False, + groups=[user_group_1.id], + user_performing_action=global_curator, + ) + + # Test editing the connector + test_connector.name = "updated_test_connector" + ConnectorManager.edit( + connector=test_connector, user_performing_action=global_curator + ) + + # Creating a CC pair as global curator + test_cc_pair = CCPairManager.create( + connector_id=test_connector.id, + credential_id=test_credential.id, + name="curator_test_cc_pair", + access_type=AccessType.PRIVATE, + groups=[user_group_1.id], + user_performing_action=global_curator, + ) + + CCPairManager.verify(cc_pair=test_cc_pair, user_performing_action=admin_user) + + # Verify that the curator can pause and unpause the CC pair + CCPairManager.pause_cc_pair( + cc_pair=test_cc_pair, user_performing_action=global_curator + ) + + # Verify that the curator can delete the CC pair + CCPairManager.delete(cc_pair=test_cc_pair, user_performing_action=global_curator) + CCPairManager.wait_for_deletion_completion(user_performing_action=global_curator) + + # Verify that the CC pair has been deleted + CCPairManager.verify( + cc_pair=test_cc_pair, + verify_deleted=True, + user_performing_action=admin_user, + ) diff --git a/backend/tests/integration/tests/pruning/test_pruning.py b/backend/tests/integration/tests/pruning/test_pruning.py new file mode 100644 index 00000000000..084ad80b357 --- /dev/null +++ b/backend/tests/integration/tests/pruning/test_pruning.py @@ -0,0 +1,143 @@ +import http.server +import os +import shutil +import tempfile +import threading +from collections.abc import Generator +from contextlib import contextmanager +from datetime import datetime +from datetime import timezone +from time import sleep +from typing import Any + +from danswer.server.documents.models import DocumentSource +from danswer.utils.logger import setup_logger +from tests.integration.common_utils.managers.api_key import APIKeyManager +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.test_models import DATestUser +from tests.integration.common_utils.vespa import vespa_fixture + +logger = setup_logger() + + +@contextmanager +def http_server_context( + directory: str, port: int = 8000 +) -> Generator[http.server.HTTPServer, None, None]: + # Create a handler that serves files from the specified directory + def handler_class( + *args: Any, **kwargs: Any + ) -> http.server.SimpleHTTPRequestHandler: + return http.server.SimpleHTTPRequestHandler( + *args, directory=directory, **kwargs + ) + + # Create an HTTPServer instance + httpd = http.server.HTTPServer(("0.0.0.0", port), handler_class) + + # Define a thread that runs the server in the background + server_thread = threading.Thread(target=httpd.serve_forever) + server_thread.daemon = ( + True # Ensures the thread will exit when the main program exits + ) + + try: + # Start the server in the background + server_thread.start() + yield httpd + finally: + # Shutdown the server and wait for the thread to finish + httpd.shutdown() + httpd.server_close() + server_thread.join() + + +def test_web_pruning(reset: None, vespa_client: vespa_fixture) -> None: + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create(name="admin_user") + + # add api key to user + APIKeyManager.create( + user_performing_action=admin_user, + ) + + test_filename = os.path.realpath(__file__) + test_directory = os.path.dirname(test_filename) + with tempfile.TemporaryDirectory() as temp_dir: + port = 8888 + + website_src = os.path.join(test_directory, "website") + website_tgt = os.path.join(temp_dir, "website") + shutil.copytree(website_src, website_tgt) + with http_server_context(os.path.join(temp_dir, "website"), port): + sleep(1) # sleep a tiny bit before starting everything + + hostname = os.getenv("TEST_WEB_HOSTNAME", "localhost") + config = { + "base_url": f"http://{hostname}:{port}/", + "web_connector_type": "recursive", + } + + # store the time before we create the connector so that we know after + # when the indexing should have started + now = datetime.now(timezone.utc) + + # create connector + cc_pair_1 = CCPairManager.create_from_scratch( + source=DocumentSource.WEB, + connector_specific_config=config, + user_performing_action=admin_user, + ) + + CCPairManager.wait_for_indexing( + cc_pair_1, now, timeout=60, user_performing_action=admin_user + ) + + selected_cc_pair = CCPairManager.get_one( + cc_pair_1.id, user_performing_action=admin_user + ) + assert selected_cc_pair is not None, "cc_pair not found after indexing!" + assert selected_cc_pair.docs_indexed == 15 + + logger.info("Removing about.html.") + os.remove(os.path.join(website_tgt, "about.html")) + logger.info("Removing courses.html.") + os.remove(os.path.join(website_tgt, "courses.html")) + + # store the time again as a reference for the pruning timestamps + now = datetime.now(timezone.utc) + + CCPairManager.prune(cc_pair_1, user_performing_action=admin_user) + CCPairManager.wait_for_prune( + cc_pair_1, now, timeout=60, user_performing_action=admin_user + ) + + selected_cc_pair = CCPairManager.get_one( + cc_pair_1.id, user_performing_action=admin_user + ) + assert selected_cc_pair is not None, "cc_pair not found after pruning!" + assert selected_cc_pair.docs_indexed == 13 + + # check vespa + index_id = f"http://{hostname}:{port}/index.html" + about_id = f"http://{hostname}:{port}/about.html" + courses_id = f"http://{hostname}:{port}/courses.html" + + doc_ids = [index_id, about_id, courses_id] + retrieved_docs_dict = vespa_client.get_documents_by_id(doc_ids)["documents"] + retrieved_docs = { + doc["fields"]["document_id"]: doc["fields"] + for doc in retrieved_docs_dict + } + + # verify index.html exists in Vespa + retrieved_doc = retrieved_docs.get(index_id) + assert retrieved_doc + + # verify about and courses do not exist + retrieved_doc = retrieved_docs.get(about_id) + assert not retrieved_doc + + retrieved_doc = retrieved_docs.get(courses_id) + assert not retrieved_doc diff --git a/backend/tests/integration/tests/pruning/website/about.html b/backend/tests/integration/tests/pruning/website/about.html new file mode 100644 index 00000000000..ea7fee823cd --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/about.html @@ -0,0 +1,523 @@ + + + + + Above Multi-purpose Free Bootstrap Responsive Template + + + + + + + + + + + + + + +
+ +
+ +
+ +
+
+
+
+

About Us

+
+
+
+
+
+
+
+
+
+ + Read more +
+
+
+ +
+
+ +
+
+
+

+ Lorem ipsum dolor sit amet, cadipisicing sit amet, consectetur + adipisicing elit. Atque sed, quidem quis praesentium, ut unde + fuga error commodi architecto, laudantium culpa tenetur at id, + beatae pet. +

+

+ Lorem ipsum dolor sit amet, consectetur adipisicing elit. + adipisicing sit amet, consectetur adipisicing elit. Atque sed, + quidem quis praesentium,m deserunt. +

+
    +
  • + Lorem + ipsum enimdolor sit amet +
  • +
  • + + Explicabo deleniti neque aliquid +
  • +
  • + + Consectetur adipisicing elit +
  • +
  • + Lorem + ipsum dolor sit amet +
  • +
  • + Quo + issimos molest quibusdam temporibus +
  • +
+
+
+
+
+
+
+
+ +
+

Why Choose Us?

+
+

+ Sed ut perspiciaatis unde omnis iste natus error sit + voluptatem accusantium doloremque laudantium, totam rem + aperiam, eaque ipsa quae ab illo inventore veritatis et quasi + architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam + voluptatem quia voluptas sit aspernatur.

Sed ut + perspiciaatis iste natus error sit voluptatem probably haven't + heard of them accusamus. +

+
+
+
+

Our Solution

+
+ +
+ +
+ + +
+ +
+ Sed ut perspiciaatis unde omnis iste natus error sit + voluptatem accusantium doloremque laudantium, totam rem + aperiam, eaque ipsa quae ab illo inventore veritatis et + quasi architecto beatae vitae dicta sunt explicabo. Nemo + enim ipsam voluptatem quia voluptas +
+
+
+
+ +
+
+ Sed ut perspiciaatis unde omnis iste natus error sit + voluptatem accusantium doloremque laudantium, totam rem + aperiam, eaque ipsa quae ab illo inventore veritatis et + quasi architecto beatae vitae dicta sunt explicabo. Nemo + enim ipsam voluptatem quia voluptas +
+
+
+
+ +
+
+ Sed ut perspiciaatis unde omnis iste natus error sit + voluptatem accusantium doloremque laudantium, totam rem + aperiam, eaque ipsa quae ab illo inventore veritatis et + quasi architecto beatae vitae dicta sunt explicabo. Nemo + enim ipsam voluptatem quia voluptas +
+
+
+
+ +
+
+ Sed ut perspiciaatis unde omnis iste natus error sit + voluptatem accusantium doloremque laudantium, totam rem + aperiam, eaque ipsa quae ab illo inventore veritatis et + quasi architecto beatae vitae dicta sunt explicabo. Nemo + enim ipsam voluptatem quia voluptas +
+
+
+
+ +
+ +
+
+

Our Expertise

+
+
Web Development
+
+ +
+ 40% Complete (success) +
+
+
Designing
+
+
+ 40% Complete (success) +
+
+
User Experience
+
+
+ 40% Complete (success) +
+
+
Development
+
+
+ 40% Complete (success) +
+
+
+
+ +
+
+ + + +
+

Our Team

+
+
+ + + +
+
+
+ +
+ + + +

Johne Doe

+ Creative +
+
+
+ +
+ + + +

Jennifer

+ Programmer +
+
+
+ +
+ + + +

Christean

+ CEO +
+
+
+ +
+ + + +

Kerinele rase

+ Manager +
+
+
+
+ + +
+
+
+ +
+ + + + + + + + + + + + + + + diff --git a/backend/tests/integration/tests/pruning/website/contact.html b/backend/tests/integration/tests/pruning/website/contact.html new file mode 100644 index 00000000000..dbe3225456d --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/contact.html @@ -0,0 +1,357 @@ + + + + + Above Multi-purpose Free Bootstrap Responsive Template + + + + + + + + + + + + + + +
+ +
+ +
+ +
+
+
+
+

Contact Us

+
+
+
+
+
+
+
+
+ +
+
+ trivoo +
+ +
+
+ +
+
+
+ + +
+
+
+ + + +
+
+ + + +
+
+ + + +
+
+ + + +
+ +
+
+
+
+
+
+

Contact info

+
+
+ Lorem ipsum dolor sit amet, cadipisicing sit amet, consectetur + adipisicing elit. Atque sed, quidem quis praesentium. +
+

+ Lorem ipsum dolor sit amet, cadipisicing sit amet, consectetur + adipisicing elit. Lorem ipsum dolor sit amet, cadipisicing sit + amet, consectetur adipisicing elit. Atque sed, quidem quis + praesentium Atque sed, quidem quis praesentium, ut unde fuga + error commodi architecto, laudantium culpa tenetur at id, + beatae pet.
+

+
+ The Company Name.
+ 12345 St John Point,
+ Brisbean, ABC 12 St 11.

+ Telephone: +1 234 567 890
+ FAX: +1 234 567 890
+ E-mail: + mail@sitename.org
+
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/backend/tests/integration/tests/pruning/website/courses.html b/backend/tests/integration/tests/pruning/website/courses.html new file mode 100644 index 00000000000..a813720fd28 --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/courses.html @@ -0,0 +1,218 @@ + + + + +Above Multi-purpose Free Bootstrap Responsive Template + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
+
+

Courses

+
+
+
+
+ +
+
+ +
+ +
+
+

Courses We Offer

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus, vero mollitia velit ad consectetur. Alias, laborum excepturi nihil autem nemo numquam, ipsa architecto non, magni consequuntur quam.
+
+
+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus


+
+
+ +

Web Development

+

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus

+
+
+ +

Mobile Development

+

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus

+
+
+ +

Responsive Design

+

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus

+
+
+ + + + + +
+
+ +
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/backend/tests/integration/tests/pruning/website/css/animate.css b/backend/tests/integration/tests/pruning/website/css/animate.css new file mode 100644 index 00000000000..92a68838f4e --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/css/animate.css @@ -0,0 +1,3880 @@ +@charset "UTF-8"; +/* +Animate.css - http://daneden.me/animate +Licensed under the MIT license + +Copyright (c) 2013 Daniel Eden + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ +body { + /* Addresses a small issue in webkit: http://bit.ly/NEdoDq */ + -webkit-backface-visibility: hidden; +} +.animated { + -webkit-animation-duration: 1s; + -moz-animation-duration: 1s; + -o-animation-duration: 1s; + animation-duration: 1s; + -webkit-animation-fill-mode: both; + -moz-animation-fill-mode: both; + -o-animation-fill-mode: both; + animation-fill-mode: both; +} + +.animated.hinge { + -webkit-animation-duration: 2s; + -moz-animation-duration: 2s; + -o-animation-duration: 2s; + animation-duration: 2s; +} + +@-webkit-keyframes flash { + 0%, + 50%, + 100% { + opacity: 1; + } + 25%, + 75% { + opacity: 0; + } +} + +@-moz-keyframes flash { + 0%, + 50%, + 100% { + opacity: 1; + } + 25%, + 75% { + opacity: 0; + } +} + +@-o-keyframes flash { + 0%, + 50%, + 100% { + opacity: 1; + } + 25%, + 75% { + opacity: 0; + } +} + +@keyframes flash { + 0%, + 50%, + 100% { + opacity: 1; + } + 25%, + 75% { + opacity: 0; + } +} + +.flash { + -webkit-animation-name: flash; + -moz-animation-name: flash; + -o-animation-name: flash; + animation-name: flash; +} +@-webkit-keyframes shake { + 0%, + 100% { + -webkit-transform: translateX(0); + } + 10%, + 30%, + 50%, + 70%, + 90% { + -webkit-transform: translateX(-10px); + } + 20%, + 40%, + 60%, + 80% { + -webkit-transform: translateX(10px); + } +} + +@-moz-keyframes shake { + 0%, + 100% { + -moz-transform: translateX(0); + } + 10%, + 30%, + 50%, + 70%, + 90% { + -moz-transform: translateX(-10px); + } + 20%, + 40%, + 60%, + 80% { + -moz-transform: translateX(10px); + } +} + +@-o-keyframes shake { + 0%, + 100% { + -o-transform: translateX(0); + } + 10%, + 30%, + 50%, + 70%, + 90% { + -o-transform: translateX(-10px); + } + 20%, + 40%, + 60%, + 80% { + -o-transform: translateX(10px); + } +} + +@keyframes shake { + 0%, + 100% { + transform: translateX(0); + } + 10%, + 30%, + 50%, + 70%, + 90% { + transform: translateX(-10px); + } + 20%, + 40%, + 60%, + 80% { + transform: translateX(10px); + } +} + +.shake { + -webkit-animation-name: shake; + -moz-animation-name: shake; + -o-animation-name: shake; + animation-name: shake; +} +@-webkit-keyframes bounce { + 0%, + 20%, + 50%, + 80%, + 100% { + -webkit-transform: translateY(0); + } + 40% { + -webkit-transform: translateY(-30px); + } + 60% { + -webkit-transform: translateY(-15px); + } +} + +@-moz-keyframes bounce { + 0%, + 20%, + 50%, + 80%, + 100% { + -moz-transform: translateY(0); + } + 40% { + -moz-transform: translateY(-30px); + } + 60% { + -moz-transform: translateY(-15px); + } +} + +@-o-keyframes bounce { + 0%, + 20%, + 50%, + 80%, + 100% { + -o-transform: translateY(0); + } + 40% { + -o-transform: translateY(-30px); + } + 60% { + -o-transform: translateY(-15px); + } +} +@keyframes bounce { + 0%, + 20%, + 50%, + 80%, + 100% { + transform: translateY(0); + } + 40% { + transform: translateY(-30px); + } + 60% { + transform: translateY(-15px); + } +} + +.bounce { + -webkit-animation-name: bounce; + -moz-animation-name: bounce; + -o-animation-name: bounce; + animation-name: bounce; +} +@-webkit-keyframes tada { + 0% { + -webkit-transform: scale(1); + } + 10%, + 20% { + -webkit-transform: scale(0.9) rotate(-3deg); + } + 30%, + 50%, + 70%, + 90% { + -webkit-transform: scale(1.1) rotate(3deg); + } + 40%, + 60%, + 80% { + -webkit-transform: scale(1.1) rotate(-3deg); + } + 100% { + -webkit-transform: scale(1) rotate(0); + } +} + +@-moz-keyframes tada { + 0% { + -moz-transform: scale(1); + } + 10%, + 20% { + -moz-transform: scale(0.9) rotate(-3deg); + } + 30%, + 50%, + 70%, + 90% { + -moz-transform: scale(1.1) rotate(3deg); + } + 40%, + 60%, + 80% { + -moz-transform: scale(1.1) rotate(-3deg); + } + 100% { + -moz-transform: scale(1) rotate(0); + } +} + +@-o-keyframes tada { + 0% { + -o-transform: scale(1); + } + 10%, + 20% { + -o-transform: scale(0.9) rotate(-3deg); + } + 30%, + 50%, + 70%, + 90% { + -o-transform: scale(1.1) rotate(3deg); + } + 40%, + 60%, + 80% { + -o-transform: scale(1.1) rotate(-3deg); + } + 100% { + -o-transform: scale(1) rotate(0); + } +} + +@keyframes tada { + 0% { + transform: scale(1); + } + 10%, + 20% { + transform: scale(0.9) rotate(-3deg); + } + 30%, + 50%, + 70%, + 90% { + transform: scale(1.1) rotate(3deg); + } + 40%, + 60%, + 80% { + transform: scale(1.1) rotate(-3deg); + } + 100% { + transform: scale(1) rotate(0); + } +} + +.tada { + -webkit-animation-name: tada; + -moz-animation-name: tada; + -o-animation-name: tada; + animation-name: tada; +} +@-webkit-keyframes swing { + 20%, + 40%, + 60%, + 80%, + 100% { + -webkit-transform-origin: top center; + } + 20% { + -webkit-transform: rotate(15deg); + } + 40% { + -webkit-transform: rotate(-10deg); + } + 60% { + -webkit-transform: rotate(5deg); + } + 80% { + -webkit-transform: rotate(-5deg); + } + 100% { + -webkit-transform: rotate(0deg); + } +} + +@-moz-keyframes swing { + 20% { + -moz-transform: rotate(15deg); + } + 40% { + -moz-transform: rotate(-10deg); + } + 60% { + -moz-transform: rotate(5deg); + } + 80% { + -moz-transform: rotate(-5deg); + } + 100% { + -moz-transform: rotate(0deg); + } +} + +@-o-keyframes swing { + 20% { + -o-transform: rotate(15deg); + } + 40% { + -o-transform: rotate(-10deg); + } + 60% { + -o-transform: rotate(5deg); + } + 80% { + -o-transform: rotate(-5deg); + } + 100% { + -o-transform: rotate(0deg); + } +} + +@keyframes swing { + 20% { + transform: rotate(15deg); + } + 40% { + transform: rotate(-10deg); + } + 60% { + transform: rotate(5deg); + } + 80% { + transform: rotate(-5deg); + } + 100% { + transform: rotate(0deg); + } +} + +.swing { + -webkit-transform-origin: top center; + -moz-transform-origin: top center; + -o-transform-origin: top center; + transform-origin: top center; + -webkit-animation-name: swing; + -moz-animation-name: swing; + -o-animation-name: swing; + animation-name: swing; +} +/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */ + +@-webkit-keyframes wobble { + 0% { + -webkit-transform: translateX(0%); + } + 15% { + -webkit-transform: translateX(-25%) rotate(-5deg); + } + 30% { + -webkit-transform: translateX(20%) rotate(3deg); + } + 45% { + -webkit-transform: translateX(-15%) rotate(-3deg); + } + 60% { + -webkit-transform: translateX(10%) rotate(2deg); + } + 75% { + -webkit-transform: translateX(-5%) rotate(-1deg); + } + 100% { + -webkit-transform: translateX(0%); + } +} + +@-moz-keyframes wobble { + 0% { + -moz-transform: translateX(0%); + } + 15% { + -moz-transform: translateX(-25%) rotate(-5deg); + } + 30% { + -moz-transform: translateX(20%) rotate(3deg); + } + 45% { + -moz-transform: translateX(-15%) rotate(-3deg); + } + 60% { + -moz-transform: translateX(10%) rotate(2deg); + } + 75% { + -moz-transform: translateX(-5%) rotate(-1deg); + } + 100% { + -moz-transform: translateX(0%); + } +} + +@-o-keyframes wobble { + 0% { + -o-transform: translateX(0%); + } + 15% { + -o-transform: translateX(-25%) rotate(-5deg); + } + 30% { + -o-transform: translateX(20%) rotate(3deg); + } + 45% { + -o-transform: translateX(-15%) rotate(-3deg); + } + 60% { + -o-transform: translateX(10%) rotate(2deg); + } + 75% { + -o-transform: translateX(-5%) rotate(-1deg); + } + 100% { + -o-transform: translateX(0%); + } +} + +@keyframes wobble { + 0% { + transform: translateX(0%); + } + 15% { + transform: translateX(-25%) rotate(-5deg); + } + 30% { + transform: translateX(20%) rotate(3deg); + } + 45% { + transform: translateX(-15%) rotate(-3deg); + } + 60% { + transform: translateX(10%) rotate(2deg); + } + 75% { + transform: translateX(-5%) rotate(-1deg); + } + 100% { + transform: translateX(0%); + } +} + +.wobble { + -webkit-animation-name: wobble; + -moz-animation-name: wobble; + -o-animation-name: wobble; + animation-name: wobble; +} +/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */ + +@-webkit-keyframes pulse { + 0% { + -webkit-transform: scale(1); + } + 50% { + -webkit-transform: scale(1.1); + } + 100% { + -webkit-transform: scale(1); + } +} +@-moz-keyframes pulse { + 0% { + -moz-transform: scale(1); + } + 50% { + -moz-transform: scale(1.1); + } + 100% { + -moz-transform: scale(1); + } +} +@-o-keyframes pulse { + 0% { + -o-transform: scale(1); + } + 50% { + -o-transform: scale(1.1); + } + 100% { + -o-transform: scale(1); + } +} +@keyframes pulse { + 0% { + transform: scale(1); + } + 50% { + transform: scale(1.1); + } + 100% { + transform: scale(1); + } +} + +.pulse { + -webkit-animation-name: pulse; + -moz-animation-name: pulse; + -o-animation-name: pulse; + animation-name: pulse; +} +@-webkit-keyframes flip { + 0% { + -webkit-transform: perspective(400px) rotateY(0); + -webkit-animation-timing-function: ease-out; + } + 40% { + -webkit-transform: perspective(400px) translateZ(150px) rotateY(170deg); + -webkit-animation-timing-function: ease-out; + } + 50% { + -webkit-transform: perspective(400px) translateZ(150px) rotateY(190deg) + scale(1); + -webkit-animation-timing-function: ease-in; + } + 80% { + -webkit-transform: perspective(400px) rotateY(360deg) scale(0.95); + -webkit-animation-timing-function: ease-in; + } + 100% { + -webkit-transform: perspective(400px) scale(1); + -webkit-animation-timing-function: ease-in; + } +} +@-moz-keyframes flip { + 0% { + -moz-transform: perspective(400px) rotateY(0); + -moz-animation-timing-function: ease-out; + } + 40% { + -moz-transform: perspective(400px) translateZ(150px) rotateY(170deg); + -moz-animation-timing-function: ease-out; + } + 50% { + -moz-transform: perspective(400px) translateZ(150px) rotateY(190deg) + scale(1); + -moz-animation-timing-function: ease-in; + } + 80% { + -moz-transform: perspective(400px) rotateY(360deg) scale(0.95); + -moz-animation-timing-function: ease-in; + } + 100% { + -moz-transform: perspective(400px) scale(1); + -moz-animation-timing-function: ease-in; + } +} +@-o-keyframes flip { + 0% { + -o-transform: perspective(400px) rotateY(0); + -o-animation-timing-function: ease-out; + } + 40% { + -o-transform: perspective(400px) translateZ(150px) rotateY(170deg); + -o-animation-timing-function: ease-out; + } + 50% { + -o-transform: perspective(400px) translateZ(150px) rotateY(190deg) scale(1); + -o-animation-timing-function: ease-in; + } + 80% { + -o-transform: perspective(400px) rotateY(360deg) scale(0.95); + -o-animation-timing-function: ease-in; + } + 100% { + -o-transform: perspective(400px) scale(1); + -o-animation-timing-function: ease-in; + } +} +@keyframes flip { + 0% { + transform: perspective(400px) rotateY(0); + animation-timing-function: ease-out; + } + 40% { + transform: perspective(400px) translateZ(150px) rotateY(170deg); + animation-timing-function: ease-out; + } + 50% { + transform: perspective(400px) translateZ(150px) rotateY(190deg) scale(1); + animation-timing-function: ease-in; + } + 80% { + transform: perspective(400px) rotateY(360deg) scale(0.95); + animation-timing-function: ease-in; + } + 100% { + transform: perspective(400px) scale(1); + animation-timing-function: ease-in; + } +} + +.flip { + -webkit-backface-visibility: visible !important; + -webkit-animation-name: flip; + -moz-backface-visibility: visible !important; + -moz-animation-name: flip; + -o-backface-visibility: visible !important; + -o-animation-name: flip; + backface-visibility: visible !important; + animation-name: flip; +} +@-webkit-keyframes flipInX { + 0% { + -webkit-transform: perspective(400px) rotateX(90deg); + opacity: 0; + } + + 40% { + -webkit-transform: perspective(400px) rotateX(-10deg); + } + + 70% { + -webkit-transform: perspective(400px) rotateX(10deg); + } + + 100% { + -webkit-transform: perspective(400px) rotateX(0deg); + opacity: 1; + } +} +@-moz-keyframes flipInX { + 0% { + -moz-transform: perspective(400px) rotateX(90deg); + opacity: 0; + } + + 40% { + -moz-transform: perspective(400px) rotateX(-10deg); + } + + 70% { + -moz-transform: perspective(400px) rotateX(10deg); + } + + 100% { + -moz-transform: perspective(400px) rotateX(0deg); + opacity: 1; + } +} +@-o-keyframes flipInX { + 0% { + -o-transform: perspective(400px) rotateX(90deg); + opacity: 0; + } + + 40% { + -o-transform: perspective(400px) rotateX(-10deg); + } + + 70% { + -o-transform: perspective(400px) rotateX(10deg); + } + + 100% { + -o-transform: perspective(400px) rotateX(0deg); + opacity: 1; + } +} +@keyframes flipInX { + 0% { + transform: perspective(400px) rotateX(90deg); + opacity: 0; + } + + 40% { + transform: perspective(400px) rotateX(-10deg); + } + + 70% { + transform: perspective(400px) rotateX(10deg); + } + + 100% { + transform: perspective(400px) rotateX(0deg); + opacity: 1; + } +} + +.flipInX { + -webkit-backface-visibility: visible !important; + -webkit-animation-name: flipInX; + -moz-backface-visibility: visible !important; + -moz-animation-name: flipInX; + -o-backface-visibility: visible !important; + -o-animation-name: flipInX; + backface-visibility: visible !important; + animation-name: flipInX; +} +@-webkit-keyframes flipOutX { + 0% { + -webkit-transform: perspective(400px) rotateX(0deg); + opacity: 1; + } + 100% { + -webkit-transform: perspective(400px) rotateX(90deg); + opacity: 0; + } +} + +@-moz-keyframes flipOutX { + 0% { + -moz-transform: perspective(400px) rotateX(0deg); + opacity: 1; + } + 100% { + -moz-transform: perspective(400px) rotateX(90deg); + opacity: 0; + } +} + +@-o-keyframes flipOutX { + 0% { + -o-transform: perspective(400px) rotateX(0deg); + opacity: 1; + } + 100% { + -o-transform: perspective(400px) rotateX(90deg); + opacity: 0; + } +} + +@keyframes flipOutX { + 0% { + transform: perspective(400px) rotateX(0deg); + opacity: 1; + } + 100% { + transform: perspective(400px) rotateX(90deg); + opacity: 0; + } +} + +.flipOutX { + -webkit-animation-name: flipOutX; + -webkit-backface-visibility: visible !important; + -moz-animation-name: flipOutX; + -moz-backface-visibility: visible !important; + -o-animation-name: flipOutX; + -o-backface-visibility: visible !important; + animation-name: flipOutX; + backface-visibility: visible !important; +} +@-webkit-keyframes flipInY { + 0% { + -webkit-transform: perspective(400px) rotateY(90deg); + opacity: 0; + } + + 40% { + -webkit-transform: perspective(400px) rotateY(-10deg); + } + + 70% { + -webkit-transform: perspective(400px) rotateY(10deg); + } + + 100% { + -webkit-transform: perspective(400px) rotateY(0deg); + opacity: 1; + } +} +@-moz-keyframes flipInY { + 0% { + -moz-transform: perspective(400px) rotateY(90deg); + opacity: 0; + } + + 40% { + -moz-transform: perspective(400px) rotateY(-10deg); + } + + 70% { + -moz-transform: perspective(400px) rotateY(10deg); + } + + 100% { + -moz-transform: perspective(400px) rotateY(0deg); + opacity: 1; + } +} +@-o-keyframes flipInY { + 0% { + -o-transform: perspective(400px) rotateY(90deg); + opacity: 0; + } + + 40% { + -o-transform: perspective(400px) rotateY(-10deg); + } + + 70% { + -o-transform: perspective(400px) rotateY(10deg); + } + + 100% { + -o-transform: perspective(400px) rotateY(0deg); + opacity: 1; + } +} +@keyframes flipInY { + 0% { + transform: perspective(400px) rotateY(90deg); + opacity: 0; + } + + 40% { + transform: perspective(400px) rotateY(-10deg); + } + + 70% { + transform: perspective(400px) rotateY(10deg); + } + + 100% { + transform: perspective(400px) rotateY(0deg); + opacity: 1; + } +} + +.flipInY { + -webkit-backface-visibility: visible !important; + -webkit-animation-name: flipInY; + -moz-backface-visibility: visible !important; + -moz-animation-name: flipInY; + -o-backface-visibility: visible !important; + -o-animation-name: flipInY; + backface-visibility: visible !important; + animation-name: flipInY; +} +@-webkit-keyframes flipOutY { + 0% { + -webkit-transform: perspective(400px) rotateY(0deg); + opacity: 1; + } + 100% { + -webkit-transform: perspective(400px) rotateY(90deg); + opacity: 0; + } +} +@-moz-keyframes flipOutY { + 0% { + -moz-transform: perspective(400px) rotateY(0deg); + opacity: 1; + } + 100% { + -moz-transform: perspective(400px) rotateY(90deg); + opacity: 0; + } +} +@-o-keyframes flipOutY { + 0% { + -o-transform: perspective(400px) rotateY(0deg); + opacity: 1; + } + 100% { + -o-transform: perspective(400px) rotateY(90deg); + opacity: 0; + } +} +@keyframes flipOutY { + 0% { + transform: perspective(400px) rotateY(0deg); + opacity: 1; + } + 100% { + transform: perspective(400px) rotateY(90deg); + opacity: 0; + } +} + +.flipOutY { + -webkit-backface-visibility: visible !important; + -webkit-animation-name: flipOutY; + -moz-backface-visibility: visible !important; + -moz-animation-name: flipOutY; + -o-backface-visibility: visible !important; + -o-animation-name: flipOutY; + backface-visibility: visible !important; + animation-name: flipOutY; +} +@-webkit-keyframes fadeIn { + 0% { + opacity: 0; + } + 100% { + opacity: 1; + } +} + +@-moz-keyframes fadeIn { + 0% { + opacity: 0; + } + 100% { + opacity: 1; + } +} + +@-o-keyframes fadeIn { + 0% { + opacity: 0; + } + 100% { + opacity: 1; + } +} + +@keyframes fadeIn { + 0% { + opacity: 0; + } + 100% { + opacity: 1; + } +} + +.fadeIn { + -webkit-animation-name: fadeIn; + -moz-animation-name: fadeIn; + -o-animation-name: fadeIn; + animation-name: fadeIn; +} +@-webkit-keyframes fadeInUp { + 0% { + opacity: 0; + -webkit-transform: translateY(20px); + } + + 100% { + opacity: 1; + -webkit-transform: translateY(0); + } +} + +@-moz-keyframes fadeInUp { + 0% { + opacity: 0; + -moz-transform: translateY(20px); + } + + 100% { + opacity: 1; + -moz-transform: translateY(0); + } +} + +@-o-keyframes fadeInUp { + 0% { + opacity: 0; + -o-transform: translateY(20px); + } + + 100% { + opacity: 1; + -o-transform: translateY(0); + } +} + +@keyframes fadeInUp { + 0% { + opacity: 0; + transform: translateY(20px); + } + + 100% { + opacity: 1; + transform: translateY(0); + } +} + +.fadeInUp { + -webkit-animation-name: fadeInUp; + -moz-animation-name: fadeInUp; + -o-animation-name: fadeInUp; + animation-name: fadeInUp; +} +@-webkit-keyframes fadeInDown { + 0% { + opacity: 0; + -webkit-transform: translateY(-20px); + } + + 100% { + opacity: 1; + -webkit-transform: translateY(0); + } +} + +@-moz-keyframes fadeInDown { + 0% { + opacity: 0; + -moz-transform: translateY(-20px); + } + + 100% { + opacity: 1; + -moz-transform: translateY(0); + } +} + +@-o-keyframes fadeInDown { + 0% { + opacity: 0; + -o-transform: translateY(-20px); + } + + 100% { + opacity: 1; + -o-transform: translateY(0); + } +} + +@keyframes fadeInDown { + 0% { + opacity: 0; + transform: translateY(-20px); + } + + 100% { + opacity: 1; + transform: translateY(0); + } +} + +.fadeInDown { + -webkit-animation-name: fadeInDown; + -moz-animation-name: fadeInDown; + -o-animation-name: fadeInDown; + animation-name: fadeInDown; +} +@-webkit-keyframes fadeInLeft { + 0% { + opacity: 0; + -webkit-transform: translateX(-20px); + } + + 100% { + opacity: 1; + -webkit-transform: translateX(0); + } +} + +@-moz-keyframes fadeInLeft { + 0% { + opacity: 0; + -moz-transform: translateX(-20px); + } + + 100% { + opacity: 1; + -moz-transform: translateX(0); + } +} + +@-o-keyframes fadeInLeft { + 0% { + opacity: 0; + -o-transform: translateX(-20px); + } + + 100% { + opacity: 1; + -o-transform: translateX(0); + } +} + +@keyframes fadeInLeft { + 0% { + opacity: 0; + transform: translateX(-20px); + } + + 100% { + opacity: 1; + transform: translateX(0); + } +} + +.fadeInLeft { + -webkit-animation-name: fadeInLeft; + -moz-animation-name: fadeInLeft; + -o-animation-name: fadeInLeft; + animation-name: fadeInLeft; +} +@-webkit-keyframes fadeInRight { + 0% { + opacity: 0; + -webkit-transform: translateX(20px); + } + + 100% { + opacity: 1; + -webkit-transform: translateX(0); + } +} + +@-moz-keyframes fadeInRight { + 0% { + opacity: 0; + -moz-transform: translateX(20px); + } + + 100% { + opacity: 1; + -moz-transform: translateX(0); + } +} + +@-o-keyframes fadeInRight { + 0% { + opacity: 0; + -o-transform: translateX(20px); + } + + 100% { + opacity: 1; + -o-transform: translateX(0); + } +} + +@keyframes fadeInRight { + 0% { + opacity: 0; + transform: translateX(20px); + } + + 100% { + opacity: 1; + transform: translateX(0); + } +} + +.fadeInRight { + -webkit-animation-name: fadeInRight; + -moz-animation-name: fadeInRight; + -o-animation-name: fadeInRight; + animation-name: fadeInRight; +} +@-webkit-keyframes fadeInUpBig { + 0% { + opacity: 0; + -webkit-transform: translateY(2000px); + } + + 100% { + opacity: 1; + -webkit-transform: translateY(0); + } +} + +@-moz-keyframes fadeInUpBig { + 0% { + opacity: 0; + -moz-transform: translateY(2000px); + } + + 100% { + opacity: 1; + -moz-transform: translateY(0); + } +} + +@-o-keyframes fadeInUpBig { + 0% { + opacity: 0; + -o-transform: translateY(2000px); + } + + 100% { + opacity: 1; + -o-transform: translateY(0); + } +} + +@keyframes fadeInUpBig { + 0% { + opacity: 0; + transform: translateY(2000px); + } + + 100% { + opacity: 1; + transform: translateY(0); + } +} + +.fadeInUpBig { + -webkit-animation-name: fadeInUpBig; + -moz-animation-name: fadeInUpBig; + -o-animation-name: fadeInUpBig; + animation-name: fadeInUpBig; +} +@-webkit-keyframes fadeInDownBig { + 0% { + opacity: 0; + -webkit-transform: translateY(-2000px); + } + + 100% { + opacity: 1; + -webkit-transform: translateY(0); + } +} + +@-moz-keyframes fadeInDownBig { + 0% { + opacity: 0; + -moz-transform: translateY(-2000px); + } + + 100% { + opacity: 1; + -moz-transform: translateY(0); + } +} + +@-o-keyframes fadeInDownBig { + 0% { + opacity: 0; + -o-transform: translateY(-2000px); + } + + 100% { + opacity: 1; + -o-transform: translateY(0); + } +} + +@keyframes fadeInDownBig { + 0% { + opacity: 0; + transform: translateY(-2000px); + } + + 100% { + opacity: 1; + transform: translateY(0); + } +} + +.fadeInDownBig { + -webkit-animation-name: fadeInDownBig; + -moz-animation-name: fadeInDownBig; + -o-animation-name: fadeInDownBig; + animation-name: fadeInDownBig; +} +@-webkit-keyframes fadeInLeftBig { + 0% { + opacity: 0; + -webkit-transform: translateX(-2000px); + } + + 100% { + opacity: 1; + -webkit-transform: translateX(0); + } +} +@-moz-keyframes fadeInLeftBig { + 0% { + opacity: 0; + -moz-transform: translateX(-2000px); + } + + 100% { + opacity: 1; + -moz-transform: translateX(0); + } +} +@-o-keyframes fadeInLeftBig { + 0% { + opacity: 0; + -o-transform: translateX(-2000px); + } + + 100% { + opacity: 1; + -o-transform: translateX(0); + } +} +@keyframes fadeInLeftBig { + 0% { + opacity: 0; + transform: translateX(-2000px); + } + + 100% { + opacity: 1; + transform: translateX(0); + } +} + +.fadeInLeftBig { + -webkit-animation-name: fadeInLeftBig; + -moz-animation-name: fadeInLeftBig; + -o-animation-name: fadeInLeftBig; + animation-name: fadeInLeftBig; +} +@-webkit-keyframes fadeInRightBig { + 0% { + opacity: 0; + -webkit-transform: translateX(2000px); + } + + 100% { + opacity: 1; + -webkit-transform: translateX(0); + } +} + +@-moz-keyframes fadeInRightBig { + 0% { + opacity: 0; + -moz-transform: translateX(2000px); + } + + 100% { + opacity: 1; + -moz-transform: translateX(0); + } +} + +@-o-keyframes fadeInRightBig { + 0% { + opacity: 0; + -o-transform: translateX(2000px); + } + + 100% { + opacity: 1; + -o-transform: translateX(0); + } +} + +@keyframes fadeInRightBig { + 0% { + opacity: 0; + transform: translateX(2000px); + } + + 100% { + opacity: 1; + transform: translateX(0); + } +} + +.fadeInRightBig { + -webkit-animation-name: fadeInRightBig; + -moz-animation-name: fadeInRightBig; + -o-animation-name: fadeInRightBig; + animation-name: fadeInRightBig; +} +@-webkit-keyframes fadeOut { + 0% { + opacity: 1; + } + 100% { + opacity: 0; + } +} + +@-moz-keyframes fadeOut { + 0% { + opacity: 1; + } + 100% { + opacity: 0; + } +} + +@-o-keyframes fadeOut { + 0% { + opacity: 1; + } + 100% { + opacity: 0; + } +} + +@keyframes fadeOut { + 0% { + opacity: 1; + } + 100% { + opacity: 0; + } +} + +.fadeOut { + -webkit-animation-name: fadeOut; + -moz-animation-name: fadeOut; + -o-animation-name: fadeOut; + animation-name: fadeOut; +} +@-webkit-keyframes fadeOutUp { + 0% { + opacity: 1; + -webkit-transform: translateY(0); + } + + 100% { + opacity: 0; + -webkit-transform: translateY(-20px); + } +} +@-moz-keyframes fadeOutUp { + 0% { + opacity: 1; + -moz-transform: translateY(0); + } + + 100% { + opacity: 0; + -moz-transform: translateY(-20px); + } +} +@-o-keyframes fadeOutUp { + 0% { + opacity: 1; + -o-transform: translateY(0); + } + + 100% { + opacity: 0; + -o-transform: translateY(-20px); + } +} +@keyframes fadeOutUp { + 0% { + opacity: 1; + transform: translateY(0); + } + + 100% { + opacity: 0; + transform: translateY(-20px); + } +} + +.fadeOutUp { + -webkit-animation-name: fadeOutUp; + -moz-animation-name: fadeOutUp; + -o-animation-name: fadeOutUp; + animation-name: fadeOutUp; +} +@-webkit-keyframes fadeOutDown { + 0% { + opacity: 1; + -webkit-transform: translateY(0); + } + + 100% { + opacity: 0; + -webkit-transform: translateY(20px); + } +} + +@-moz-keyframes fadeOutDown { + 0% { + opacity: 1; + -moz-transform: translateY(0); + } + + 100% { + opacity: 0; + -moz-transform: translateY(20px); + } +} + +@-o-keyframes fadeOutDown { + 0% { + opacity: 1; + -o-transform: translateY(0); + } + + 100% { + opacity: 0; + -o-transform: translateY(20px); + } +} + +@keyframes fadeOutDown { + 0% { + opacity: 1; + transform: translateY(0); + } + + 100% { + opacity: 0; + transform: translateY(20px); + } +} + +.fadeOutDown { + -webkit-animation-name: fadeOutDown; + -moz-animation-name: fadeOutDown; + -o-animation-name: fadeOutDown; + animation-name: fadeOutDown; +} +@-webkit-keyframes fadeOutLeft { + 0% { + opacity: 1; + -webkit-transform: translateX(0); + } + + 100% { + opacity: 0; + -webkit-transform: translateX(-20px); + } +} + +@-moz-keyframes fadeOutLeft { + 0% { + opacity: 1; + -moz-transform: translateX(0); + } + + 100% { + opacity: 0; + -moz-transform: translateX(-20px); + } +} + +@-o-keyframes fadeOutLeft { + 0% { + opacity: 1; + -o-transform: translateX(0); + } + + 100% { + opacity: 0; + -o-transform: translateX(-20px); + } +} + +@keyframes fadeOutLeft { + 0% { + opacity: 1; + transform: translateX(0); + } + + 100% { + opacity: 0; + transform: translateX(-20px); + } +} + +.fadeOutLeft { + -webkit-animation-name: fadeOutLeft; + -moz-animation-name: fadeOutLeft; + -o-animation-name: fadeOutLeft; + animation-name: fadeOutLeft; +} +@-webkit-keyframes fadeOutRight { + 0% { + opacity: 1; + -webkit-transform: translateX(0); + } + + 100% { + opacity: 0; + -webkit-transform: translateX(20px); + } +} + +@-moz-keyframes fadeOutRight { + 0% { + opacity: 1; + -moz-transform: translateX(0); + } + + 100% { + opacity: 0; + -moz-transform: translateX(20px); + } +} + +@-o-keyframes fadeOutRight { + 0% { + opacity: 1; + -o-transform: translateX(0); + } + + 100% { + opacity: 0; + -o-transform: translateX(20px); + } +} + +@keyframes fadeOutRight { + 0% { + opacity: 1; + transform: translateX(0); + } + + 100% { + opacity: 0; + transform: translateX(20px); + } +} + +.fadeOutRight { + -webkit-animation-name: fadeOutRight; + -moz-animation-name: fadeOutRight; + -o-animation-name: fadeOutRight; + animation-name: fadeOutRight; +} +@-webkit-keyframes fadeOutUpBig { + 0% { + opacity: 1; + -webkit-transform: translateY(0); + } + + 100% { + opacity: 0; + -webkit-transform: translateY(-2000px); + } +} + +@-moz-keyframes fadeOutUpBig { + 0% { + opacity: 1; + -moz-transform: translateY(0); + } + + 100% { + opacity: 0; + -moz-transform: translateY(-2000px); + } +} + +@-o-keyframes fadeOutUpBig { + 0% { + opacity: 1; + -o-transform: translateY(0); + } + + 100% { + opacity: 0; + -o-transform: translateY(-2000px); + } +} + +@keyframes fadeOutUpBig { + 0% { + opacity: 1; + transform: translateY(0); + } + + 100% { + opacity: 0; + transform: translateY(-2000px); + } +} + +.fadeOutUpBig { + -webkit-animation-name: fadeOutUpBig; + -moz-animation-name: fadeOutUpBig; + -o-animation-name: fadeOutUpBig; + animation-name: fadeOutUpBig; +} +@-webkit-keyframes fadeOutDownBig { + 0% { + opacity: 1; + -webkit-transform: translateY(0); + } + + 100% { + opacity: 0; + -webkit-transform: translateY(2000px); + } +} + +@-moz-keyframes fadeOutDownBig { + 0% { + opacity: 1; + -moz-transform: translateY(0); + } + + 100% { + opacity: 0; + -moz-transform: translateY(2000px); + } +} + +@-o-keyframes fadeOutDownBig { + 0% { + opacity: 1; + -o-transform: translateY(0); + } + + 100% { + opacity: 0; + -o-transform: translateY(2000px); + } +} + +@keyframes fadeOutDownBig { + 0% { + opacity: 1; + transform: translateY(0); + } + + 100% { + opacity: 0; + transform: translateY(2000px); + } +} + +.fadeOutDownBig { + -webkit-animation-name: fadeOutDownBig; + -moz-animation-name: fadeOutDownBig; + -o-animation-name: fadeOutDownBig; + animation-name: fadeOutDownBig; +} +@-webkit-keyframes fadeOutLeftBig { + 0% { + opacity: 1; + -webkit-transform: translateX(0); + } + + 100% { + opacity: 0; + -webkit-transform: translateX(-2000px); + } +} + +@-moz-keyframes fadeOutLeftBig { + 0% { + opacity: 1; + -moz-transform: translateX(0); + } + + 100% { + opacity: 0; + -moz-transform: translateX(-2000px); + } +} + +@-o-keyframes fadeOutLeftBig { + 0% { + opacity: 1; + -o-transform: translateX(0); + } + + 100% { + opacity: 0; + -o-transform: translateX(-2000px); + } +} + +@keyframes fadeOutLeftBig { + 0% { + opacity: 1; + transform: translateX(0); + } + + 100% { + opacity: 0; + transform: translateX(-2000px); + } +} + +.fadeOutLeftBig { + -webkit-animation-name: fadeOutLeftBig; + -moz-animation-name: fadeOutLeftBig; + -o-animation-name: fadeOutLeftBig; + animation-name: fadeOutLeftBig; +} +@-webkit-keyframes fadeOutRightBig { + 0% { + opacity: 1; + -webkit-transform: translateX(0); + } + + 100% { + opacity: 0; + -webkit-transform: translateX(2000px); + } +} +@-moz-keyframes fadeOutRightBig { + 0% { + opacity: 1; + -moz-transform: translateX(0); + } + + 100% { + opacity: 0; + -moz-transform: translateX(2000px); + } +} +@-o-keyframes fadeOutRightBig { + 0% { + opacity: 1; + -o-transform: translateX(0); + } + + 100% { + opacity: 0; + -o-transform: translateX(2000px); + } +} +@keyframes fadeOutRightBig { + 0% { + opacity: 1; + transform: translateX(0); + } + + 100% { + opacity: 0; + transform: translateX(2000px); + } +} + +.fadeOutRightBig { + -webkit-animation-name: fadeOutRightBig; + -moz-animation-name: fadeOutRightBig; + -o-animation-name: fadeOutRightBig; + animation-name: fadeOutRightBig; +} +@-webkit-keyframes bounceIn { + 0% { + opacity: 0; + -webkit-transform: scale(0.3); + } + + 50% { + opacity: 1; + -webkit-transform: scale(1.05); + } + + 70% { + -webkit-transform: scale(0.9); + } + + 100% { + -webkit-transform: scale(1); + } +} + +@-moz-keyframes bounceIn { + 0% { + opacity: 0; + -moz-transform: scale(0.3); + } + + 50% { + opacity: 1; + -moz-transform: scale(1.05); + } + + 70% { + -moz-transform: scale(0.9); + } + + 100% { + -moz-transform: scale(1); + } +} + +@-o-keyframes bounceIn { + 0% { + opacity: 0; + -o-transform: scale(0.3); + } + + 50% { + opacity: 1; + -o-transform: scale(1.05); + } + + 70% { + -o-transform: scale(0.9); + } + + 100% { + -o-transform: scale(1); + } +} + +@keyframes bounceIn { + 0% { + opacity: 0; + transform: scale(0.3); + } + + 50% { + opacity: 1; + transform: scale(1.05); + } + + 70% { + transform: scale(0.9); + } + + 100% { + transform: scale(1); + } +} + +.bounceIn { + -webkit-animation-name: bounceIn; + -moz-animation-name: bounceIn; + -o-animation-name: bounceIn; + animation-name: bounceIn; +} +@-webkit-keyframes bounceInUp { + 0% { + opacity: 0; + -webkit-transform: translateY(2000px); + } + + 60% { + opacity: 1; + -webkit-transform: translateY(-30px); + } + + 80% { + -webkit-transform: translateY(10px); + } + + 100% { + -webkit-transform: translateY(0); + } +} +@-moz-keyframes bounceInUp { + 0% { + opacity: 0; + -moz-transform: translateY(2000px); + } + + 60% { + opacity: 1; + -moz-transform: translateY(-30px); + } + + 80% { + -moz-transform: translateY(10px); + } + + 100% { + -moz-transform: translateY(0); + } +} + +@-o-keyframes bounceInUp { + 0% { + opacity: 0; + -o-transform: translateY(2000px); + } + + 60% { + opacity: 1; + -o-transform: translateY(-30px); + } + + 80% { + -o-transform: translateY(10px); + } + + 100% { + -o-transform: translateY(0); + } +} + +@keyframes bounceInUp { + 0% { + opacity: 0; + transform: translateY(2000px); + } + + 60% { + opacity: 1; + transform: translateY(-30px); + } + + 80% { + transform: translateY(10px); + } + + 100% { + transform: translateY(0); + } +} + +.bounceInUp { + -webkit-animation-name: bounceInUp; + -moz-animation-name: bounceInUp; + -o-animation-name: bounceInUp; + animation-name: bounceInUp; +} +@-webkit-keyframes bounceInDown { + 0% { + opacity: 0; + -webkit-transform: translateY(-2000px); + } + + 60% { + opacity: 1; + -webkit-transform: translateY(30px); + } + + 80% { + -webkit-transform: translateY(-10px); + } + + 100% { + -webkit-transform: translateY(0); + } +} + +@-moz-keyframes bounceInDown { + 0% { + opacity: 0; + -moz-transform: translateY(-2000px); + } + + 60% { + opacity: 1; + -moz-transform: translateY(30px); + } + + 80% { + -moz-transform: translateY(-10px); + } + + 100% { + -moz-transform: translateY(0); + } +} + +@-o-keyframes bounceInDown { + 0% { + opacity: 0; + -o-transform: translateY(-2000px); + } + + 60% { + opacity: 1; + -o-transform: translateY(30px); + } + + 80% { + -o-transform: translateY(-10px); + } + + 100% { + -o-transform: translateY(0); + } +} + +@keyframes bounceInDown { + 0% { + opacity: 0; + transform: translateY(-2000px); + } + + 60% { + opacity: 1; + transform: translateY(30px); + } + + 80% { + transform: translateY(-10px); + } + + 100% { + transform: translateY(0); + } +} + +.bounceInDown { + -webkit-animation-name: bounceInDown; + -moz-animation-name: bounceInDown; + -o-animation-name: bounceInDown; + animation-name: bounceInDown; +} +@-webkit-keyframes bounceInLeft { + 0% { + opacity: 0; + -webkit-transform: translateX(-2000px); + } + + 60% { + opacity: 1; + -webkit-transform: translateX(30px); + } + + 80% { + -webkit-transform: translateX(-10px); + } + + 100% { + -webkit-transform: translateX(0); + } +} + +@-moz-keyframes bounceInLeft { + 0% { + opacity: 0; + -moz-transform: translateX(-2000px); + } + + 60% { + opacity: 1; + -moz-transform: translateX(30px); + } + + 80% { + -moz-transform: translateX(-10px); + } + + 100% { + -moz-transform: translateX(0); + } +} + +@-o-keyframes bounceInLeft { + 0% { + opacity: 0; + -o-transform: translateX(-2000px); + } + + 60% { + opacity: 1; + -o-transform: translateX(30px); + } + + 80% { + -o-transform: translateX(-10px); + } + + 100% { + -o-transform: translateX(0); + } +} + +@keyframes bounceInLeft { + 0% { + opacity: 0; + transform: translateX(-2000px); + } + + 60% { + opacity: 1; + transform: translateX(30px); + } + + 80% { + transform: translateX(-10px); + } + + 100% { + transform: translateX(0); + } +} + +.bounceInLeft { + -webkit-animation-name: bounceInLeft; + -moz-animation-name: bounceInLeft; + -o-animation-name: bounceInLeft; + animation-name: bounceInLeft; +} +@-webkit-keyframes bounceInRight { + 0% { + opacity: 0; + -webkit-transform: translateX(2000px); + } + + 60% { + opacity: 1; + -webkit-transform: translateX(-30px); + } + + 80% { + -webkit-transform: translateX(10px); + } + + 100% { + -webkit-transform: translateX(0); + } +} + +@-moz-keyframes bounceInRight { + 0% { + opacity: 0; + -moz-transform: translateX(2000px); + } + + 60% { + opacity: 1; + -moz-transform: translateX(-30px); + } + + 80% { + -moz-transform: translateX(10px); + } + + 100% { + -moz-transform: translateX(0); + } +} + +@-o-keyframes bounceInRight { + 0% { + opacity: 0; + -o-transform: translateX(2000px); + } + + 60% { + opacity: 1; + -o-transform: translateX(-30px); + } + + 80% { + -o-transform: translateX(10px); + } + + 100% { + -o-transform: translateX(0); + } +} + +@keyframes bounceInRight { + 0% { + opacity: 0; + transform: translateX(2000px); + } + + 60% { + opacity: 1; + transform: translateX(-30px); + } + + 80% { + transform: translateX(10px); + } + + 100% { + transform: translateX(0); + } +} + +.bounceInRight { + -webkit-animation-name: bounceInRight; + -moz-animation-name: bounceInRight; + -o-animation-name: bounceInRight; + animation-name: bounceInRight; +} +@-webkit-keyframes bounceOut { + 0% { + -webkit-transform: scale(1); + } + + 25% { + -webkit-transform: scale(0.95); + } + + 50% { + opacity: 1; + -webkit-transform: scale(1.1); + } + + 100% { + opacity: 0; + -webkit-transform: scale(0.3); + } +} + +@-moz-keyframes bounceOut { + 0% { + -moz-transform: scale(1); + } + + 25% { + -moz-transform: scale(0.95); + } + + 50% { + opacity: 1; + -moz-transform: scale(1.1); + } + + 100% { + opacity: 0; + -moz-transform: scale(0.3); + } +} + +@-o-keyframes bounceOut { + 0% { + -o-transform: scale(1); + } + + 25% { + -o-transform: scale(0.95); + } + + 50% { + opacity: 1; + -o-transform: scale(1.1); + } + + 100% { + opacity: 0; + -o-transform: scale(0.3); + } +} + +@keyframes bounceOut { + 0% { + transform: scale(1); + } + + 25% { + transform: scale(0.95); + } + + 50% { + opacity: 1; + transform: scale(1.1); + } + + 100% { + opacity: 0; + transform: scale(0.3); + } +} + +.bounceOut { + -webkit-animation-name: bounceOut; + -moz-animation-name: bounceOut; + -o-animation-name: bounceOut; + animation-name: bounceOut; +} +@-webkit-keyframes bounceOutUp { + 0% { + -webkit-transform: translateY(0); + } + + 20% { + opacity: 1; + -webkit-transform: translateY(20px); + } + + 100% { + opacity: 0; + -webkit-transform: translateY(-2000px); + } +} + +@-moz-keyframes bounceOutUp { + 0% { + -moz-transform: translateY(0); + } + + 20% { + opacity: 1; + -moz-transform: translateY(20px); + } + + 100% { + opacity: 0; + -moz-transform: translateY(-2000px); + } +} + +@-o-keyframes bounceOutUp { + 0% { + -o-transform: translateY(0); + } + + 20% { + opacity: 1; + -o-transform: translateY(20px); + } + + 100% { + opacity: 0; + -o-transform: translateY(-2000px); + } +} + +@keyframes bounceOutUp { + 0% { + transform: translateY(0); + } + + 20% { + opacity: 1; + transform: translateY(20px); + } + + 100% { + opacity: 0; + transform: translateY(-2000px); + } +} + +.bounceOutUp { + -webkit-animation-name: bounceOutUp; + -moz-animation-name: bounceOutUp; + -o-animation-name: bounceOutUp; + animation-name: bounceOutUp; +} +@-webkit-keyframes bounceOutDown { + 0% { + -webkit-transform: translateY(0); + } + + 20% { + opacity: 1; + -webkit-transform: translateY(-20px); + } + + 100% { + opacity: 0; + -webkit-transform: translateY(2000px); + } +} + +@-moz-keyframes bounceOutDown { + 0% { + -moz-transform: translateY(0); + } + + 20% { + opacity: 1; + -moz-transform: translateY(-20px); + } + + 100% { + opacity: 0; + -moz-transform: translateY(2000px); + } +} + +@-o-keyframes bounceOutDown { + 0% { + -o-transform: translateY(0); + } + + 20% { + opacity: 1; + -o-transform: translateY(-20px); + } + + 100% { + opacity: 0; + -o-transform: translateY(2000px); + } +} + +@keyframes bounceOutDown { + 0% { + transform: translateY(0); + } + + 20% { + opacity: 1; + transform: translateY(-20px); + } + + 100% { + opacity: 0; + transform: translateY(2000px); + } +} + +.bounceOutDown { + -webkit-animation-name: bounceOutDown; + -moz-animation-name: bounceOutDown; + -o-animation-name: bounceOutDown; + animation-name: bounceOutDown; +} +@-webkit-keyframes bounceOutLeft { + 0% { + -webkit-transform: translateX(0); + } + + 20% { + opacity: 1; + -webkit-transform: translateX(20px); + } + + 100% { + opacity: 0; + -webkit-transform: translateX(-2000px); + } +} + +@-moz-keyframes bounceOutLeft { + 0% { + -moz-transform: translateX(0); + } + + 20% { + opacity: 1; + -moz-transform: translateX(20px); + } + + 100% { + opacity: 0; + -moz-transform: translateX(-2000px); + } +} + +@-o-keyframes bounceOutLeft { + 0% { + -o-transform: translateX(0); + } + + 20% { + opacity: 1; + -o-transform: translateX(20px); + } + + 100% { + opacity: 0; + -o-transform: translateX(-2000px); + } +} + +@keyframes bounceOutLeft { + 0% { + transform: translateX(0); + } + + 20% { + opacity: 1; + transform: translateX(20px); + } + + 100% { + opacity: 0; + transform: translateX(-2000px); + } +} + +.bounceOutLeft { + -webkit-animation-name: bounceOutLeft; + -moz-animation-name: bounceOutLeft; + -o-animation-name: bounceOutLeft; + animation-name: bounceOutLeft; +} +@-webkit-keyframes bounceOutRight { + 0% { + -webkit-transform: translateX(0); + } + + 20% { + opacity: 1; + -webkit-transform: translateX(-20px); + } + + 100% { + opacity: 0; + -webkit-transform: translateX(2000px); + } +} + +@-moz-keyframes bounceOutRight { + 0% { + -moz-transform: translateX(0); + } + + 20% { + opacity: 1; + -moz-transform: translateX(-20px); + } + + 100% { + opacity: 0; + -moz-transform: translateX(2000px); + } +} + +@-o-keyframes bounceOutRight { + 0% { + -o-transform: translateX(0); + } + + 20% { + opacity: 1; + -o-transform: translateX(-20px); + } + + 100% { + opacity: 0; + -o-transform: translateX(2000px); + } +} + +@keyframes bounceOutRight { + 0% { + transform: translateX(0); + } + + 20% { + opacity: 1; + transform: translateX(-20px); + } + + 100% { + opacity: 0; + transform: translateX(2000px); + } +} + +.bounceOutRight { + -webkit-animation-name: bounceOutRight; + -moz-animation-name: bounceOutRight; + -o-animation-name: bounceOutRight; + animation-name: bounceOutRight; +} +@-webkit-keyframes rotateIn { + 0% { + -webkit-transform-origin: center center; + -webkit-transform: rotate(-200deg); + opacity: 0; + } + + 100% { + -webkit-transform-origin: center center; + -webkit-transform: rotate(0); + opacity: 1; + } +} +@-moz-keyframes rotateIn { + 0% { + -moz-transform-origin: center center; + -moz-transform: rotate(-200deg); + opacity: 0; + } + + 100% { + -moz-transform-origin: center center; + -moz-transform: rotate(0); + opacity: 1; + } +} +@-o-keyframes rotateIn { + 0% { + -o-transform-origin: center center; + -o-transform: rotate(-200deg); + opacity: 0; + } + + 100% { + -o-transform-origin: center center; + -o-transform: rotate(0); + opacity: 1; + } +} +@keyframes rotateIn { + 0% { + transform-origin: center center; + transform: rotate(-200deg); + opacity: 0; + } + + 100% { + transform-origin: center center; + transform: rotate(0); + opacity: 1; + } +} + +.rotateIn { + -webkit-animation-name: rotateIn; + -moz-animation-name: rotateIn; + -o-animation-name: rotateIn; + animation-name: rotateIn; +} +@-webkit-keyframes rotateInUpLeft { + 0% { + -webkit-transform-origin: left bottom; + -webkit-transform: rotate(90deg); + opacity: 0; + } + + 100% { + -webkit-transform-origin: left bottom; + -webkit-transform: rotate(0); + opacity: 1; + } +} + +@-moz-keyframes rotateInUpLeft { + 0% { + -moz-transform-origin: left bottom; + -moz-transform: rotate(90deg); + opacity: 0; + } + + 100% { + -moz-transform-origin: left bottom; + -moz-transform: rotate(0); + opacity: 1; + } +} + +@-o-keyframes rotateInUpLeft { + 0% { + -o-transform-origin: left bottom; + -o-transform: rotate(90deg); + opacity: 0; + } + + 100% { + -o-transform-origin: left bottom; + -o-transform: rotate(0); + opacity: 1; + } +} + +@keyframes rotateInUpLeft { + 0% { + transform-origin: left bottom; + transform: rotate(90deg); + opacity: 0; + } + + 100% { + transform-origin: left bottom; + transform: rotate(0); + opacity: 1; + } +} + +.rotateInUpLeft { + -webkit-animation-name: rotateInUpLeft; + -moz-animation-name: rotateInUpLeft; + -o-animation-name: rotateInUpLeft; + animation-name: rotateInUpLeft; +} +@-webkit-keyframes rotateInDownLeft { + 0% { + -webkit-transform-origin: left bottom; + -webkit-transform: rotate(-90deg); + opacity: 0; + } + + 100% { + -webkit-transform-origin: left bottom; + -webkit-transform: rotate(0); + opacity: 1; + } +} + +@-moz-keyframes rotateInDownLeft { + 0% { + -moz-transform-origin: left bottom; + -moz-transform: rotate(-90deg); + opacity: 0; + } + + 100% { + -moz-transform-origin: left bottom; + -moz-transform: rotate(0); + opacity: 1; + } +} + +@-o-keyframes rotateInDownLeft { + 0% { + -o-transform-origin: left bottom; + -o-transform: rotate(-90deg); + opacity: 0; + } + + 100% { + -o-transform-origin: left bottom; + -o-transform: rotate(0); + opacity: 1; + } +} + +@keyframes rotateInDownLeft { + 0% { + transform-origin: left bottom; + transform: rotate(-90deg); + opacity: 0; + } + + 100% { + transform-origin: left bottom; + transform: rotate(0); + opacity: 1; + } +} + +.rotateInDownLeft { + -webkit-animation-name: rotateInDownLeft; + -moz-animation-name: rotateInDownLeft; + -o-animation-name: rotateInDownLeft; + animation-name: rotateInDownLeft; +} +@-webkit-keyframes rotateInUpRight { + 0% { + -webkit-transform-origin: right bottom; + -webkit-transform: rotate(-90deg); + opacity: 0; + } + + 100% { + -webkit-transform-origin: right bottom; + -webkit-transform: rotate(0); + opacity: 1; + } +} + +@-moz-keyframes rotateInUpRight { + 0% { + -moz-transform-origin: right bottom; + -moz-transform: rotate(-90deg); + opacity: 0; + } + + 100% { + -moz-transform-origin: right bottom; + -moz-transform: rotate(0); + opacity: 1; + } +} + +@-o-keyframes rotateInUpRight { + 0% { + -o-transform-origin: right bottom; + -o-transform: rotate(-90deg); + opacity: 0; + } + + 100% { + -o-transform-origin: right bottom; + -o-transform: rotate(0); + opacity: 1; + } +} + +@keyframes rotateInUpRight { + 0% { + transform-origin: right bottom; + transform: rotate(-90deg); + opacity: 0; + } + + 100% { + transform-origin: right bottom; + transform: rotate(0); + opacity: 1; + } +} + +.rotateInUpRight { + -webkit-animation-name: rotateInUpRight; + -moz-animation-name: rotateInUpRight; + -o-animation-name: rotateInUpRight; + animation-name: rotateInUpRight; +} +@-webkit-keyframes rotateInDownRight { + 0% { + -webkit-transform-origin: right bottom; + -webkit-transform: rotate(90deg); + opacity: 0; + } + + 100% { + -webkit-transform-origin: right bottom; + -webkit-transform: rotate(0); + opacity: 1; + } +} + +@-moz-keyframes rotateInDownRight { + 0% { + -moz-transform-origin: right bottom; + -moz-transform: rotate(90deg); + opacity: 0; + } + + 100% { + -moz-transform-origin: right bottom; + -moz-transform: rotate(0); + opacity: 1; + } +} + +@-o-keyframes rotateInDownRight { + 0% { + -o-transform-origin: right bottom; + -o-transform: rotate(90deg); + opacity: 0; + } + + 100% { + -o-transform-origin: right bottom; + -o-transform: rotate(0); + opacity: 1; + } +} + +@keyframes rotateInDownRight { + 0% { + transform-origin: right bottom; + transform: rotate(90deg); + opacity: 0; + } + + 100% { + transform-origin: right bottom; + transform: rotate(0); + opacity: 1; + } +} + +.rotateInDownRight { + -webkit-animation-name: rotateInDownRight; + -moz-animation-name: rotateInDownRight; + -o-animation-name: rotateInDownRight; + animation-name: rotateInDownRight; +} +@-webkit-keyframes rotateOut { + 0% { + -webkit-transform-origin: center center; + -webkit-transform: rotate(0); + opacity: 1; + } + + 100% { + -webkit-transform-origin: center center; + -webkit-transform: rotate(200deg); + opacity: 0; + } +} + +@-moz-keyframes rotateOut { + 0% { + -moz-transform-origin: center center; + -moz-transform: rotate(0); + opacity: 1; + } + + 100% { + -moz-transform-origin: center center; + -moz-transform: rotate(200deg); + opacity: 0; + } +} + +@-o-keyframes rotateOut { + 0% { + -o-transform-origin: center center; + -o-transform: rotate(0); + opacity: 1; + } + + 100% { + -o-transform-origin: center center; + -o-transform: rotate(200deg); + opacity: 0; + } +} + +@keyframes rotateOut { + 0% { + transform-origin: center center; + transform: rotate(0); + opacity: 1; + } + + 100% { + transform-origin: center center; + transform: rotate(200deg); + opacity: 0; + } +} + +.rotateOut { + -webkit-animation-name: rotateOut; + -moz-animation-name: rotateOut; + -o-animation-name: rotateOut; + animation-name: rotateOut; +} +@-webkit-keyframes rotateOutUpLeft { + 0% { + -webkit-transform-origin: left bottom; + -webkit-transform: rotate(0); + opacity: 1; + } + + 100% { + -webkit-transform-origin: left bottom; + -webkit-transform: rotate(-90deg); + opacity: 0; + } +} + +@-moz-keyframes rotateOutUpLeft { + 0% { + -moz-transform-origin: left bottom; + -moz-transform: rotate(0); + opacity: 1; + } + + 100% { + -moz-transform-origin: left bottom; + -moz-transform: rotate(-90deg); + opacity: 0; + } +} + +@-o-keyframes rotateOutUpLeft { + 0% { + -o-transform-origin: left bottom; + -o-transform: rotate(0); + opacity: 1; + } + + 100% { + -o-transform-origin: left bottom; + -o-transform: rotate(-90deg); + opacity: 0; + } +} + +@keyframes rotateOutUpLeft { + 0% { + transform-origin: left bottom; + transform: rotate(0); + opacity: 1; + } + + 100% { + transform-origin: left bottom; + transform: rotate(-90deg); + opacity: 0; + } +} + +.rotateOutUpLeft { + -webkit-animation-name: rotateOutUpLeft; + -moz-animation-name: rotateOutUpLeft; + -o-animation-name: rotateOutUpLeft; + animation-name: rotateOutUpLeft; +} +@-webkit-keyframes rotateOutDownLeft { + 0% { + -webkit-transform-origin: left bottom; + -webkit-transform: rotate(0); + opacity: 1; + } + + 100% { + -webkit-transform-origin: left bottom; + -webkit-transform: rotate(90deg); + opacity: 0; + } +} + +@-moz-keyframes rotateOutDownLeft { + 0% { + -moz-transform-origin: left bottom; + -moz-transform: rotate(0); + opacity: 1; + } + + 100% { + -moz-transform-origin: left bottom; + -moz-transform: rotate(90deg); + opacity: 0; + } +} + +@-o-keyframes rotateOutDownLeft { + 0% { + -o-transform-origin: left bottom; + -o-transform: rotate(0); + opacity: 1; + } + + 100% { + -o-transform-origin: left bottom; + -o-transform: rotate(90deg); + opacity: 0; + } +} + +@keyframes rotateOutDownLeft { + 0% { + transform-origin: left bottom; + transform: rotate(0); + opacity: 1; + } + + 100% { + transform-origin: left bottom; + transform: rotate(90deg); + opacity: 0; + } +} + +.rotateOutDownLeft { + -webkit-animation-name: rotateOutDownLeft; + -moz-animation-name: rotateOutDownLeft; + -o-animation-name: rotateOutDownLeft; + animation-name: rotateOutDownLeft; +} +@-webkit-keyframes rotateOutUpRight { + 0% { + -webkit-transform-origin: right bottom; + -webkit-transform: rotate(0); + opacity: 1; + } + + 100% { + -webkit-transform-origin: right bottom; + -webkit-transform: rotate(90deg); + opacity: 0; + } +} + +@-moz-keyframes rotateOutUpRight { + 0% { + -moz-transform-origin: right bottom; + -moz-transform: rotate(0); + opacity: 1; + } + + 100% { + -moz-transform-origin: right bottom; + -moz-transform: rotate(90deg); + opacity: 0; + } +} + +@-o-keyframes rotateOutUpRight { + 0% { + -o-transform-origin: right bottom; + -o-transform: rotate(0); + opacity: 1; + } + + 100% { + -o-transform-origin: right bottom; + -o-transform: rotate(90deg); + opacity: 0; + } +} + +@keyframes rotateOutUpRight { + 0% { + transform-origin: right bottom; + transform: rotate(0); + opacity: 1; + } + + 100% { + transform-origin: right bottom; + transform: rotate(90deg); + opacity: 0; + } +} + +.rotateOutUpRight { + -webkit-animation-name: rotateOutUpRight; + -moz-animation-name: rotateOutUpRight; + -o-animation-name: rotateOutUpRight; + animation-name: rotateOutUpRight; +} +@-webkit-keyframes rotateOutDownRight { + 0% { + -webkit-transform-origin: right bottom; + -webkit-transform: rotate(0); + opacity: 1; + } + + 100% { + -webkit-transform-origin: right bottom; + -webkit-transform: rotate(-90deg); + opacity: 0; + } +} + +@-moz-keyframes rotateOutDownRight { + 0% { + -moz-transform-origin: right bottom; + -moz-transform: rotate(0); + opacity: 1; + } + + 100% { + -moz-transform-origin: right bottom; + -moz-transform: rotate(-90deg); + opacity: 0; + } +} + +@-o-keyframes rotateOutDownRight { + 0% { + -o-transform-origin: right bottom; + -o-transform: rotate(0); + opacity: 1; + } + + 100% { + -o-transform-origin: right bottom; + -o-transform: rotate(-90deg); + opacity: 0; + } +} + +@keyframes rotateOutDownRight { + 0% { + transform-origin: right bottom; + transform: rotate(0); + opacity: 1; + } + + 100% { + transform-origin: right bottom; + transform: rotate(-90deg); + opacity: 0; + } +} + +.rotateOutDownRight { + -webkit-animation-name: rotateOutDownRight; + -moz-animation-name: rotateOutDownRight; + -o-animation-name: rotateOutDownRight; + animation-name: rotateOutDownRight; +} +@-webkit-keyframes hinge { + 0% { + -webkit-transform: rotate(0); + -webkit-transform-origin: top left; + -webkit-animation-timing-function: ease-in-out; + } + 20%, + 60% { + -webkit-transform: rotate(80deg); + -webkit-transform-origin: top left; + -webkit-animation-timing-function: ease-in-out; + } + 40% { + -webkit-transform: rotate(60deg); + -webkit-transform-origin: top left; + -webkit-animation-timing-function: ease-in-out; + } + 80% { + -webkit-transform: rotate(60deg) translateY(0); + opacity: 1; + -webkit-transform-origin: top left; + -webkit-animation-timing-function: ease-in-out; + } + 100% { + -webkit-transform: translateY(700px); + opacity: 0; + } +} + +@-moz-keyframes hinge { + 0% { + -moz-transform: rotate(0); + -moz-transform-origin: top left; + -moz-animation-timing-function: ease-in-out; + } + 20%, + 60% { + -moz-transform: rotate(80deg); + -moz-transform-origin: top left; + -moz-animation-timing-function: ease-in-out; + } + 40% { + -moz-transform: rotate(60deg); + -moz-transform-origin: top left; + -moz-animation-timing-function: ease-in-out; + } + 80% { + -moz-transform: rotate(60deg) translateY(0); + opacity: 1; + -moz-transform-origin: top left; + -moz-animation-timing-function: ease-in-out; + } + 100% { + -moz-transform: translateY(700px); + opacity: 0; + } +} + +@-o-keyframes hinge { + 0% { + -o-transform: rotate(0); + -o-transform-origin: top left; + -o-animation-timing-function: ease-in-out; + } + 20%, + 60% { + -o-transform: rotate(80deg); + -o-transform-origin: top left; + -o-animation-timing-function: ease-in-out; + } + 40% { + -o-transform: rotate(60deg); + -o-transform-origin: top left; + -o-animation-timing-function: ease-in-out; + } + 80% { + -o-transform: rotate(60deg) translateY(0); + opacity: 1; + -o-transform-origin: top left; + -o-animation-timing-function: ease-in-out; + } + 100% { + -o-transform: translateY(700px); + opacity: 0; + } +} + +@keyframes hinge { + 0% { + transform: rotate(0); + transform-origin: top left; + animation-timing-function: ease-in-out; + } + 20%, + 60% { + transform: rotate(80deg); + transform-origin: top left; + animation-timing-function: ease-in-out; + } + 40% { + transform: rotate(60deg); + transform-origin: top left; + animation-timing-function: ease-in-out; + } + 80% { + transform: rotate(60deg) translateY(0); + opacity: 1; + transform-origin: top left; + animation-timing-function: ease-in-out; + } + 100% { + transform: translateY(700px); + opacity: 0; + } +} + +.hinge { + -webkit-animation-name: hinge; + -moz-animation-name: hinge; + -o-animation-name: hinge; + animation-name: hinge; +} +/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */ + +@-webkit-keyframes rollIn { + 0% { + opacity: 0; + -webkit-transform: translateX(-100%) rotate(-120deg); + } + 100% { + opacity: 1; + -webkit-transform: translateX(0px) rotate(0deg); + } +} + +@-moz-keyframes rollIn { + 0% { + opacity: 0; + -moz-transform: translateX(-100%) rotate(-120deg); + } + 100% { + opacity: 1; + -moz-transform: translateX(0px) rotate(0deg); + } +} + +@-o-keyframes rollIn { + 0% { + opacity: 0; + -o-transform: translateX(-100%) rotate(-120deg); + } + 100% { + opacity: 1; + -o-transform: translateX(0px) rotate(0deg); + } +} + +@keyframes rollIn { + 0% { + opacity: 0; + transform: translateX(-100%) rotate(-120deg); + } + 100% { + opacity: 1; + transform: translateX(0px) rotate(0deg); + } +} + +.rollIn { + -webkit-animation-name: rollIn; + -moz-animation-name: rollIn; + -o-animation-name: rollIn; + animation-name: rollIn; +} +/* originally authored by Nick Pettit - https://github.com/nickpettit/glide */ + +@-webkit-keyframes rollOut { + 0% { + opacity: 1; + -webkit-transform: translateX(0px) rotate(0deg); + } + + 100% { + opacity: 0; + -webkit-transform: translateX(100%) rotate(120deg); + } +} + +@-moz-keyframes rollOut { + 0% { + opacity: 1; + -moz-transform: translateX(0px) rotate(0deg); + } + + 100% { + opacity: 0; + -moz-transform: translateX(100%) rotate(120deg); + } +} + +@-o-keyframes rollOut { + 0% { + opacity: 1; + -o-transform: translateX(0px) rotate(0deg); + } + + 100% { + opacity: 0; + -o-transform: translateX(100%) rotate(120deg); + } +} + +@keyframes rollOut { + 0% { + opacity: 1; + transform: translateX(0px) rotate(0deg); + } + + 100% { + opacity: 0; + transform: translateX(100%) rotate(120deg); + } +} + +.rollOut { + -webkit-animation-name: rollOut; + -moz-animation-name: rollOut; + -o-animation-name: rollOut; + animation-name: rollOut; +} + +/* originally authored by Angelo Rohit - https://github.com/angelorohit */ + +@-webkit-keyframes lightSpeedIn { + 0% { + -webkit-transform: translateX(100%) skewX(-30deg); + opacity: 0; + } + 60% { + -webkit-transform: translateX(-20%) skewX(30deg); + opacity: 1; + } + 80% { + -webkit-transform: translateX(0%) skewX(-15deg); + opacity: 1; + } + 100% { + -webkit-transform: translateX(0%) skewX(0deg); + opacity: 1; + } +} + +@-moz-keyframes lightSpeedIn { + 0% { + -moz-transform: translateX(100%) skewX(-30deg); + opacity: 0; + } + 60% { + -moz-transform: translateX(-20%) skewX(30deg); + opacity: 1; + } + 80% { + -moz-transform: translateX(0%) skewX(-15deg); + opacity: 1; + } + 100% { + -moz-transform: translateX(0%) skewX(0deg); + opacity: 1; + } +} + +@-o-keyframes lightSpeedIn { + 0% { + -o-transform: translateX(100%) skewX(-30deg); + opacity: 0; + } + 60% { + -o-transform: translateX(-20%) skewX(30deg); + opacity: 1; + } + 80% { + -o-transform: translateX(0%) skewX(-15deg); + opacity: 1; + } + 100% { + -o-transform: translateX(0%) skewX(0deg); + opacity: 1; + } +} + +@keyframes lightSpeedIn { + 0% { + transform: translateX(100%) skewX(-30deg); + opacity: 0; + } + 60% { + transform: translateX(-20%) skewX(30deg); + opacity: 1; + } + 80% { + transform: translateX(0%) skewX(-15deg); + opacity: 1; + } + 100% { + transform: translateX(0%) skewX(0deg); + opacity: 1; + } +} + +.lightSpeedIn { + -webkit-animation-name: lightSpeedIn; + -moz-animation-name: lightSpeedIn; + -o-animation-name: lightSpeedIn; + animation-name: lightSpeedIn; + + -webkit-animation-timing-function: ease-out; + -moz-animation-timing-function: ease-out; + -o-animation-timing-function: ease-out; + animation-timing-function: ease-out; +} + +.animated.lightSpeedIn { + -webkit-animation-duration: 0.5s; + -moz-animation-duration: 0.5s; + -o-animation-duration: 0.5s; + animation-duration: 0.5s; +} + +/* originally authored by Angelo Rohit - https://github.com/angelorohit */ + +@-webkit-keyframes lightSpeedOut { + 0% { + -webkit-transform: translateX(0%) skewX(0deg); + opacity: 1; + } + 100% { + -webkit-transform: translateX(100%) skewX(-30deg); + opacity: 0; + } +} + +@-moz-keyframes lightSpeedOut { + 0% { + -moz-transform: translateX(0%) skewX(0deg); + opacity: 1; + } + 100% { + -moz-transform: translateX(100%) skewX(-30deg); + opacity: 0; + } +} + +@-o-keyframes lightSpeedOut { + 0% { + -o-transform: translateX(0%) skewX(0deg); + opacity: 1; + } + 100% { + -o-transform: translateX(100%) skewX(-30deg); + opacity: 0; + } +} + +@keyframes lightSpeedOut { + 0% { + transform: translateX(0%) skewX(0deg); + opacity: 1; + } + 100% { + transform: translateX(100%) skewX(-30deg); + opacity: 0; + } +} + +.lightSpeedOut { + -webkit-animation-name: lightSpeedOut; + -moz-animation-name: lightSpeedOut; + -o-animation-name: lightSpeedOut; + animation-name: lightSpeedOut; + + -webkit-animation-timing-function: ease-in; + -moz-animation-timing-function: ease-in; + -o-animation-timing-function: ease-in; + animation-timing-function: ease-in; +} + +.animated.lightSpeedOut { + -webkit-animation-duration: 0.25s; + -moz-animation-duration: 0.25s; + -o-animation-duration: 0.25s; + animation-duration: 0.25s; +} + +/* originally authored by Angelo Rohit - https://github.com/angelorohit */ + +@-webkit-keyframes wiggle { + 0% { + -webkit-transform: skewX(9deg); + } + 10% { + -webkit-transform: skewX(-8deg); + } + 20% { + -webkit-transform: skewX(7deg); + } + 30% { + -webkit-transform: skewX(-6deg); + } + 40% { + -webkit-transform: skewX(5deg); + } + 50% { + -webkit-transform: skewX(-4deg); + } + 60% { + -webkit-transform: skewX(3deg); + } + 70% { + -webkit-transform: skewX(-2deg); + } + 80% { + -webkit-transform: skewX(1deg); + } + 90% { + -webkit-transform: skewX(0deg); + } + 100% { + -webkit-transform: skewX(0deg); + } +} + +@-moz-keyframes wiggle { + 0% { + -moz-transform: skewX(9deg); + } + 10% { + -moz-transform: skewX(-8deg); + } + 20% { + -moz-transform: skewX(7deg); + } + 30% { + -moz-transform: skewX(-6deg); + } + 40% { + -moz-transform: skewX(5deg); + } + 50% { + -moz-transform: skewX(-4deg); + } + 60% { + -moz-transform: skewX(3deg); + } + 70% { + -moz-transform: skewX(-2deg); + } + 80% { + -moz-transform: skewX(1deg); + } + 90% { + -moz-transform: skewX(0deg); + } + 100% { + -moz-transform: skewX(0deg); + } +} + +@-o-keyframes wiggle { + 0% { + -o-transform: skewX(9deg); + } + 10% { + -o-transform: skewX(-8deg); + } + 20% { + -o-transform: skewX(7deg); + } + 30% { + -o-transform: skewX(-6deg); + } + 40% { + -o-transform: skewX(5deg); + } + 50% { + -o-transform: skewX(-4deg); + } + 60% { + -o-transform: skewX(3deg); + } + 70% { + -o-transform: skewX(-2deg); + } + 80% { + -o-transform: skewX(1deg); + } + 90% { + -o-transform: skewX(0deg); + } + 100% { + -o-transform: skewX(0deg); + } +} + +@keyframes wiggle { + 0% { + transform: skewX(9deg); + } + 10% { + transform: skewX(-8deg); + } + 20% { + transform: skewX(7deg); + } + 30% { + transform: skewX(-6deg); + } + 40% { + transform: skewX(5deg); + } + 50% { + transform: skewX(-4deg); + } + 60% { + transform: skewX(3deg); + } + 70% { + transform: skewX(-2deg); + } + 80% { + transform: skewX(1deg); + } + 90% { + transform: skewX(0deg); + } + 100% { + transform: skewX(0deg); + } +} + +.wiggle { + -webkit-animation-name: wiggle; + -moz-animation-name: wiggle; + -o-animation-name: wiggle; + animation-name: wiggle; + + -webkit-animation-timing-function: ease-in; + -moz-animation-timing-function: ease-in; + -o-animation-timing-function: ease-in; + animation-timing-function: ease-in; +} + +.animated.wiggle { + -webkit-animation-duration: 0.75s; + -moz-animation-duration: 0.75s; + -o-animation-duration: 0.75s; + animation-duration: 0.75s; +} diff --git a/backend/tests/integration/tests/pruning/website/css/bootstrap.min.css b/backend/tests/integration/tests/pruning/website/css/bootstrap.min.css new file mode 100644 index 00000000000..8a7496b6f6a --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/css/bootstrap.min.css @@ -0,0 +1,6136 @@ +/*! + * Bootstrap v3.1.0 (http://getbootstrap.com) + * Copyright 2011-2014 Twitter, Inc. + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) + */ + +/*! normalize.css v3.0.0 | MIT License | git.io/normalize */ +html { + font-family: sans-serif; + -ms-text-size-adjust: 100%; + -webkit-text-size-adjust: 100%; +} +body { + margin: 0; +} +article, +aside, +details, +figcaption, +figure, +footer, +header, +hgroup, +main, +nav, +section, +summary { + display: block; +} +audio, +canvas, +progress, +video { + display: inline-block; + vertical-align: baseline; +} +audio:not([controls]) { + display: none; + height: 0; +} +[hidden], +template { + display: none; +} +a { + background: 0 0; +} +a:active, +a:hover { + outline: 0; +} +abbr[title] { + border-bottom: 1px dotted; +} +b, +strong { + font-weight: 700; +} +dfn { + font-style: italic; +} +h1 { + font-size: 2em; + margin: 0.67em 0; +} +mark { + background: #ff0; + color: #000; +} +small { + font-size: 80%; +} +sub, +sup { + font-size: 75%; + line-height: 0; + position: relative; + vertical-align: baseline; +} +sup { + top: -0.5em; +} +sub { + bottom: -0.25em; +} +img { + border: 0; +} +svg:not(:root) { + overflow: hidden; +} +figure { + margin: 1em 40px; +} +hr { + -moz-box-sizing: content-box; + box-sizing: content-box; + height: 0; +} +pre { + overflow: auto; +} +code, +kbd, +pre, +samp { + font-family: monospace, monospace; + font-size: 1em; +} +button, +input, +optgroup, +select, +textarea { + color: inherit; + font: inherit; + margin: 0; +} +button { + overflow: visible; +} +button, +select { + text-transform: none; +} +button, +html input[type="button"], +input[type="reset"], +input[type="submit"] { + -webkit-appearance: button; + cursor: pointer; +} +button[disabled], +html input[disabled] { + cursor: default; +} +button::-moz-focus-inner, +input::-moz-focus-inner { + border: 0; + padding: 0; +} +input { + line-height: normal; +} +input[type="checkbox"], +input[type="radio"] { + box-sizing: border-box; + padding: 0; +} +input[type="number"]::-webkit-inner-spin-button, +input[type="number"]::-webkit-outer-spin-button { + height: auto; +} +input[type="search"] { + -webkit-appearance: textfield; + -moz-box-sizing: content-box; + -webkit-box-sizing: content-box; + box-sizing: content-box; +} +input[type="search"]::-webkit-search-cancel-button, +input[type="search"]::-webkit-search-decoration { + -webkit-appearance: none; +} +fieldset { + border: 1px solid silver; + margin: 0 2px; + padding: 0.35em 0.625em 0.75em; +} +legend { + border: 0; + padding: 0; +} +textarea { + overflow: auto; +} +optgroup { + font-weight: 700; +} +table { + border-collapse: collapse; + border-spacing: 0; +} +td, +th { + padding: 0; +} +@media print { + * { + text-shadow: none !important; + color: #000 !important; + background: transparent !important; + box-shadow: none !important; + } + a, + a:visited { + text-decoration: underline; + } + a[href]:after { + content: " (" attr(href) ")"; + } + abbr[title]:after { + content: " (" attr(title) ")"; + } + a[href^="javascript:"]:after, + a[href^="#"]:after { + content: ""; + } + pre, + blockquote { + border: 1px solid #999; + page-break-inside: avoid; + } + thead { + display: table-header-group; + } + tr, + img { + page-break-inside: avoid; + } + img { + max-width: 100% !important; + } + p, + h2, + h3 { + orphans: 3; + widows: 3; + } + h2, + h3 { + page-break-after: avoid; + } + select { + background: #fff !important; + } + .navbar { + display: none; + } + .table td, + .table th { + background-color: #fff !important; + } + .btn > .caret, + .dropup > .btn > .caret { + border-top-color: #000 !important; + } + .label { + border: 1px solid #000; + } + .table { + border-collapse: collapse !important; + } + .table-bordered th, + .table-bordered td { + border: 1px solid #ddd !important; + } +} +* { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} +:before, +:after { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} +html { + font-size: 62.5%; + -webkit-tap-highlight-color: rgba(0, 0, 0, 0); +} +body { + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-size: 14px; + line-height: 1.428571429; + color: #333; + background-color: #fff; +} +input, +button, +select, +textarea { + font-family: inherit; + font-size: inherit; + line-height: inherit; +} +a { + color: #428bca; + text-decoration: none; +} +a:hover, +a:focus { + color: #2a6496; + text-decoration: underline; +} +a:focus { + outline: thin dotted; + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px; +} +figure { + margin: 0; +} +img { + vertical-align: middle; +} +.img-responsive { + display: block; + max-width: 100%; + height: auto; +} +.img-rounded { + border-radius: 6px; +} +.img-thumbnail { + padding: 4px; + line-height: 1.428571429; + background-color: #fff; + border: 1px solid #ddd; + border-radius: 4px; + -webkit-transition: all 0.2s ease-in-out; + transition: all 0.2s ease-in-out; + display: inline-block; + max-width: 100%; + height: auto; +} +.img-circle { + border-radius: 50%; +} +hr { + margin-top: 20px; + margin-bottom: 20px; + border: 0; + border-top: 1px solid #eee; +} +.sr-only { + position: absolute; + width: 1px; + height: 1px; + margin: -1px; + padding: 0; + overflow: hidden; + clip: rect(0, 0, 0, 0); + border: 0; +} +h1, +h2, +h3, +h4, +h5, +h6, +.h1, +.h2, +.h3, +.h4, +.h5, +.h6 { + font-family: inherit; + font-weight: 500; + line-height: 1.1; + color: inherit; +} +h1 small, +h2 small, +h3 small, +h4 small, +h5 small, +h6 small, +.h1 small, +.h2 small, +.h3 small, +.h4 small, +.h5 small, +.h6 small, +h1 .small, +h2 .small, +h3 .small, +h4 .small, +h5 .small, +h6 .small, +.h1 .small, +.h2 .small, +.h3 .small, +.h4 .small, +.h5 .small, +.h6 .small { + font-weight: 400; + line-height: 1; + color: #999; +} +h1, +.h1, +h2, +.h2, +h3, +.h3 { + margin-top: 20px; + margin-bottom: 10px; +} +h1 small, +.h1 small, +h2 small, +.h2 small, +h3 small, +.h3 small, +h1 .small, +.h1 .small, +h2 .small, +.h2 .small, +h3 .small, +.h3 .small { + font-size: 65%; +} +h4, +.h4, +h5, +.h5, +h6, +.h6 { + margin-top: 10px; + margin-bottom: 10px; +} +h4 small, +.h4 small, +h5 small, +.h5 small, +h6 small, +.h6 small, +h4 .small, +.h4 .small, +h5 .small, +.h5 .small, +h6 .small, +.h6 .small { + font-size: 75%; +} +h1, +.h1 { + font-size: 36px; +} +h2, +.h2 { + font-size: 30px; +} +h3, +.h3 { + font-size: 24px; +} +h4, +.h4 { + font-size: 18px; +} +h5, +.h5 { + font-size: 14px; +} +h6, +.h6 { + font-size: 12px; +} +p { + margin: 0 0 10px; +} +.lead { + margin-bottom: 20px; + font-size: 16px; + font-weight: 200; + line-height: 1.4; +} +@media (min-width: 768px) { + .lead { + font-size: 21px; + } +} +small, +.small { + font-size: 85%; +} +cite { + font-style: normal; +} +.text-left { + text-align: left; +} +.text-right { + text-align: right; +} +.text-center { + text-align: center; +} +.text-justify { + text-align: justify; +} +.text-muted { + color: #999; +} +.text-primary { + color: #428bca; +} +a.text-primary:hover { + color: #3071a9; +} +.text-success { + color: #3c763d; +} +a.text-success:hover { + color: #2b542c; +} +.text-info { + color: #31708f; +} +a.text-info:hover { + color: #245269; +} +.text-warning { + color: #8a6d3b; +} +a.text-warning:hover { + color: #66512c; +} +.text-danger { + color: #a94442; +} +a.text-danger:hover { + color: #843534; +} +.bg-primary { + color: #fff; + background-color: #428bca; +} +a.bg-primary:hover { + background-color: #3071a9; +} +.bg-success { + background-color: #dff0d8; +} +a.bg-success:hover { + background-color: #c1e2b3; +} +.bg-info { + background-color: #d9edf7; +} +a.bg-info:hover { + background-color: #afd9ee; +} +.bg-warning { + background-color: #fcf8e3; +} +a.bg-warning:hover { + background-color: #f7ecb5; +} +.bg-danger { + background-color: #f2dede; +} +a.bg-danger:hover { + background-color: #e4b9b9; +} +.page-header { + padding-bottom: 9px; + margin: 40px 0 20px; + border-bottom: 1px solid #eee; +} +ul, +ol { + margin-top: 0; + margin-bottom: 10px; +} +ul ul, +ol ul, +ul ol, +ol ol { + margin-bottom: 0; +} +.list-unstyled { + padding-left: 0; + list-style: none; +} +.list-inline { + padding-left: 0; + list-style: none; +} +.list-inline > li { + display: inline-block; + padding-left: 5px; + padding-right: 5px; +} +.list-inline > li:first-child { + padding-left: 0; +} +dl { + margin-top: 0; + margin-bottom: 20px; +} +dt, +dd { + line-height: 1.428571429; +} +dt { + font-weight: 700; +} +dd { + margin-left: 0; +} +@media (min-width: 768px) { + .dl-horizontal dt { + float: left; + width: 160px; + clear: left; + text-align: right; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + .dl-horizontal dd { + margin-left: 180px; + } +} +abbr[title], +abbr[data-original-title] { + cursor: help; + border-bottom: 1px dotted #999; +} +.initialism { + font-size: 90%; + text-transform: uppercase; +} +blockquote { + padding: 10px 20px; + margin: 0 0 20px; + font-size: 17.5px; + border-left: 5px solid #eee; +} +blockquote p:last-child, +blockquote ul:last-child, +blockquote ol:last-child { + margin-bottom: 0; +} +blockquote footer, +blockquote small, +blockquote .small { + display: block; + font-size: 80%; + line-height: 1.428571429; + color: #999; +} +blockquote footer:before, +blockquote small:before, +blockquote .small:before { + content: "\2014 \00A0"; +} +.blockquote-reverse, +blockquote.pull-right { + padding-right: 15px; + padding-left: 0; + border-right: 5px solid #eee; + border-left: 0; + text-align: right; +} +.blockquote-reverse footer:before, +blockquote.pull-right footer:before, +.blockquote-reverse small:before, +blockquote.pull-right small:before, +.blockquote-reverse .small:before, +blockquote.pull-right .small:before { + content: ""; +} +.blockquote-reverse footer:after, +blockquote.pull-right footer:after, +.blockquote-reverse small:after, +blockquote.pull-right small:after, +.blockquote-reverse .small:after, +blockquote.pull-right .small:after { + content: "\00A0 \2014"; +} +blockquote:before, +blockquote:after { + content: ""; +} +address { + margin-bottom: 20px; + font-style: normal; + line-height: 1.428571429; +} +code, +kbd, +pre, +samp { + font-family: Menlo, Monaco, Consolas, "Courier New", monospace; +} +code { + padding: 2px 4px; + font-size: 90%; + color: #c7254e; + background-color: #f9f2f4; + white-space: nowrap; + border-radius: 4px; +} +kbd { + padding: 2px 4px; + font-size: 90%; + color: #fff; + background-color: #333; + border-radius: 3px; + box-shadow: inset 0 -1px 0 rgba(0, 0, 0, 0.25); +} +pre { + display: block; + padding: 9.5px; + margin: 0 0 10px; + font-size: 13px; + line-height: 1.428571429; + word-break: break-all; + word-wrap: break-word; + color: #333; + background-color: #f5f5f5; + border: 1px solid #ccc; + border-radius: 4px; +} +pre code { + padding: 0; + font-size: inherit; + color: inherit; + white-space: pre-wrap; + background-color: transparent; + border-radius: 0; +} +.pre-scrollable { + max-height: 340px; + overflow-y: scroll; +} +.container { + margin-right: auto; + margin-left: auto; + padding-left: 15px; + padding-right: 15px; +} +@media (min-width: 768px) { + .container { + width: 750px; + } +} +@media (min-width: 992px) { + .container { + width: 970px; + } +} +@media (min-width: 1200px) { + .container { + width: 1170px; + } +} +.container-fluid { + margin-right: auto; + margin-left: auto; + padding-left: 15px; + padding-right: 15px; +} +.row { + margin-left: -15px; + margin-right: -15px; +} +.col-xs-1, +.col-sm-1, +.col-md-1, +.col-lg-1, +.col-xs-2, +.col-sm-2, +.col-md-2, +.col-lg-2, +.col-xs-3, +.col-sm-3, +.col-md-3, +.col-lg-3, +.col-xs-4, +.col-sm-4, +.col-md-4, +.col-lg-4, +.col-xs-5, +.col-sm-5, +.col-md-5, +.col-lg-5, +.col-xs-6, +.col-sm-6, +.col-md-6, +.col-lg-6, +.col-xs-7, +.col-sm-7, +.col-md-7, +.col-lg-7, +.col-xs-8, +.col-sm-8, +.col-md-8, +.col-lg-8, +.col-xs-9, +.col-sm-9, +.col-md-9, +.col-lg-9, +.col-xs-10, +.col-sm-10, +.col-md-10, +.col-lg-10, +.col-xs-11, +.col-sm-11, +.col-md-11, +.col-lg-11, +.col-xs-12, +.col-sm-12, +.col-md-12, +.col-lg-12 { + position: relative; + min-height: 1px; + padding-left: 15px; + padding-right: 15px; +} +.col-xs-1, +.col-xs-2, +.col-xs-3, +.col-xs-4, +.col-xs-5, +.col-xs-6, +.col-xs-7, +.col-xs-8, +.col-xs-9, +.col-xs-10, +.col-xs-11, +.col-xs-12 { + float: left; +} +.col-xs-12 { + width: 100%; +} +.col-xs-11 { + width: 91.66666666666666%; +} +.col-xs-10 { + width: 83.33333333333334%; +} +.col-xs-9 { + width: 75%; +} +.col-xs-8 { + width: 66.66666666666666%; +} +.col-xs-7 { + width: 58.333333333333336%; +} +.col-xs-6 { + width: 50%; +} +.col-xs-5 { + width: 41.66666666666667%; +} +.col-xs-4 { + width: 33.33333333333333%; +} +.col-xs-3 { + width: 25%; +} +.col-xs-2 { + width: 16.666666666666664%; +} +.col-xs-1 { + width: 8.333333333333332%; +} +.col-xs-pull-12 { + right: 100%; +} +.col-xs-pull-11 { + right: 91.66666666666666%; +} +.col-xs-pull-10 { + right: 83.33333333333334%; +} +.col-xs-pull-9 { + right: 75%; +} +.col-xs-pull-8 { + right: 66.66666666666666%; +} +.col-xs-pull-7 { + right: 58.333333333333336%; +} +.col-xs-pull-6 { + right: 50%; +} +.col-xs-pull-5 { + right: 41.66666666666667%; +} +.col-xs-pull-4 { + right: 33.33333333333333%; +} +.col-xs-pull-3 { + right: 25%; +} +.col-xs-pull-2 { + right: 16.666666666666664%; +} +.col-xs-pull-1 { + right: 8.333333333333332%; +} +.col-xs-pull-0 { + right: 0; +} +.col-xs-push-12 { + left: 100%; +} +.col-xs-push-11 { + left: 91.66666666666666%; +} +.col-xs-push-10 { + left: 83.33333333333334%; +} +.col-xs-push-9 { + left: 75%; +} +.col-xs-push-8 { + left: 66.66666666666666%; +} +.col-xs-push-7 { + left: 58.333333333333336%; +} +.col-xs-push-6 { + left: 50%; +} +.col-xs-push-5 { + left: 41.66666666666667%; +} +.col-xs-push-4 { + left: 33.33333333333333%; +} +.col-xs-push-3 { + left: 25%; +} +.col-xs-push-2 { + left: 16.666666666666664%; +} +.col-xs-push-1 { + left: 8.333333333333332%; +} +.col-xs-push-0 { + left: 0; +} +.col-xs-offset-12 { + margin-left: 100%; +} +.col-xs-offset-11 { + margin-left: 91.66666666666666%; +} +.col-xs-offset-10 { + margin-left: 83.33333333333334%; +} +.col-xs-offset-9 { + margin-left: 75%; +} +.col-xs-offset-8 { + margin-left: 66.66666666666666%; +} +.col-xs-offset-7 { + margin-left: 58.333333333333336%; +} +.col-xs-offset-6 { + margin-left: 50%; +} +.col-xs-offset-5 { + margin-left: 41.66666666666667%; +} +.col-xs-offset-4 { + margin-left: 33.33333333333333%; +} +.col-xs-offset-3 { + margin-left: 25%; +} +.col-xs-offset-2 { + margin-left: 16.666666666666664%; +} +.col-xs-offset-1 { + margin-left: 8.333333333333332%; +} +.col-xs-offset-0 { + margin-left: 0; +} +@media (min-width: 768px) { + .col-sm-1, + .col-sm-2, + .col-sm-3, + .col-sm-4, + .col-sm-5, + .col-sm-6, + .col-sm-7, + .col-sm-8, + .col-sm-9, + .col-sm-10, + .col-sm-11, + .col-sm-12 { + float: left; + } + .col-sm-12 { + width: 100%; + } + .col-sm-11 { + width: 91.66666666666666%; + } + .col-sm-10 { + width: 83.33333333333334%; + } + .col-sm-9 { + width: 75%; + } + .col-sm-8 { + width: 66.66666666666666%; + } + .col-sm-7 { + width: 58.333333333333336%; + } + .col-sm-6 { + width: 50%; + } + .col-sm-5 { + width: 41.66666666666667%; + } + .col-sm-4 { + width: 33.33333333333333%; + } + .col-sm-3 { + width: 25%; + } + .col-sm-2 { + width: 16.666666666666664%; + } + .col-sm-1 { + width: 8.333333333333332%; + } + .col-sm-pull-12 { + right: 100%; + } + .col-sm-pull-11 { + right: 91.66666666666666%; + } + .col-sm-pull-10 { + right: 83.33333333333334%; + } + .col-sm-pull-9 { + right: 75%; + } + .col-sm-pull-8 { + right: 66.66666666666666%; + } + .col-sm-pull-7 { + right: 58.333333333333336%; + } + .col-sm-pull-6 { + right: 50%; + } + .col-sm-pull-5 { + right: 41.66666666666667%; + } + .col-sm-pull-4 { + right: 33.33333333333333%; + } + .col-sm-pull-3 { + right: 25%; + } + .col-sm-pull-2 { + right: 16.666666666666664%; + } + .col-sm-pull-1 { + right: 8.333333333333332%; + } + .col-sm-pull-0 { + right: 0; + } + .col-sm-push-12 { + left: 100%; + } + .col-sm-push-11 { + left: 91.66666666666666%; + } + .col-sm-push-10 { + left: 83.33333333333334%; + } + .col-sm-push-9 { + left: 75%; + } + .col-sm-push-8 { + left: 66.66666666666666%; + } + .col-sm-push-7 { + left: 58.333333333333336%; + } + .col-sm-push-6 { + left: 50%; + } + .col-sm-push-5 { + left: 41.66666666666667%; + } + .col-sm-push-4 { + left: 33.33333333333333%; + } + .col-sm-push-3 { + left: 25%; + } + .col-sm-push-2 { + left: 16.666666666666664%; + } + .col-sm-push-1 { + left: 8.333333333333332%; + } + .col-sm-push-0 { + left: 0; + } + .col-sm-offset-12 { + margin-left: 100%; + } + .col-sm-offset-11 { + margin-left: 91.66666666666666%; + } + .col-sm-offset-10 { + margin-left: 83.33333333333334%; + } + .col-sm-offset-9 { + margin-left: 75%; + } + .col-sm-offset-8 { + margin-left: 66.66666666666666%; + } + .col-sm-offset-7 { + margin-left: 58.333333333333336%; + } + .col-sm-offset-6 { + margin-left: 50%; + } + .col-sm-offset-5 { + margin-left: 41.66666666666667%; + } + .col-sm-offset-4 { + margin-left: 33.33333333333333%; + } + .col-sm-offset-3 { + margin-left: 25%; + } + .col-sm-offset-2 { + margin-left: 16.666666666666664%; + } + .col-sm-offset-1 { + margin-left: 8.333333333333332%; + } + .col-sm-offset-0 { + margin-left: 0; + } +} +@media (min-width: 992px) { + .col-md-1, + .col-md-2, + .col-md-3, + .col-md-4, + .col-md-5, + .col-md-6, + .col-md-7, + .col-md-8, + .col-md-9, + .col-md-10, + .col-md-11, + .col-md-12 { + float: left; + } + .col-md-12 { + width: 100%; + } + .col-md-11 { + width: 91.66666666666666%; + } + .col-md-10 { + width: 83.33333333333334%; + } + .col-md-9 { + width: 75%; + } + .col-md-8 { + width: 66.66666666666666%; + } + .col-md-7 { + width: 58.333333333333336%; + } + .col-md-6 { + width: 50%; + } + .col-md-5 { + width: 41.66666666666667%; + } + .col-md-4 { + width: 33.33333333333333%; + } + .col-md-3 { + width: 25%; + } + .col-md-2 { + width: 16.666666666666664%; + } + .col-md-1 { + width: 8.333333333333332%; + } + .col-md-pull-12 { + right: 100%; + } + .col-md-pull-11 { + right: 91.66666666666666%; + } + .col-md-pull-10 { + right: 83.33333333333334%; + } + .col-md-pull-9 { + right: 75%; + } + .col-md-pull-8 { + right: 66.66666666666666%; + } + .col-md-pull-7 { + right: 58.333333333333336%; + } + .col-md-pull-6 { + right: 50%; + } + .col-md-pull-5 { + right: 41.66666666666667%; + } + .col-md-pull-4 { + right: 33.33333333333333%; + } + .col-md-pull-3 { + right: 25%; + } + .col-md-pull-2 { + right: 16.666666666666664%; + } + .col-md-pull-1 { + right: 8.333333333333332%; + } + .col-md-pull-0 { + right: 0; + } + .col-md-push-12 { + left: 100%; + } + .col-md-push-11 { + left: 91.66666666666666%; + } + .col-md-push-10 { + left: 83.33333333333334%; + } + .col-md-push-9 { + left: 75%; + } + .col-md-push-8 { + left: 66.66666666666666%; + } + .col-md-push-7 { + left: 58.333333333333336%; + } + .col-md-push-6 { + left: 50%; + } + .col-md-push-5 { + left: 41.66666666666667%; + } + .col-md-push-4 { + left: 33.33333333333333%; + } + .col-md-push-3 { + left: 25%; + } + .col-md-push-2 { + left: 16.666666666666664%; + } + .col-md-push-1 { + left: 8.333333333333332%; + } + .col-md-push-0 { + left: 0; + } + .col-md-offset-12 { + margin-left: 100%; + } + .col-md-offset-11 { + margin-left: 91.66666666666666%; + } + .col-md-offset-10 { + margin-left: 83.33333333333334%; + } + .col-md-offset-9 { + margin-left: 75%; + } + .col-md-offset-8 { + margin-left: 66.66666666666666%; + } + .col-md-offset-7 { + margin-left: 58.333333333333336%; + } + .col-md-offset-6 { + margin-left: 50%; + } + .col-md-offset-5 { + margin-left: 41.66666666666667%; + } + .col-md-offset-4 { + margin-left: 33.33333333333333%; + } + .col-md-offset-3 { + margin-left: 25%; + } + .col-md-offset-2 { + margin-left: 16.666666666666664%; + } + .col-md-offset-1 { + margin-left: 8.333333333333332%; + } + .col-md-offset-0 { + margin-left: 0; + } +} +@media (min-width: 1200px) { + .col-lg-1, + .col-lg-2, + .col-lg-3, + .col-lg-4, + .col-lg-5, + .col-lg-6, + .col-lg-7, + .col-lg-8, + .col-lg-9, + .col-lg-10, + .col-lg-11, + .col-lg-12 { + float: left; + } + .col-lg-12 { + width: 100%; + } + .col-lg-11 { + width: 91.66666666666666%; + } + .col-lg-10 { + width: 83.33333333333334%; + } + .col-lg-9 { + width: 75%; + } + .col-lg-8 { + width: 66.66666666666666%; + } + .col-lg-7 { + width: 58.333333333333336%; + } + .col-lg-6 { + width: 50%; + } + .col-lg-5 { + width: 41.66666666666667%; + } + .col-lg-4 { + width: 33.33333333333333%; + } + .col-lg-3 { + width: 25%; + } + .col-lg-2 { + width: 16.666666666666664%; + } + .col-lg-1 { + width: 8.333333333333332%; + } + .col-lg-pull-12 { + right: 100%; + } + .col-lg-pull-11 { + right: 91.66666666666666%; + } + .col-lg-pull-10 { + right: 83.33333333333334%; + } + .col-lg-pull-9 { + right: 75%; + } + .col-lg-pull-8 { + right: 66.66666666666666%; + } + .col-lg-pull-7 { + right: 58.333333333333336%; + } + .col-lg-pull-6 { + right: 50%; + } + .col-lg-pull-5 { + right: 41.66666666666667%; + } + .col-lg-pull-4 { + right: 33.33333333333333%; + } + .col-lg-pull-3 { + right: 25%; + } + .col-lg-pull-2 { + right: 16.666666666666664%; + } + .col-lg-pull-1 { + right: 8.333333333333332%; + } + .col-lg-pull-0 { + right: 0; + } + .col-lg-push-12 { + left: 100%; + } + .col-lg-push-11 { + left: 91.66666666666666%; + } + .col-lg-push-10 { + left: 83.33333333333334%; + } + .col-lg-push-9 { + left: 75%; + } + .col-lg-push-8 { + left: 66.66666666666666%; + } + .col-lg-push-7 { + left: 58.333333333333336%; + } + .col-lg-push-6 { + left: 50%; + } + .col-lg-push-5 { + left: 41.66666666666667%; + } + .col-lg-push-4 { + left: 33.33333333333333%; + } + .col-lg-push-3 { + left: 25%; + } + .col-lg-push-2 { + left: 16.666666666666664%; + } + .col-lg-push-1 { + left: 8.333333333333332%; + } + .col-lg-push-0 { + left: 0; + } + .col-lg-offset-12 { + margin-left: 100%; + } + .col-lg-offset-11 { + margin-left: 91.66666666666666%; + } + .col-lg-offset-10 { + margin-left: 83.33333333333334%; + } + .col-lg-offset-9 { + margin-left: 75%; + } + .col-lg-offset-8 { + margin-left: 66.66666666666666%; + } + .col-lg-offset-7 { + margin-left: 58.333333333333336%; + } + .col-lg-offset-6 { + margin-left: 50%; + } + .col-lg-offset-5 { + margin-left: 41.66666666666667%; + } + .col-lg-offset-4 { + margin-left: 33.33333333333333%; + } + .col-lg-offset-3 { + margin-left: 25%; + } + .col-lg-offset-2 { + margin-left: 16.666666666666664%; + } + .col-lg-offset-1 { + margin-left: 8.333333333333332%; + } + .col-lg-offset-0 { + margin-left: 0; + } +} +table { + max-width: 100%; + background-color: transparent; +} +th { + text-align: left; +} +.table { + width: 100%; + margin-bottom: 20px; +} +.table > thead > tr > th, +.table > tbody > tr > th, +.table > tfoot > tr > th, +.table > thead > tr > td, +.table > tbody > tr > td, +.table > tfoot > tr > td { + padding: 8px; + line-height: 1.428571429; + vertical-align: top; + border-top: 1px solid #ddd; +} +.table > thead > tr > th { + vertical-align: bottom; + border-bottom: 2px solid #ddd; +} +.table > caption + thead > tr:first-child > th, +.table > colgroup + thead > tr:first-child > th, +.table > thead:first-child > tr:first-child > th, +.table > caption + thead > tr:first-child > td, +.table > colgroup + thead > tr:first-child > td, +.table > thead:first-child > tr:first-child > td { + border-top: 0; +} +.table > tbody + tbody { + border-top: 2px solid #ddd; +} +.table .table { + background-color: #fff; +} +.table-condensed > thead > tr > th, +.table-condensed > tbody > tr > th, +.table-condensed > tfoot > tr > th, +.table-condensed > thead > tr > td, +.table-condensed > tbody > tr > td, +.table-condensed > tfoot > tr > td { + padding: 5px; +} +.table-bordered { + border: 1px solid #ddd; +} +.table-bordered > thead > tr > th, +.table-bordered > tbody > tr > th, +.table-bordered > tfoot > tr > th, +.table-bordered > thead > tr > td, +.table-bordered > tbody > tr > td, +.table-bordered > tfoot > tr > td { + border: 1px solid #ddd; +} +.table-bordered > thead > tr > th, +.table-bordered > thead > tr > td { + border-bottom-width: 2px; +} +.table-striped > tbody > tr:nth-child(odd) > td, +.table-striped > tbody > tr:nth-child(odd) > th { + background-color: #f9f9f9; +} +.table-hover > tbody > tr:hover > td, +.table-hover > tbody > tr:hover > th { + background-color: #f5f5f5; +} +table col[class*="col-"] { + position: static; + float: none; + display: table-column; +} +table td[class*="col-"], +table th[class*="col-"] { + position: static; + float: none; + display: table-cell; +} +.table > thead > tr > td.active, +.table > tbody > tr > td.active, +.table > tfoot > tr > td.active, +.table > thead > tr > th.active, +.table > tbody > tr > th.active, +.table > tfoot > tr > th.active, +.table > thead > tr.active > td, +.table > tbody > tr.active > td, +.table > tfoot > tr.active > td, +.table > thead > tr.active > th, +.table > tbody > tr.active > th, +.table > tfoot > tr.active > th { + background-color: #f5f5f5; +} +.table-hover > tbody > tr > td.active:hover, +.table-hover > tbody > tr > th.active:hover, +.table-hover > tbody > tr.active:hover > td, +.table-hover > tbody > tr.active:hover > th { + background-color: #e8e8e8; +} +.table > thead > tr > td.success, +.table > tbody > tr > td.success, +.table > tfoot > tr > td.success, +.table > thead > tr > th.success, +.table > tbody > tr > th.success, +.table > tfoot > tr > th.success, +.table > thead > tr.success > td, +.table > tbody > tr.success > td, +.table > tfoot > tr.success > td, +.table > thead > tr.success > th, +.table > tbody > tr.success > th, +.table > tfoot > tr.success > th { + background-color: #dff0d8; +} +.table-hover > tbody > tr > td.success:hover, +.table-hover > tbody > tr > th.success:hover, +.table-hover > tbody > tr.success:hover > td, +.table-hover > tbody > tr.success:hover > th { + background-color: #d0e9c6; +} +.table > thead > tr > td.info, +.table > tbody > tr > td.info, +.table > tfoot > tr > td.info, +.table > thead > tr > th.info, +.table > tbody > tr > th.info, +.table > tfoot > tr > th.info, +.table > thead > tr.info > td, +.table > tbody > tr.info > td, +.table > tfoot > tr.info > td, +.table > thead > tr.info > th, +.table > tbody > tr.info > th, +.table > tfoot > tr.info > th { + background-color: #d9edf7; +} +.table-hover > tbody > tr > td.info:hover, +.table-hover > tbody > tr > th.info:hover, +.table-hover > tbody > tr.info:hover > td, +.table-hover > tbody > tr.info:hover > th { + background-color: #c4e3f3; +} +.table > thead > tr > td.warning, +.table > tbody > tr > td.warning, +.table > tfoot > tr > td.warning, +.table > thead > tr > th.warning, +.table > tbody > tr > th.warning, +.table > tfoot > tr > th.warning, +.table > thead > tr.warning > td, +.table > tbody > tr.warning > td, +.table > tfoot > tr.warning > td, +.table > thead > tr.warning > th, +.table > tbody > tr.warning > th, +.table > tfoot > tr.warning > th { + background-color: #fcf8e3; +} +.table-hover > tbody > tr > td.warning:hover, +.table-hover > tbody > tr > th.warning:hover, +.table-hover > tbody > tr.warning:hover > td, +.table-hover > tbody > tr.warning:hover > th { + background-color: #faf2cc; +} +.table > thead > tr > td.danger, +.table > tbody > tr > td.danger, +.table > tfoot > tr > td.danger, +.table > thead > tr > th.danger, +.table > tbody > tr > th.danger, +.table > tfoot > tr > th.danger, +.table > thead > tr.danger > td, +.table > tbody > tr.danger > td, +.table > tfoot > tr.danger > td, +.table > thead > tr.danger > th, +.table > tbody > tr.danger > th, +.table > tfoot > tr.danger > th { + background-color: #f2dede; +} +.table-hover > tbody > tr > td.danger:hover, +.table-hover > tbody > tr > th.danger:hover, +.table-hover > tbody > tr.danger:hover > td, +.table-hover > tbody > tr.danger:hover > th { + background-color: #ebcccc; +} +@media (max-width: 767px) { + .table-responsive { + width: 100%; + margin-bottom: 15px; + overflow-y: hidden; + overflow-x: scroll; + -ms-overflow-style: -ms-autohiding-scrollbar; + border: 1px solid #ddd; + -webkit-overflow-scrolling: touch; + } + .table-responsive > .table { + margin-bottom: 0; + } + .table-responsive > .table > thead > tr > th, + .table-responsive > .table > tbody > tr > th, + .table-responsive > .table > tfoot > tr > th, + .table-responsive > .table > thead > tr > td, + .table-responsive > .table > tbody > tr > td, + .table-responsive > .table > tfoot > tr > td { + white-space: nowrap; + } + .table-responsive > .table-bordered { + border: 0; + } + .table-responsive > .table-bordered > thead > tr > th:first-child, + .table-responsive > .table-bordered > tbody > tr > th:first-child, + .table-responsive > .table-bordered > tfoot > tr > th:first-child, + .table-responsive > .table-bordered > thead > tr > td:first-child, + .table-responsive > .table-bordered > tbody > tr > td:first-child, + .table-responsive > .table-bordered > tfoot > tr > td:first-child { + border-left: 0; + } + .table-responsive > .table-bordered > thead > tr > th:last-child, + .table-responsive > .table-bordered > tbody > tr > th:last-child, + .table-responsive > .table-bordered > tfoot > tr > th:last-child, + .table-responsive > .table-bordered > thead > tr > td:last-child, + .table-responsive > .table-bordered > tbody > tr > td:last-child, + .table-responsive > .table-bordered > tfoot > tr > td:last-child { + border-right: 0; + } + .table-responsive > .table-bordered > tbody > tr:last-child > th, + .table-responsive > .table-bordered > tfoot > tr:last-child > th, + .table-responsive > .table-bordered > tbody > tr:last-child > td, + .table-responsive > .table-bordered > tfoot > tr:last-child > td { + border-bottom: 0; + } +} +fieldset { + padding: 0; + margin: 0; + border: 0; + min-width: 0; +} +legend { + display: block; + width: 100%; + padding: 0; + margin-bottom: 20px; + font-size: 21px; + line-height: inherit; + color: #333; + border: 0; + border-bottom: 1px solid #e5e5e5; +} +label { + display: inline-block; + margin-bottom: 5px; + font-weight: 700; +} +input[type="search"] { + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} +input[type="radio"], +input[type="checkbox"] { + margin: 4px 0 0; + margin-top: 1px \9; + line-height: normal; +} +input[type="file"] { + display: block; +} +input[type="range"] { + display: block; + width: 100%; +} +select[multiple], +select[size] { + height: auto; +} +input[type="file"]:focus, +input[type="radio"]:focus, +input[type="checkbox"]:focus { + outline: thin dotted; + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px; +} +output { + display: block; + padding-top: 7px; + font-size: 14px; + line-height: 1.428571429; + color: #555; +} +.form-control { + display: block; + width: 100%; + height: 34px; + padding: 6px 12px; + font-size: 14px; + line-height: 1.428571429; + color: #555; + background-color: #fff; + background-image: none; + border: 1px solid #ccc; + border-radius: 4px; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); + -webkit-transition: + border-color ease-in-out 0.15s, + box-shadow ease-in-out 0.15s; + transition: + border-color ease-in-out 0.15s, + box-shadow ease-in-out 0.15s; +} +.form-control:focus { + border-color: #66afe9; + outline: 0; + -webkit-box-shadow: + inset 0 1px 1px rgba(0, 0, 0, 0.075), + 0 0 8px rgba(102, 175, 233, 0.6); + box-shadow: + inset 0 1px 1px rgba(0, 0, 0, 0.075), + 0 0 8px rgba(102, 175, 233, 0.6); +} +.form-control:-moz-placeholder { + color: #999; +} +.form-control::-moz-placeholder { + color: #999; + opacity: 1; +} +.form-control:-ms-input-placeholder { + color: #999; +} +.form-control::-webkit-input-placeholder { + color: #999; +} +.form-control[disabled], +.form-control[readonly], +fieldset[disabled] .form-control { + cursor: not-allowed; + background-color: #eee; + opacity: 1; +} +textarea.form-control { + height: auto; +} +input[type="date"] { + line-height: 34px; +} +.form-group { + margin-bottom: 15px; +} +.radio, +.checkbox { + display: block; + min-height: 20px; + margin-top: 10px; + margin-bottom: 10px; + padding-left: 20px; +} +.radio label, +.checkbox label { + display: inline; + font-weight: 400; + cursor: pointer; +} +.radio input[type="radio"], +.radio-inline input[type="radio"], +.checkbox input[type="checkbox"], +.checkbox-inline input[type="checkbox"] { + float: left; + margin-left: -20px; +} +.radio + .radio, +.checkbox + .checkbox { + margin-top: -5px; +} +.radio-inline, +.checkbox-inline { + display: inline-block; + padding-left: 20px; + margin-bottom: 0; + vertical-align: middle; + font-weight: 400; + cursor: pointer; +} +.radio-inline + .radio-inline, +.checkbox-inline + .checkbox-inline { + margin-top: 0; + margin-left: 10px; +} +input[type="radio"][disabled], +input[type="checkbox"][disabled], +.radio[disabled], +.radio-inline[disabled], +.checkbox[disabled], +.checkbox-inline[disabled], +fieldset[disabled] input[type="radio"], +fieldset[disabled] input[type="checkbox"], +fieldset[disabled] .radio, +fieldset[disabled] .radio-inline, +fieldset[disabled] .checkbox, +fieldset[disabled] .checkbox-inline { + cursor: not-allowed; +} +.input-sm { + height: 30px; + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +select.input-sm { + height: 30px; + line-height: 30px; +} +textarea.input-sm, +select[multiple].input-sm { + height: auto; +} +.input-lg { + height: 46px; + padding: 10px 16px; + font-size: 18px; + line-height: 1.33; + border-radius: 6px; +} +select.input-lg { + height: 46px; + line-height: 46px; +} +textarea.input-lg, +select[multiple].input-lg { + height: auto; +} +.has-feedback { + position: relative; +} +.has-feedback .form-control { + padding-right: 42.5px; +} +.has-feedback .form-control-feedback { + position: absolute; + top: 25px; + right: 0; + display: block; + width: 34px; + height: 34px; + line-height: 34px; + text-align: center; +} +.has-success .help-block, +.has-success .control-label, +.has-success .radio, +.has-success .checkbox, +.has-success .radio-inline, +.has-success .checkbox-inline { + color: #3c763d; +} +.has-success .form-control { + border-color: #3c763d; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); +} +.has-success .form-control:focus { + border-color: #2b542c; + -webkit-box-shadow: + inset 0 1px 1px rgba(0, 0, 0, 0.075), + 0 0 6px #67b168; + box-shadow: + inset 0 1px 1px rgba(0, 0, 0, 0.075), + 0 0 6px #67b168; +} +.has-success .input-group-addon { + color: #3c763d; + border-color: #3c763d; + background-color: #dff0d8; +} +.has-success .form-control-feedback { + color: #3c763d; +} +.has-warning .help-block, +.has-warning .control-label, +.has-warning .radio, +.has-warning .checkbox, +.has-warning .radio-inline, +.has-warning .checkbox-inline { + color: #8a6d3b; +} +.has-warning .form-control { + border-color: #8a6d3b; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); +} +.has-warning .form-control:focus { + border-color: #66512c; + -webkit-box-shadow: + inset 0 1px 1px rgba(0, 0, 0, 0.075), + 0 0 6px #c0a16b; + box-shadow: + inset 0 1px 1px rgba(0, 0, 0, 0.075), + 0 0 6px #c0a16b; +} +.has-warning .input-group-addon { + color: #8a6d3b; + border-color: #8a6d3b; + background-color: #fcf8e3; +} +.has-warning .form-control-feedback { + color: #8a6d3b; +} +.has-error .help-block, +.has-error .control-label, +.has-error .radio, +.has-error .checkbox, +.has-error .radio-inline, +.has-error .checkbox-inline { + color: #a94442; +} +.has-error .form-control { + border-color: #a94442; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.075); +} +.has-error .form-control:focus { + border-color: #843534; + -webkit-box-shadow: + inset 0 1px 1px rgba(0, 0, 0, 0.075), + 0 0 6px #ce8483; + box-shadow: + inset 0 1px 1px rgba(0, 0, 0, 0.075), + 0 0 6px #ce8483; +} +.has-error .input-group-addon { + color: #a94442; + border-color: #a94442; + background-color: #f2dede; +} +.has-error .form-control-feedback { + color: #a94442; +} +.form-control-static { + margin-bottom: 0; +} +.help-block { + display: block; + margin-top: 5px; + margin-bottom: 10px; + color: #737373; +} +@media (min-width: 768px) { + .form-inline .form-group { + display: inline-block; + margin-bottom: 0; + vertical-align: middle; + } + .form-inline .form-control { + display: inline-block; + width: auto; + vertical-align: middle; + } + .form-inline .control-label { + margin-bottom: 0; + vertical-align: middle; + } + .form-inline .radio, + .form-inline .checkbox { + display: inline-block; + margin-top: 0; + margin-bottom: 0; + padding-left: 0; + vertical-align: middle; + } + .form-inline .radio input[type="radio"], + .form-inline .checkbox input[type="checkbox"] { + float: none; + margin-left: 0; + } + .form-inline .has-feedback .form-control-feedback { + top: 0; + } +} +.form-horizontal .control-label, +.form-horizontal .radio, +.form-horizontal .checkbox, +.form-horizontal .radio-inline, +.form-horizontal .checkbox-inline { + margin-top: 0; + margin-bottom: 0; + padding-top: 7px; +} +.form-horizontal .radio, +.form-horizontal .checkbox { + min-height: 27px; +} +.form-horizontal .form-group { + margin-left: -15px; + margin-right: -15px; +} +.form-horizontal .form-control-static { + padding-top: 7px; +} +@media (min-width: 768px) { + .form-horizontal .control-label { + text-align: right; + } +} +.form-horizontal .has-feedback .form-control-feedback { + top: 0; + right: 15px; +} +.btn { + display: inline-block; + margin-bottom: 0; + font-weight: 400; + text-align: center; + vertical-align: middle; + cursor: pointer; + background-image: none; + border: 1px solid transparent; + white-space: nowrap; + padding: 6px 12px; + font-size: 14px; + line-height: 1.428571429; + border-radius: 4px; + -webkit-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + -o-user-select: none; + user-select: none; +} +.btn:focus { + outline: thin dotted; + outline: 5px auto -webkit-focus-ring-color; + outline-offset: -2px; +} +.btn:hover, +.btn:focus { + color: #333; + text-decoration: none; +} +.btn:active, +.btn.active { + outline: 0; + background-image: none; + -webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125); + box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125); +} +.btn.disabled, +.btn[disabled], +fieldset[disabled] .btn { + cursor: not-allowed; + pointer-events: none; + opacity: 0.65; + filter: alpha(opacity=65); + -webkit-box-shadow: none; + box-shadow: none; +} +.btn-default { + color: #333; + background-color: #fff; + border-color: #ccc; +} +.btn-default:hover, +.btn-default:focus, +.btn-default:active, +.btn-default.active, +.open .dropdown-toggle.btn-default { + color: #333; + background-color: #ebebeb; + border-color: #adadad; +} +.btn-default:active, +.btn-default.active, +.open .dropdown-toggle.btn-default { + background-image: none; +} +.btn-default.disabled, +.btn-default[disabled], +fieldset[disabled] .btn-default, +.btn-default.disabled:hover, +.btn-default[disabled]:hover, +fieldset[disabled] .btn-default:hover, +.btn-default.disabled:focus, +.btn-default[disabled]:focus, +fieldset[disabled] .btn-default:focus, +.btn-default.disabled:active, +.btn-default[disabled]:active, +fieldset[disabled] .btn-default:active, +.btn-default.disabled.active, +.btn-default[disabled].active, +fieldset[disabled] .btn-default.active { + background-color: #fff; + border-color: #ccc; +} +.btn-default .badge { + color: #fff; + background-color: #333; +} +.btn-primary { + color: #fff; + background-color: #428bca; + border-color: #357ebd; +} +.btn-primary:hover, +.btn-primary:focus, +.btn-primary:active, +.btn-primary.active, +.open .dropdown-toggle.btn-primary { + color: #fff; + background-color: #3276b1; + border-color: #285e8e; +} +.btn-primary:active, +.btn-primary.active, +.open .dropdown-toggle.btn-primary { + background-image: none; +} +.btn-primary.disabled, +.btn-primary[disabled], +fieldset[disabled] .btn-primary, +.btn-primary.disabled:hover, +.btn-primary[disabled]:hover, +fieldset[disabled] .btn-primary:hover, +.btn-primary.disabled:focus, +.btn-primary[disabled]:focus, +fieldset[disabled] .btn-primary:focus, +.btn-primary.disabled:active, +.btn-primary[disabled]:active, +fieldset[disabled] .btn-primary:active, +.btn-primary.disabled.active, +.btn-primary[disabled].active, +fieldset[disabled] .btn-primary.active { + background-color: #428bca; + border-color: #357ebd; +} +.btn-primary .badge { + color: #428bca; + background-color: #fff; +} +.btn-success { + color: #fff; + background-color: #5cb85c; + border-color: #4cae4c; +} +.btn-success:hover, +.btn-success:focus, +.btn-success:active, +.btn-success.active, +.open .dropdown-toggle.btn-success { + color: #fff; + background-color: #47a447; + border-color: #398439; +} +.btn-success:active, +.btn-success.active, +.open .dropdown-toggle.btn-success { + background-image: none; +} +.btn-success.disabled, +.btn-success[disabled], +fieldset[disabled] .btn-success, +.btn-success.disabled:hover, +.btn-success[disabled]:hover, +fieldset[disabled] .btn-success:hover, +.btn-success.disabled:focus, +.btn-success[disabled]:focus, +fieldset[disabled] .btn-success:focus, +.btn-success.disabled:active, +.btn-success[disabled]:active, +fieldset[disabled] .btn-success:active, +.btn-success.disabled.active, +.btn-success[disabled].active, +fieldset[disabled] .btn-success.active { + background-color: #5cb85c; + border-color: #4cae4c; +} +.btn-success .badge { + color: #5cb85c; + background-color: #fff; +} +.btn-info { + color: #fff; + background-color: #5bc0de; + border-color: #46b8da; +} +.btn-info:hover, +.btn-info:focus, +.btn-info:active, +.btn-info.active, +.open .dropdown-toggle.btn-info { + color: #fff; + background-color: #39b3d7; + border-color: #269abc; +} +.btn-info:active, +.btn-info.active, +.open .dropdown-toggle.btn-info { + background-image: none; +} +.btn-info.disabled, +.btn-info[disabled], +fieldset[disabled] .btn-info, +.btn-info.disabled:hover, +.btn-info[disabled]:hover, +fieldset[disabled] .btn-info:hover, +.btn-info.disabled:focus, +.btn-info[disabled]:focus, +fieldset[disabled] .btn-info:focus, +.btn-info.disabled:active, +.btn-info[disabled]:active, +fieldset[disabled] .btn-info:active, +.btn-info.disabled.active, +.btn-info[disabled].active, +fieldset[disabled] .btn-info.active { + background-color: #5bc0de; + border-color: #46b8da; +} +.btn-info .badge { + color: #5bc0de; + background-color: #fff; +} +.btn-warning { + color: #fff; + background-color: #f0ad4e; + border-color: #eea236; +} +.btn-warning:hover, +.btn-warning:focus, +.btn-warning:active, +.btn-warning.active, +.open .dropdown-toggle.btn-warning { + color: #fff; + background-color: #ed9c28; + border-color: #d58512; +} +.btn-warning:active, +.btn-warning.active, +.open .dropdown-toggle.btn-warning { + background-image: none; +} +.btn-warning.disabled, +.btn-warning[disabled], +fieldset[disabled] .btn-warning, +.btn-warning.disabled:hover, +.btn-warning[disabled]:hover, +fieldset[disabled] .btn-warning:hover, +.btn-warning.disabled:focus, +.btn-warning[disabled]:focus, +fieldset[disabled] .btn-warning:focus, +.btn-warning.disabled:active, +.btn-warning[disabled]:active, +fieldset[disabled] .btn-warning:active, +.btn-warning.disabled.active, +.btn-warning[disabled].active, +fieldset[disabled] .btn-warning.active { + background-color: #f0ad4e; + border-color: #eea236; +} +.btn-warning .badge { + color: #f0ad4e; + background-color: #fff; +} +.btn-danger { + color: #fff; + background-color: #d9534f; + border-color: #d43f3a; +} +.btn-danger:hover, +.btn-danger:focus, +.btn-danger:active, +.btn-danger.active, +.open .dropdown-toggle.btn-danger { + color: #fff; + background-color: #d2322d; + border-color: #ac2925; +} +.btn-danger:active, +.btn-danger.active, +.open .dropdown-toggle.btn-danger { + background-image: none; +} +.btn-danger.disabled, +.btn-danger[disabled], +fieldset[disabled] .btn-danger, +.btn-danger.disabled:hover, +.btn-danger[disabled]:hover, +fieldset[disabled] .btn-danger:hover, +.btn-danger.disabled:focus, +.btn-danger[disabled]:focus, +fieldset[disabled] .btn-danger:focus, +.btn-danger.disabled:active, +.btn-danger[disabled]:active, +fieldset[disabled] .btn-danger:active, +.btn-danger.disabled.active, +.btn-danger[disabled].active, +fieldset[disabled] .btn-danger.active { + background-color: #d9534f; + border-color: #d43f3a; +} +.btn-danger .badge { + color: #d9534f; + background-color: #fff; +} +.btn-link { + color: #428bca; + font-weight: 400; + cursor: pointer; + border-radius: 0; +} +.btn-link, +.btn-link:active, +.btn-link[disabled], +fieldset[disabled] .btn-link { + background-color: transparent; + -webkit-box-shadow: none; + box-shadow: none; +} +.btn-link, +.btn-link:hover, +.btn-link:focus, +.btn-link:active { + border-color: transparent; +} +.btn-link:hover, +.btn-link:focus { + color: #2a6496; + text-decoration: underline; + background-color: transparent; +} +.btn-link[disabled]:hover, +fieldset[disabled] .btn-link:hover, +.btn-link[disabled]:focus, +fieldset[disabled] .btn-link:focus { + color: #999; + text-decoration: none; +} +.btn-lg { + padding: 10px 16px; + font-size: 18px; + line-height: 1.33; + border-radius: 6px; +} +.btn-sm { + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +.btn-xs { + padding: 1px 5px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +.btn-block { + display: block; + width: 100%; + padding-left: 0; + padding-right: 0; +} +.btn-block + .btn-block { + margin-top: 5px; +} +input[type="submit"].btn-block, +input[type="reset"].btn-block, +input[type="button"].btn-block { + width: 100%; +} +.fade { + opacity: 0; + -webkit-transition: opacity 0.15s linear; + transition: opacity 0.15s linear; +} +.fade.in { + opacity: 1; +} +.collapse { + display: none; +} +.collapse.in { + display: block; +} +.collapsing { + position: relative; + height: 0; + overflow: hidden; + -webkit-transition: height 0.35s ease; + transition: height 0.35s ease; +} +@font-face { + font-family: "Glyphicons Halflings"; + src: url(../fonts/glyphicons-halflings-regular.eot); + src: + url(../fonts/glyphicons-halflings-regular.eot?#iefix) + format("embedded-opentype"), + url(../fonts/glyphicons-halflings-regular.woff) format("woff"), + url(../fonts/glyphicons-halflings-regular.ttf) format("truetype"), + url(../fonts/glyphicons-halflings-regular.svg#glyphicons_halflingsregular) + format("svg"); +} +.glyphicon { + position: relative; + top: 1px; + display: inline-block; + font-family: "Glyphicons Halflings"; + font-style: normal; + font-weight: 400; + line-height: 1; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} +.glyphicon-asterisk:before { + content: "\2a"; +} +.glyphicon-plus:before { + content: "\2b"; +} +.glyphicon-euro:before { + content: "\20ac"; +} +.glyphicon-minus:before { + content: "\2212"; +} +.glyphicon-cloud:before { + content: "\2601"; +} +.glyphicon-envelope:before { + content: "\2709"; +} +.glyphicon-pencil:before { + content: "\270f"; +} +.glyphicon-glass:before { + content: "\e001"; +} +.glyphicon-music:before { + content: "\e002"; +} +.glyphicon-search:before { + content: "\e003"; +} +.glyphicon-heart:before { + content: "\e005"; +} +.glyphicon-star:before { + content: "\e006"; +} +.glyphicon-star-empty:before { + content: "\e007"; +} +.glyphicon-user:before { + content: "\e008"; +} +.glyphicon-film:before { + content: "\e009"; +} +.glyphicon-th-large:before { + content: "\e010"; +} +.glyphicon-th:before { + content: "\e011"; +} +.glyphicon-th-list:before { + content: "\e012"; +} +.glyphicon-ok:before { + content: "\e013"; +} +.glyphicon-remove:before { + content: "\e014"; +} +.glyphicon-zoom-in:before { + content: "\e015"; +} +.glyphicon-zoom-out:before { + content: "\e016"; +} +.glyphicon-off:before { + content: "\e017"; +} +.glyphicon-signal:before { + content: "\e018"; +} +.glyphicon-cog:before { + content: "\e019"; +} +.glyphicon-trash:before { + content: "\e020"; +} +.glyphicon-home:before { + content: "\e021"; +} +.glyphicon-file:before { + content: "\e022"; +} +.glyphicon-time:before { + content: "\e023"; +} +.glyphicon-road:before { + content: "\e024"; +} +.glyphicon-download-alt:before { + content: "\e025"; +} +.glyphicon-download:before { + content: "\e026"; +} +.glyphicon-upload:before { + content: "\e027"; +} +.glyphicon-inbox:before { + content: "\e028"; +} +.glyphicon-play-circle:before { + content: "\e029"; +} +.glyphicon-repeat:before { + content: "\e030"; +} +.glyphicon-refresh:before { + content: "\e031"; +} +.glyphicon-list-alt:before { + content: "\e032"; +} +.glyphicon-lock:before { + content: "\e033"; +} +.glyphicon-flag:before { + content: "\e034"; +} +.glyphicon-headphones:before { + content: "\e035"; +} +.glyphicon-volume-off:before { + content: "\e036"; +} +.glyphicon-volume-down:before { + content: "\e037"; +} +.glyphicon-volume-up:before { + content: "\e038"; +} +.glyphicon-qrcode:before { + content: "\e039"; +} +.glyphicon-barcode:before { + content: "\e040"; +} +.glyphicon-tag:before { + content: "\e041"; +} +.glyphicon-tags:before { + content: "\e042"; +} +.glyphicon-book:before { + content: "\e043"; +} +.glyphicon-bookmark:before { + content: "\e044"; +} +.glyphicon-print:before { + content: "\e045"; +} +.glyphicon-camera:before { + content: "\e046"; +} +.glyphicon-font:before { + content: "\e047"; +} +.glyphicon-bold:before { + content: "\e048"; +} +.glyphicon-italic:before { + content: "\e049"; +} +.glyphicon-text-height:before { + content: "\e050"; +} +.glyphicon-text-width:before { + content: "\e051"; +} +.glyphicon-align-left:before { + content: "\e052"; +} +.glyphicon-align-center:before { + content: "\e053"; +} +.glyphicon-align-right:before { + content: "\e054"; +} +.glyphicon-align-justify:before { + content: "\e055"; +} +.glyphicon-list:before { + content: "\e056"; +} +.glyphicon-indent-left:before { + content: "\e057"; +} +.glyphicon-indent-right:before { + content: "\e058"; +} +.glyphicon-facetime-video:before { + content: "\e059"; +} +.glyphicon-picture:before { + content: "\e060"; +} +.glyphicon-map-marker:before { + content: "\e062"; +} +.glyphicon-adjust:before { + content: "\e063"; +} +.glyphicon-tint:before { + content: "\e064"; +} +.glyphicon-edit:before { + content: "\e065"; +} +.glyphicon-share:before { + content: "\e066"; +} +.glyphicon-check:before { + content: "\e067"; +} +.glyphicon-move:before { + content: "\e068"; +} +.glyphicon-step-backward:before { + content: "\e069"; +} +.glyphicon-fast-backward:before { + content: "\e070"; +} +.glyphicon-backward:before { + content: "\e071"; +} +.glyphicon-play:before { + content: "\e072"; +} +.glyphicon-pause:before { + content: "\e073"; +} +.glyphicon-stop:before { + content: "\e074"; +} +.glyphicon-forward:before { + content: "\e075"; +} +.glyphicon-fast-forward:before { + content: "\e076"; +} +.glyphicon-step-forward:before { + content: "\e077"; +} +.glyphicon-eject:before { + content: "\e078"; +} +.glyphicon-chevron-left:before { + content: "\e079"; +} +.glyphicon-chevron-right:before { + content: "\e080"; +} +.glyphicon-plus-sign:before { + content: "\e081"; +} +.glyphicon-minus-sign:before { + content: "\e082"; +} +.glyphicon-remove-sign:before { + content: "\e083"; +} +.glyphicon-ok-sign:before { + content: "\e084"; +} +.glyphicon-question-sign:before { + content: "\e085"; +} +.glyphicon-info-sign:before { + content: "\e086"; +} +.glyphicon-screenshot:before { + content: "\e087"; +} +.glyphicon-remove-circle:before { + content: "\e088"; +} +.glyphicon-ok-circle:before { + content: "\e089"; +} +.glyphicon-ban-circle:before { + content: "\e090"; +} +.glyphicon-arrow-left:before { + content: "\e091"; +} +.glyphicon-arrow-right:before { + content: "\e092"; +} +.glyphicon-arrow-up:before { + content: "\e093"; +} +.glyphicon-arrow-down:before { + content: "\e094"; +} +.glyphicon-share-alt:before { + content: "\e095"; +} +.glyphicon-resize-full:before { + content: "\e096"; +} +.glyphicon-resize-small:before { + content: "\e097"; +} +.glyphicon-exclamation-sign:before { + content: "\e101"; +} +.glyphicon-gift:before { + content: "\e102"; +} +.glyphicon-leaf:before { + content: "\e103"; +} +.glyphicon-fire:before { + content: "\e104"; +} +.glyphicon-eye-open:before { + content: "\e105"; +} +.glyphicon-eye-close:before { + content: "\e106"; +} +.glyphicon-warning-sign:before { + content: "\e107"; +} +.glyphicon-plane:before { + content: "\e108"; +} +.glyphicon-calendar:before { + content: "\e109"; +} +.glyphicon-random:before { + content: "\e110"; +} +.glyphicon-comment:before { + content: "\e111"; +} +.glyphicon-magnet:before { + content: "\e112"; +} +.glyphicon-chevron-up:before { + content: "\e113"; +} +.glyphicon-chevron-down:before { + content: "\e114"; +} +.glyphicon-retweet:before { + content: "\e115"; +} +.glyphicon-shopping-cart:before { + content: "\e116"; +} +.glyphicon-folder-close:before { + content: "\e117"; +} +.glyphicon-folder-open:before { + content: "\e118"; +} +.glyphicon-resize-vertical:before { + content: "\e119"; +} +.glyphicon-resize-horizontal:before { + content: "\e120"; +} +.glyphicon-hdd:before { + content: "\e121"; +} +.glyphicon-bullhorn:before { + content: "\e122"; +} +.glyphicon-bell:before { + content: "\e123"; +} +.glyphicon-certificate:before { + content: "\e124"; +} +.glyphicon-thumbs-up:before { + content: "\e125"; +} +.glyphicon-thumbs-down:before { + content: "\e126"; +} +.glyphicon-hand-right:before { + content: "\e127"; +} +.glyphicon-hand-left:before { + content: "\e128"; +} +.glyphicon-hand-up:before { + content: "\e129"; +} +.glyphicon-hand-down:before { + content: "\e130"; +} +.glyphicon-circle-arrow-right:before { + content: "\e131"; +} +.glyphicon-circle-arrow-left:before { + content: "\e132"; +} +.glyphicon-circle-arrow-up:before { + content: "\e133"; +} +.glyphicon-circle-arrow-down:before { + content: "\e134"; +} +.glyphicon-globe:before { + content: "\e135"; +} +.glyphicon-wrench:before { + content: "\e136"; +} +.glyphicon-tasks:before { + content: "\e137"; +} +.glyphicon-filter:before { + content: "\e138"; +} +.glyphicon-briefcase:before { + content: "\e139"; +} +.glyphicon-fullscreen:before { + content: "\e140"; +} +.glyphicon-dashboard:before { + content: "\e141"; +} +.glyphicon-paperclip:before { + content: "\e142"; +} +.glyphicon-heart-empty:before { + content: "\e143"; +} +.glyphicon-link:before { + content: "\e144"; +} +.glyphicon-phone:before { + content: "\e145"; +} +.glyphicon-pushpin:before { + content: "\e146"; +} +.glyphicon-usd:before { + content: "\e148"; +} +.glyphicon-gbp:before { + content: "\e149"; +} +.glyphicon-sort:before { + content: "\e150"; +} +.glyphicon-sort-by-alphabet:before { + content: "\e151"; +} +.glyphicon-sort-by-alphabet-alt:before { + content: "\e152"; +} +.glyphicon-sort-by-order:before { + content: "\e153"; +} +.glyphicon-sort-by-order-alt:before { + content: "\e154"; +} +.glyphicon-sort-by-attributes:before { + content: "\e155"; +} +.glyphicon-sort-by-attributes-alt:before { + content: "\e156"; +} +.glyphicon-unchecked:before { + content: "\e157"; +} +.glyphicon-expand:before { + content: "\e158"; +} +.glyphicon-collapse-down:before { + content: "\e159"; +} +.glyphicon-collapse-up:before { + content: "\e160"; +} +.glyphicon-log-in:before { + content: "\e161"; +} +.glyphicon-flash:before { + content: "\e162"; +} +.glyphicon-log-out:before { + content: "\e163"; +} +.glyphicon-new-window:before { + content: "\e164"; +} +.glyphicon-record:before { + content: "\e165"; +} +.glyphicon-save:before { + content: "\e166"; +} +.glyphicon-open:before { + content: "\e167"; +} +.glyphicon-saved:before { + content: "\e168"; +} +.glyphicon-import:before { + content: "\e169"; +} +.glyphicon-export:before { + content: "\e170"; +} +.glyphicon-send:before { + content: "\e171"; +} +.glyphicon-floppy-disk:before { + content: "\e172"; +} +.glyphicon-floppy-saved:before { + content: "\e173"; +} +.glyphicon-floppy-remove:before { + content: "\e174"; +} +.glyphicon-floppy-save:before { + content: "\e175"; +} +.glyphicon-floppy-open:before { + content: "\e176"; +} +.glyphicon-credit-card:before { + content: "\e177"; +} +.glyphicon-transfer:before { + content: "\e178"; +} +.glyphicon-cutlery:before { + content: "\e179"; +} +.glyphicon-header:before { + content: "\e180"; +} +.glyphicon-compressed:before { + content: "\e181"; +} +.glyphicon-earphone:before { + content: "\e182"; +} +.glyphicon-phone-alt:before { + content: "\e183"; +} +.glyphicon-tower:before { + content: "\e184"; +} +.glyphicon-stats:before { + content: "\e185"; +} +.glyphicon-sd-video:before { + content: "\e186"; +} +.glyphicon-hd-video:before { + content: "\e187"; +} +.glyphicon-subtitles:before { + content: "\e188"; +} +.glyphicon-sound-stereo:before { + content: "\e189"; +} +.glyphicon-sound-dolby:before { + content: "\e190"; +} +.glyphicon-sound-5-1:before { + content: "\e191"; +} +.glyphicon-sound-6-1:before { + content: "\e192"; +} +.glyphicon-sound-7-1:before { + content: "\e193"; +} +.glyphicon-copyright-mark:before { + content: "\e194"; +} +.glyphicon-registration-mark:before { + content: "\e195"; +} +.glyphicon-cloud-download:before { + content: "\e197"; +} +.glyphicon-cloud-upload:before { + content: "\e198"; +} +.glyphicon-tree-conifer:before { + content: "\e199"; +} +.glyphicon-tree-deciduous:before { + content: "\e200"; +} +.caret { + display: inline-block; + width: 0; + height: 0; + margin-left: 2px; + vertical-align: middle; + border-top: 4px solid; + border-right: 4px solid transparent; + border-left: 4px solid transparent; +} +.dropdown { + position: relative; +} +.dropdown-toggle:focus { + outline: 0; +} +.dropdown-menu { + position: absolute; + top: 100%; + left: 0; + z-index: 1000; + display: none; + float: left; + min-width: 160px; + padding: 5px 0; + margin: 2px 0 0; + list-style: none; + font-size: 14px; + background-color: #fff; + border: 1px solid #ccc; + border: 1px solid rgba(0, 0, 0, 0.15); + border-radius: 4px; + -webkit-box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175); + box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175); + background-clip: padding-box; +} +.dropdown-menu.pull-right { + right: 0; + left: auto; +} +.dropdown-menu .divider { + height: 1px; + margin: 9px 0; + overflow: hidden; + background-color: #e5e5e5; +} +.dropdown-menu > li > a { + display: block; + padding: 3px 20px; + clear: both; + font-weight: 400; + line-height: 1.428571429; + color: #333; + white-space: nowrap; +} +.dropdown-menu > li > a:hover, +.dropdown-menu > li > a:focus { + text-decoration: none; + color: #262626; + background-color: #f5f5f5; +} +.dropdown-menu > .active > a, +.dropdown-menu > .active > a:hover, +.dropdown-menu > .active > a:focus { + color: #fff; + text-decoration: none; + outline: 0; + background-color: #428bca; +} +.dropdown-menu > .disabled > a, +.dropdown-menu > .disabled > a:hover, +.dropdown-menu > .disabled > a:focus { + color: #999; +} +.dropdown-menu > .disabled > a:hover, +.dropdown-menu > .disabled > a:focus { + text-decoration: none; + background-color: transparent; + background-image: none; + filter: progid:DXImageTransform.Microsoft.gradient(enabled=false); + cursor: not-allowed; +} +.open > .dropdown-menu { + display: block; +} +.open > a { + outline: 0; +} +.dropdown-menu-right { + left: auto; + right: 0; +} +.dropdown-menu-left { + left: 0; + right: auto; +} +.dropdown-header { + display: block; + padding: 3px 20px; + font-size: 12px; + line-height: 1.428571429; + color: #999; +} +.dropdown-backdrop { + position: fixed; + left: 0; + right: 0; + bottom: 0; + top: 0; + z-index: 990; +} +.pull-right > .dropdown-menu { + right: 0; + left: auto; +} +.dropup .caret, +.navbar-fixed-bottom .dropdown .caret { + border-top: 0; + border-bottom: 4px solid; + content: ""; +} +.dropup .dropdown-menu, +.navbar-fixed-bottom .dropdown .dropdown-menu { + top: auto; + bottom: 100%; + margin-bottom: 1px; +} +@media (min-width: 768px) { + .navbar-right .dropdown-menu { + left: auto; + right: 0; + } + .navbar-right .dropdown-menu-left { + left: 0; + right: auto; + } +} +.btn-group, +.btn-group-vertical { + position: relative; + display: inline-block; + vertical-align: middle; +} +.btn-group > .btn, +.btn-group-vertical > .btn { + position: relative; + float: left; +} +.btn-group > .btn:hover, +.btn-group-vertical > .btn:hover, +.btn-group > .btn:focus, +.btn-group-vertical > .btn:focus, +.btn-group > .btn:active, +.btn-group-vertical > .btn:active, +.btn-group > .btn.active, +.btn-group-vertical > .btn.active { + z-index: 2; +} +.btn-group > .btn:focus, +.btn-group-vertical > .btn:focus { + outline: 0; +} +.btn-group .btn + .btn, +.btn-group .btn + .btn-group, +.btn-group .btn-group + .btn, +.btn-group .btn-group + .btn-group { + margin-left: -1px; +} +.btn-toolbar { + margin-left: -5px; +} +.btn-toolbar .btn-group, +.btn-toolbar .input-group { + float: left; +} +.btn-toolbar > .btn, +.btn-toolbar > .btn-group, +.btn-toolbar > .input-group { + margin-left: 5px; +} +.btn-group > .btn:not(:first-child):not(:last-child):not(.dropdown-toggle) { + border-radius: 0; +} +.btn-group > .btn:first-child { + margin-left: 0; +} +.btn-group > .btn:first-child:not(:last-child):not(.dropdown-toggle) { + border-bottom-right-radius: 0; + border-top-right-radius: 0; +} +.btn-group > .btn:last-child:not(:first-child), +.btn-group > .dropdown-toggle:not(:first-child) { + border-bottom-left-radius: 0; + border-top-left-radius: 0; +} +.btn-group > .btn-group { + float: left; +} +.btn-group > .btn-group:not(:first-child):not(:last-child) > .btn { + border-radius: 0; +} +.btn-group > .btn-group:first-child > .btn:last-child, +.btn-group > .btn-group:first-child > .dropdown-toggle { + border-bottom-right-radius: 0; + border-top-right-radius: 0; +} +.btn-group > .btn-group:last-child > .btn:first-child { + border-bottom-left-radius: 0; + border-top-left-radius: 0; +} +.btn-group .dropdown-toggle:active, +.btn-group.open .dropdown-toggle { + outline: 0; +} +.btn-group-xs > .btn { + padding: 1px 5px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +.btn-group-sm > .btn { + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +.btn-group-lg > .btn { + padding: 10px 16px; + font-size: 18px; + line-height: 1.33; + border-radius: 6px; +} +.btn-group > .btn + .dropdown-toggle { + padding-left: 8px; + padding-right: 8px; +} +.btn-group > .btn-lg + .dropdown-toggle { + padding-left: 12px; + padding-right: 12px; +} +.btn-group.open .dropdown-toggle { + -webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125); + box-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125); +} +.btn-group.open .dropdown-toggle.btn-link { + -webkit-box-shadow: none; + box-shadow: none; +} +.btn .caret { + margin-left: 0; +} +.btn-lg .caret { + border-width: 5px 5px 0; + border-bottom-width: 0; +} +.dropup .btn-lg .caret { + border-width: 0 5px 5px; +} +.btn-group-vertical > .btn, +.btn-group-vertical > .btn-group, +.btn-group-vertical > .btn-group > .btn { + display: block; + float: none; + width: 100%; + max-width: 100%; +} +.btn-group-vertical > .btn-group > .btn { + float: none; +} +.btn-group-vertical > .btn + .btn, +.btn-group-vertical > .btn + .btn-group, +.btn-group-vertical > .btn-group + .btn, +.btn-group-vertical > .btn-group + .btn-group { + margin-top: -1px; + margin-left: 0; +} +.btn-group-vertical > .btn:not(:first-child):not(:last-child) { + border-radius: 0; +} +.btn-group-vertical > .btn:first-child:not(:last-child) { + border-top-right-radius: 4px; + border-bottom-right-radius: 0; + border-bottom-left-radius: 0; +} +.btn-group-vertical > .btn:last-child:not(:first-child) { + border-bottom-left-radius: 4px; + border-top-right-radius: 0; + border-top-left-radius: 0; +} +.btn-group-vertical > .btn-group:not(:first-child):not(:last-child) > .btn { + border-radius: 0; +} +.btn-group-vertical > .btn-group:first-child:not(:last-child) > .btn:last-child, +.btn-group-vertical + > .btn-group:first-child:not(:last-child) + > .dropdown-toggle { + border-bottom-right-radius: 0; + border-bottom-left-radius: 0; +} +.btn-group-vertical + > .btn-group:last-child:not(:first-child) + > .btn:first-child { + border-top-right-radius: 0; + border-top-left-radius: 0; +} +.btn-group-justified { + display: table; + width: 100%; + table-layout: fixed; + border-collapse: separate; +} +.btn-group-justified > .btn, +.btn-group-justified > .btn-group { + float: none; + display: table-cell; + width: 1%; +} +.btn-group-justified > .btn-group .btn { + width: 100%; +} +[data-toggle="buttons"] > .btn > input[type="radio"], +[data-toggle="buttons"] > .btn > input[type="checkbox"] { + display: none; +} +.input-group { + position: relative; + display: table; + border-collapse: separate; +} +.input-group[class*="col-"] { + float: none; + padding-left: 0; + padding-right: 0; +} +.input-group .form-control { + float: left; + width: 100%; + margin-bottom: 0; +} +.input-group-lg > .form-control, +.input-group-lg > .input-group-addon, +.input-group-lg > .input-group-btn > .btn { + height: 46px; + padding: 10px 16px; + font-size: 18px; + line-height: 1.33; + border-radius: 6px; +} +select.input-group-lg > .form-control, +select.input-group-lg > .input-group-addon, +select.input-group-lg > .input-group-btn > .btn { + height: 46px; + line-height: 46px; +} +textarea.input-group-lg > .form-control, +textarea.input-group-lg > .input-group-addon, +textarea.input-group-lg > .input-group-btn > .btn, +select[multiple].input-group-lg > .form-control, +select[multiple].input-group-lg > .input-group-addon, +select[multiple].input-group-lg > .input-group-btn > .btn { + height: auto; +} +.input-group-sm > .form-control, +.input-group-sm > .input-group-addon, +.input-group-sm > .input-group-btn > .btn { + height: 30px; + padding: 5px 10px; + font-size: 12px; + line-height: 1.5; + border-radius: 3px; +} +select.input-group-sm > .form-control, +select.input-group-sm > .input-group-addon, +select.input-group-sm > .input-group-btn > .btn { + height: 30px; + line-height: 30px; +} +textarea.input-group-sm > .form-control, +textarea.input-group-sm > .input-group-addon, +textarea.input-group-sm > .input-group-btn > .btn, +select[multiple].input-group-sm > .form-control, +select[multiple].input-group-sm > .input-group-addon, +select[multiple].input-group-sm > .input-group-btn > .btn { + height: auto; +} +.input-group-addon, +.input-group-btn, +.input-group .form-control { + display: table-cell; +} +.input-group-addon:not(:first-child):not(:last-child), +.input-group-btn:not(:first-child):not(:last-child), +.input-group .form-control:not(:first-child):not(:last-child) { + border-radius: 0; +} +.input-group-addon, +.input-group-btn { + width: 1%; + white-space: nowrap; + vertical-align: middle; +} +.input-group-addon { + padding: 6px 12px; + font-size: 14px; + font-weight: 400; + line-height: 1; + color: #555; + text-align: center; + background-color: #eee; + border: 1px solid #ccc; + border-radius: 4px; +} +.input-group-addon.input-sm { + padding: 5px 10px; + font-size: 12px; + border-radius: 3px; +} +.input-group-addon.input-lg { + padding: 10px 16px; + font-size: 18px; + border-radius: 6px; +} +.input-group-addon input[type="radio"], +.input-group-addon input[type="checkbox"] { + margin-top: 0; +} +.input-group .form-control:first-child, +.input-group-addon:first-child, +.input-group-btn:first-child > .btn, +.input-group-btn:first-child > .btn-group > .btn, +.input-group-btn:first-child > .dropdown-toggle, +.input-group-btn:last-child > .btn:not(:last-child):not(.dropdown-toggle), +.input-group-btn:last-child > .btn-group:not(:last-child) > .btn { + border-bottom-right-radius: 0; + border-top-right-radius: 0; +} +.input-group-addon:first-child { + border-right: 0; +} +.input-group .form-control:last-child, +.input-group-addon:last-child, +.input-group-btn:last-child > .btn, +.input-group-btn:last-child > .btn-group > .btn, +.input-group-btn:last-child > .dropdown-toggle, +.input-group-btn:first-child > .btn:not(:first-child), +.input-group-btn:first-child > .btn-group:not(:first-child) > .btn { + border-bottom-left-radius: 0; + border-top-left-radius: 0; +} +.input-group-addon:last-child { + border-left: 0; +} +.input-group-btn { + position: relative; + font-size: 0; + white-space: nowrap; +} +.input-group-btn > .btn { + position: relative; +} +.input-group-btn > .btn + .btn { + margin-left: -1px; +} +.input-group-btn > .btn:hover, +.input-group-btn > .btn:focus, +.input-group-btn > .btn:active { + z-index: 2; +} +.input-group-btn:first-child > .btn, +.input-group-btn:first-child > .btn-group { + margin-right: -1px; +} +.input-group-btn:last-child > .btn, +.input-group-btn:last-child > .btn-group { + margin-left: -1px; +} +.nav { + margin-bottom: 0; + padding-left: 0; + list-style: none; +} +.nav > li { + position: relative; + display: block; +} +.nav > li > a { + position: relative; + display: block; + padding: 10px 15px; +} +.nav > li > a:hover, +.nav > li > a:focus { + text-decoration: none; + background-color: #eee; +} +.nav > li.disabled > a { + color: #999; +} +.nav > li.disabled > a:hover, +.nav > li.disabled > a:focus { + color: #999; + text-decoration: none; + background-color: transparent; + cursor: not-allowed; +} +.nav .open > a, +.nav .open > a:hover, +.nav .open > a:focus { + background-color: #eee; + border-color: #428bca; +} +.nav .nav-divider { + height: 1px; + margin: 9px 0; + overflow: hidden; + background-color: #e5e5e5; +} +.nav > li > a > img { + max-width: none; +} +.nav-tabs { + border-bottom: 1px solid #ddd; +} +.nav-tabs > li { + float: left; + margin-bottom: -1px; +} +.nav-tabs > li > a { + margin-right: 2px; + line-height: 1.428571429; + border: 1px solid transparent; + border-radius: 4px 4px 0 0; +} +.nav-tabs > li > a:hover { + border-color: #eee #eee #ddd; +} +.nav-tabs > li.active > a, +.nav-tabs > li.active > a:hover, +.nav-tabs > li.active > a:focus { + color: #555; + background-color: #fff; + border: 1px solid #ddd; + border-bottom-color: transparent; + cursor: default; +} +.nav-tabs.nav-justified { + width: 100%; + border-bottom: 0; +} +.nav-tabs.nav-justified > li { + float: none; +} +.nav-tabs.nav-justified > li > a { + text-align: center; + margin-bottom: 5px; +} +.nav-tabs.nav-justified > .dropdown .dropdown-menu { + top: auto; + left: auto; +} +@media (min-width: 768px) { + .nav-tabs.nav-justified > li { + display: table-cell; + width: 1%; + } + .nav-tabs.nav-justified > li > a { + margin-bottom: 0; + } +} +.nav-tabs.nav-justified > li > a { + margin-right: 0; + border-radius: 4px; +} +.nav-tabs.nav-justified > .active > a, +.nav-tabs.nav-justified > .active > a:hover, +.nav-tabs.nav-justified > .active > a:focus { + border: 1px solid #ddd; +} +@media (min-width: 768px) { + .nav-tabs.nav-justified > li > a { + border-bottom: 1px solid #ddd; + border-radius: 4px 4px 0 0; + } + .nav-tabs.nav-justified > .active > a, + .nav-tabs.nav-justified > .active > a:hover, + .nav-tabs.nav-justified > .active > a:focus { + border-bottom-color: #fff; + } +} +.nav-pills > li { + float: left; +} +.nav-pills > li > a { + border-radius: 4px; +} +.nav-pills > li + li { + margin-left: 2px; +} +.nav-pills > li.active > a, +.nav-pills > li.active > a:hover, +.nav-pills > li.active > a:focus { + color: #fff; + background-color: #428bca; +} +.nav-stacked > li { + float: none; +} +.nav-stacked > li + li { + margin-top: 2px; + margin-left: 0; +} +.nav-justified { + width: 100%; +} +.nav-justified > li { + float: none; +} +.nav-justified > li > a { + text-align: center; + margin-bottom: 5px; +} +.nav-justified > .dropdown .dropdown-menu { + top: auto; + left: auto; +} +@media (min-width: 768px) { + .nav-justified > li { + display: table-cell; + width: 1%; + } + .nav-justified > li > a { + margin-bottom: 0; + } +} +.nav-tabs-justified { + border-bottom: 0; +} +.nav-tabs-justified > li > a { + margin-right: 0; + border-radius: 4px; +} +.nav-tabs-justified > .active > a, +.nav-tabs-justified > .active > a:hover, +.nav-tabs-justified > .active > a:focus { + border: 1px solid #ddd; +} +@media (min-width: 768px) { + .nav-tabs-justified > li > a { + border-bottom: 1px solid #ddd; + border-radius: 4px 4px 0 0; + } + .nav-tabs-justified > .active > a, + .nav-tabs-justified > .active > a:hover, + .nav-tabs-justified > .active > a:focus { + border-bottom-color: #fff; + } +} +.tab-content > .tab-pane { + display: none; +} +.tab-content > .active { + display: block; +} +.nav-tabs .dropdown-menu { + margin-top: -1px; + border-top-right-radius: 0; + border-top-left-radius: 0; +} +.navbar { + position: relative; + min-height: 50px; + margin-bottom: 20px; + border: 1px solid transparent; +} +@media (min-width: 768px) { + .navbar { + border-radius: 4px; + } +} +@media (min-width: 768px) { + .navbar-header { + float: left; + } +} +.navbar-collapse { + max-height: 340px; + overflow-x: visible; + padding-right: 15px; + padding-left: 15px; + border-top: 1px solid transparent; + box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.1); + -webkit-overflow-scrolling: touch; +} +.navbar-collapse.in { + overflow-y: auto; +} +@media (min-width: 768px) { + .navbar-collapse { + width: auto; + border-top: 0; + box-shadow: none; + } + .navbar-collapse.collapse { + display: block !important; + height: auto !important; + padding-bottom: 0; + overflow: visible !important; + } + .navbar-collapse.in { + overflow-y: visible; + } + .navbar-fixed-top .navbar-collapse, + .navbar-static-top .navbar-collapse, + .navbar-fixed-bottom .navbar-collapse { + padding-left: 0; + padding-right: 0; + } +} +.container > .navbar-header, +.container-fluid > .navbar-header, +.container > .navbar-collapse, +.container-fluid > .navbar-collapse { + margin-right: -15px; + margin-left: -15px; +} +@media (min-width: 768px) { + .container > .navbar-header, + .container-fluid > .navbar-header, + .container > .navbar-collapse, + .container-fluid > .navbar-collapse { + margin-right: 0; + margin-left: 0; + } +} +.navbar-static-top { + z-index: 1000; + border-width: 0 0 1px; +} +@media (min-width: 768px) { + .navbar-static-top { + border-radius: 0; + } +} +.navbar-fixed-top, +.navbar-fixed-bottom { + position: fixed; + right: 0; + left: 0; + z-index: 1030; +} +@media (min-width: 768px) { + .navbar-fixed-top, + .navbar-fixed-bottom { + border-radius: 0; + } +} +.navbar-fixed-top { + top: 0; + border-width: 0 0 1px; +} +.navbar-fixed-bottom { + bottom: 0; + margin-bottom: 0; + border-width: 1px 0 0; +} +.navbar-brand { + float: left; + padding: 15px; + font-size: 18px; + line-height: 20px; + height: 20px; +} +.navbar-brand:hover, +.navbar-brand:focus { + text-decoration: none; +} +@media (min-width: 768px) { + .navbar > .container .navbar-brand, + .navbar > .container-fluid .navbar-brand { + margin-left: -15px; + } +} +.navbar-toggle { + position: relative; + float: right; + margin-right: 15px; + padding: 9px 10px; + margin-top: 8px; + margin-bottom: 8px; + background-color: transparent; + background-image: none; + border: 1px solid transparent; + border-radius: 4px; +} +.navbar-toggle:focus { + outline: 0; +} +.navbar-toggle .icon-bar { + display: block; + width: 22px; + height: 2px; + border-radius: 1px; +} +.navbar-toggle .icon-bar + .icon-bar { + margin-top: 4px; +} +@media (min-width: 768px) { + .navbar-toggle { + display: none; + } +} +.navbar-nav { + margin: 7.5px -15px; +} +.navbar-nav > li > a { + padding-top: 10px; + padding-bottom: 10px; + line-height: 20px; +} +@media (max-width: 767px) { + .navbar-nav .open .dropdown-menu { + position: static; + float: none; + width: auto; + margin-top: 0; + background-color: transparent; + border: 0; + box-shadow: none; + } + .navbar-nav .open .dropdown-menu > li > a, + .navbar-nav .open .dropdown-menu .dropdown-header { + padding: 5px 15px 5px 25px; + } + .navbar-nav .open .dropdown-menu > li > a { + line-height: 20px; + } + .navbar-nav .open .dropdown-menu > li > a:hover, + .navbar-nav .open .dropdown-menu > li > a:focus { + background-image: none; + } +} +@media (min-width: 768px) { + .navbar-nav { + float: left; + margin: 0; + } + .navbar-nav > li { + float: left; + } + .navbar-nav > li > a { + padding-top: 15px; + padding-bottom: 15px; + } + .navbar-nav.navbar-right:last-child { + margin-right: -15px; + } +} +@media (min-width: 768px) { + .navbar-left { + float: left !important; + } + .navbar-right { + float: right !important; + } +} +.navbar-form { + margin-left: -15px; + margin-right: -15px; + padding: 10px 15px; + border-top: 1px solid transparent; + border-bottom: 1px solid transparent; + -webkit-box-shadow: + inset 0 1px 0 rgba(255, 255, 255, 0.1), + 0 1px 0 rgba(255, 255, 255, 0.1); + box-shadow: + inset 0 1px 0 rgba(255, 255, 255, 0.1), + 0 1px 0 rgba(255, 255, 255, 0.1); + margin-top: 8px; + margin-bottom: 8px; +} +@media (min-width: 768px) { + .navbar-form .form-group { + display: inline-block; + margin-bottom: 0; + vertical-align: middle; + } + .navbar-form .form-control { + display: inline-block; + width: auto; + vertical-align: middle; + } + .navbar-form .control-label { + margin-bottom: 0; + vertical-align: middle; + } + .navbar-form .radio, + .navbar-form .checkbox { + display: inline-block; + margin-top: 0; + margin-bottom: 0; + padding-left: 0; + vertical-align: middle; + } + .navbar-form .radio input[type="radio"], + .navbar-form .checkbox input[type="checkbox"] { + float: none; + margin-left: 0; + } + .navbar-form .has-feedback .form-control-feedback { + top: 0; + } +} +@media (max-width: 767px) { + .navbar-form .form-group { + margin-bottom: 5px; + } +} +@media (min-width: 768px) { + .navbar-form { + width: auto; + border: 0; + margin-left: 0; + margin-right: 0; + padding-top: 0; + padding-bottom: 0; + -webkit-box-shadow: none; + box-shadow: none; + } + .navbar-form.navbar-right:last-child { + margin-right: -15px; + } +} +.navbar-nav > li > .dropdown-menu { + margin-top: 0; + border-top-right-radius: 0; + border-top-left-radius: 0; +} +.navbar-fixed-bottom .navbar-nav > li > .dropdown-menu { + border-bottom-right-radius: 0; + border-bottom-left-radius: 0; +} +.navbar-btn { + margin-top: 8px; + margin-bottom: 8px; +} +.navbar-btn.btn-sm { + margin-top: 10px; + margin-bottom: 10px; +} +.navbar-btn.btn-xs { + margin-top: 14px; + margin-bottom: 14px; +} +.navbar-text { + margin-top: 15px; + margin-bottom: 15px; +} +@media (min-width: 768px) { + .navbar-text { + float: left; + margin-left: 15px; + margin-right: 15px; + } + .navbar-text.navbar-right:last-child { + margin-right: 0; + } +} +.navbar-default { + background-color: #f8f8f8; + border-color: #e7e7e7; +} +.navbar-default .navbar-brand { + color: #777; +} +.navbar-default .navbar-brand:hover, +.navbar-default .navbar-brand:focus { + color: #5e5e5e; + background-color: transparent; +} +.navbar-default .navbar-text { + color: #777; +} +.navbar-default .navbar-nav > li > a { + color: #777; +} +.navbar-default .navbar-nav > li > a:hover, +.navbar-default .navbar-nav > li > a:focus { + color: #333; + background-color: transparent; +} +.navbar-default .navbar-nav > .active > a, +.navbar-default .navbar-nav > .active > a:hover, +.navbar-default .navbar-nav > .active > a:focus { + color: #555; + background-color: #e7e7e7; +} +.navbar-default .navbar-nav > .disabled > a, +.navbar-default .navbar-nav > .disabled > a:hover, +.navbar-default .navbar-nav > .disabled > a:focus { + color: #ccc; + background-color: transparent; +} +.navbar-default .navbar-toggle { + border-color: #ddd; +} +.navbar-default .navbar-toggle:hover, +.navbar-default .navbar-toggle:focus { + background-color: #ddd; +} +.navbar-default .navbar-toggle .icon-bar { + background-color: #888; +} +.navbar-default .navbar-collapse, +.navbar-default .navbar-form { + border-color: #e7e7e7; +} +.navbar-default .navbar-nav > .open > a, +.navbar-default .navbar-nav > .open > a:hover, +.navbar-default .navbar-nav > .open > a:focus { + background-color: #e7e7e7; + color: #555; +} +@media (max-width: 767px) { + .navbar-default .navbar-nav .open .dropdown-menu > li > a { + color: #777; + } + .navbar-default .navbar-nav .open .dropdown-menu > li > a:hover, + .navbar-default .navbar-nav .open .dropdown-menu > li > a:focus { + color: #333; + background-color: transparent; + } + .navbar-default .navbar-nav .open .dropdown-menu > .active > a, + .navbar-default .navbar-nav .open .dropdown-menu > .active > a:hover, + .navbar-default .navbar-nav .open .dropdown-menu > .active > a:focus { + color: #555; + background-color: #e7e7e7; + } + .navbar-default .navbar-nav .open .dropdown-menu > .disabled > a, + .navbar-default .navbar-nav .open .dropdown-menu > .disabled > a:hover, + .navbar-default .navbar-nav .open .dropdown-menu > .disabled > a:focus { + color: #ccc; + background-color: transparent; + } +} +.navbar-default .navbar-link { + color: #777; +} +.navbar-default .navbar-link:hover { + color: #333; +} +.navbar-inverse { + background-color: #222; + border-color: #080808; +} +.navbar-inverse .navbar-brand { + color: #999; +} +.navbar-inverse .navbar-brand:hover, +.navbar-inverse .navbar-brand:focus { + color: #fff; + background-color: transparent; +} +.navbar-inverse .navbar-text { + color: #999; +} +.navbar-inverse .navbar-nav > li > a { + color: #999; +} +.navbar-inverse .navbar-nav > li > a:hover, +.navbar-inverse .navbar-nav > li > a:focus { + color: #fff; + background-color: transparent; +} +.navbar-inverse .navbar-nav > .active > a, +.navbar-inverse .navbar-nav > .active > a:hover, +.navbar-inverse .navbar-nav > .active > a:focus { + color: #fff; + background-color: #080808; +} +.navbar-inverse .navbar-nav > .disabled > a, +.navbar-inverse .navbar-nav > .disabled > a:hover, +.navbar-inverse .navbar-nav > .disabled > a:focus { + color: #444; + background-color: transparent; +} +.navbar-inverse .navbar-toggle { + border-color: #333; +} +.navbar-inverse .navbar-toggle:hover, +.navbar-inverse .navbar-toggle:focus { + background-color: #333; +} +.navbar-inverse .navbar-toggle .icon-bar { + background-color: #fff; +} +.navbar-inverse .navbar-collapse, +.navbar-inverse .navbar-form { + border-color: #101010; +} +.navbar-inverse .navbar-nav > .open > a, +.navbar-inverse .navbar-nav > .open > a:hover, +.navbar-inverse .navbar-nav > .open > a:focus { + background-color: #080808; + color: #fff; +} +@media (max-width: 767px) { + .navbar-inverse .navbar-nav .open .dropdown-menu > .dropdown-header { + border-color: #080808; + } + .navbar-inverse .navbar-nav .open .dropdown-menu .divider { + background-color: #080808; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > li > a { + color: #999; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > li > a:hover, + .navbar-inverse .navbar-nav .open .dropdown-menu > li > a:focus { + color: #fff; + background-color: transparent; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > .active > a, + .navbar-inverse .navbar-nav .open .dropdown-menu > .active > a:hover, + .navbar-inverse .navbar-nav .open .dropdown-menu > .active > a:focus { + color: #fff; + background-color: #080808; + } + .navbar-inverse .navbar-nav .open .dropdown-menu > .disabled > a, + .navbar-inverse .navbar-nav .open .dropdown-menu > .disabled > a:hover, + .navbar-inverse .navbar-nav .open .dropdown-menu > .disabled > a:focus { + color: #444; + background-color: transparent; + } +} +.navbar-inverse .navbar-link { + color: #999; +} +.navbar-inverse .navbar-link:hover { + color: #fff; +} +.breadcrumb { + padding: 8px 15px; + margin-bottom: 20px; + list-style: none; + background-color: #f5f5f5; + border-radius: 4px; +} +.breadcrumb > li { + display: inline-block; +} +.breadcrumb > li + li:before { + content: "/\00a0"; + padding: 0 5px; + color: #ccc; +} +.breadcrumb > .active { + color: #999; +} +.pagination { + display: inline-block; + padding-left: 0; + margin: 20px 0; + border-radius: 4px; +} +.pagination > li { + display: inline; +} +.pagination > li > a, +.pagination > li > span { + position: relative; + float: left; + padding: 6px 12px; + line-height: 1.428571429; + text-decoration: none; + color: #428bca; + background-color: #fff; + border: 1px solid #ddd; + margin-left: -1px; +} +.pagination > li:first-child > a, +.pagination > li:first-child > span { + margin-left: 0; + border-bottom-left-radius: 4px; + border-top-left-radius: 4px; +} +.pagination > li:last-child > a, +.pagination > li:last-child > span { + border-bottom-right-radius: 4px; + border-top-right-radius: 4px; +} +.pagination > li > a:hover, +.pagination > li > span:hover, +.pagination > li > a:focus, +.pagination > li > span:focus { + color: #2a6496; + background-color: #eee; + border-color: #ddd; +} +.pagination > .active > a, +.pagination > .active > span, +.pagination > .active > a:hover, +.pagination > .active > span:hover, +.pagination > .active > a:focus, +.pagination > .active > span:focus { + z-index: 2; + color: #fff; + background-color: #428bca; + border-color: #428bca; + cursor: default; +} +.pagination > .disabled > span, +.pagination > .disabled > span:hover, +.pagination > .disabled > span:focus, +.pagination > .disabled > a, +.pagination > .disabled > a:hover, +.pagination > .disabled > a:focus { + color: #999; + background-color: #fff; + border-color: #ddd; + cursor: not-allowed; +} +.pagination-lg > li > a, +.pagination-lg > li > span { + padding: 10px 16px; + font-size: 18px; +} +.pagination-lg > li:first-child > a, +.pagination-lg > li:first-child > span { + border-bottom-left-radius: 6px; + border-top-left-radius: 6px; +} +.pagination-lg > li:last-child > a, +.pagination-lg > li:last-child > span { + border-bottom-right-radius: 6px; + border-top-right-radius: 6px; +} +.pagination-sm > li > a, +.pagination-sm > li > span { + padding: 5px 10px; + font-size: 12px; +} +.pagination-sm > li:first-child > a, +.pagination-sm > li:first-child > span { + border-bottom-left-radius: 3px; + border-top-left-radius: 3px; +} +.pagination-sm > li:last-child > a, +.pagination-sm > li:last-child > span { + border-bottom-right-radius: 3px; + border-top-right-radius: 3px; +} +.pager { + padding-left: 0; + margin: 20px 0; + list-style: none; + text-align: center; +} +.pager li { + display: inline; +} +.pager li > a, +.pager li > span { + display: inline-block; + padding: 5px 14px; + background-color: #fff; + border: 1px solid #ddd; + border-radius: 15px; +} +.pager li > a:hover, +.pager li > a:focus { + text-decoration: none; + background-color: #eee; +} +.pager .next > a, +.pager .next > span { + float: right; +} +.pager .previous > a, +.pager .previous > span { + float: left; +} +.pager .disabled > a, +.pager .disabled > a:hover, +.pager .disabled > a:focus, +.pager .disabled > span { + color: #999; + background-color: #fff; + cursor: not-allowed; +} +.label { + display: inline; + padding: 0.2em 0.6em 0.3em; + font-size: 75%; + font-weight: 700; + line-height: 1; + color: #fff; + text-align: center; + white-space: nowrap; + vertical-align: baseline; + border-radius: 0.25em; +} +.label[href]:hover, +.label[href]:focus { + color: #fff; + text-decoration: none; + cursor: pointer; +} +.label:empty { + display: none; +} +.btn .label { + position: relative; + top: -1px; +} +.label-default { + background-color: #999; +} +.label-default[href]:hover, +.label-default[href]:focus { + background-color: gray; +} +.label-primary { + background-color: #428bca; +} +.label-primary[href]:hover, +.label-primary[href]:focus { + background-color: #3071a9; +} +.label-success { + background-color: #5cb85c; +} +.label-success[href]:hover, +.label-success[href]:focus { + background-color: #449d44; +} +.label-info { + background-color: #5bc0de; +} +.label-info[href]:hover, +.label-info[href]:focus { + background-color: #31b0d5; +} +.label-warning { + background-color: #f0ad4e; +} +.label-warning[href]:hover, +.label-warning[href]:focus { + background-color: #ec971f; +} +.label-danger { + background-color: #d9534f; +} +.label-danger[href]:hover, +.label-danger[href]:focus { + background-color: #c9302c; +} +.badge { + display: inline-block; + min-width: 10px; + padding: 3px 7px; + font-size: 12px; + font-weight: 700; + color: #fff; + line-height: 1; + vertical-align: baseline; + white-space: nowrap; + text-align: center; + background-color: #999; + border-radius: 10px; +} +.badge:empty { + display: none; +} +.btn .badge { + position: relative; + top: -1px; +} +.btn-xs .badge { + top: 0; + padding: 1px 5px; +} +a.badge:hover, +a.badge:focus { + color: #fff; + text-decoration: none; + cursor: pointer; +} +a.list-group-item.active > .badge, +.nav-pills > .active > a > .badge { + color: #428bca; + background-color: #fff; +} +.nav-pills > li > a > .badge { + margin-left: 3px; +} +.jumbotron { + padding: 30px; + margin-bottom: 30px; + color: inherit; + background-color: #eee; +} +.jumbotron h1, +.jumbotron .h1 { + color: inherit; +} +.jumbotron p { + margin-bottom: 15px; + font-size: 21px; + font-weight: 200; +} +.container .jumbotron { + border-radius: 6px; +} +.jumbotron .container { + max-width: 100%; +} +@media screen and (min-width: 768px) { + .jumbotron { + padding-top: 48px; + padding-bottom: 48px; + } + .container .jumbotron { + padding-left: 60px; + padding-right: 60px; + } + .jumbotron h1, + .jumbotron .h1 { + font-size: 63px; + } +} +.thumbnail { + display: block; + padding: 4px; + margin-bottom: 20px; + line-height: 1.428571429; + background-color: #fff; + border: 1px solid #ddd; + border-radius: 4px; + -webkit-transition: all 0.2s ease-in-out; + transition: all 0.2s ease-in-out; +} +.thumbnail > img, +.thumbnail a > img { + display: block; + max-width: 100%; + height: auto; + margin-left: auto; + margin-right: auto; +} +a.thumbnail:hover, +a.thumbnail:focus, +a.thumbnail.active { + border-color: #428bca; +} +.thumbnail .caption { + padding: 9px; + color: #333; +} +.alert { + padding: 15px; + margin-bottom: 20px; + border: 1px solid transparent; + border-radius: 4px; +} +.alert h4 { + margin-top: 0; + color: inherit; +} +.alert .alert-link { + font-weight: 700; +} +.alert > p, +.alert > ul { + margin-bottom: 0; +} +.alert > p + p { + margin-top: 5px; +} +.alert-dismissable { + padding-right: 35px; +} +.alert-dismissable .close { + position: relative; + top: -2px; + right: -21px; + color: inherit; +} +.alert-success { + background-color: #dff0d8; + border-color: #d6e9c6; + color: #3c763d; +} +.alert-success hr { + border-top-color: #c9e2b3; +} +.alert-success .alert-link { + color: #2b542c; +} +.alert-info { + background-color: #d9edf7; + border-color: #bce8f1; + color: #31708f; +} +.alert-info hr { + border-top-color: #a6e1ec; +} +.alert-info .alert-link { + color: #245269; +} +.alert-warning { + background-color: #fcf8e3; + border-color: #faebcc; + color: #8a6d3b; +} +.alert-warning hr { + border-top-color: #f7e1b5; +} +.alert-warning .alert-link { + color: #66512c; +} +.alert-danger { + background-color: #f2dede; + border-color: #ebccd1; + color: #a94442; +} +.alert-danger hr { + border-top-color: #e4b9c0; +} +.alert-danger .alert-link { + color: #843534; +} +@-webkit-keyframes progress-bar-stripes { + from { + background-position: 40px 0; + } + to { + background-position: 0 0; + } +} +@keyframes progress-bar-stripes { + from { + background-position: 40px 0; + } + to { + background-position: 0 0; + } +} +.progress { + overflow: hidden; + height: 20px; + margin-bottom: 20px; + background-color: #f5f5f5; + border-radius: 4px; + -webkit-box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1); + box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1); +} +.progress-bar { + float: left; + width: 0; + height: 100%; + font-size: 12px; + line-height: 20px; + color: #fff; + text-align: center; + background-color: #428bca; + -webkit-box-shadow: inset 0 -1px 0 rgba(0, 0, 0, 0.15); + box-shadow: inset 0 -1px 0 rgba(0, 0, 0, 0.15); + -webkit-transition: width 0.6s ease; + transition: width 0.6s ease; +} +.progress-striped .progress-bar { + background-image: -webkit-linear-gradient( + 45deg, + rgba(255, 255, 255, 0.15) 25%, + transparent 25%, + transparent 50%, + rgba(255, 255, 255, 0.15) 50%, + rgba(255, 255, 255, 0.15) 75%, + transparent 75%, + transparent + ); + background-image: linear-gradient( + 45deg, + rgba(255, 255, 255, 0.15) 25%, + transparent 25%, + transparent 50%, + rgba(255, 255, 255, 0.15) 50%, + rgba(255, 255, 255, 0.15) 75%, + transparent 75%, + transparent + ); + background-size: 40px 40px; +} +.progress.active .progress-bar { + -webkit-animation: progress-bar-stripes 2s linear infinite; + animation: progress-bar-stripes 2s linear infinite; +} +.progress-bar-success { + background-color: #5cb85c; +} +.progress-striped .progress-bar-success { + background-image: -webkit-linear-gradient( + 45deg, + rgba(255, 255, 255, 0.15) 25%, + transparent 25%, + transparent 50%, + rgba(255, 255, 255, 0.15) 50%, + rgba(255, 255, 255, 0.15) 75%, + transparent 75%, + transparent + ); + background-image: linear-gradient( + 45deg, + rgba(255, 255, 255, 0.15) 25%, + transparent 25%, + transparent 50%, + rgba(255, 255, 255, 0.15) 50%, + rgba(255, 255, 255, 0.15) 75%, + transparent 75%, + transparent + ); +} +.progress-bar-info { + background-color: #5bc0de; +} +.progress-striped .progress-bar-info { + background-image: -webkit-linear-gradient( + 45deg, + rgba(255, 255, 255, 0.15) 25%, + transparent 25%, + transparent 50%, + rgba(255, 255, 255, 0.15) 50%, + rgba(255, 255, 255, 0.15) 75%, + transparent 75%, + transparent + ); + background-image: linear-gradient( + 45deg, + rgba(255, 255, 255, 0.15) 25%, + transparent 25%, + transparent 50%, + rgba(255, 255, 255, 0.15) 50%, + rgba(255, 255, 255, 0.15) 75%, + transparent 75%, + transparent + ); +} +.progress-bar-warning { + background-color: #f0ad4e; +} +.progress-striped .progress-bar-warning { + background-image: -webkit-linear-gradient( + 45deg, + rgba(255, 255, 255, 0.15) 25%, + transparent 25%, + transparent 50%, + rgba(255, 255, 255, 0.15) 50%, + rgba(255, 255, 255, 0.15) 75%, + transparent 75%, + transparent + ); + background-image: linear-gradient( + 45deg, + rgba(255, 255, 255, 0.15) 25%, + transparent 25%, + transparent 50%, + rgba(255, 255, 255, 0.15) 50%, + rgba(255, 255, 255, 0.15) 75%, + transparent 75%, + transparent + ); +} +.progress-bar-danger { + background-color: #d9534f; +} +.progress-striped .progress-bar-danger { + background-image: -webkit-linear-gradient( + 45deg, + rgba(255, 255, 255, 0.15) 25%, + transparent 25%, + transparent 50%, + rgba(255, 255, 255, 0.15) 50%, + rgba(255, 255, 255, 0.15) 75%, + transparent 75%, + transparent + ); + background-image: linear-gradient( + 45deg, + rgba(255, 255, 255, 0.15) 25%, + transparent 25%, + transparent 50%, + rgba(255, 255, 255, 0.15) 50%, + rgba(255, 255, 255, 0.15) 75%, + transparent 75%, + transparent + ); +} +.media, +.media-body { + overflow: hidden; + zoom: 1; +} +.media, +.media .media { + margin-top: 15px; +} +.media:first-child { + margin-top: 0; +} +.media-object { + display: block; +} +.media-heading { + margin: 0 0 5px; +} +.media > .pull-left { + margin-right: 10px; +} +.media > .pull-right { + margin-left: 10px; +} +.media-list { + padding-left: 0; + list-style: none; +} +.list-group { + margin-bottom: 20px; + padding-left: 0; +} +.list-group-item { + position: relative; + display: block; + padding: 10px 15px; + margin-bottom: -1px; + background-color: #fff; + border: 1px solid #ddd; +} +.list-group-item:first-child { + border-top-right-radius: 4px; + border-top-left-radius: 4px; +} +.list-group-item:last-child { + margin-bottom: 0; + border-bottom-right-radius: 4px; + border-bottom-left-radius: 4px; +} +.list-group-item > .badge { + float: right; +} +.list-group-item > .badge + .badge { + margin-right: 5px; +} +a.list-group-item { + color: #555; +} +a.list-group-item .list-group-item-heading { + color: #333; +} +a.list-group-item:hover, +a.list-group-item:focus { + text-decoration: none; + background-color: #f5f5f5; +} +a.list-group-item.active, +a.list-group-item.active:hover, +a.list-group-item.active:focus { + z-index: 2; + color: #fff; + background-color: #428bca; + border-color: #428bca; +} +a.list-group-item.active .list-group-item-heading, +a.list-group-item.active:hover .list-group-item-heading, +a.list-group-item.active:focus .list-group-item-heading { + color: inherit; +} +a.list-group-item.active .list-group-item-text, +a.list-group-item.active:hover .list-group-item-text, +a.list-group-item.active:focus .list-group-item-text { + color: #e1edf7; +} +.list-group-item-success { + color: #3c763d; + background-color: #dff0d8; +} +a.list-group-item-success { + color: #3c763d; +} +a.list-group-item-success .list-group-item-heading { + color: inherit; +} +a.list-group-item-success:hover, +a.list-group-item-success:focus { + color: #3c763d; + background-color: #d0e9c6; +} +a.list-group-item-success.active, +a.list-group-item-success.active:hover, +a.list-group-item-success.active:focus { + color: #fff; + background-color: #3c763d; + border-color: #3c763d; +} +.list-group-item-info { + color: #31708f; + background-color: #d9edf7; +} +a.list-group-item-info { + color: #31708f; +} +a.list-group-item-info .list-group-item-heading { + color: inherit; +} +a.list-group-item-info:hover, +a.list-group-item-info:focus { + color: #31708f; + background-color: #c4e3f3; +} +a.list-group-item-info.active, +a.list-group-item-info.active:hover, +a.list-group-item-info.active:focus { + color: #fff; + background-color: #31708f; + border-color: #31708f; +} +.list-group-item-warning { + color: #8a6d3b; + background-color: #fcf8e3; +} +a.list-group-item-warning { + color: #8a6d3b; +} +a.list-group-item-warning .list-group-item-heading { + color: inherit; +} +a.list-group-item-warning:hover, +a.list-group-item-warning:focus { + color: #8a6d3b; + background-color: #faf2cc; +} +a.list-group-item-warning.active, +a.list-group-item-warning.active:hover, +a.list-group-item-warning.active:focus { + color: #fff; + background-color: #8a6d3b; + border-color: #8a6d3b; +} +.list-group-item-danger { + color: #a94442; + background-color: #f2dede; +} +a.list-group-item-danger { + color: #a94442; +} +a.list-group-item-danger .list-group-item-heading { + color: inherit; +} +a.list-group-item-danger:hover, +a.list-group-item-danger:focus { + color: #a94442; + background-color: #ebcccc; +} +a.list-group-item-danger.active, +a.list-group-item-danger.active:hover, +a.list-group-item-danger.active:focus { + color: #fff; + background-color: #a94442; + border-color: #a94442; +} +.list-group-item-heading { + margin-top: 0; + margin-bottom: 5px; +} +.list-group-item-text { + margin-bottom: 0; + line-height: 1.3; +} +.panel { + margin-bottom: 20px; + background-color: #fff; + border: 1px solid transparent; + border-radius: 4px; + -webkit-box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05); + box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05); +} +.panel-body { + padding: 15px; +} +.panel > .list-group { + margin-bottom: 0; +} +.panel > .list-group .list-group-item { + border-width: 1px 0; + border-radius: 0; +} +.panel > .list-group .list-group-item:first-child { + border-top: 0; +} +.panel > .list-group .list-group-item:last-child { + border-bottom: 0; +} +.panel > .list-group:first-child .list-group-item:first-child { + border-top-right-radius: 3px; + border-top-left-radius: 3px; +} +.panel > .list-group:last-child .list-group-item:last-child { + border-bottom-right-radius: 3px; + border-bottom-left-radius: 3px; +} +.panel-heading + .list-group .list-group-item:first-child { + border-top-width: 0; +} +.panel > .table, +.panel > .table-responsive > .table { + margin-bottom: 0; +} +.panel > .table:first-child > thead:first-child > tr:first-child td:first-child, +.panel + > .table-responsive:first-child + > .table:first-child + > thead:first-child + > tr:first-child + td:first-child, +.panel > .table:first-child > tbody:first-child > tr:first-child td:first-child, +.panel + > .table-responsive:first-child + > .table:first-child + > tbody:first-child + > tr:first-child + td:first-child, +.panel > .table:first-child > thead:first-child > tr:first-child th:first-child, +.panel + > .table-responsive:first-child + > .table:first-child + > thead:first-child + > tr:first-child + th:first-child, +.panel > .table:first-child > tbody:first-child > tr:first-child th:first-child, +.panel + > .table-responsive:first-child + > .table:first-child + > tbody:first-child + > tr:first-child + th:first-child { + border-top-left-radius: 3px; +} +.panel > .table:first-child > thead:first-child > tr:first-child td:last-child, +.panel + > .table-responsive:first-child + > .table:first-child + > thead:first-child + > tr:first-child + td:last-child, +.panel > .table:first-child > tbody:first-child > tr:first-child td:last-child, +.panel + > .table-responsive:first-child + > .table:first-child + > tbody:first-child + > tr:first-child + td:last-child, +.panel > .table:first-child > thead:first-child > tr:first-child th:last-child, +.panel + > .table-responsive:first-child + > .table:first-child + > thead:first-child + > tr:first-child + th:last-child, +.panel > .table:first-child > tbody:first-child > tr:first-child th:last-child, +.panel + > .table-responsive:first-child + > .table:first-child + > tbody:first-child + > tr:first-child + th:last-child { + border-top-right-radius: 3px; +} +.panel > .table:last-child > tbody:last-child > tr:last-child td:first-child, +.panel + > .table-responsive:last-child + > .table:last-child + > tbody:last-child + > tr:last-child + td:first-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child td:first-child, +.panel + > .table-responsive:last-child + > .table:last-child + > tfoot:last-child + > tr:last-child + td:first-child, +.panel > .table:last-child > tbody:last-child > tr:last-child th:first-child, +.panel + > .table-responsive:last-child + > .table:last-child + > tbody:last-child + > tr:last-child + th:first-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child th:first-child, +.panel + > .table-responsive:last-child + > .table:last-child + > tfoot:last-child + > tr:last-child + th:first-child { + border-bottom-left-radius: 3px; +} +.panel > .table:last-child > tbody:last-child > tr:last-child td:last-child, +.panel + > .table-responsive:last-child + > .table:last-child + > tbody:last-child + > tr:last-child + td:last-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child td:last-child, +.panel + > .table-responsive:last-child + > .table:last-child + > tfoot:last-child + > tr:last-child + td:last-child, +.panel > .table:last-child > tbody:last-child > tr:last-child th:last-child, +.panel + > .table-responsive:last-child + > .table:last-child + > tbody:last-child + > tr:last-child + th:last-child, +.panel > .table:last-child > tfoot:last-child > tr:last-child th:last-child, +.panel + > .table-responsive:last-child + > .table:last-child + > tfoot:last-child + > tr:last-child + th:last-child { + border-bottom-right-radius: 3px; +} +.panel > .panel-body + .table, +.panel > .panel-body + .table-responsive { + border-top: 1px solid #ddd; +} +.panel > .table > tbody:first-child > tr:first-child th, +.panel > .table > tbody:first-child > tr:first-child td { + border-top: 0; +} +.panel > .table-bordered, +.panel > .table-responsive > .table-bordered { + border: 0; +} +.panel > .table-bordered > thead > tr > th:first-child, +.panel > .table-responsive > .table-bordered > thead > tr > th:first-child, +.panel > .table-bordered > tbody > tr > th:first-child, +.panel > .table-responsive > .table-bordered > tbody > tr > th:first-child, +.panel > .table-bordered > tfoot > tr > th:first-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > th:first-child, +.panel > .table-bordered > thead > tr > td:first-child, +.panel > .table-responsive > .table-bordered > thead > tr > td:first-child, +.panel > .table-bordered > tbody > tr > td:first-child, +.panel > .table-responsive > .table-bordered > tbody > tr > td:first-child, +.panel > .table-bordered > tfoot > tr > td:first-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > td:first-child { + border-left: 0; +} +.panel > .table-bordered > thead > tr > th:last-child, +.panel > .table-responsive > .table-bordered > thead > tr > th:last-child, +.panel > .table-bordered > tbody > tr > th:last-child, +.panel > .table-responsive > .table-bordered > tbody > tr > th:last-child, +.panel > .table-bordered > tfoot > tr > th:last-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > th:last-child, +.panel > .table-bordered > thead > tr > td:last-child, +.panel > .table-responsive > .table-bordered > thead > tr > td:last-child, +.panel > .table-bordered > tbody > tr > td:last-child, +.panel > .table-responsive > .table-bordered > tbody > tr > td:last-child, +.panel > .table-bordered > tfoot > tr > td:last-child, +.panel > .table-responsive > .table-bordered > tfoot > tr > td:last-child { + border-right: 0; +} +.panel > .table-bordered > thead > tr:first-child > th, +.panel > .table-responsive > .table-bordered > thead > tr:first-child > th, +.panel > .table-bordered > tbody > tr:first-child > th, +.panel > .table-responsive > .table-bordered > tbody > tr:first-child > th, +.panel > .table-bordered > tfoot > tr:first-child > th, +.panel > .table-responsive > .table-bordered > tfoot > tr:first-child > th, +.panel > .table-bordered > thead > tr:first-child > td, +.panel > .table-responsive > .table-bordered > thead > tr:first-child > td, +.panel > .table-bordered > tbody > tr:first-child > td, +.panel > .table-responsive > .table-bordered > tbody > tr:first-child > td, +.panel > .table-bordered > tfoot > tr:first-child > td, +.panel > .table-responsive > .table-bordered > tfoot > tr:first-child > td { + border-top: 0; +} +.panel > .table-bordered > thead > tr:last-child > th, +.panel > .table-responsive > .table-bordered > thead > tr:last-child > th, +.panel > .table-bordered > tbody > tr:last-child > th, +.panel > .table-responsive > .table-bordered > tbody > tr:last-child > th, +.panel > .table-bordered > tfoot > tr:last-child > th, +.panel > .table-responsive > .table-bordered > tfoot > tr:last-child > th, +.panel > .table-bordered > thead > tr:last-child > td, +.panel > .table-responsive > .table-bordered > thead > tr:last-child > td, +.panel > .table-bordered > tbody > tr:last-child > td, +.panel > .table-responsive > .table-bordered > tbody > tr:last-child > td, +.panel > .table-bordered > tfoot > tr:last-child > td, +.panel > .table-responsive > .table-bordered > tfoot > tr:last-child > td { + border-bottom: 0; +} +.panel > .table-responsive { + border: 0; + margin-bottom: 0; +} +.panel-heading { + padding: 10px 15px; + border-bottom: 1px solid transparent; + border-top-right-radius: 3px; + border-top-left-radius: 3px; +} +.panel-heading > .dropdown .dropdown-toggle { + color: inherit; +} +.panel-title { + margin-top: 0; + margin-bottom: 0; + font-size: 16px; + color: inherit; +} +.panel-title > a { + color: inherit; +} +.panel-footer { + padding: 10px 15px; + background-color: #f5f5f5; + border-top: 1px solid #ddd; + border-bottom-right-radius: 3px; + border-bottom-left-radius: 3px; +} +.panel-group { + margin-bottom: 20px; +} +.panel-group .panel { + margin-bottom: 0; + border-radius: 4px; + overflow: hidden; +} +.panel-group .panel + .panel { + margin-top: 5px; +} +.panel-group .panel-heading { + border-bottom: 0; +} +.panel-group .panel-heading + .panel-collapse .panel-body { + border-top: 1px solid #ddd; +} +.panel-group .panel-footer { + border-top: 0; +} +.panel-group .panel-footer + .panel-collapse .panel-body { + border-bottom: 1px solid #ddd; +} +.panel-default { + border-color: #ddd; +} +.panel-default > .panel-heading { + color: #333; + background-color: #f5f5f5; + border-color: #ddd; +} +.panel-default > .panel-heading + .panel-collapse .panel-body { + border-top-color: #ddd; +} +.panel-default > .panel-footer + .panel-collapse .panel-body { + border-bottom-color: #ddd; +} +.panel-primary { + border-color: #428bca; +} +.panel-primary > .panel-heading { + color: #fff; + background-color: #428bca; + border-color: #428bca; +} +.panel-primary > .panel-heading + .panel-collapse .panel-body { + border-top-color: #428bca; +} +.panel-primary > .panel-footer + .panel-collapse .panel-body { + border-bottom-color: #428bca; +} +.panel-success { + border-color: #d6e9c6; +} +.panel-success > .panel-heading { + color: #3c763d; + background-color: #dff0d8; + border-color: #d6e9c6; +} +.panel-success > .panel-heading + .panel-collapse .panel-body { + border-top-color: #d6e9c6; +} +.panel-success > .panel-footer + .panel-collapse .panel-body { + border-bottom-color: #d6e9c6; +} +.panel-info { + border-color: #bce8f1; +} +.panel-info > .panel-heading { + color: #31708f; + background-color: #d9edf7; + border-color: #bce8f1; +} +.panel-info > .panel-heading + .panel-collapse .panel-body { + border-top-color: #bce8f1; +} +.panel-info > .panel-footer + .panel-collapse .panel-body { + border-bottom-color: #bce8f1; +} +.panel-warning { + border-color: #faebcc; +} +.panel-warning > .panel-heading { + color: #8a6d3b; + background-color: #fcf8e3; + border-color: #faebcc; +} +.panel-warning > .panel-heading + .panel-collapse .panel-body { + border-top-color: #faebcc; +} +.panel-warning > .panel-footer + .panel-collapse .panel-body { + border-bottom-color: #faebcc; +} +.panel-danger { + border-color: #ebccd1; +} +.panel-danger > .panel-heading { + color: #a94442; + background-color: #f2dede; + border-color: #ebccd1; +} +.panel-danger > .panel-heading + .panel-collapse .panel-body { + border-top-color: #ebccd1; +} +.panel-danger > .panel-footer + .panel-collapse .panel-body { + border-bottom-color: #ebccd1; +} +.well { + min-height: 20px; + padding: 19px; + margin-bottom: 20px; + background-color: #f5f5f5; + border: 1px solid #e3e3e3; + border-radius: 4px; + -webkit-box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.05); + box-shadow: inset 0 1px 1px rgba(0, 0, 0, 0.05); +} +.well blockquote { + border-color: #ddd; + border-color: rgba(0, 0, 0, 0.15); +} +.well-lg { + padding: 24px; + border-radius: 6px; +} +.well-sm { + padding: 9px; + border-radius: 3px; +} +.close { + float: right; + font-size: 21px; + font-weight: 700; + line-height: 1; + color: #000; + text-shadow: 0 1px 0 #fff; + opacity: 0.2; + filter: alpha(opacity=20); +} +.close:hover, +.close:focus { + color: #000; + text-decoration: none; + cursor: pointer; + opacity: 0.5; + filter: alpha(opacity=50); +} +button.close { + padding: 0; + cursor: pointer; + background: 0 0; + border: 0; + -webkit-appearance: none; +} +.modal-open { + overflow: hidden; +} +.modal { + display: none; + overflow: auto; + overflow-y: scroll; + position: fixed; + top: 0; + right: 0; + bottom: 0; + left: 0; + z-index: 1050; + -webkit-overflow-scrolling: touch; + outline: 0; +} +.modal.fade .modal-dialog { + -webkit-transform: translate(0, -25%); + -ms-transform: translate(0, -25%); + transform: translate(0, -25%); + -webkit-transition: -webkit-transform 0.3s ease-out; + -moz-transition: -moz-transform 0.3s ease-out; + -o-transition: -o-transform 0.3s ease-out; + transition: transform 0.3s ease-out; +} +.modal.in .modal-dialog { + -webkit-transform: translate(0, 0); + -ms-transform: translate(0, 0); + transform: translate(0, 0); +} +.modal-dialog { + position: relative; + width: auto; + margin: 10px; +} +.modal-content { + position: relative; + background-color: #fff; + border: 1px solid #999; + border: 1px solid rgba(0, 0, 0, 0.2); + border-radius: 6px; + -webkit-box-shadow: 0 3px 9px rgba(0, 0, 0, 0.5); + box-shadow: 0 3px 9px rgba(0, 0, 0, 0.5); + background-clip: padding-box; + outline: 0; +} +.modal-backdrop { + position: fixed; + top: 0; + right: 0; + bottom: 0; + left: 0; + z-index: 1040; + background-color: #000; +} +.modal-backdrop.fade { + opacity: 0; + filter: alpha(opacity=0); +} +.modal-backdrop.in { + opacity: 0.5; + filter: alpha(opacity=50); +} +.modal-header { + padding: 15px; + border-bottom: 1px solid #e5e5e5; + min-height: 16.428571429px; +} +.modal-header .close { + margin-top: -2px; +} +.modal-title { + margin: 0; + line-height: 1.428571429; +} +.modal-body { + position: relative; + padding: 20px; +} +.modal-footer { + margin-top: 15px; + padding: 19px 20px 20px; + text-align: right; + border-top: 1px solid #e5e5e5; +} +.modal-footer .btn + .btn { + margin-left: 5px; + margin-bottom: 0; +} +.modal-footer .btn-group .btn + .btn { + margin-left: -1px; +} +.modal-footer .btn-block + .btn-block { + margin-left: 0; +} +@media (min-width: 768px) { + .modal-dialog { + width: 600px; + margin: 30px auto; + } + .modal-content { + -webkit-box-shadow: 0 5px 15px rgba(0, 0, 0, 0.5); + box-shadow: 0 5px 15px rgba(0, 0, 0, 0.5); + } + .modal-sm { + width: 300px; + } + .modal-lg { + width: 900px; + } +} +.tooltip { + position: absolute; + z-index: 1030; + display: block; + visibility: visible; + font-size: 12px; + line-height: 1.4; + opacity: 0; + filter: alpha(opacity=0); +} +.tooltip.in { + opacity: 0.9; + filter: alpha(opacity=90); +} +.tooltip.top { + margin-top: -3px; + padding: 5px 0; +} +.tooltip.right { + margin-left: 3px; + padding: 0 5px; +} +.tooltip.bottom { + margin-top: 3px; + padding: 5px 0; +} +.tooltip.left { + margin-left: -3px; + padding: 0 5px; +} +.tooltip-inner { + max-width: 200px; + padding: 3px 8px; + color: #fff; + text-align: center; + text-decoration: none; + background-color: #000; + border-radius: 4px; +} +.tooltip-arrow { + position: absolute; + width: 0; + height: 0; + border-color: transparent; + border-style: solid; +} +.tooltip.top .tooltip-arrow { + bottom: 0; + left: 50%; + margin-left: -5px; + border-width: 5px 5px 0; + border-top-color: #000; +} +.tooltip.top-left .tooltip-arrow { + bottom: 0; + left: 5px; + border-width: 5px 5px 0; + border-top-color: #000; +} +.tooltip.top-right .tooltip-arrow { + bottom: 0; + right: 5px; + border-width: 5px 5px 0; + border-top-color: #000; +} +.tooltip.right .tooltip-arrow { + top: 50%; + left: 0; + margin-top: -5px; + border-width: 5px 5px 5px 0; + border-right-color: #000; +} +.tooltip.left .tooltip-arrow { + top: 50%; + right: 0; + margin-top: -5px; + border-width: 5px 0 5px 5px; + border-left-color: #000; +} +.tooltip.bottom .tooltip-arrow { + top: 0; + left: 50%; + margin-left: -5px; + border-width: 0 5px 5px; + border-bottom-color: #000; +} +.tooltip.bottom-left .tooltip-arrow { + top: 0; + left: 5px; + border-width: 0 5px 5px; + border-bottom-color: #000; +} +.tooltip.bottom-right .tooltip-arrow { + top: 0; + right: 5px; + border-width: 0 5px 5px; + border-bottom-color: #000; +} +.popover { + position: absolute; + top: 0; + left: 0; + z-index: 1010; + display: none; + max-width: 276px; + padding: 1px; + text-align: left; + background-color: #fff; + background-clip: padding-box; + border: 1px solid #ccc; + border: 1px solid rgba(0, 0, 0, 0.2); + border-radius: 6px; + -webkit-box-shadow: 0 5px 10px rgba(0, 0, 0, 0.2); + box-shadow: 0 5px 10px rgba(0, 0, 0, 0.2); + white-space: normal; +} +.popover.top { + margin-top: -10px; +} +.popover.right { + margin-left: 10px; +} +.popover.bottom { + margin-top: 10px; +} +.popover.left { + margin-left: -10px; +} +.popover-title { + margin: 0; + padding: 8px 14px; + font-size: 14px; + font-weight: 400; + line-height: 18px; + background-color: #f7f7f7; + border-bottom: 1px solid #ebebeb; + border-radius: 5px 5px 0 0; +} +.popover-content { + padding: 9px 14px; +} +.popover .arrow, +.popover .arrow:after { + position: absolute; + display: block; + width: 0; + height: 0; + border-color: transparent; + border-style: solid; +} +.popover .arrow { + border-width: 11px; +} +.popover .arrow:after { + border-width: 10px; + content: ""; +} +.popover.top .arrow { + left: 50%; + margin-left: -11px; + border-bottom-width: 0; + border-top-color: #999; + border-top-color: rgba(0, 0, 0, 0.25); + bottom: -11px; +} +.popover.top .arrow:after { + content: " "; + bottom: 1px; + margin-left: -10px; + border-bottom-width: 0; + border-top-color: #fff; +} +.popover.right .arrow { + top: 50%; + left: -11px; + margin-top: -11px; + border-left-width: 0; + border-right-color: #999; + border-right-color: rgba(0, 0, 0, 0.25); +} +.popover.right .arrow:after { + content: " "; + left: 1px; + bottom: -10px; + border-left-width: 0; + border-right-color: #fff; +} +.popover.bottom .arrow { + left: 50%; + margin-left: -11px; + border-top-width: 0; + border-bottom-color: #999; + border-bottom-color: rgba(0, 0, 0, 0.25); + top: -11px; +} +.popover.bottom .arrow:after { + content: " "; + top: 1px; + margin-left: -10px; + border-top-width: 0; + border-bottom-color: #fff; +} +.popover.left .arrow { + top: 50%; + right: -11px; + margin-top: -11px; + border-right-width: 0; + border-left-color: #999; + border-left-color: rgba(0, 0, 0, 0.25); +} +.popover.left .arrow:after { + content: " "; + right: 1px; + border-right-width: 0; + border-left-color: #fff; + bottom: -10px; +} +.carousel { + position: relative; +} +.carousel-inner { + position: relative; + overflow: hidden; + width: 100%; +} +.carousel-inner > .item { + display: none; + position: relative; + -webkit-transition: 0.6s ease-in-out left; + transition: 0.6s ease-in-out left; +} +.carousel-inner > .item > img, +.carousel-inner > .item > a > img { + display: block; + max-width: 100%; + height: auto; + line-height: 1; +} +.carousel-inner > .active, +.carousel-inner > .next, +.carousel-inner > .prev { + display: block; +} +.carousel-inner > .active { + left: 0; +} +.carousel-inner > .next, +.carousel-inner > .prev { + position: absolute; + top: 0; + width: 100%; +} +.carousel-inner > .next { + left: 100%; +} +.carousel-inner > .prev { + left: -100%; +} +.carousel-inner > .next.left, +.carousel-inner > .prev.right { + left: 0; +} +.carousel-inner > .active.left { + left: -100%; +} +.carousel-inner > .active.right { + left: 100%; +} +.carousel-control { + position: absolute; + top: 0; + left: 0; + bottom: 0; + width: 15%; + opacity: 0.5; + filter: alpha(opacity=50); + font-size: 20px; + color: #fff; + text-align: center; + text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6); +} +.carousel-control.left { + background-image: -webkit-linear-gradient( + left, + color-stop(rgba(0, 0, 0, 0.5) 0), + color-stop(rgba(0, 0, 0, 0.0001) 100%) + ); + background-image: linear-gradient( + to right, + rgba(0, 0, 0, 0.5) 0, + rgba(0, 0, 0, 0.0001) 100% + ); + background-repeat: repeat-x; + filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#80000000', endColorstr='#00000000', GradientType=1); +} +.carousel-control.right { + left: auto; + right: 0; + background-image: -webkit-linear-gradient( + left, + color-stop(rgba(0, 0, 0, 0.0001) 0), + color-stop(rgba(0, 0, 0, 0.5) 100%) + ); + background-image: linear-gradient( + to right, + rgba(0, 0, 0, 0.0001) 0, + rgba(0, 0, 0, 0.5) 100% + ); + background-repeat: repeat-x; + filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#00000000', endColorstr='#80000000', GradientType=1); +} +.carousel-control:hover, +.carousel-control:focus { + outline: 0; + color: #fff; + text-decoration: none; + opacity: 0.9; + filter: alpha(opacity=90); +} +.carousel-control .icon-prev, +.carousel-control .icon-next, +.carousel-control .glyphicon-chevron-left, +.carousel-control .glyphicon-chevron-right { + position: absolute; + top: 50%; + z-index: 5; + display: inline-block; +} +.carousel-control .icon-prev, +.carousel-control .glyphicon-chevron-left { + left: 50%; +} +.carousel-control .icon-next, +.carousel-control .glyphicon-chevron-right { + right: 50%; +} +.carousel-control .icon-prev, +.carousel-control .icon-next { + width: 20px; + height: 20px; + margin-top: -10px; + margin-left: -10px; + font-family: serif; +} +.carousel-control .icon-prev:before { + content: "\2039"; +} +.carousel-control .icon-next:before { + content: "\203a"; +} +.carousel-indicators { + position: absolute; + bottom: 10px; + left: 50%; + z-index: 15; + width: 60%; + margin-left: -30%; + padding-left: 0; + list-style: none; + text-align: center; +} +.carousel-indicators li { + display: inline-block; + width: 10px; + height: 10px; + margin: 1px; + text-indent: -999px; + border: 1px solid #fff; + border-radius: 10px; + cursor: pointer; + background-color: #000 \9; + background-color: rgba(0, 0, 0, 0); +} +.carousel-indicators .active { + margin: 0; + width: 12px; + height: 12px; + background-color: #fff; +} +.carousel-caption { + position: absolute; + left: 15%; + right: 15%; + bottom: 20px; + z-index: 10; + padding-top: 20px; + padding-bottom: 20px; + color: #fff; + text-align: center; + text-shadow: 0 1px 2px rgba(0, 0, 0, 0.6); +} +.carousel-caption .btn { + text-shadow: none; +} +@media screen and (min-width: 768px) { + .carousel-control .glyphicons-chevron-left, + .carousel-control .glyphicons-chevron-right, + .carousel-control .icon-prev, + .carousel-control .icon-next { + width: 30px; + height: 30px; + margin-top: -15px; + margin-left: -15px; + font-size: 30px; + } + .carousel-caption { + left: 20%; + right: 20%; + padding-bottom: 30px; + } + .carousel-indicators { + bottom: 20px; + } +} +.clearfix:before, +.clearfix:after, +.container:before, +.container:after, +.container-fluid:before, +.container-fluid:after, +.row:before, +.row:after, +.form-horizontal .form-group:before, +.form-horizontal .form-group:after, +.btn-toolbar:before, +.btn-toolbar:after, +.btn-group-vertical > .btn-group:before, +.btn-group-vertical > .btn-group:after, +.nav:before, +.nav:after, +.navbar:before, +.navbar:after, +.navbar-header:before, +.navbar-header:after, +.navbar-collapse:before, +.navbar-collapse:after, +.pager:before, +.pager:after, +.panel-body:before, +.panel-body:after, +.modal-footer:before, +.modal-footer:after { + content: " "; + display: table; +} +.clearfix:after, +.container:after, +.container-fluid:after, +.row:after, +.form-horizontal .form-group:after, +.btn-toolbar:after, +.btn-group-vertical > .btn-group:after, +.nav:after, +.navbar:after, +.navbar-header:after, +.navbar-collapse:after, +.pager:after, +.panel-body:after, +.modal-footer:after { + clear: both; +} +.center-block { + display: block; + margin-left: auto; + margin-right: auto; +} +.pull-right { + float: right !important; +} +.pull-left { + float: left !important; +} +.hide { + display: none !important; +} +.show { + display: block !important; +} +.invisible { + visibility: hidden; +} +.text-hide { + font: 0/0 a; + color: transparent; + text-shadow: none; + background-color: transparent; + border: 0; +} +.hidden { + display: none !important; + visibility: hidden !important; +} +.affix { + position: fixed; +} +@-ms-viewport { + width: device-width; +} +.visible-xs, +tr.visible-xs, +th.visible-xs, +td.visible-xs { + display: none !important; +} +@media (max-width: 767px) { + .visible-xs { + display: block !important; + } + table.visible-xs { + display: table; + } + tr.visible-xs { + display: table-row !important; + } + th.visible-xs, + td.visible-xs { + display: table-cell !important; + } +} +.visible-sm, +tr.visible-sm, +th.visible-sm, +td.visible-sm { + display: none !important; +} +@media (min-width: 768px) and (max-width: 991px) { + .visible-sm { + display: block !important; + } + table.visible-sm { + display: table; + } + tr.visible-sm { + display: table-row !important; + } + th.visible-sm, + td.visible-sm { + display: table-cell !important; + } +} +.visible-md, +tr.visible-md, +th.visible-md, +td.visible-md { + display: none !important; +} +@media (min-width: 992px) and (max-width: 1199px) { + .visible-md { + display: block !important; + } + table.visible-md { + display: table; + } + tr.visible-md { + display: table-row !important; + } + th.visible-md, + td.visible-md { + display: table-cell !important; + } +} +.visible-lg, +tr.visible-lg, +th.visible-lg, +td.visible-lg { + display: none !important; +} +@media (min-width: 1200px) { + .visible-lg { + display: block !important; + } + table.visible-lg { + display: table; + } + tr.visible-lg { + display: table-row !important; + } + th.visible-lg, + td.visible-lg { + display: table-cell !important; + } +} +@media (max-width: 767px) { + .hidden-xs, + tr.hidden-xs, + th.hidden-xs, + td.hidden-xs { + display: none !important; + } +} +@media (min-width: 768px) and (max-width: 991px) { + .hidden-sm, + tr.hidden-sm, + th.hidden-sm, + td.hidden-sm { + display: none !important; + } +} +@media (min-width: 992px) and (max-width: 1199px) { + .hidden-md, + tr.hidden-md, + th.hidden-md, + td.hidden-md { + display: none !important; + } +} +@media (min-width: 1200px) { + .hidden-lg, + tr.hidden-lg, + th.hidden-lg, + td.hidden-lg { + display: none !important; + } +} +.visible-print, +tr.visible-print, +th.visible-print, +td.visible-print { + display: none !important; +} +@media print { + .visible-print { + display: block !important; + } + table.visible-print { + display: table; + } + tr.visible-print { + display: table-row !important; + } + th.visible-print, + td.visible-print { + display: table-cell !important; + } +} +@media print { + .hidden-print, + tr.hidden-print, + th.hidden-print, + td.hidden-print { + display: none !important; + } +} diff --git a/backend/tests/integration/tests/pruning/website/css/custom-fonts.css b/backend/tests/integration/tests/pruning/website/css/custom-fonts.css new file mode 100644 index 00000000000..f3b62deb077 --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/css/custom-fonts.css @@ -0,0 +1,1028 @@ +/* ================================================== +Font-Face Icons +================================================== */ + +@font-face { + font-family: "Icons"; + src: url("../fonts/customicon/Icons.eot"); + src: + url("../fonts/customicon/Icons.eot?#iefix") format("embedded-opentype"), + url("../fonts/customicon/Icons.woff") format("woff"), + url("../fonts/customicon/Icons.ttf") format("truetype"), + url("../fonts/customicon/Icons.svg#Icons") format("svg"); + font-weight: normal; + font-style: normal; +} + +/* Use the following CSS code if you want to use data attributes for inserting your icons */ +[data-icon]:before { + font-family: "Icons"; + content: attr(data-icon); + speak: none; + font-weight: normal; + font-variant: normal; + text-transform: none; + line-height: 1; + -webkit-font-smoothing: antialiased; +} + +[class^="font-"]:before, +[class*=" font-"]:before { + font-family: "Icons"; + speak: none; + font-style: normal; + font-weight: normal; + font-variant: normal; + text-transform: none; + -webkit-font-smoothing: antialiased; +} + +[class^="font-"], +[class*=" font-"] { + display: inline-block; + line-height: 1em; +} + +/* Use the following CSS code if you want to have a class per icon */ +/* +Instead of a list of all class selectors, +you can use the generic selector below, but it's slower: +[class*="font-icon-"] { +*/ +.font-icon-zoom-out, +.font-icon-zoom-in, +.font-icon-wrench, +.font-icon-waves, +.font-icon-warning, +.font-icon-volume-up, +.font-icon-volume-off, +.font-icon-volume-down, +.font-icon-viewport, +.font-icon-user, +.font-icon-user-border, +.font-icon-upload, +.font-icon-upload-2, +.font-icon-unlock, +.font-icon-underline, +.font-icon-tint, +.font-icon-time, +.font-icon-text, +.font-icon-text-width, +.font-icon-text-height, +.font-icon-tags, +.font-icon-tag, +.font-icon-table, +.font-icon-strikethrough, +.font-icon-stop, +.font-icon-step-forward, +.font-icon-step-backward, +.font-icon-stars, +.font-icon-star, +.font-icon-star-line, +.font-icon-star-half, +.font-icon-sort, +.font-icon-sort-up, +.font-icon-sort-down, +.font-icon-social-zerply, +.font-icon-social-youtube, +.font-icon-social-yelp, +.font-icon-social-yahoo, +.font-icon-social-wordpress, +.font-icon-social-virb, +.font-icon-social-vimeo, +.font-icon-social-viddler, +.font-icon-social-twitter, +.font-icon-social-tumblr, +.font-icon-social-stumbleupon, +.font-icon-social-soundcloud, +.font-icon-social-skype, +.font-icon-social-share-this, +.font-icon-social-quora, +.font-icon-social-pinterest, +.font-icon-social-photobucket, +.font-icon-social-paypal, +.font-icon-social-myspace, +.font-icon-social-linkedin, +.font-icon-social-last-fm, +.font-icon-social-grooveshark, +.font-icon-social-google-plus, +.font-icon-social-github, +.font-icon-social-forrst, +.font-icon-social-flickr, +.font-icon-social-facebook, +.font-icon-social-evernote, +.font-icon-social-envato, +.font-icon-social-email, +.font-icon-social-dribbble, +.font-icon-social-digg, +.font-icon-social-deviant-art, +.font-icon-social-blogger, +.font-icon-social-behance, +.font-icon-social-bebo, +.font-icon-social-addthis, +.font-icon-social-500px, +.font-icon-sitemap, +.font-icon-signout, +.font-icon-signin, +.font-icon-signal, +.font-icon-shopping-cart, +.font-icon-search, +.font-icon-rss, +.font-icon-road, +.font-icon-retweet, +.font-icon-resize-vertical, +.font-icon-resize-vertical-2, +.font-icon-resize-small, +.font-icon-resize-horizontal, +.font-icon-resize-horizontal-2, +.font-icon-resize-fullscreen, +.font-icon-resize-full, +.font-icon-repeat, +.font-icon-reorder, +.font-icon-remove, +.font-icon-remove-sign, +.font-icon-remove-circle, +.font-icon-read-more, +.font-icon-random, +.font-icon-question-sign, +.font-icon-pushpin, +.font-icon-pushpin-2, +.font-icon-print, +.font-icon-plus, +.font-icon-plus-sign, +.font-icon-play, +.font-icon-picture, +.font-icon-phone, +.font-icon-phone-sign, +.font-icon-phone-boxed, +.font-icon-pause, +.font-icon-paste, +.font-icon-paper-clip, +.font-icon-ok, +.font-icon-ok-sign, +.font-icon-ok-circle, +.font-icon-music, +.font-icon-move, +.font-icon-money, +.font-icon-minus, +.font-icon-minus-sign, +.font-icon-map, +.font-icon-map-marker, +.font-icon-map-marker-2, +.font-icon-magnet, +.font-icon-magic, +.font-icon-lock, +.font-icon-list, +.font-icon-list-3, +.font-icon-list-2, +.font-icon-link, +.font-icon-layer, +.font-icon-key, +.font-icon-italic, +.font-icon-info, +.font-icon-indent-right, +.font-icon-indent-left, +.font-icon-inbox, +.font-icon-inbox-empty, +.font-icon-home, +.font-icon-heart, +.font-icon-heart-line, +.font-icon-headphones, +.font-icon-headphones-line, +.font-icon-headphones-line-2, +.font-icon-headphones-2, +.font-icon-hdd, +.font-icon-group, +.font-icon-grid, +.font-icon-grid-large, +.font-icon-globe_line, +.font-icon-glass, +.font-icon-glass_2, +.font-icon-gift, +.font-icon-forward, +.font-icon-font, +.font-icon-folder-open, +.font-icon-folder-close, +.font-icon-flag, +.font-icon-fire, +.font-icon-film, +.font-icon-file, +.font-icon-file-empty, +.font-icon-fast-forward, +.font-icon-fast-backward, +.font-icon-facetime, +.font-icon-eye, +.font-icon-eye_disable, +.font-icon-expand-view, +.font-icon-expand-view-3, +.font-icon-expand-view-2, +.font-icon-expand-vertical, +.font-icon-expand-horizontal, +.font-icon-exclamation, +.font-icon-email, +.font-icon-email_2, +.font-icon-eject, +.font-icon-edit, +.font-icon-edit-check, +.font-icon-download, +.font-icon-download_2, +.font-icon-dashboard, +.font-icon-credit-card, +.font-icon-copy, +.font-icon-comments, +.font-icon-comments-line, +.font-icon-comment, +.font-icon-comment-line, +.font-icon-columns, +.font-icon-columns-2, +.font-icon-cogs, +.font-icon-cog, +.font-icon-cloud, +.font-icon-check, +.font-icon-check-empty, +.font-icon-certificate, +.font-icon-camera, +.font-icon-calendar, +.font-icon-bullhorn, +.font-icon-briefcase, +.font-icon-bookmark, +.font-icon-book, +.font-icon-bolt, +.font-icon-bold, +.font-icon-blockquote, +.font-icon-bell, +.font-icon-beaker, +.font-icon-barcode, +.font-icon-ban-circle, +.font-icon-ban-chart, +.font-icon-ban-chart-2, +.font-icon-backward, +.font-icon-asterisk, +.font-icon-arrow-simple-up, +.font-icon-arrow-simple-up-circle, +.font-icon-arrow-simple-right, +.font-icon-arrow-simple-right-circle, +.font-icon-arrow-simple-left, +.font-icon-arrow-simple-left-circle, +.font-icon-arrow-simple-down, +.font-icon-arrow-simple-down-circle, +.font-icon-arrow-round-up, +.font-icon-arrow-round-up-circle, +.font-icon-arrow-round-right, +.font-icon-arrow-round-right-circle, +.font-icon-arrow-round-left, +.font-icon-arrow-round-left-circle, +.font-icon-arrow-round-down, +.font-icon-arrow-round-down-circle, +.font-icon-arrow-light-up, +.font-icon-arrow-light-round-up, +.font-icon-arrow-light-round-up-circle, +.font-icon-arrow-light-round-right, +.font-icon-arrow-light-round-right-circle, +.font-icon-arrow-light-round-left, +.font-icon-arrow-light-round-left-circle, +.font-icon-arrow-light-round-down, +.font-icon-arrow-light-round-down-circle, +.font-icon-arrow-light-right, +.font-icon-arrow-light-left, +.font-icon-arrow-light-down, +.font-icon-align-right, +.font-icon-align-left, +.font-icon-align-justify, +.font-icon-align-center, +.font-icon-adjust { + font-family: "Icons"; + speak: none; + font-style: normal; + font-weight: normal; + font-variant: normal; + text-transform: none; + line-height: 1; + -webkit-font-smoothing: antialiased; +} +.font-icon-zoom-out:before { + content: "\e000"; +} +.font-icon-zoom-in:before { + content: "\e001"; +} +.font-icon-wrench:before { + content: "\e002"; +} +.font-icon-waves:before { + content: "\e003"; +} +.font-icon-warning:before { + content: "\e004"; +} +.font-icon-volume-up:before { + content: "\e005"; +} +.font-icon-volume-off:before { + content: "\e006"; +} +.font-icon-volume-down:before { + content: "\e007"; +} +.font-icon-viewport:before { + content: "\e008"; +} +.font-icon-user:before { + content: "\e009"; +} +.font-icon-user-border:before { + content: "\e00a"; +} +.font-icon-upload:before { + content: "\e00b"; +} +.font-icon-upload-2:before { + content: "\e00c"; +} +.font-icon-unlock:before { + content: "\e00d"; +} +.font-icon-underline:before { + content: "\e00e"; +} +.font-icon-tint:before { + content: "\e00f"; +} +.font-icon-time:before { + content: "\e010"; +} +.font-icon-text:before { + content: "\e011"; +} +.font-icon-text-width:before { + content: "\e012"; +} +.font-icon-text-height:before { + content: "\e013"; +} +.font-icon-tags:before { + content: "\e014"; +} +.font-icon-tag:before { + content: "\e015"; +} +.font-icon-table:before { + content: "\e016"; +} +.font-icon-strikethrough:before { + content: "\e017"; +} +.font-icon-stop:before { + content: "\e018"; +} +.font-icon-step-forward:before { + content: "\e019"; +} +.font-icon-step-backward:before { + content: "\e01a"; +} +.font-icon-stars:before { + content: "\e01b"; +} +.font-icon-star:before { + content: "\e01c"; +} +.font-icon-star-line:before { + content: "\e01d"; +} +.font-icon-star-half:before { + content: "\e01e"; +} +.font-icon-sort:before { + content: "\e01f"; +} +.font-icon-sort-up:before { + content: "\e020"; +} +.font-icon-sort-down:before { + content: "\e021"; +} +.font-icon-social-zerply:before { + content: "\e022"; +} +.font-icon-social-youtube:before { + content: "\e023"; +} +.font-icon-social-yelp:before { + content: "\e024"; +} +.font-icon-social-yahoo:before { + content: "\e025"; +} +.font-icon-social-wordpress:before { + content: "\e026"; +} +.font-icon-social-virb:before { + content: "\e027"; +} +.font-icon-social-vimeo:before { + content: "\e028"; +} +.font-icon-social-viddler:before { + content: "\e029"; +} +.font-icon-social-twitter:before { + content: "\e02a"; +} +.font-icon-social-tumblr:before { + content: "\e02b"; +} +.font-icon-social-stumbleupon:before { + content: "\e02c"; +} +.font-icon-social-soundcloud:before { + content: "\e02d"; +} +.font-icon-social-skype:before { + content: "\e02e"; +} +.font-icon-social-share-this:before { + content: "\e02f"; +} +.font-icon-social-quora:before { + content: "\e030"; +} +.font-icon-social-pinterest:before { + content: "\e031"; +} +.font-icon-social-photobucket:before { + content: "\e032"; +} +.font-icon-social-paypal:before { + content: "\e033"; +} +.font-icon-social-myspace:before { + content: "\e034"; +} +.font-icon-social-linkedin:before { + content: "\e035"; +} +.font-icon-social-last-fm:before { + content: "\e036"; +} +.font-icon-social-grooveshark:before { + content: "\e037"; +} +.font-icon-social-google-plus:before { + content: "\e038"; +} +.font-icon-social-github:before { + content: "\e039"; +} +.font-icon-social-forrst:before { + content: "\e03a"; +} +.font-icon-social-flickr:before { + content: "\e03b"; +} +.font-icon-social-facebook:before { + content: "\e03c"; +} +.font-icon-social-evernote:before { + content: "\e03d"; +} +.font-icon-social-envato:before { + content: "\e03e"; +} +.font-icon-social-email:before { + content: "\e03f"; +} +.font-icon-social-dribbble:before { + content: "\e040"; +} +.font-icon-social-digg:before { + content: "\e041"; +} +.font-icon-social-deviant-art:before { + content: "\e042"; +} +.font-icon-social-blogger:before { + content: "\e043"; +} +.font-icon-social-behance:before { + content: "\e044"; +} +.font-icon-social-bebo:before { + content: "\e045"; +} +.font-icon-social-addthis:before { + content: "\e046"; +} +.font-icon-social-500px:before { + content: "\e047"; +} +.font-icon-sitemap:before { + content: "\e048"; +} +.font-icon-signout:before { + content: "\e049"; +} +.font-icon-signin:before { + content: "\e04a"; +} +.font-icon-signal:before { + content: "\e04b"; +} +.font-icon-shopping-cart:before { + content: "\e04c"; +} +.font-icon-search:before { + content: "\e04d"; +} +.font-icon-rss:before { + content: "\e04e"; +} +.font-icon-road:before { + content: "\e04f"; +} +.font-icon-retweet:before { + content: "\e050"; +} +.font-icon-resize-vertical:before { + content: "\e051"; +} +.font-icon-resize-vertical-2:before { + content: "\e052"; +} +.font-icon-resize-small:before { + content: "\e053"; +} +.font-icon-resize-horizontal:before { + content: "\e054"; +} +.font-icon-resize-horizontal-2:before { + content: "\e055"; +} +.font-icon-resize-fullscreen:before { + content: "\e056"; +} +.font-icon-resize-full:before { + content: "\e057"; +} +.font-icon-repeat:before { + content: "\e058"; +} +.font-icon-reorder:before { + content: "\e059"; +} +.font-icon-remove:before { + content: "\e05a"; +} +.font-icon-remove-sign:before { + content: "\e05b"; +} +.font-icon-remove-circle:before { + content: "\e05c"; +} +.font-icon-read-more:before { + content: "\e05d"; +} +.font-icon-random:before { + content: "\e05e"; +} +.font-icon-question-sign:before { + content: "\e05f"; +} +.font-icon-pushpin:before { + content: "\e060"; +} +.font-icon-pushpin-2:before { + content: "\e061"; +} +.font-icon-print:before { + content: "\e062"; +} +.font-icon-plus:before { + content: "\e063"; +} +.font-icon-plus-sign:before { + content: "\e064"; +} +.font-icon-play:before { + content: "\e065"; +} +.font-icon-picture:before { + content: "\e066"; +} +.font-icon-phone:before { + content: "\e067"; +} +.font-icon-phone-sign:before { + content: "\e068"; +} +.font-icon-phone-boxed:before { + content: "\e069"; +} +.font-icon-pause:before { + content: "\e06a"; +} +.font-icon-paste:before { + content: "\e06b"; +} +.font-icon-paper-clip:before { + content: "\e06c"; +} +.font-icon-ok:before { + content: "\e06d"; +} +.font-icon-ok-sign:before { + content: "\e06e"; +} +.font-icon-ok-circle:before { + content: "\e06f"; +} +.font-icon-music:before { + content: "\e070"; +} +.font-icon-move:before { + content: "\e071"; +} +.font-icon-money:before { + content: "\e072"; +} +.font-icon-minus:before { + content: "\e073"; +} +.font-icon-minus-sign:before { + content: "\e074"; +} +.font-icon-map:before { + content: "\e075"; +} +.font-icon-map-marker:before { + content: "\e076"; +} +.font-icon-map-marker-2:before { + content: "\e077"; +} +.font-icon-magnet:before { + content: "\e078"; +} +.font-icon-magic:before { + content: "\e079"; +} +.font-icon-lock:before { + content: "\e07a"; +} +.font-icon-list:before { + content: "\e07b"; +} +.font-icon-list-3:before { + content: "\e07c"; +} +.font-icon-list-2:before { + content: "\e07d"; +} +.font-icon-link:before { + content: "\e07e"; +} +.font-icon-layer:before { + content: "\e07f"; +} +.font-icon-key:before { + content: "\e080"; +} +.font-icon-italic:before { + content: "\e081"; +} +.font-icon-info:before { + content: "\e082"; +} +.font-icon-indent-right:before { + content: "\e083"; +} +.font-icon-indent-left:before { + content: "\e084"; +} +.font-icon-inbox:before { + content: "\e085"; +} +.font-icon-inbox-empty:before { + content: "\e086"; +} +.font-icon-home:before { + content: "\e087"; +} +.font-icon-heart:before { + content: "\e088"; +} +.font-icon-heart-line:before { + content: "\e089"; +} +.font-icon-headphones:before { + content: "\e08a"; +} +.font-icon-headphones-line:before { + content: "\e08b"; +} +.font-icon-headphones-line-2:before { + content: "\e08c"; +} +.font-icon-headphones-2:before { + content: "\e08d"; +} +.font-icon-hdd:before { + content: "\e08e"; +} +.font-icon-group:before { + content: "\e08f"; +} +.font-icon-grid:before { + content: "\e090"; +} +.font-icon-grid-large:before { + content: "\e091"; +} +.font-icon-globe_line:before { + content: "\e092"; +} +.font-icon-glass:before { + content: "\e093"; +} +.font-icon-glass_2:before { + content: "\e094"; +} +.font-icon-gift:before { + content: "\e095"; +} +.font-icon-forward:before { + content: "\e096"; +} +.font-icon-font:before { + content: "\e097"; +} +.font-icon-folder-open:before { + content: "\e098"; +} +.font-icon-folder-close:before { + content: "\e099"; +} +.font-icon-flag:before { + content: "\e09a"; +} +.font-icon-fire:before { + content: "\e09b"; +} +.font-icon-film:before { + content: "\e09c"; +} +.font-icon-file:before { + content: "\e09d"; +} +.font-icon-file-empty:before { + content: "\e09e"; +} +.font-icon-fast-forward:before { + content: "\e09f"; +} +.font-icon-fast-backward:before { + content: "\e0a0"; +} +.font-icon-facetime:before { + content: "\e0a1"; +} +.font-icon-eye:before { + content: "\e0a2"; +} +.font-icon-eye_disable:before { + content: "\e0a3"; +} +.font-icon-expand-view:before { + content: "\e0a4"; +} +.font-icon-expand-view-3:before { + content: "\e0a5"; +} +.font-icon-expand-view-2:before { + content: "\e0a6"; +} +.font-icon-expand-vertical:before { + content: "\e0a7"; +} +.font-icon-expand-horizontal:before { + content: "\e0a8"; +} +.font-icon-exclamation:before { + content: "\e0a9"; +} +.font-icon-email:before { + content: "\e0aa"; +} +.font-icon-email_2:before { + content: "\e0ab"; +} +.font-icon-eject:before { + content: "\e0ac"; +} +.font-icon-edit:before { + content: "\e0ad"; +} +.font-icon-edit-check:before { + content: "\e0ae"; +} +.font-icon-download:before { + content: "\e0af"; +} +.font-icon-download_2:before { + content: "\e0b0"; +} +.font-icon-dashboard:before { + content: "\e0b1"; +} +.font-icon-credit-card:before { + content: "\e0b2"; +} +.font-icon-copy:before { + content: "\e0b3"; +} +.font-icon-comments:before { + content: "\e0b4"; +} +.font-icon-comments-line:before { + content: "\e0b5"; +} +.font-icon-comment:before { + content: "\e0b6"; +} +.font-icon-comment-line:before { + content: "\e0b7"; +} +.font-icon-columns:before { + content: "\e0b8"; +} +.font-icon-columns-2:before { + content: "\e0b9"; +} +.font-icon-cogs:before { + content: "\e0ba"; +} +.font-icon-cog:before { + content: "\e0bb"; +} +.font-icon-cloud:before { + content: "\e0bc"; +} +.font-icon-check:before { + content: "\e0bd"; +} +.font-icon-check-empty:before { + content: "\e0be"; +} +.font-icon-certificate:before { + content: "\e0bf"; +} +.font-icon-camera:before { + content: "\e0c0"; +} +.font-icon-calendar:before { + content: "\e0c1"; +} +.font-icon-bullhorn:before { + content: "\e0c2"; +} +.font-icon-briefcase:before { + content: "\e0c3"; +} +.font-icon-bookmark:before { + content: "\e0c4"; +} +.font-icon-book:before { + content: "\e0c5"; +} +.font-icon-bolt:before { + content: "\e0c6"; +} +.font-icon-bold:before { + content: "\e0c7"; +} +.font-icon-blockquote:before { + content: "\e0c8"; +} +.font-icon-bell:before { + content: "\e0c9"; +} +.font-icon-beaker:before { + content: "\e0ca"; +} +.font-icon-barcode:before { + content: "\e0cb"; +} +.font-icon-ban-circle:before { + content: "\e0cc"; +} +.font-icon-ban-chart:before { + content: "\e0cd"; +} +.font-icon-ban-chart-2:before { + content: "\e0ce"; +} +.font-icon-backward:before { + content: "\e0cf"; +} +.font-icon-asterisk:before { + content: "\e0d0"; +} +.font-icon-arrow-simple-up:before { + content: "\e0d1"; +} +.font-icon-arrow-simple-up-circle:before { + content: "\e0d2"; +} +.font-icon-arrow-simple-right:before { + content: "\e0d3"; +} +.font-icon-arrow-simple-right-circle:before { + content: "\e0d4"; +} +.font-icon-arrow-simple-left:before { + content: "\e0d5"; +} +.font-icon-arrow-simple-left-circle:before { + content: "\e0d6"; +} +.font-icon-arrow-simple-down:before { + content: "\e0d7"; +} +.font-icon-arrow-simple-down-circle:before { + content: "\e0d8"; +} +.font-icon-arrow-round-up:before { + content: "\e0d9"; +} +.font-icon-arrow-round-up-circle:before { + content: "\e0da"; +} +.font-icon-arrow-round-right:before { + content: "\e0db"; +} +.font-icon-arrow-round-right-circle:before { + content: "\e0dc"; +} +.font-icon-arrow-round-left:before { + content: "\e0dd"; +} +.font-icon-arrow-round-left-circle:before { + content: "\e0de"; +} +.font-icon-arrow-round-down:before { + content: "\e0df"; +} +.font-icon-arrow-round-down-circle:before { + content: "\e0e0"; +} +.font-icon-arrow-light-up:before { + content: "\e0e1"; +} +.font-icon-arrow-light-round-up:before { + content: "\e0e2"; +} +.font-icon-arrow-light-round-up-circle:before { + content: "\e0e3"; +} +.font-icon-arrow-light-round-right:before { + content: "\e0e4"; +} +.font-icon-arrow-light-round-right-circle:before { + content: "\e0e5"; +} +.font-icon-arrow-light-round-left:before { + content: "\e0e6"; +} +.font-icon-arrow-light-round-left-circle:before { + content: "\e0e7"; +} +.font-icon-arrow-light-round-down:before { + content: "\e0e8"; +} +.font-icon-arrow-light-round-down-circle:before { + content: "\e0e9"; +} +.font-icon-arrow-light-right:before { + content: "\e0ea"; +} +.font-icon-arrow-light-left:before { + content: "\e0eb"; +} +.font-icon-arrow-light-down:before { + content: "\e0ec"; +} +.font-icon-align-right:before { + content: "\e0ed"; +} +.font-icon-align-left:before { + content: "\e0ee"; +} +.font-icon-align-justify:before { + content: "\e0ef"; +} +.font-icon-align-center:before { + content: "\e0f0"; +} +.font-icon-adjust:before { + content: "\e0f1"; +} diff --git a/backend/tests/integration/tests/pruning/website/css/fancybox/blank.gif b/backend/tests/integration/tests/pruning/website/css/fancybox/blank.gif new file mode 100644 index 00000000000..35d42e808f0 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/css/fancybox/blank.gif differ diff --git a/backend/tests/integration/tests/pruning/website/css/fancybox/fancybox_loading.gif b/backend/tests/integration/tests/pruning/website/css/fancybox/fancybox_loading.gif new file mode 100644 index 00000000000..01586176d79 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/css/fancybox/fancybox_loading.gif differ diff --git a/backend/tests/integration/tests/pruning/website/css/fancybox/fancybox_overlay.png b/backend/tests/integration/tests/pruning/website/css/fancybox/fancybox_overlay.png new file mode 100644 index 00000000000..a4391396a9d Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/css/fancybox/fancybox_overlay.png differ diff --git a/backend/tests/integration/tests/pruning/website/css/fancybox/fancybox_sprite.png b/backend/tests/integration/tests/pruning/website/css/fancybox/fancybox_sprite.png new file mode 100644 index 00000000000..fd8d5ca566d Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/css/fancybox/fancybox_sprite.png differ diff --git a/backend/tests/integration/tests/pruning/website/css/fancybox/jquery.fancybox.css b/backend/tests/integration/tests/pruning/website/css/fancybox/jquery.fancybox.css new file mode 100644 index 00000000000..a20015ff683 --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/css/fancybox/jquery.fancybox.css @@ -0,0 +1,349 @@ +/*! fancyBox v2.1.4 fancyapps.com | fancyapps.com/fancybox/#license */ +.fancybox-wrap, +.fancybox-skin, +.fancybox-outer, +.fancybox-inner, +.fancybox-image, +.fancybox-wrap iframe, +.fancybox-wrap object, +.fancybox-nav, +.fancybox-nav span, +.fancybox-tmp { + padding: 0; + margin: 0; + border: 0; + outline: none; + vertical-align: top; +} + +.fancybox-wrap { + position: absolute; + top: 0; + left: 0; + z-index: 8020; +} + +.fancybox-skin { + position: relative; + background: #2f3238; + color: #565656; + text-shadow: none; + -webkit-border-radius: 0; + -moz-border-radius: 0; + border-radius: 0; +} + +.fancybox-opened { + z-index: 8030; +} + +.fancybox-opened .fancybox-skin { + -webkit-box-shadow: none; + -moz-box-shadow: none; + box-shadow: none; +} + +.fancybox-outer, +.fancybox-inner { + position: relative; +} + +.fancybox-inner { + overflow: hidden; +} + +.fancybox-type-iframe .fancybox-inner { + -webkit-overflow-scrolling: touch; +} + +.fancybox-error { + color: #444; + font-size: 14px; + line-height: 20px; + margin: 0; + padding: 15px; + white-space: nowrap; +} + +.fancybox-image, +.fancybox-iframe { + display: block; + width: 100%; + height: 100%; +} + +.fancybox-image { + max-width: 100%; + max-height: 100%; +} + +#fancybox-loading, +.fancybox-close, +.fancybox-prev span, +.fancybox-next span { + background-image: url("fancybox_sprite.png") !important; +} + +#fancybox-loading { + position: fixed; + top: 50%; + left: 50%; + margin-top: -22px; + margin-left: -22px; + background-position: 0 -108px; + opacity: 0.8; + cursor: pointer; + z-index: 8060; +} + +#fancybox-loading div { + width: 44px; + height: 44px; + background: url("fancybox_loading.gif") center center no-repeat; +} + +.fancybox-close { + position: absolute; + right: 0; + top: 0; + width: 40px; + height: 38px; + cursor: pointer; + z-index: 9000; + background-image: none; + + opacity: 0.5; + + -webkit-transition: + background 0.1s linear 0s, + opacity 0.1s linear 0s; + -moz-transition: + background 0.1s linear 0s, + opacity 0.1s linear 0s; + -o-transition: + background 0.1s linear 0s, + opacity 0.1s linear 0s; + transition: + background 0.1s linear 0s, + opacity 0.1s linear 0s; +} + +.fancybox-close i { + left: 50%; + top: 50%; + margin: -11px 0 0 -11px; + font-size: 22px; + line-height: 1em; + position: absolute; + color: #ffffff; +} + +.fancybox-close:hover { + opacity: 1; +} + +.fancybox-nav { + position: absolute; + top: 0; + height: 100%; + cursor: pointer; + text-decoration: none; + background: transparent url("blank.gif"); /* helps IE */ + -webkit-tap-highlight-color: rgba(0, 0, 0, 0); + z-index: 8040; +} + +.fancybox-prev, +.fancybox-prev span { + left: 0; +} + +.fancybox-next, +.fancybox-next span { + right: 0; +} + +.fancybox-nav span { + position: absolute; + top: 50%; + width: 44px; + height: 32px; + margin-top: -25px; + cursor: pointer; + z-index: 8040; + background-image: none; + background-color: #26292e; + background-position-y: -38px; + opacity: 0.5; + + -webkit-transition: + background 0.1s linear 0s, + opacity 0.1s linear 0s; + -moz-transition: + background 0.1s linear 0s, + opacity 0.1s linear 0s; + -o-transition: + background 0.1s linear 0s, + opacity 0.1s linear 0s; + transition: + background 0.1s linear 0s, + opacity 0.1s linear 0s; +} +.fancybox-next span { + background-position-y: -72px; +} +.fancybox-prev span i { + left: 50%; + top: 50%; + margin: -15px 0 0 -17px; + font-size: 30px; + line-height: 1em; + position: absolute; + color: #ffffff; +} + +.fancybox-next span i { + left: 50%; + top: 50%; + margin: -15px 0 0 -15px; + font-size: 30px; + line-height: 1em; + position: absolute; + color: #ffffff; +} + +.fancybox-nav:hover span { + opacity: 1; +} + +.fancybox-tmp { + position: absolute; + top: -99999px; + left: -99999px; + visibility: hidden; + max-width: 99999px; + max-height: 99999px; + overflow: visible !important; +} + +/* Overlay helper */ + +.fancybox-lock { + margin: 0 !important; +} + +.fancybox-overlay { + position: absolute; + top: 0; + left: 0; + overflow: hidden !important; + display: none; + z-index: 8010; + background: url("fancybox_overlay.png"); +} + +.fancybox-overlay-fixed { + position: fixed; + bottom: 0; + right: 0; +} + +.fancybox-lock .fancybox-overlay { + overflow: auto; + overflow-y: scroll; +} + +/* Title helper */ + +.fancybox-title { + visibility: hidden; + position: relative; + text-shadow: none; + z-index: 8050; +} + +.fancybox-opened .fancybox-title { + visibility: visible; +} + +.fancybox-opened .fancybox-title h4 { + font-size: 24px; + color: #fff; + font-weight: 300; + margin-bottom: 10px; +} + +.fancybox-opened .fancybox-title p { + font-size: 16px; + font-weight: 300; + color: #bbb; + line-height: 1.6em; + margin-bottom: 0; +} + +.fancybox-title-float-wrap { + position: absolute; + bottom: 0; + right: 50%; + margin-bottom: -35px; + z-index: 8050; + text-align: center; +} + +.fancybox-title-float-wrap .child { + display: inline-block; + margin-right: -100%; + padding: 2px 20px; + background: transparent; /* Fallback for web browsers that doesn't support RGBa */ + background: rgba(0, 0, 0, 0.8); + -webkit-border-radius: 15px; + -moz-border-radius: 15px; + border-radius: 15px; + text-shadow: 0 1px 2px #222; + color: #fff; + font-weight: bold; + line-height: 24px; + white-space: nowrap; +} + +.fancybox-title-outside-wrap { + position: relative; + margin-top: 10px; + color: #fff; +} + +.fancybox-title-inside-wrap { + padding: 3px 30px 6px; + background: #61b331; +} + +.fancybox-title-over-wrap { + position: absolute; + bottom: 0; + left: 0; + color: #fff; + padding: 10px; + background: #000; + background: rgba(0, 0, 0, 0.8); +} + +@media (max-width: 480px) { + .fancybox-nav span, + .fancybox-nav:hover span, + .fancybox-close, + .fancybox-close:hover { + background: transparent; + } + + .fancybox-close i { + left: 70px; + top: 10px; + } +} + +@media (max-width: 320px) { + .fancybox-close i { + left: 30px; + top: 20px; + } +} diff --git a/backend/tests/integration/tests/pruning/website/css/flexslider.css b/backend/tests/integration/tests/pruning/website/css/flexslider.css new file mode 100644 index 00000000000..6088235631c --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/css/flexslider.css @@ -0,0 +1,226 @@ +/* + * jQuery FlexSlider v2.0 + * http://www.woothemes.com/flexslider/ + * + * Copyright 2012 WooThemes + * Free to use under the GPLv2 license. + * http://www.gnu.org/licenses/gpl-2.0.html + * + * Contributing author: Tyler Smith (@mbmufffin) + */ + +/* Browser Resets */ +.flex-container a:active, +.flexslider a:active, +.flex-container a:focus, +.flexslider a:focus { + outline: none; +} +.slides, +.flex-control-nav, +.flex-direction-nav { + margin: 0; + padding: 0; + list-style: none; +} + +/* FlexSlider Necessary Styles +*********************************/ +.flexslider { + margin: 0; + padding: 0; +} +.flexslider .slides > li { + display: none; + -webkit-backface-visibility: hidden; +} /* Hide the slides before the JS is loaded. Avoids image jumping */ +.flexslider .slides img { + width: 100%; + display: block; +} +.flex-pauseplay span { + text-transform: capitalize; +} + +/* Clearfix for the .slides element */ +.slides:after { + content: "."; + display: block; + clear: both; + visibility: hidden; + line-height: 0; + height: 0; +} +html[xmlns] .slides { + display: block; +} +* html .slides { + height: 1%; +} + +/* No JavaScript Fallback */ +/* If you are not using another script, such as Modernizr, make sure you + * include js that eliminates this class on page load */ +.no-js .slides > li:first-child { + display: block; +} + +/* FlexSlider Default Theme +*********************************/ +.flexslider { + background: none; + position: relative; + zoom: 1; +} +.flex-viewport { + max-height: 2000px; + -webkit-transition: all 1s ease; + -moz-transition: all 1s ease; + transition: all 1s ease; +} +.loading .flex-viewport { + max-height: 300px; +} +.flexslider .slides { + zoom: 1; +} + +.carousel li { + margin-right: 5px; +} + +/* Caption style */ + +.flex-caption { + background: rgba(0, 0, 0, 0.8); + margin-left: 5px; + bottom: 5px; + position: absolute; + padding: 20px; + z-index: 99; +} +.flex-caption p { + font-size: 14px !important; + line-height: 22px; + font-weight: 300; + color: #fff; +} +.flex-caption h2, +.flex-caption h4 { + color: #fff; +} + +/* Direction Nav */ +.flex-direction-nav { + *height: 0; +} +.flex-direction-nav a { + width: 30px; + height: 40px; + margin: 0; + display: block; + background: url(../img/bg_direction_nav.png) no-repeat 0 0; + position: absolute; + top: 45%; + z-index: 10; + cursor: pointer; + text-indent: -9999px; + opacity: 0; + -webkit-transition: all 0.3s ease; +} +.flex-direction-nav .flex-next { + background-position: 100% 0; + right: -36px; +} +.flex-direction-nav .flex-prev { + left: -36px; +} +.flexslider:hover .flex-next { + opacity: 0.8; + right: 5px; +} +.flexslider:hover .flex-prev { + opacity: 0.8; + left: 5px; +} +.flexslider:hover .flex-next:hover, +.flexslider:hover .flex-prev:hover { + opacity: 1; +} +.flex-direction-nav .flex-disabled { + opacity: 0.3 !important; + filter: alpha(opacity=30); + cursor: default; +} + +/* Control Nav */ +.flex-control-nav { + width: 100%; + position: absolute; + bottom: 0; + text-align: center; +} +.flex-control-nav li { + margin: 0 6px; + display: inline-block; + zoom: 1; + *display: inline; +} +.flex-control-paging li a { + width: 11px; + height: 11px; + display: block; + background: #666; + background: rgba(0, 0, 0, 0.5); + cursor: pointer; + text-indent: -9999px; + -webkit-border-radius: 20px; + -moz-border-radius: 20px; + -o-border-radius: 20px; + border-radius: 20px; + box-shadow: inset 0 0 3px rgba(0, 0, 0, 0.3); +} +.flex-control-paging li a:hover { + background: #333; + background: rgba(0, 0, 0, 0.7); +} +.flex-control-paging li a.flex-active { + background: #000; + background: rgba(0, 0, 0, 0.9); + cursor: default; +} + +.flex-control-thumbs { + margin: 5px 0 0; + position: static; + overflow: hidden; +} +.flex-control-thumbs li { + width: 25%; + float: left; + margin: 0; +} +.flex-control-thumbs img { + width: 100%; + display: block; + opacity: 0.7; + cursor: pointer; +} +.flex-control-thumbs img:hover { + opacity: 1; +} +.flex-control-thumbs .flex-active { + opacity: 1; + cursor: default; +} + +@media screen and (max-width: 860px) { + .flex-direction-nav .flex-prev { + opacity: 1; + left: 0; + } + .flex-direction-nav .flex-next { + opacity: 1; + right: 0; + } +} diff --git a/backend/tests/integration/tests/pruning/website/css/font-awesome.css b/backend/tests/integration/tests/pruning/website/css/font-awesome.css new file mode 100644 index 00000000000..49a13c9a58d --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/css/font-awesome.css @@ -0,0 +1,1344 @@ +/*! + * Font Awesome 4.0.3 by @davegandy - http://fontawesome.io - @fontawesome + * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) + */ +/* FONT PATH + * -------------------------- */ +@font-face { + font-family: "FontAwesome"; + src: url("../fonts/fontawesome-webfont.eot?v=4.0.3"); + src: + url("../fonts/fontawesome-webfont.eot?#iefix&v=4.0.3") + format("embedded-opentype"), + url("../fonts/fontawesome-webfont.woff?v=4.0.3") format("woff"), + url("../fonts/fontawesome-webfont.ttf?v=4.0.3") format("truetype"), + url("../fonts/fontawesome-webfont.svg?v=4.0.3#fontawesomeregular") + format("svg"); + font-weight: normal; + font-style: normal; +} +.fa { + display: inline-block; + font-family: FontAwesome; + font-style: normal; + font-weight: normal; + line-height: 1; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} +/* makes the font 33% larger relative to the icon container */ +.fa-lg { + font-size: 1.3333333333333333em; + line-height: 0.75em; + vertical-align: -15%; +} +.fa-2x { + font-size: 2em; +} +.fa-3x { + font-size: 3em; +} +.fa-4x { + font-size: 4em; +} +.fa-5x { + font-size: 5em; +} +.fa-fw { + width: 1.2857142857142858em; + text-align: center; +} +.fa-ul { + padding-left: 0; + margin-left: 2.142857142857143em; + list-style-type: none; +} +.fa-ul > li { + position: relative; +} +.fa-li { + position: absolute; + left: -2.142857142857143em; + width: 2.142857142857143em; + top: 0.14285714285714285em; + text-align: center; +} +.fa-li.fa-lg { + left: -1.8571428571428572em; +} +.fa-border { + padding: 0.2em 0.25em 0.15em; + border: solid 0.08em #eeeeee; + border-radius: 0.1em; +} +.pull-right { + float: right; +} +.pull-left { + float: left; +} +.fa.pull-left { + margin-right: 0.3em; +} +.fa.pull-right { + margin-left: 0.3em; +} +.fa-spin { + -webkit-animation: spin 2s infinite linear; + -moz-animation: spin 2s infinite linear; + -o-animation: spin 2s infinite linear; + animation: spin 2s infinite linear; +} +@-moz-keyframes spin { + 0% { + -moz-transform: rotate(0deg); + } + 100% { + -moz-transform: rotate(359deg); + } +} +@-webkit-keyframes spin { + 0% { + -webkit-transform: rotate(0deg); + } + 100% { + -webkit-transform: rotate(359deg); + } +} +@-o-keyframes spin { + 0% { + -o-transform: rotate(0deg); + } + 100% { + -o-transform: rotate(359deg); + } +} +@-ms-keyframes spin { + 0% { + -ms-transform: rotate(0deg); + } + 100% { + -ms-transform: rotate(359deg); + } +} +@keyframes spin { + 0% { + transform: rotate(0deg); + } + 100% { + transform: rotate(359deg); + } +} +.fa-rotate-90 { + filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=1); + -webkit-transform: rotate(90deg); + -moz-transform: rotate(90deg); + -ms-transform: rotate(90deg); + -o-transform: rotate(90deg); + transform: rotate(90deg); +} +.fa-rotate-180 { + filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=2); + -webkit-transform: rotate(180deg); + -moz-transform: rotate(180deg); + -ms-transform: rotate(180deg); + -o-transform: rotate(180deg); + transform: rotate(180deg); +} +.fa-rotate-270 { + filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=3); + -webkit-transform: rotate(270deg); + -moz-transform: rotate(270deg); + -ms-transform: rotate(270deg); + -o-transform: rotate(270deg); + transform: rotate(270deg); +} +.fa-flip-horizontal { + filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1); + -webkit-transform: scale(-1, 1); + -moz-transform: scale(-1, 1); + -ms-transform: scale(-1, 1); + -o-transform: scale(-1, 1); + transform: scale(-1, 1); +} +.fa-flip-vertical { + filter: progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1); + -webkit-transform: scale(1, -1); + -moz-transform: scale(1, -1); + -ms-transform: scale(1, -1); + -o-transform: scale(1, -1); + transform: scale(1, -1); +} +.fa-stack { + position: relative; + display: inline-block; + width: 2em; + height: 2em; + line-height: 2em; + vertical-align: middle; +} +.fa-stack-1x, +.fa-stack-2x { + position: absolute; + left: 0; + width: 100%; + text-align: center; +} +.fa-stack-1x { + line-height: inherit; +} +.fa-stack-2x { + font-size: 2em; +} +.fa-inverse { + color: #ffffff; +} +/* Font Awesome uses the Unicode Private Use Area (PUA) to ensure screen + readers do not read off random characters that represent icons */ +.fa-glass:before { + content: "\f000"; +} +.fa-music:before { + content: "\f001"; +} +.fa-search:before { + content: "\f002"; +} +.fa-envelope-o:before { + content: "\f003"; +} +.fa-heart:before { + content: "\f004"; +} +.fa-star:before { + content: "\f005"; +} +.fa-star-o:before { + content: "\f006"; +} +.fa-user:before { + content: "\f007"; +} +.fa-film:before { + content: "\f008"; +} +.fa-th-large:before { + content: "\f009"; +} +.fa-th:before { + content: "\f00a"; +} +.fa-th-list:before { + content: "\f00b"; +} +.fa-check:before { + content: "\f00c"; +} +.fa-times:before { + content: "\f00d"; +} +.fa-search-plus:before { + content: "\f00e"; +} +.fa-search-minus:before { + content: "\f010"; +} +.fa-power-off:before { + content: "\f011"; +} +.fa-signal:before { + content: "\f012"; +} +.fa-gear:before, +.fa-cog:before { + content: "\f013"; +} +.fa-trash-o:before { + content: "\f014"; +} +.fa-home:before { + content: "\f015"; +} +.fa-file-o:before { + content: "\f016"; +} +.fa-clock-o:before { + content: "\f017"; +} +.fa-road:before { + content: "\f018"; +} +.fa-download:before { + content: "\f019"; +} +.fa-arrow-circle-o-down:before { + content: "\f01a"; +} +.fa-arrow-circle-o-up:before { + content: "\f01b"; +} +.fa-inbox:before { + content: "\f01c"; +} +.fa-play-circle-o:before { + content: "\f01d"; +} +.fa-rotate-right:before, +.fa-repeat:before { + content: "\f01e"; +} +.fa-refresh:before { + content: "\f021"; +} +.fa-list-alt:before { + content: "\f022"; +} +.fa-lock:before { + content: "\f023"; +} +.fa-flag:before { + content: "\f024"; +} +.fa-headphones:before { + content: "\f025"; +} +.fa-volume-off:before { + content: "\f026"; +} +.fa-volume-down:before { + content: "\f027"; +} +.fa-volume-up:before { + content: "\f028"; +} +.fa-qrcode:before { + content: "\f029"; +} +.fa-barcode:before { + content: "\f02a"; +} +.fa-tag:before { + content: "\f02b"; +} +.fa-tags:before { + content: "\f02c"; +} +.fa-book:before { + content: "\f02d"; +} +.fa-bookmark:before { + content: "\f02e"; +} +.fa-print:before { + content: "\f02f"; +} +.fa-camera:before { + content: "\f030"; +} +.fa-font:before { + content: "\f031"; +} +.fa-bold:before { + content: "\f032"; +} +.fa-italic:before { + content: "\f033"; +} +.fa-text-height:before { + content: "\f034"; +} +.fa-text-width:before { + content: "\f035"; +} +.fa-align-left:before { + content: "\f036"; +} +.fa-align-center:before { + content: "\f037"; +} +.fa-align-right:before { + content: "\f038"; +} +.fa-align-justify:before { + content: "\f039"; +} +.fa-list:before { + content: "\f03a"; +} +.fa-dedent:before, +.fa-outdent:before { + content: "\f03b"; +} +.fa-indent:before { + content: "\f03c"; +} +.fa-video-camera:before { + content: "\f03d"; +} +.fa-picture-o:before { + content: "\f03e"; +} +.fa-pencil:before { + content: "\f040"; +} +.fa-map-marker:before { + content: "\f041"; +} +.fa-adjust:before { + content: "\f042"; +} +.fa-tint:before { + content: "\f043"; +} +.fa-edit:before, +.fa-pencil-square-o:before { + content: "\f044"; +} +.fa-share-square-o:before { + content: "\f045"; +} +.fa-check-square-o:before { + content: "\f046"; +} +.fa-arrows:before { + content: "\f047"; +} +.fa-step-backward:before { + content: "\f048"; +} +.fa-fast-backward:before { + content: "\f049"; +} +.fa-backward:before { + content: "\f04a"; +} +.fa-play:before { + content: "\f04b"; +} +.fa-pause:before { + content: "\f04c"; +} +.fa-stop:before { + content: "\f04d"; +} +.fa-forward:before { + content: "\f04e"; +} +.fa-fast-forward:before { + content: "\f050"; +} +.fa-step-forward:before { + content: "\f051"; +} +.fa-eject:before { + content: "\f052"; +} +.fa-chevron-left:before { + content: "\f053"; +} +.fa-chevron-right:before { + content: "\f054"; +} +.fa-plus-circle:before { + content: "\f055"; +} +.fa-minus-circle:before { + content: "\f056"; +} +.fa-times-circle:before { + content: "\f057"; +} +.fa-check-circle:before { + content: "\f058"; +} +.fa-question-circle:before { + content: "\f059"; +} +.fa-info-circle:before { + content: "\f05a"; +} +.fa-crosshairs:before { + content: "\f05b"; +} +.fa-times-circle-o:before { + content: "\f05c"; +} +.fa-check-circle-o:before { + content: "\f05d"; +} +.fa-ban:before { + content: "\f05e"; +} +.fa-arrow-left:before { + content: "\f060"; +} +.fa-arrow-right:before { + content: "\f061"; +} +.fa-arrow-up:before { + content: "\f062"; +} +.fa-arrow-down:before { + content: "\f063"; +} +.fa-mail-forward:before, +.fa-share:before { + content: "\f064"; +} +.fa-expand:before { + content: "\f065"; +} +.fa-compress:before { + content: "\f066"; +} +.fa-plus:before { + content: "\f067"; +} +.fa-minus:before { + content: "\f068"; +} +.fa-asterisk:before { + content: "\f069"; +} +.fa-exclamation-circle:before { + content: "\f06a"; +} +.fa-gift:before { + content: "\f06b"; +} +.fa-leaf:before { + content: "\f06c"; +} +.fa-fire:before { + content: "\f06d"; +} +.fa-eye:before { + content: "\f06e"; +} +.fa-eye-slash:before { + content: "\f070"; +} +.fa-warning:before, +.fa-exclamation-triangle:before { + content: "\f071"; +} +.fa-plane:before { + content: "\f072"; +} +.fa-calendar:before { + content: "\f073"; +} +.fa-random:before { + content: "\f074"; +} +.fa-comment:before { + content: "\f075"; +} +.fa-magnet:before { + content: "\f076"; +} +.fa-chevron-up:before { + content: "\f077"; +} +.fa-chevron-down:before { + content: "\f078"; +} +.fa-retweet:before { + content: "\f079"; +} +.fa-shopping-cart:before { + content: "\f07a"; +} +.fa-folder:before { + content: "\f07b"; +} +.fa-folder-open:before { + content: "\f07c"; +} +.fa-arrows-v:before { + content: "\f07d"; +} +.fa-arrows-h:before { + content: "\f07e"; +} +.fa-bar-chart-o:before { + content: "\f080"; +} +.fa-twitter-square:before { + content: "\f081"; +} +.fa-facebook-square:before { + content: "\f082"; +} +.fa-camera-retro:before { + content: "\f083"; +} +.fa-key:before { + content: "\f084"; +} +.fa-gears:before, +.fa-cogs:before { + content: "\f085"; +} +.fa-comments:before { + content: "\f086"; +} +.fa-thumbs-o-up:before { + content: "\f087"; +} +.fa-thumbs-o-down:before { + content: "\f088"; +} +.fa-star-half:before { + content: "\f089"; +} +.fa-heart-o:before { + content: "\f08a"; +} +.fa-sign-out:before { + content: "\f08b"; +} +.fa-linkedin-square:before { + content: "\f08c"; +} +.fa-thumb-tack:before { + content: "\f08d"; +} +.fa-external-link:before { + content: "\f08e"; +} +.fa-sign-in:before { + content: "\f090"; +} +.fa-trophy:before { + content: "\f091"; +} +.fa-github-square:before { + content: "\f092"; +} +.fa-upload:before { + content: "\f093"; +} +.fa-lemon-o:before { + content: "\f094"; +} +.fa-phone:before { + content: "\f095"; +} +.fa-square-o:before { + content: "\f096"; +} +.fa-bookmark-o:before { + content: "\f097"; +} +.fa-phone-square:before { + content: "\f098"; +} +.fa-twitter:before { + content: "\f099"; +} +.fa-facebook:before { + content: "\f09a"; +} +.fa-github:before { + content: "\f09b"; +} +.fa-unlock:before { + content: "\f09c"; +} +.fa-credit-card:before { + content: "\f09d"; +} +.fa-rss:before { + content: "\f09e"; +} +.fa-hdd-o:before { + content: "\f0a0"; +} +.fa-bullhorn:before { + content: "\f0a1"; +} +.fa-bell:before { + content: "\f0f3"; +} +.fa-certificate:before { + content: "\f0a3"; +} +.fa-hand-o-right:before { + content: "\f0a4"; +} +.fa-hand-o-left:before { + content: "\f0a5"; +} +.fa-hand-o-up:before { + content: "\f0a6"; +} +.fa-hand-o-down:before { + content: "\f0a7"; +} +.fa-arrow-circle-left:before { + content: "\f0a8"; +} +.fa-arrow-circle-right:before { + content: "\f0a9"; +} +.fa-arrow-circle-up:before { + content: "\f0aa"; +} +.fa-arrow-circle-down:before { + content: "\f0ab"; +} +.fa-globe:before { + content: "\f0ac"; +} +.fa-wrench:before { + content: "\f0ad"; +} +.fa-tasks:before { + content: "\f0ae"; +} +.fa-filter:before { + content: "\f0b0"; +} +.fa-briefcase:before { + content: "\f0b1"; +} +.fa-arrows-alt:before { + content: "\f0b2"; +} +.fa-group:before, +.fa-users:before { + content: "\f0c0"; +} +.fa-chain:before, +.fa-link:before { + content: "\f0c1"; +} +.fa-cloud:before { + content: "\f0c2"; +} +.fa-flask:before { + content: "\f0c3"; +} +.fa-cut:before, +.fa-scissors:before { + content: "\f0c4"; +} +.fa-copy:before, +.fa-files-o:before { + content: "\f0c5"; +} +.fa-paperclip:before { + content: "\f0c6"; +} +.fa-save:before, +.fa-floppy-o:before { + content: "\f0c7"; +} +.fa-square:before { + content: "\f0c8"; +} +.fa-bars:before { + content: "\f0c9"; +} +.fa-list-ul:before { + content: "\f0ca"; +} +.fa-list-ol:before { + content: "\f0cb"; +} +.fa-strikethrough:before { + content: "\f0cc"; +} +.fa-underline:before { + content: "\f0cd"; +} +.fa-table:before { + content: "\f0ce"; +} +.fa-magic:before { + content: "\f0d0"; +} +.fa-truck:before { + content: "\f0d1"; +} +.fa-pinterest:before { + content: "\f0d2"; +} +.fa-pinterest-square:before { + content: "\f0d3"; +} +.fa-google-plus-square:before { + content: "\f0d4"; +} +.fa-google-plus:before { + content: "\f0d5"; +} +.fa-money:before { + content: "\f0d6"; +} +.fa-caret-down:before { + content: "\f0d7"; +} +.fa-caret-up:before { + content: "\f0d8"; +} +.fa-caret-left:before { + content: "\f0d9"; +} +.fa-caret-right:before { + content: "\f0da"; +} +.fa-columns:before { + content: "\f0db"; +} +.fa-unsorted:before, +.fa-sort:before { + content: "\f0dc"; +} +.fa-sort-down:before, +.fa-sort-asc:before { + content: "\f0dd"; +} +.fa-sort-up:before, +.fa-sort-desc:before { + content: "\f0de"; +} +.fa-envelope:before { + content: "\f0e0"; +} +.fa-linkedin:before { + content: "\f0e1"; +} +.fa-rotate-left:before, +.fa-undo:before { + content: "\f0e2"; +} +.fa-legal:before, +.fa-gavel:before { + content: "\f0e3"; +} +.fa-dashboard:before, +.fa-tachometer:before { + content: "\f0e4"; +} +.fa-comment-o:before { + content: "\f0e5"; +} +.fa-comments-o:before { + content: "\f0e6"; +} +.fa-flash:before, +.fa-bolt:before { + content: "\f0e7"; +} +.fa-sitemap:before { + content: "\f0e8"; +} +.fa-umbrella:before { + content: "\f0e9"; +} +.fa-paste:before, +.fa-clipboard:before { + content: "\f0ea"; +} +.fa-lightbulb-o:before { + content: "\f0eb"; +} +.fa-exchange:before { + content: "\f0ec"; +} +.fa-cloud-download:before { + content: "\f0ed"; +} +.fa-cloud-upload:before { + content: "\f0ee"; +} +.fa-user-md:before { + content: "\f0f0"; +} +.fa-stethoscope:before { + content: "\f0f1"; +} +.fa-suitcase:before { + content: "\f0f2"; +} +.fa-bell-o:before { + content: "\f0a2"; +} +.fa-coffee:before { + content: "\f0f4"; +} +.fa-cutlery:before { + content: "\f0f5"; +} +.fa-file-text-o:before { + content: "\f0f6"; +} +.fa-building-o:before { + content: "\f0f7"; +} +.fa-hospital-o:before { + content: "\f0f8"; +} +.fa-ambulance:before { + content: "\f0f9"; +} +.fa-medkit:before { + content: "\f0fa"; +} +.fa-fighter-jet:before { + content: "\f0fb"; +} +.fa-beer:before { + content: "\f0fc"; +} +.fa-h-square:before { + content: "\f0fd"; +} +.fa-plus-square:before { + content: "\f0fe"; +} +.fa-angle-double-left:before { + content: "\f100"; +} +.fa-angle-double-right:before { + content: "\f101"; +} +.fa-angle-double-up:before { + content: "\f102"; +} +.fa-angle-double-down:before { + content: "\f103"; +} +.fa-angle-left:before { + content: "\f104"; +} +.fa-angle-right:before { + content: "\f105"; +} +.fa-angle-up:before { + content: "\f106"; +} +.fa-angle-down:before { + content: "\f107"; +} +.fa-desktop:before { + content: "\f108"; +} +.fa-laptop:before { + content: "\f109"; +} +.fa-tablet:before { + content: "\f10a"; +} +.fa-mobile-phone:before, +.fa-mobile:before { + content: "\f10b"; +} +.fa-circle-o:before { + content: "\f10c"; +} +.fa-quote-left:before { + content: "\f10d"; +} +.fa-quote-right:before { + content: "\f10e"; +} +.fa-spinner:before { + content: "\f110"; +} +.fa-circle:before { + content: "\f111"; +} +.fa-mail-reply:before, +.fa-reply:before { + content: "\f112"; +} +.fa-github-alt:before { + content: "\f113"; +} +.fa-folder-o:before { + content: "\f114"; +} +.fa-folder-open-o:before { + content: "\f115"; +} +.fa-smile-o:before { + content: "\f118"; +} +.fa-frown-o:before { + content: "\f119"; +} +.fa-meh-o:before { + content: "\f11a"; +} +.fa-gamepad:before { + content: "\f11b"; +} +.fa-keyboard-o:before { + content: "\f11c"; +} +.fa-flag-o:before { + content: "\f11d"; +} +.fa-flag-checkered:before { + content: "\f11e"; +} +.fa-terminal:before { + content: "\f120"; +} +.fa-code:before { + content: "\f121"; +} +.fa-reply-all:before { + content: "\f122"; +} +.fa-mail-reply-all:before { + content: "\f122"; +} +.fa-star-half-empty:before, +.fa-star-half-full:before, +.fa-star-half-o:before { + content: "\f123"; +} +.fa-location-arrow:before { + content: "\f124"; +} +.fa-crop:before { + content: "\f125"; +} +.fa-code-fork:before { + content: "\f126"; +} +.fa-unlink:before, +.fa-chain-broken:before { + content: "\f127"; +} +.fa-question:before { + content: "\f128"; +} +.fa-info:before { + content: "\f129"; +} +.fa-exclamation:before { + content: "\f12a"; +} +.fa-superscript:before { + content: "\f12b"; +} +.fa-subscript:before { + content: "\f12c"; +} +.fa-eraser:before { + content: "\f12d"; +} +.fa-puzzle-piece:before { + content: "\f12e"; +} +.fa-microphone:before { + content: "\f130"; +} +.fa-microphone-slash:before { + content: "\f131"; +} +.fa-shield:before { + content: "\f132"; +} +.fa-calendar-o:before { + content: "\f133"; +} +.fa-fire-extinguisher:before { + content: "\f134"; +} +.fa-rocket:before { + content: "\f135"; +} +.fa-maxcdn:before { + content: "\f136"; +} +.fa-chevron-circle-left:before { + content: "\f137"; +} +.fa-chevron-circle-right:before { + content: "\f138"; +} +.fa-chevron-circle-up:before { + content: "\f139"; +} +.fa-chevron-circle-down:before { + content: "\f13a"; +} +.fa-html5:before { + content: "\f13b"; +} +.fa-css3:before { + content: "\f13c"; +} +.fa-anchor:before { + content: "\f13d"; +} +.fa-unlock-alt:before { + content: "\f13e"; +} +.fa-bullseye:before { + content: "\f140"; +} +.fa-ellipsis-h:before { + content: "\f141"; +} +.fa-ellipsis-v:before { + content: "\f142"; +} +.fa-rss-square:before { + content: "\f143"; +} +.fa-play-circle:before { + content: "\f144"; +} +.fa-ticket:before { + content: "\f145"; +} +.fa-minus-square:before { + content: "\f146"; +} +.fa-minus-square-o:before { + content: "\f147"; +} +.fa-level-up:before { + content: "\f148"; +} +.fa-level-down:before { + content: "\f149"; +} +.fa-check-square:before { + content: "\f14a"; +} +.fa-pencil-square:before { + content: "\f14b"; +} +.fa-external-link-square:before { + content: "\f14c"; +} +.fa-share-square:before { + content: "\f14d"; +} +.fa-compass:before { + content: "\f14e"; +} +.fa-toggle-down:before, +.fa-caret-square-o-down:before { + content: "\f150"; +} +.fa-toggle-up:before, +.fa-caret-square-o-up:before { + content: "\f151"; +} +.fa-toggle-right:before, +.fa-caret-square-o-right:before { + content: "\f152"; +} +.fa-euro:before, +.fa-eur:before { + content: "\f153"; +} +.fa-gbp:before { + content: "\f154"; +} +.fa-dollar:before, +.fa-usd:before { + content: "\f155"; +} +.fa-rupee:before, +.fa-inr:before { + content: "\f156"; +} +.fa-cny:before, +.fa-rmb:before, +.fa-yen:before, +.fa-jpy:before { + content: "\f157"; +} +.fa-ruble:before, +.fa-rouble:before, +.fa-rub:before { + content: "\f158"; +} +.fa-won:before, +.fa-krw:before { + content: "\f159"; +} +.fa-bitcoin:before, +.fa-btc:before { + content: "\f15a"; +} +.fa-file:before { + content: "\f15b"; +} +.fa-file-text:before { + content: "\f15c"; +} +.fa-sort-alpha-asc:before { + content: "\f15d"; +} +.fa-sort-alpha-desc:before { + content: "\f15e"; +} +.fa-sort-amount-asc:before { + content: "\f160"; +} +.fa-sort-amount-desc:before { + content: "\f161"; +} +.fa-sort-numeric-asc:before { + content: "\f162"; +} +.fa-sort-numeric-desc:before { + content: "\f163"; +} +.fa-thumbs-up:before { + content: "\f164"; +} +.fa-thumbs-down:before { + content: "\f165"; +} +.fa-youtube-square:before { + content: "\f166"; +} +.fa-youtube:before { + content: "\f167"; +} +.fa-xing:before { + content: "\f168"; +} +.fa-xing-square:before { + content: "\f169"; +} +.fa-youtube-play:before { + content: "\f16a"; +} +.fa-dropbox:before { + content: "\f16b"; +} +.fa-stack-overflow:before { + content: "\f16c"; +} +.fa-instagram:before { + content: "\f16d"; +} +.fa-flickr:before { + content: "\f16e"; +} +.fa-adn:before { + content: "\f170"; +} +.fa-bitbucket:before { + content: "\f171"; +} +.fa-bitbucket-square:before { + content: "\f172"; +} +.fa-tumblr:before { + content: "\f173"; +} +.fa-tumblr-square:before { + content: "\f174"; +} +.fa-long-arrow-down:before { + content: "\f175"; +} +.fa-long-arrow-up:before { + content: "\f176"; +} +.fa-long-arrow-left:before { + content: "\f177"; +} +.fa-long-arrow-right:before { + content: "\f178"; +} +.fa-apple:before { + content: "\f179"; +} +.fa-windows:before { + content: "\f17a"; +} +.fa-android:before { + content: "\f17b"; +} +.fa-linux:before { + content: "\f17c"; +} +.fa-dribbble:before { + content: "\f17d"; +} +.fa-skype:before { + content: "\f17e"; +} +.fa-foursquare:before { + content: "\f180"; +} +.fa-trello:before { + content: "\f181"; +} +.fa-female:before { + content: "\f182"; +} +.fa-male:before { + content: "\f183"; +} +.fa-gittip:before { + content: "\f184"; +} +.fa-sun-o:before { + content: "\f185"; +} +.fa-moon-o:before { + content: "\f186"; +} +.fa-archive:before { + content: "\f187"; +} +.fa-bug:before { + content: "\f188"; +} +.fa-vk:before { + content: "\f189"; +} +.fa-weibo:before { + content: "\f18a"; +} +.fa-renren:before { + content: "\f18b"; +} +.fa-pagelines:before { + content: "\f18c"; +} +.fa-stack-exchange:before { + content: "\f18d"; +} +.fa-arrow-circle-o-right:before { + content: "\f18e"; +} +.fa-arrow-circle-o-left:before { + content: "\f190"; +} +.fa-toggle-left:before, +.fa-caret-square-o-left:before { + content: "\f191"; +} +.fa-dot-circle-o:before { + content: "\f192"; +} +.fa-wheelchair:before { + content: "\f193"; +} +.fa-vimeo-square:before { + content: "\f194"; +} +.fa-turkish-lira:before, +.fa-try:before { + content: "\f195"; +} +.fa-plus-square-o:before { + content: "\f196"; +} diff --git a/backend/tests/integration/tests/pruning/website/css/style.css b/backend/tests/integration/tests/pruning/website/css/style.css new file mode 100644 index 00000000000..970a9e89e90 --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/css/style.css @@ -0,0 +1,1779 @@ +/* +Author URI: http://webthemez.com/ +Note: +Licence under Creative Commons Attribution 3.0 +Do not remove the back-link in this web template +-------------------------------------------------------*/ + +@import url("http://fonts.googleapis.com/css?family=Noto+Serif:400,400italic,700|Open+Sans:400,600,700"); +@import url("font-awesome.css"); +@import url("animate.css"); + +body { + font-family: "Open Sans", Arial, sans-serif; + font-size: 14px; + font-weight: 300; + line-height: 1.6em; + color: #656565; +} + +a:active { + outline: 0; +} + +.clear { + clear: both; +} + +h1, +h2, +h3, +h4, +h5, +h6 { + font-family: "Open Sans", Arial, sans-serif; + font-weight: 700; + line-height: 1.1em; + color: #333; + margin-bottom: 20px; +} + +.container { + padding: 0 20px 0 20px; + position: relative; +} + +#wrapper { + width: 100%; + margin: 0; + padding: 0; +} + +.row, +.row-fluid { + margin-bottom: 30px; +} + +.row .row, +.row-fluid .row-fluid { + margin-bottom: 30px; +} + +.row.nomargin, +.row-fluid.nomargin { + margin-bottom: 0; +} + +img.img-polaroid { + margin: 0 0 20px 0; +} +.img-box { + max-width: 100%; +} +/* Header +==================================== */ + +header .navbar { + margin-bottom: 0; +} + +.navbar-default { + border: none; +} + +.navbar-brand { + color: #222; + text-transform: uppercase; + font-size: 24px; + font-weight: 700; + line-height: 1em; + letter-spacing: -1px; + margin-top: 13px; + padding: 0 0 0 15px; +} +.navbar-default .navbar-brand { + color: #61b331; +} + +header .navbar-collapse ul.navbar-nav { + float: right; + margin-right: 0; +} + +header .navbar-default { + background-color: #ffffff; +} + +header .nav li a:hover, +header .nav li a:focus, +header .nav li.active a, +header .nav li.active a:hover, +header .nav li a.dropdown-toggle:hover, +header .nav li a.dropdown-toggle:focus, +header .nav li.active ul.dropdown-menu li a:hover, +header .nav li.active ul.dropdown-menu li.active a { + -webkit-transition: all 0.3s ease; + -moz-transition: all 0.3s ease; + -ms-transition: all 0.3s ease; + -o-transition: all 0.3s ease; + transition: all 0.3s ease; +} + +header .navbar-default .navbar-nav > .open > a, +header .navbar-default .navbar-nav > .open > a:hover, +header .navbar-default .navbar-nav > .open > a:focus { + -webkit-transition: all 0.3s ease; + -moz-transition: all 0.3s ease; + -ms-transition: all 0.3s ease; + -o-transition: all 0.3s ease; + transition: all 0.3s ease; +} + +header .navbar { + min-height: 70px; + padding: 18px 0; +} + +header .navbar-nav > li { + padding-bottom: 12px; + padding-top: 12px; +} + +header .navbar-nav > li > a { + padding-bottom: 6px; + padding-top: 5px; + margin-left: 2px; + line-height: 30px; + font-weight: 700; + -webkit-transition: all 0.3s ease; + -moz-transition: all 0.3s ease; + -ms-transition: all 0.3s ease; + -o-transition: all 0.3s ease; + transition: all 0.3s ease; +} + +.dropdown-menu li a:hover { + color: #fff !important; +} + +header .nav .caret { + border-bottom-color: #f5f5f5; + border-top-color: #f5f5f5; +} +.navbar-default .navbar-nav > .active > a, +.navbar-default .navbar-nav > .active > a:hover, +.navbar-default .navbar-nav > .active > a:focus { + background-color: #fff; +} +.navbar-default .navbar-nav > .open > a, +.navbar-default .navbar-nav > .open > a:hover, +.navbar-default .navbar-nav > .open > a:focus { + background-color: #fff; +} + +.dropdown-menu { + box-shadow: none; + border-radius: 0; + border: none; +} + +.dropdown-menu li:last-child { + padding-bottom: 0 !important; + margin-bottom: 0; +} + +header .nav li .dropdown-menu { + padding: 0; +} + +header .nav li .dropdown-menu li a { + line-height: 28px; + padding: 3px 12px; +} +.item-thumbs img { + margin-bottom: 15px; +} +.flex-control-paging li a.flex-active { + background: #000; + background: rgb(255, 255, 255); + cursor: default; +} +.flex-control-paging li a { + width: 30px; + height: 11px; + display: block; + background: #666; + background: rgba(0, 0, 0, 0.5); + cursor: pointer; + text-indent: -9999px; + -webkit-border-radius: 20px; + -moz-border-radius: 20px; + -o-border-radius: 20px; + border-radius: 20px; + box-shadow: inset 0 0 3px rgba(0, 0, 0, 0.3); +} +.panel-title > a { + color: inherit; + color: #fff; +} +.panel-group .panel-heading + .panel-collapse .panel-body { + border-top: 1px solid #ddd; + color: #fff; + background-color: #9c9c9c; +} +/* --- menu --- */ + +header .navigation { + float: right; +} + +header ul.nav li { + border: none; + margin: 0; +} + +header ul.nav li a { + font-size: 12px; + border: none; + font-weight: 700; + text-transform: uppercase; +} + +header ul.nav li ul li a { + font-size: 12px; + border: none; + font-weight: 300; + text-transform: uppercase; +} + +.navbar .nav > li > a { + color: #848484; + text-shadow: none; + border: 1px solid rgba(255, 255, 255, 0) !important; +} + +.navbar .nav a:hover { + background: none; + color: #14a085 !important; +} + +.navbar .nav > .active > a, +.navbar .nav > .active > a:hover { + background: none; + font-weight: 700; +} + +.navbar .nav > .active > a:active, +.navbar .nav > .active > a:focus { + background: none; + outline: 0; + font-weight: 700; +} + +.navbar .nav li .dropdown-menu { + z-index: 2000; +} + +header ul.nav li ul { + margin-top: 1px; +} +header ul.nav li ul li ul { + margin: 1px 0 0 1px; +} +.dropdown-menu .dropdown i { + position: absolute; + right: 0; + margin-top: 3px; + padding-left: 20px; +} + +.navbar .nav > li > .dropdown-menu:before { + display: inline-block; + border-right: none; + border-bottom: none; + border-left: none; + border-bottom-color: none; + content: none; +} +.navbar-default .navbar-nav > .active > a, +.navbar-default .navbar-nav > .active > a:hover, +.navbar-default .navbar-nav > .active > a:focus { + color: #14a085; +} + +ul.nav li.dropdown a { + z-index: 1000; + display: block; +} + +select.selectmenu { + display: none; +} +.pageTitle { + color: #fff; + margin: 30px 0 3px; + display: inline-block; +} + +#featured { + width: 100%; + background: #000; + position: relative; + margin: 0; + padding: 0; +} + +/* Sliders +==================================== */ +/* --- flexslider --- */ + +#featured .flexslider { + padding: 0; + background: #fff; + position: relative; + zoom: 1; +} +.flex-direction-nav .flex-prev { + left: 0px; +} +.flex-direction-nav .flex-next { + right: 0px; +} +.flex-caption { + zoom: 0; + color: #1c1d21; + margin: 0 auto; + padding: 1px; + position: absolute; + vertical-align: bottom; + text-align: center; + background-color: rgba(255, 255, 255, 0.26); + bottom: 5%; + display: block; + left: 0; + right: 0; +} +.flex-caption h3 { + color: #fff; + letter-spacing: 1px; + margin-bottom: 8px; + text-transform: uppercase; +} +.flex-caption p { + margin: 0 0 15px; +} +.skill-home { + margin-bottom: 50px; +} +.c1 { + border: #ed5441 1px solid; + background: #ed5441; +} +.c2 { + border: #d867b2 1px solid; + background: #d867b2; +} +.c3 { + border: #61b331 1px solid; + background: #4bc567; +} +.c4 { + border: #609cec 1px solid; + background: #26aff0; +} +.skill-home .icons { + padding: 33px 0 0 0; + width: 100%; + height: 178px; + color: rgb(255, 255, 255); + font-size: 42px; + font-size: 76px; + text-align: center; + -ms-border-radius: 50%; + -moz-border-radius: 50%; + -webkit-border-radius: 50%; + border-radius: 0; + display: inline-table; +} +.skill-home h2 { + padding-top: 20px; + font-size: 36px; + font-weight: 700; +} +.testimonial-solid { + padding: 50px 0 60px 0; + margin: 0 0 0 0; + background: #efefef; + text-align: center; +} +.testi-icon-area { + text-align: center; + position: absolute; + top: -84px; + margin: 0 auto; + width: 100%; + color: #000; +} +.testi-icon-area .quote { + padding: 15px 0 0 0; + margin: 0 0 0 0; + background: #ffffff; + text-align: center; + color: #26aff0; + display: inline-table; + width: 70px; + height: 70px; + -ms-border-radius: 50%; + -moz-border-radius: 50%; + -webkit-border-radius: 50%; + border-radius: 0; + font-size: 42px; + border: 1px solid #26aff0; + display: none; +} + +.testi-icon-area .carousel-inner { + margin: 20px 0; +} +.carousel-indicators { + bottom: -30px; +} +.team-member { + text-align: center; + background-color: #f9f9f9; + padding-bottom: 15px; +} +.fancybox-title-inside-wrap { + padding: 3px 30px 6px; + background: #292929; +} + +.item_introtext { + background-color: rgba(254, 254, 255, 0.66); + margin: 0 auto; + display: inline-block; + padding: 25px; +} +.item_introtext span { + font-size: 20px; + display: block; + font-weight: bold; +} +.item_introtext strong { + font-size: 50px; + display: block; + padding: 14px 0 30px; +} +.item_introtext p { + font-size: 20px !important; + color: #1c1d21; + font-weight: bold; +} + +.form-control { + border-radius: 0; +} + +/* Testimonial +----------------------------------*/ +.testimonial-area { + padding: 0 0 0 0; + margin: 0; + background: url(../img/low-poly01.jpg) fixed center center; + background-size: cover; + -webkit-background-size: cover; + -moz-background-size: cover; + -ms-background-size: cover; + color: red; +} +.testimonial-solid p { + color: #1f1f1f; + font-size: 16px; + line-height: 30px; + font-style: italic; +} +section.callaction { + background: #fff; + padding: 50px 0 0 0; +} + +/* Content +==================================== */ + +#content { + position: relative; + background: #fff; + padding: 50px 0 0px 0; +} + +#content img { + max-width: 100%; + height: auto; +} + +.cta-text { + text-align: center; + margin-top: 10px; +} + +.big-cta .cta { + margin-top: 10px; +} + +.box { + width: 100%; +} +.box-gray { + background: #f8f8f8; + padding: 20px 20px 30px; +} +.box-gray h4, +.box-gray i { + margin-bottom: 20px; +} +.box-bottom { + padding: 20px 0; + text-align: center; +} +.box-bottom a { + color: #fff; + font-weight: 700; +} +.box-bottom a:hover { + color: #eee; + text-decoration: none; +} + +/* Bottom +==================================== */ + +#bottom { + background: #fcfcfc; + padding: 50px 0 0; +} +/* twitter */ +#twitter-wrapper { + text-align: center; + width: 70%; + margin: 0 auto; +} +#twitter em { + font-style: normal; + font-size: 13px; +} + +#twitter em.twitterTime a { + font-weight: 600; +} + +#twitter ul { + padding: 0; + list-style: none; +} +#twitter ul li { + font-size: 20px; + line-height: 1.6em; + font-weight: 300; + margin-bottom: 20px; + position: relative; + word-break: break-word; +} + +/* page headline +==================================== */ + +#inner-headline { + background: #14a085; + position: relative; + margin: 0; + padding: 0; + color: #fefefe; + /* margin: 15px; */ + border-top: 10px solid #11967c; +} + +#inner-headline .inner-heading h2 { + color: #fff; + margin: 20px 0 0 0; +} + +/* --- breadcrumbs --- */ +#inner-headline ul.breadcrumb { + margin: 30px 0 0; + float: left; +} + +#inner-headline ul.breadcrumb li { + margin-bottom: 0; + padding-bottom: 0; +} +#inner-headline ul.breadcrumb li { + font-size: 13px; + color: #fff; +} + +#inner-headline ul.breadcrumb li i { + color: #dedede; +} + +#inner-headline ul.breadcrumb li a { + color: #fff; +} + +ul.breadcrumb li a:hover { + text-decoration: none; +} + +/* Forms +============================= */ + +/* --- contact form ---- */ +form#contactform input[type="text"] { + width: 100%; + border: 1px solid #f5f5f5; + min-height: 40px; + padding-left: 20px; + font-size: 13px; + padding-right: 20px; + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} + +form#contactform textarea { + border: 1px solid #f5f5f5; + width: 100%; + padding-left: 20px; + padding-top: 10px; + font-size: 13px; + padding-right: 20px; + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} + +form#contactform .validation { + font-size: 11px; +} + +#sendmessage { + border: 1px solid #e6e6e6; + background: #f6f6f6; + display: none; + text-align: center; + padding: 15px 12px 15px 65px; + margin: 10px 0; + font-weight: 600; + margin-bottom: 30px; +} + +#sendmessage.show, +.show { + display: block; +} + +form#commentform input[type="text"] { + width: 100%; + min-height: 40px; + padding-left: 20px; + font-size: 13px; + padding-right: 20px; + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; + -webkit-border-radius: 2px 2px 2px 2px; + -moz-border-radius: 2px 2px 2px 2px; + border-radius: 2px 2px 2px 2px; +} + +form#commentform textarea { + width: 100%; + padding-left: 20px; + padding-top: 10px; + font-size: 13px; + padding-right: 20px; + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; + -webkit-border-radius: 2px 2px 2px 2px; + -moz-border-radius: 2px 2px 2px 2px; + border-radius: 2px 2px 2px 2px; +} + +/* --- search form --- */ +.search { + float: right; + margin: 35px 0 0; + padding-bottom: 0; +} + +#inner-headline form.input-append { + margin: 0; + padding: 0; +} + +/* Portfolio +================================ */ + +.work-nav #filters { + margin: 0; + padding: 0; + list-style: none; +} + +.work-nav #filters li { + margin: 0 10px 30px 0; + padding: 0; + float: left; +} + +.work-nav #filters li a { + color: #7f8289; + font-size: 16px; + display: block; +} + +.work-nav #filters li a:hover { +} + +.work-nav #filters li a.selected { + color: #de5e60; +} + +#thumbs { + margin: 0; + padding: 0; +} + +#thumbs li { + list-style-type: none; +} + +.item-thumbs { + position: relative; + overflow: hidden; + margin-bottom: 30px; + cursor: pointer; +} + +.item-thumbs a + img { + width: 100%; +} + +.item-thumbs .hover-wrap { + position: absolute; + display: block; + width: 100%; + height: 100%; + + opacity: 0; + filter: alpha(opacity=0); + + -webkit-transition: all 450ms ease-out 0s; + -moz-transition: all 450ms ease-out 0s; + -o-transition: all 450ms ease-out 0s; + transition: all 450ms ease-out 0s; + + -webkit-transform: rotateY(180deg) scale(0.5, 0.5); + -moz-transform: rotateY(180deg) scale(0.5, 0.5); + -ms-transform: rotateY(180deg) scale(0.5, 0.5); + -o-transform: rotateY(180deg) scale(0.5, 0.5); + transform: rotateY(180deg) scale(0.5, 0.5); +} + +.item-thumbs:hover .hover-wrap, +.item-thumbs.active .hover-wrap { + opacity: 1; + filter: alpha(opacity=100); + + -webkit-transform: rotateY(0deg) scale(1, 1); + -moz-transform: rotateY(0deg) scale(1, 1); + -ms-transform: rotateY(0deg) scale(1, 1); + -o-transform: rotateY(0deg) scale(1, 1); + transform: rotateY(0deg) scale(1, 1); +} + +.item-thumbs .hover-wrap .overlay-img { + position: absolute; + width: 90%; + height: 91%; + opacity: 0.5; + filter: alpha(opacity=80); + background: #14a085; +} + +.item-thumbs .hover-wrap .overlay-img-thumb { + position: absolute; + border-radius: 60px; + top: 50%; + left: 45%; + margin: -16px 0 0 -16px; + color: #fff; + font-size: 32px; + line-height: 1em; + opacity: 1; + filter: alpha(opacity=100); +} + +ul.portfolio-categ { + margin: 10px 0 30px 0; + padding: 0; + float: left; + list-style: none; +} + +ul.portfolio-categ li { + margin: 0; + float: left; + list-style: none; + font-size: 13px; + font-weight: 600; + border: 1px solid #d5d5d5; + margin-right: 15px; +} + +ul.portfolio-categ li a { + display: block; + padding: 8px 20px; + color: #14a085; +} +ul.portfolio-categ li.active { + border: 1px solid #d7d8d6; + + background-color: #eaeaea; +} +ul.portfolio-categ li.active a:hover, +ul.portfolio-categ li a:hover, +ul.portfolio-categ li a:focus, +ul.portfolio-categ li a:active { + text-decoration: none; + outline: 0; +} +#accordion-alt3 .panel-heading h4 { + font-size: 13px; + line-height: 28px; + color: #6b6b6b; +} +.panel .panel-heading h4 { + font-weight: 400; +} +.panel-title { + margin-top: 0; + margin-bottom: 0; + font-size: 15px; + color: inherit; +} +.panel-group .panel { + margin-bottom: 0; + border-radius: 2px; +} +.panel { + margin-bottom: 18px; + background-color: #b9b9b9; + border: 1px solid transparent; + border-radius: 2px; + -webkit-box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05); + box-shadow: 0 1px 1px rgba(0, 0, 0, 0.05); +} +#accordion-alt3 .panel-heading h4 a i { + font-size: 13px; + line-height: 18px; + width: 18px; + height: 18px; + margin-right: 5px; + color: #fff; + text-align: center; + border-radius: 50%; + margin-left: 6px; +} +.progress.pb-sm { + height: 6px !important; +} +.progress { + box-shadow: inset 0 0 2px rgba(0, 0, 0, 0.1); +} +.progress { + overflow: hidden; + height: 18px; + margin-bottom: 18px; + background-color: #f5f5f5; + border-radius: 2px; + -webkit-box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1); + box-shadow: inset 0 1px 2px rgba(0, 0, 0, 0.1); +} +.progress .progress-bar.progress-bar-red { + background: #ed5441; +} +.progress .progress-bar.progress-bar-green { + background: #51d466; +} +.progress .progress-bar.progress-bar-lblue { + background: #32c8de; +} +/* --- portfolio detail --- */ +.top-wrapper { + margin-bottom: 20px; +} +.info-blocks { + margin-bottom: 15px; +} +.info-blocks i.icon-info-blocks { + float: left; + color: #318fcf; + font-size: 30px; + min-width: 50px; + margin-top: 6px; + text-align: center; + background-color: #efefef; + padding: 15px; +} +.info-blocks .info-blocks-in { + padding: 0 10px; + overflow: hidden; +} +.info-blocks .info-blocks-in h3 { + color: #555; + font-size: 20px; + line-height: 28px; + margin: 0px; +} +.info-blocks .info-blocks-in p { + font-size: 12px; +} + +blockquote { + font-size: 16px; + font-weight: 400; + font-family: "Noto Serif", serif; + font-style: italic; + padding-left: 0; + color: #a2a2a2; + line-height: 1.6em; + border: none; +} + +blockquote cite { + display: block; + font-size: 12px; + color: #666; + margin-top: 10px; +} +blockquote cite:before { + content: "\2014 \0020"; +} +blockquote cite a, +blockquote cite a:visited, +blockquote cite a:visited { + color: #555; +} + +/* --- pullquotes --- */ + +.pullquote-left { + display: block; + color: #a2a2a2; + font-family: "Noto Serif", serif; + font-size: 14px; + line-height: 1.6em; + padding-left: 20px; +} + +.pullquote-right { + display: block; + color: #a2a2a2; + font-family: "Noto Serif", serif; + font-size: 14px; + line-height: 1.6em; + padding-right: 20px; +} + +/* --- button --- */ +.btn { + text-align: center; + background: #318cca; + color: #fff; + border-radius: 0; + padding: 10px 30px; +} +.btn-theme { + color: #fff; +} +.btn-theme:hover { + color: #eee; +} + +/* --- list style --- */ + +ul.general { + list-style: none; + margin-left: 0; +} + +ul.link-list { + margin: 0; + padding: 0; + list-style: none; +} + +ul.link-list li { + margin: 0; + padding: 2px 0 2px 0; + list-style: none; +} +footer { + background: #14a085; +} +footer ul.link-list li a { + color: #ffffff; +} +footer ul.link-list li a:hover { + color: #e2e2e2; +} +/* --- Heading style --- */ + +h4.heading { + font-weight: 700; +} + +.heading { + margin-bottom: 30px; +} + +.heading { + position: relative; +} + +.widgetheading { + width: 100%; + + padding: 0; +} + +#bottom .widgetheading { + position: relative; + border-bottom: #e6e6e6 1px solid; + padding-bottom: 9px; +} + +aside .widgetheading { + position: relative; + border-bottom: #e9e9e9 1px solid; + padding-bottom: 9px; +} + +footer .widgetheading { + position: relative; +} + +footer .widget .social-network { + position: relative; +} + +#bottom .widget .widgetheading span, +aside .widget .widgetheading span, +footer .widget .widgetheading span { + position: absolute; + width: 60px; + height: 1px; + bottom: -1px; + right: 0; +} +.box-area { + border: 1px solid #f3f3f3; + padding: 0 15px 12px; + padding-top: 41px; + margin-top: -42px; + text-align: left; + background-color: #f9f9f9; + position: relative; +} +/* --- Map --- */ +.map { + position: relative; + margin-top: -50px; + margin-bottom: 40px; +} + +.map iframe { + width: 100%; + height: 450px; + border: none; +} + +.map-grid iframe { + width: 100%; + height: 350px; + border: none; + margin: 0 0 -5px 0; + padding: 0; +} + +ul.team-detail { + margin: -10px 0 0 0; + padding: 0; + list-style: none; +} + +ul.team-detail li { + border-bottom: 1px dotted #e9e9e9; + margin: 0 0 15px 0; + padding: 0 0 15px 0; + list-style: none; +} + +ul.team-detail li label { + font-size: 13px; +} + +ul.team-detail li h4, +ul.team-detail li label { + margin-bottom: 0; +} + +ul.team-detail li ul.social-network { + border: none; + margin: 0; + padding: 0; +} + +ul.team-detail li ul.social-network li { + border: none; + margin: 0; +} +ul.team-detail li ul.social-network li i { + margin: 0; +} + +.pricing-title { + background: #fff; + text-align: center; + padding: 10px 0 10px 0; +} + +.pricing-title h3 { + font-weight: 600; + margin-bottom: 0; +} + +.pricing-offer { + background: #fcfcfc; + text-align: center; + padding: 40px 0 40px 0; + font-size: 18px; + border-top: 1px solid #e6e6e6; + border-bottom: 1px solid #e6e6e6; +} + +.pricing-box.activeItem .pricing-offer { + color: #fff; +} + +.pricing-offer strong { + font-size: 78px; + line-height: 89px; +} + +.pricing-offer sup { + font-size: 28px; +} + +.pricing-container { + background: #fff; + text-align: center; + font-size: 14px; +} + +.pricing-container strong { + color: #353535; +} + +.pricing-container ul { + list-style: none; + padding: 0; + margin: 0; +} + +.pricing-container ul li { + border-bottom: 1px solid #f5f5f5; + list-style: none; + padding: 15px 0 15px 0; + margin: 0 0 0 0; + color: #222; +} + +.pricing-action { + margin: 0; + background: #fcfcfc; + text-align: center; + padding: 20px 0 30px 0; +} + +.pricing-wrapp { + margin: 0 auto; + width: 100%; + background: #fd0000; +} +.pricing-box-item { + border: 1px solid #f5f5f5; + + background: #f9f9f9; + position: relative; + margin: 0 0 20px 0; + padding: 0; + -webkit-box-shadow: 0 2px 0 rgba(0, 0, 0, 0.03); + -moz-box-shadow: 0 2px 0 rgba(0, 0, 0, 0.03); + box-shadow: 0 2px 0 rgba(0, 0, 0, 0.03); + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} + +.pricing-box-item .pricing-heading { + text-align: center; + padding: 0px 0 0px 0; + display: block; +} +.pricing-box-item.activeItem .pricing-heading { + text-align: center; + padding: 0px 0 1px 0; + border-bottom: none; + display: block; + color: #fff; +} +.pricing-box-item.activeItem .pricing-heading h3 { +} + +.pricing-box-item .pricing-heading h3 strong { + font-size: 20px; + font-weight: 700; + letter-spacing: -1px; +} +.pricing-box-item .pricing-heading h3 { + font-size: 35px; + font-weight: 300; + letter-spacing: -1px; +} + +.pricing-box-item .pricing-terms { + text-align: center; + display: block; + overflow: hidden; + padding: 11px 0 5px; +} + +.pricing-box-item .pricing-terms h6 { + font-style: italic; + margin-top: 10px; + color: #14a085; + font-size: 22px; + font-family: "Noto Serif", serif; +} + +.pricing-box-item .icon .price-circled { + margin: 10px 10px 10px 0; + display: inline-block !important; + text-align: center !important; + color: #fff; + width: 68px; + height: 68px; + padding: 12px; + font-size: 16px; + font-weight: 700; + line-height: 68px; + text-shadow: none; + cursor: pointer; + background-color: #888; + border-radius: 64px; + -moz-border-radius: 64px; + -webkit-border-radius: 64px; +} + +.pricing-box-item .pricing-action { + margin: 0; + text-align: center; + padding: 30px 0 30px 0; +} + +/* ===== Widgets ===== */ + +/* --- flickr --- */ +.widget .flickr_badge { + width: 100%; +} +.widget .flickr_badge img { + margin: 0 9px 20px 0; +} + +footer .widget .flickr_badge { + width: 100%; +} +footer .widget .flickr_badge img { + margin: 0 9px 20px 0; +} + +.flickr_badge img { + width: 50px; + height: 50px; + float: left; + margin: 0 9px 20px 0; +} + +/* --- Recent post widget --- */ + +.recent-post { + margin: 20px 0 0 0; + padding: 0; + line-height: 18px; +} + +.recent-post h5 a:hover { + text-decoration: none; +} + +.recent-post .text h5 a { + color: #353535; +} + +footer { + padding: 50px 0 0 0; + color: #f8f8f8; +} + +footer a { + color: #fff; +} + +footer a:hover { + color: #eee; +} + +footer h1, +footer h2, +footer h3, +footer h4, +footer h5, +footer h6 { + color: #fff; +} + +footer address { + line-height: 1.6em; + color: #ffffff; +} + +footer h5 a:hover, +footer a:hover { + text-decoration: none; +} + +ul.social-network { + list-style: none; + margin: 0; +} + +ul.social-network li { + display: inline; + margin: 0 5px; +} + +#sub-footer { + text-shadow: none; + color: #f5f5f5; + padding: 0; + padding-top: 30px; + margin: 20px 0 0 0; + background: #14a085; +} + +#sub-footer p { + margin: 0; + padding: 0; +} + +#sub-footer span { + color: #f5f5f5; +} + +.copyright { + text-align: left; + font-size: 12px; +} + +#sub-footer ul.social-network { + float: right; +} + +/* scroll to top */ +.scrollup { + position: fixed; + width: 32px; + height: 32px; + bottom: 0px; + right: 20px; + background: #222; +} + +a.scrollup { + outline: 0; + text-align: center; +} + +a.scrollup:hover, +a.scrollup:active, +a.scrollup:focus { + opacity: 1; + text-decoration: none; +} +a.scrollup i { + margin-top: 10px; + color: #fff; +} +a.scrollup i:hover { + text-decoration: none; +} + +.absolute { + position: absolute; +} + +.relative { + position: relative; +} + +.aligncenter { + text-align: center; +} + +.aligncenter span { + margin-left: 0; +} + +.floatright { + float: right; +} + +.floatleft { + float: left; +} + +.floatnone { + float: none; +} + +.aligncenter { + text-align: center; +} + +img.pull-left, +.align-left { + float: left; + margin: 0 15px 15px 0; +} + +.widget img.pull-left { + float: left; + margin: 0 15px 15px 0; +} + +img.pull-right, +.align-right { + float: right; + margin: 0 0 15px 15px; +} + +article img.pull-left, +article .align-left { + float: left; + margin: 5px 15px 15px 0; +} + +article img.pull-right, +article .align-right { + float: right; + margin: 5px 0 15px 15px; +} +============================= */ .clear-marginbot { + margin-bottom: 0; +} + +.marginbot10 { + margin-bottom: 10px; +} +.marginbot20 { + margin-bottom: 20px; +} +.marginbot30 { + margin-bottom: 30px; +} +.marginbot40 { + margin-bottom: 40px; +} + +.clear-margintop { + margin-top: 0; +} + +.margintop10 { + margin-top: 10px; +} + +.margintop20 { + margin-top: 20px; +} + +.margintop30 { + margin-top: 30px; +} + +.margintop40 { + margin-top: 40px; +} + +/* Media queries +============================= */ + +@media (min-width: 768px) and (max-width: 979px) { + a.detail { + background: none; + width: 100%; + } + + footer .widget form input#appendedInputButton { + display: block; + width: 91%; + -webkit-border-radius: 4px 4px 4px 4px; + -moz-border-radius: 4px 4px 4px 4px; + border-radius: 4px 4px 4px 4px; + } + + footer .widget form .input-append .btn { + display: block; + width: 100%; + padding-right: 0; + padding-left: 0; + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; + margin-top: 10px; + } + + ul.related-folio li { + width: 156px; + margin: 0 20px 0 0; + } +} + +@media (max-width: 767px) { + body { + padding-right: 0; + padding-left: 0; + } + .navbar-brand { + margin-top: 10px; + border-bottom: none; + } + .navbar-header { + margin-top: 20px; + border-bottom: none; + } + + .navbar-nav { + border-top: none; + float: none; + width: 100%; + } + .navbar .nav > .active > a, + .navbar .nav > .active > a:hover { + background: none; + font-weight: 700; + color: #26aff0; + } + header .navbar-nav > li { + padding-bottom: 0px; + padding-top: 2px; + } + header .nav li .dropdown-menu { + margin-top: 0; + } + + .dropdown-menu { + position: absolute; + top: 0; + left: 40px; + z-index: 1000; + display: none; + float: left; + min-width: 160px; + padding: 5px 0; + margin: 2px 0 0; + font-size: 13px; + list-style: none; + background-color: #fff; + background-clip: padding-box; + border: 1px solid #f5f5f5; + border: 1px solid rgba(0, 0, 0, 0.15); + border-radius: 0; + -webkit-box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175); + box-shadow: 0 6px 12px rgba(0, 0, 0, 0.175); + } + + .navbar-collapse.collapse { + border: none; + overflow: hidden; + } + + .box { + border-bottom: 1px solid #e9e9e9; + padding-bottom: 20px; + } + + #featured .flexslider .slide-caption { + width: 90%; + padding: 2%; + position: absolute; + left: 0; + bottom: -40px; + } + + #inner-headline .breadcrumb { + float: left; + clear: both; + width: 100%; + } + + .breadcrumb > li { + font-size: 13px; + } + + ul.portfolio li article a i.icon-48 { + width: 20px; + height: 20px; + font-size: 16px; + line-height: 20px; + } + + .left-sidebar { + border-right: none; + padding: 0 0 0 0; + border-bottom: 1px dotted #e6e6e6; + padding-bottom: 10px; + margin-bottom: 40px; + } + + .right-sidebar { + margin-top: 30px; + border-left: none; + padding: 0 0 0 0; + } + + footer .col-lg-1, + footer .col-lg-2, + footer .col-lg-3, + footer .col-lg-4, + footer .col-lg-5, + footer .col-lg-6, + footer .col-lg-7, + footer .col-lg-8, + footer .col-lg-9, + footer .col-lg-10, + footer .col-lg-11, + footer .col-lg-12 { + margin-bottom: 20px; + } + + #sub-footer ul.social-network { + float: left; + } + + [class*="span"] { + margin-bottom: 20px; + } +} + +@media (max-width: 480px) { + .bottom-article a.pull-right { + float: left; + margin-top: 20px; + } + + .search { + float: left; + } + + .flexslider .flex-caption { + display: none; + } + + .cta-text { + margin: 0 auto; + text-align: center; + } + + ul.portfolio li article a i { + width: 20px; + height: 20px; + font-size: 14px; + } +} + +.box-area:before { + position: absolute; + width: 100%; + height: 100%; + z-index: 0; + background-color: red; + content: ""; + position: absolute; + top: 7px; + left: -1px; + width: 100%; + height: 23px; + background: #f9f9f9; + -moz-transform: skewY(-3deg); + -o-transform: skewY(-3deg); + -ms-transform: skewY(-3deg); + -webkit-transform: skewY(-3deg); + transform: skewY(11deg); + background-size: cover; +} +.box-area:after { + position: absolute; + width: 100%; + height: 100%; + z-index: 0; + background-color: red; + content: ""; + position: absolute; + top: 7px; + left: 1px; + width: 100%; + height: 22px; + background: #f9f9f9; + -moz-transform: skewY(-3deg); + -o-transform: skewY(-3deg); + -ms-transform: skewY(-3deg); + -webkit-transform: skewY(-3deg); + transform: skewY(-11deg); + background-size: cover; +} +.box-area h3 { + margin-top: -16px; + z-index: 12; + position: relative; +} +.courses { + padding: 50px 0; +} +.carousel-indicators li { + display: inline-block; + border: 1px solid #929292; +} +.textbox { + background-color: #efefef; + padding: 4px 25px; +} +.textbox h3 { + margin: 0; + padding: 22px 0 14px; + font-size: 18px; +} diff --git a/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.eot b/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.eot new file mode 100644 index 00000000000..7ac16dbc9ea Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.eot differ diff --git a/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.svg b/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.svg new file mode 100644 index 00000000000..5c2071ee504 --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.svg @@ -0,0 +1,1186 @@ + + + + +This is a custom SVG font generated by IcoMoon. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.ttf b/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.ttf new file mode 100644 index 00000000000..0a2ac6fa70b Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.ttf differ diff --git a/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.woff b/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.woff new file mode 100644 index 00000000000..f9391cb4fa6 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/fonts/customicon/icons.woff differ diff --git a/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.eot b/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.eot new file mode 100644 index 00000000000..7c79c6a6bc9 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.eot differ diff --git a/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.svg b/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.svg new file mode 100644 index 00000000000..45fdf338301 --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.svg @@ -0,0 +1,414 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.ttf b/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.ttf new file mode 100644 index 00000000000..e89738de5ea Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.ttf differ diff --git a/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.woff b/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.woff new file mode 100644 index 00000000000..8c1748aab7a Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/fonts/fontawesome-webfont.woff differ diff --git a/backend/tests/integration/tests/pruning/website/fonts/fontawesome.otf b/backend/tests/integration/tests/pruning/website/fonts/fontawesome.otf new file mode 100644 index 00000000000..8b0f54e47e1 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/fonts/fontawesome.otf differ diff --git a/backend/tests/integration/tests/pruning/website/img/avatar.png b/backend/tests/integration/tests/pruning/website/img/avatar.png new file mode 100644 index 00000000000..f11955333e0 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/avatar.png differ diff --git a/backend/tests/integration/tests/pruning/website/img/bg_direction_nav.png b/backend/tests/integration/tests/pruning/website/img/bg_direction_nav.png new file mode 100644 index 00000000000..59b2e718c83 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/bg_direction_nav.png differ diff --git a/backend/tests/integration/tests/pruning/website/img/glyphicons-halflings-white.png b/backend/tests/integration/tests/pruning/website/img/glyphicons-halflings-white.png new file mode 100644 index 00000000000..3bf6484a29d Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/glyphicons-halflings-white.png differ diff --git a/backend/tests/integration/tests/pruning/website/img/glyphicons-halflings.png b/backend/tests/integration/tests/pruning/website/img/glyphicons-halflings.png new file mode 100644 index 00000000000..a9969993201 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/glyphicons-halflings.png differ diff --git a/backend/tests/integration/tests/pruning/website/img/logo.png b/backend/tests/integration/tests/pruning/website/img/logo.png new file mode 100644 index 00000000000..04fb2a41478 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/logo.png differ diff --git a/backend/tests/integration/tests/pruning/website/img/nivo-bullets.png b/backend/tests/integration/tests/pruning/website/img/nivo-bullets.png new file mode 100644 index 00000000000..a84c9c0bdcc Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/nivo-bullets.png differ diff --git a/backend/tests/integration/tests/pruning/website/img/section-image-1.png b/backend/tests/integration/tests/pruning/website/img/section-image-1.png new file mode 100644 index 00000000000..9c0fab01c00 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/section-image-1.png differ diff --git a/backend/tests/integration/tests/pruning/website/img/service1.jpg b/backend/tests/integration/tests/pruning/website/img/service1.jpg new file mode 100644 index 00000000000..ed8c9c35579 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/service1.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/service2.jpg b/backend/tests/integration/tests/pruning/website/img/service2.jpg new file mode 100644 index 00000000000..1e42801fab2 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/service2.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/service3.jpg b/backend/tests/integration/tests/pruning/website/img/service3.jpg new file mode 100644 index 00000000000..0332b3e3dcd Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/service3.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/slides/1.jpg b/backend/tests/integration/tests/pruning/website/img/slides/1.jpg new file mode 100644 index 00000000000..872131c2dc6 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/slides/1.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/slides/2.jpg b/backend/tests/integration/tests/pruning/website/img/slides/2.jpg new file mode 100644 index 00000000000..0e7fc381d43 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/slides/2.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/slides/3.jpg b/backend/tests/integration/tests/pruning/website/img/slides/3.jpg new file mode 100644 index 00000000000..67eb62b93fe Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/slides/3.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/team1.jpg b/backend/tests/integration/tests/pruning/website/img/team1.jpg new file mode 100644 index 00000000000..0e0c282cad0 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/team1.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/team2.jpg b/backend/tests/integration/tests/pruning/website/img/team2.jpg new file mode 100644 index 00000000000..242d6c79d94 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/team2.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/team3.jpg b/backend/tests/integration/tests/pruning/website/img/team3.jpg new file mode 100644 index 00000000000..fcbb2908d4b Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/team3.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/team4.jpg b/backend/tests/integration/tests/pruning/website/img/team4.jpg new file mode 100644 index 00000000000..88039d54e8c Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/team4.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/works/1.jpg b/backend/tests/integration/tests/pruning/website/img/works/1.jpg new file mode 100644 index 00000000000..c6fce1776df Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/works/1.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/works/2.jpg b/backend/tests/integration/tests/pruning/website/img/works/2.jpg new file mode 100644 index 00000000000..4b6e0d1a713 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/works/2.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/works/3.jpg b/backend/tests/integration/tests/pruning/website/img/works/3.jpg new file mode 100644 index 00000000000..fd8b3b6729e Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/works/3.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/works/4.jpg b/backend/tests/integration/tests/pruning/website/img/works/4.jpg new file mode 100644 index 00000000000..a55d6eafbeb Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/works/4.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/works/5.jpg b/backend/tests/integration/tests/pruning/website/img/works/5.jpg new file mode 100644 index 00000000000..e5907a77938 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/works/5.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/works/6.jpg b/backend/tests/integration/tests/pruning/website/img/works/6.jpg new file mode 100644 index 00000000000..9758bd59378 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/works/6.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/works/7.jpg b/backend/tests/integration/tests/pruning/website/img/works/7.jpg new file mode 100644 index 00000000000..78c73c643c2 Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/works/7.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/img/works/8.jpg b/backend/tests/integration/tests/pruning/website/img/works/8.jpg new file mode 100644 index 00000000000..4570ff38ebb Binary files /dev/null and b/backend/tests/integration/tests/pruning/website/img/works/8.jpg differ diff --git a/backend/tests/integration/tests/pruning/website/index.html b/backend/tests/integration/tests/pruning/website/index.html new file mode 100644 index 00000000000..39e5fa6ff02 --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/index.html @@ -0,0 +1,309 @@ + + + + +Above Multi-purpose Free Bootstrap Responsive Template + + + + + + + + + + + + + + + + +
+ +
+ +
+ + +
+
+
+
+

Our Featured Courses

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus, vero mollitia velit ad consectetur. Alias, laborum excepturi nihil autem nemo numquam, ipsa architecto non, magni consequuntur quam.
+
+
+
+
+
+ + +
+
+
+
+
+

Web Development

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident

+
+
+
+

UI Design

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident

+
+
+
+

Interaction

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident

+
+
+
+

User Experiance

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident

+
+
+
+ + +
+
+
+
+
+
+
+ +
+
+ +
+
+
+
+
+ +
+
+

Courses We Offer

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Dolores quae porro consequatur aliquam, incidunt eius magni provident, doloribus omnis minus temporibus perferendis nesciunt quam repellendus nulla nemo ipsum odit corrupti consequuntur possimus, vero mollitia velit ad consectetur. Alias, laborum excepturi nihil autem nemo numquam, ipsa architecto non, magni consequuntur quam.
+
+
+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+
+

Heading Course

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Praesent vest sit amet, consec ibulum molestie lacus. Aenean nonummy hendrerit mauris. Phasellus porta.

+
+
+
+
+ +
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/backend/tests/integration/tests/pruning/website/js/animate.js b/backend/tests/integration/tests/pruning/website/js/animate.js new file mode 100644 index 00000000000..98875e1b657 --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/js/animate.js @@ -0,0 +1,477 @@ +jQuery(document).ready(function ($) { + //animate effect + $(".e_flash").hover( + function () { + $(this).addClass("animated flash"); + }, + function () { + $(this).removeClass("animated flash"); + }, + ); + $(".e_bounce").hover( + function () { + $(this).addClass("animated bounce"); + }, + function () { + $(this).removeClass("animated bounce"); + }, + ); + + $(".e_shake").hover( + function () { + $(this).addClass("animated shake"); + }, + function () { + $(this).removeClass("animated shake"); + }, + ); + $(".e_tada").hover( + function () { + $(this).addClass("animated tada"); + }, + function () { + $(this).removeClass("animated tada"); + }, + ); + $(".e_swing").hover( + function () { + $(this).addClass("animated swing"); + }, + function () { + $(this).removeClass("animated swing"); + }, + ); + $(".e_wobble").hover( + function () { + $(this).addClass("animated wobble"); + }, + function () { + $(this).removeClass("animated wobble"); + }, + ); + $(".e_wiggle").hover( + function () { + $(this).addClass("animated wiggle"); + }, + function () { + $(this).removeClass("animated wiggle"); + }, + ); + $(".e_pulse").hover( + function () { + $(this).addClass("animated pulse"); + }, + function () { + $(this).removeClass("animated pulse"); + }, + ); + + $(".e_flip").hover( + function () { + $(this).addClass("animated flip"); + }, + function () { + $(this).removeClass("animated flip"); + }, + ); + $(".e_flipInX").hover( + function () { + $(this).addClass("animated flipInX"); + }, + function () { + $(this).removeClass("animated flipInX"); + }, + ); + $(".e_flipOutX").hover( + function () { + $(this).addClass("animated flipOutX"); + }, + function () { + $(this).removeClass("animated flipOutX"); + }, + ); + $(".e_flipInY").hover( + function () { + $(this).addClass("animated flipInY"); + }, + function () { + $(this).removeClass("animated flipInY"); + }, + ); + $(".e_flipOutY").hover( + function () { + $(this).addClass("animated flipOutY"); + }, + function () { + $(this).removeClass("animated flipOutY"); + }, + ); + + //Fading entrances + $(".e_fadeIn").hover( + function () { + $(this).addClass("animated fadeIn"); + }, + function () { + $(this).removeClass("animated fadeIn"); + }, + ); + $(".e_fadeInUp").hover( + function () { + $(this).addClass("animated fadeInUp"); + }, + function () { + $(this).removeClass("animated fadeInUp"); + }, + ); + $(".e_fadeInDown").hover( + function () { + $(this).addClass("animated fadeInDown"); + }, + function () { + $(this).removeClass("animated fadeInDown"); + }, + ); + $(".e_fadeInLeft").hover( + function () { + $(this).addClass("animated fadeInLeft"); + }, + function () { + $(this).removeClass("animated fadeInLeft"); + }, + ); + $(".e_fadeInRight").hover( + function () { + $(this).addClass("animated fadeInRight"); + }, + function () { + $(this).removeClass("animated fadeInRight"); + }, + ); + $(".e_fadeInUpBig").hover( + function () { + $(this).addClass("animated fadeInUpBig"); + }, + function () { + $(this).removeClass("animated fadeInUpBig"); + }, + ); + $(".e_fadeInUpBig").hover( + function () { + $(this).addClass("animated fadeInUpBig"); + }, + function () { + $(this).removeClass("animated fadeInUpBig"); + }, + ); + $(".e_fadeInDownBig").hover( + function () { + $(this).addClass("animated fadeInDownBig"); + }, + function () { + $(this).removeClass("animated fadeInDownBig"); + }, + ); + $(".e_fadeInLeftBig").hover( + function () { + $(this).addClass("animated fadeInLeftBig"); + }, + function () { + $(this).removeClass("animated fadeInLeftBig"); + }, + ); + $(".e_fadeInRightBig").hover( + function () { + $(this).addClass("animated fadeInRightBig"); + }, + function () { + $(this).removeClass("animated fadeInRightBig"); + }, + ); + + //Fading exits + $(".e_fadeOut").hover( + function () { + $(this).addClass("animated fadeOut"); + }, + function () { + $(this).removeClass("animated fadeOut"); + }, + ); + $(".e_fadeOutUp").hover( + function () { + $(this).addClass("animated fadeOutUp"); + }, + function () { + $(this).removeClass("animated fadeOutUp"); + }, + ); + $(".e_fadeOutDown").hover( + function () { + $(this).addClass("animated fadeOutDown"); + }, + function () { + $(this).removeClass("animated fadeOutDown"); + }, + ); + $(".e_fadeOutLeft").hover( + function () { + $(this).addClass("animated fadeOutLeft"); + }, + function () { + $(this).removeClass("animated fadeOutLeft"); + }, + ); + $(".e_fadeOutRight").hover( + function () { + $(this).addClass("animated fadeOutRight"); + }, + function () { + $(this).removeClass("animated fadeOutRight"); + }, + ); + $(".e_fadeOutUpBig").hover( + function () { + $(this).addClass("animated fadeOutUpBig"); + }, + function () { + $(this).removeClass("animated fadeOutUpBig"); + }, + ); + $(".e_fadeOutDownBig").hover( + function () { + $(this).addClass("animated fadeOutDownBig"); + }, + function () { + $(this).removeClass("animated fadeOutDownBig"); + }, + ); + $(".e_fadeOutLeftBig").hover( + function () { + $(this).addClass("animated fadeOutLeftBig"); + }, + function () { + $(this).removeClass("animated fadeOutLeftBig"); + }, + ); + $(".e_fadeOutRightBig").hover( + function () { + $(this).addClass("animated fadeOutRightBig"); + }, + function () { + $(this).removeClass("animated fadeOutRightBig"); + }, + ); + + //Bouncing entrances + $(".e_bounceIn").hover( + function () { + $(this).addClass("animated bounceIn"); + }, + function () { + $(this).removeClass("animated bounceIn"); + }, + ); + $(".e_bounceInDown").hover( + function () { + $(this).addClass("animated bounceInDown"); + }, + function () { + $(this).removeClass("animated bounceInDown"); + }, + ); + $(".e_bounceInUp").hover( + function () { + $(this).addClass("animated bounceInUp"); + }, + function () { + $(this).removeClass("animated bounceInUp"); + }, + ); + $(".e_bounceInLeft").hover( + function () { + $(this).addClass("animated bounceInLeft"); + }, + function () { + $(this).removeClass("animated bounceInLeft"); + }, + ); + $(".e_bounceInRight").hover( + function () { + $(this).addClass("animated bounceInRight"); + }, + function () { + $(this).removeClass("animated bounceInRight"); + }, + ); + + //Bouncing exits + $(".e_bounceOut").hover( + function () { + $(this).addClass("animated bounceOut"); + }, + function () { + $(this).removeClass("animated bounceOut"); + }, + ); + $(".e_bounceOutDown").hover( + function () { + $(this).addClass("animated bounceOutDown"); + }, + function () { + $(this).removeClass("animated bounceOutDown"); + }, + ); + $(".e_bounceOutUp").hover( + function () { + $(this).addClass("animated bounceOutUp"); + }, + function () { + $(this).removeClass("animated bounceOutUp"); + }, + ); + $(".e_bounceOutLeft").hover( + function () { + $(this).addClass("animated bounceOutLeft"); + }, + function () { + $(this).removeClass("animated bounceOutLeft"); + }, + ); + $(".e_bounceOutRight").hover( + function () { + $(this).addClass("animated bounceOutRight"); + }, + function () { + $(this).removeClass("animated bounceOutRight"); + }, + ); + + //Rotating entrances + $(".e_rotateIn").hover( + function () { + $(this).addClass("animated rotateIn"); + }, + function () { + $(this).removeClass("animated rotateIn"); + }, + ); + $(".e_rotateInDownLeft").hover( + function () { + $(this).addClass("animated rotateInDownLeft"); + }, + function () { + $(this).removeClass("animated rotateInDownLeft"); + }, + ); + $(".e_rotateInDownRight").hover( + function () { + $(this).addClass("animated rotateInDownRight"); + }, + function () { + $(this).removeClass("animated rotateInDownRight"); + }, + ); + $(".e_rotateInUpRight").hover( + function () { + $(this).addClass("animated rotateInUpRight"); + }, + function () { + $(this).removeClass("animated rotateInUpRight"); + }, + ); + $(".e_rotateInUpLeft").hover( + function () { + $(this).addClass("animated rotateInUpLeft"); + }, + function () { + $(this).removeClass("animated rotateInUpLeft"); + }, + ); + + //Rotating exits + $(".e_rotateOut").hover( + function () { + $(this).addClass("animated rotateOut"); + }, + function () { + $(this).removeClass("animated rotateOut"); + }, + ); + $(".e_rotateOutDownLeft").hover( + function () { + $(this).addClass("animated rotateOutDownLeft"); + }, + function () { + $(this).removeClass("animated rotateOutDownLeft"); + }, + ); + $(".e_rotateOutDownRight").hover( + function () { + $(this).addClass("animated rotateOutDownRight"); + }, + function () { + $(this).removeClass("animated rotateOutDownRight"); + }, + ); + $(".e_rotateOutUpLeft").hover( + function () { + $(this).addClass("animated rotateOutUpLeft"); + }, + function () { + $(this).removeClass("animated rotateOutUpLeft"); + }, + ); + $(".e_rotateOutUpRight").hover( + function () { + $(this).addClass("animated rotateOutUpRight"); + }, + function () { + $(this).removeClass("animated rotateOutUpRight"); + }, + ); + + //Lightspeed + $(".e_lightSpeedIn").hover( + function () { + $(this).addClass("animated lightSpeedIn"); + }, + function () { + $(this).removeClass("animated lightSpeedIn"); + }, + ); + $(".e_lightSpeedOut").hover( + function () { + $(this).addClass("animated lightSpeedOut"); + }, + function () { + $(this).removeClass("animated lightSpeedOut"); + }, + ); + + //specials + $(".e_hinge").hover( + function () { + $(this).addClass("animated hinge"); + }, + function () { + $(this).removeClass("animated hinge"); + }, + ); + $(".e_rollIn").hover( + function () { + $(this).addClass("animated rollIn"); + }, + function () { + $(this).removeClass("animated rollIn"); + }, + ); + $(".e_rollOut").hover( + function () { + $(this).addClass("animated rollOut"); + }, + function () { + $(this).removeClass("animated rollOut"); + }, + ); +}); diff --git a/backend/tests/integration/tests/pruning/website/js/bootstrap.min.js b/backend/tests/integration/tests/pruning/website/js/bootstrap.min.js new file mode 100644 index 00000000000..d6c0c9a8f99 --- /dev/null +++ b/backend/tests/integration/tests/pruning/website/js/bootstrap.min.js @@ -0,0 +1,1352 @@ +/*! + * Bootstrap v3.1.0 (http://getbootstrap.com) + * Copyright 2011-2014 Twitter, Inc. + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) + */ +if ("undefined" == typeof jQuery) throw new Error("Bootstrap requires jQuery"); ++(function (a) { + "use strict"; + function b() { + var a = document.createElement("bootstrap"), + b = { + WebkitTransition: "webkitTransitionEnd", + MozTransition: "transitionend", + OTransition: "oTransitionEnd otransitionend", + transition: "transitionend", + }; + for (var c in b) if (void 0 !== a.style[c]) return { end: b[c] }; + return !1; + } + (a.fn.emulateTransitionEnd = function (b) { + var c = !1, + d = this; + a(this).one(a.support.transition.end, function () { + c = !0; + }); + var e = function () { + c || a(d).trigger(a.support.transition.end); + }; + return setTimeout(e, b), this; + }), + a(function () { + a.support.transition = b(); + }); +})(jQuery), + +(function (a) { + "use strict"; + var b = '[data-dismiss="alert"]', + c = function (c) { + a(c).on("click", b, this.close); + }; + c.prototype.close = function (b) { + function c() { + f.trigger("closed.bs.alert").remove(); + } + var d = a(this), + e = d.attr("data-target"); + e || ((e = d.attr("href")), (e = e && e.replace(/.*(?=#[^\s]*$)/, ""))); + var f = a(e); + b && b.preventDefault(), + f.length || (f = d.hasClass("alert") ? d : d.parent()), + f.trigger((b = a.Event("close.bs.alert"))), + b.isDefaultPrevented() || + (f.removeClass("in"), + a.support.transition && f.hasClass("fade") + ? f.one(a.support.transition.end, c).emulateTransitionEnd(150) + : c()); + }; + var d = a.fn.alert; + (a.fn.alert = function (b) { + return this.each(function () { + var d = a(this), + e = d.data("bs.alert"); + e || d.data("bs.alert", (e = new c(this))), + "string" == typeof b && e[b].call(d); + }); + }), + (a.fn.alert.Constructor = c), + (a.fn.alert.noConflict = function () { + return (a.fn.alert = d), this; + }), + a(document).on("click.bs.alert.data-api", b, c.prototype.close); + })(jQuery), + +(function (a) { + "use strict"; + var b = function (c, d) { + (this.$element = a(c)), + (this.options = a.extend({}, b.DEFAULTS, d)), + (this.isLoading = !1); + }; + (b.DEFAULTS = { loadingText: "loading..." }), + (b.prototype.setState = function (b) { + var c = "disabled", + d = this.$element, + e = d.is("input") ? "val" : "html", + f = d.data(); + (b += "Text"), + f.resetText || d.data("resetText", d[e]()), + d[e](f[b] || this.options[b]), + setTimeout( + a.proxy(function () { + "loadingText" == b + ? ((this.isLoading = !0), d.addClass(c).attr(c, c)) + : this.isLoading && + ((this.isLoading = !1), d.removeClass(c).removeAttr(c)); + }, this), + 0, + ); + }), + (b.prototype.toggle = function () { + var a = !0, + b = this.$element.closest('[data-toggle="buttons"]'); + if (b.length) { + var c = this.$element.find("input"); + "radio" == c.prop("type") && + (c.prop("checked") && this.$element.hasClass("active") + ? (a = !1) + : b.find(".active").removeClass("active")), + a && + c + .prop("checked", !this.$element.hasClass("active")) + .trigger("change"); + } + a && this.$element.toggleClass("active"); + }); + var c = a.fn.button; + (a.fn.button = function (c) { + return this.each(function () { + var d = a(this), + e = d.data("bs.button"), + f = "object" == typeof c && c; + e || d.data("bs.button", (e = new b(this, f))), + "toggle" == c ? e.toggle() : c && e.setState(c); + }); + }), + (a.fn.button.Constructor = b), + (a.fn.button.noConflict = function () { + return (a.fn.button = c), this; + }), + a(document).on( + "click.bs.button.data-api", + "[data-toggle^=button]", + function (b) { + var c = a(b.target); + c.hasClass("btn") || (c = c.closest(".btn")), + c.button("toggle"), + b.preventDefault(); + }, + ); + })(jQuery), + +(function (a) { + "use strict"; + var b = function (b, c) { + (this.$element = a(b)), + (this.$indicators = this.$element.find(".carousel-indicators")), + (this.options = c), + (this.paused = + this.sliding = + this.interval = + this.$active = + this.$items = + null), + "hover" == this.options.pause && + this.$element + .on("mouseenter", a.proxy(this.pause, this)) + .on("mouseleave", a.proxy(this.cycle, this)); + }; + (b.DEFAULTS = { interval: 5e3, pause: "hover", wrap: !0 }), + (b.prototype.cycle = function (b) { + return ( + b || (this.paused = !1), + this.interval && clearInterval(this.interval), + this.options.interval && + !this.paused && + (this.interval = setInterval( + a.proxy(this.next, this), + this.options.interval, + )), + this + ); + }), + (b.prototype.getActiveIndex = function () { + return ( + (this.$active = this.$element.find(".item.active")), + (this.$items = this.$active.parent().children()), + this.$items.index(this.$active) + ); + }), + (b.prototype.to = function (b) { + var c = this, + d = this.getActiveIndex(); + return b > this.$items.length - 1 || 0 > b + ? void 0 + : this.sliding + ? this.$element.one("slid.bs.carousel", function () { + c.to(b); + }) + : d == b + ? this.pause().cycle() + : this.slide(b > d ? "next" : "prev", a(this.$items[b])); + }), + (b.prototype.pause = function (b) { + return ( + b || (this.paused = !0), + this.$element.find(".next, .prev").length && + a.support.transition && + (this.$element.trigger(a.support.transition.end), this.cycle(!0)), + (this.interval = clearInterval(this.interval)), + this + ); + }), + (b.prototype.next = function () { + return this.sliding ? void 0 : this.slide("next"); + }), + (b.prototype.prev = function () { + return this.sliding ? void 0 : this.slide("prev"); + }), + (b.prototype.slide = function (b, c) { + var d = this.$element.find(".item.active"), + e = c || d[b](), + f = this.interval, + g = "next" == b ? "left" : "right", + h = "next" == b ? "first" : "last", + i = this; + if (!e.length) { + if (!this.options.wrap) return; + e = this.$element.find(".item")[h](); + } + if (e.hasClass("active")) return (this.sliding = !1); + var j = a.Event("slide.bs.carousel", { + relatedTarget: e[0], + direction: g, + }); + return ( + this.$element.trigger(j), + j.isDefaultPrevented() + ? void 0 + : ((this.sliding = !0), + f && this.pause(), + this.$indicators.length && + (this.$indicators.find(".active").removeClass("active"), + this.$element.one("slid.bs.carousel", function () { + var b = a(i.$indicators.children()[i.getActiveIndex()]); + b && b.addClass("active"); + })), + a.support.transition && this.$element.hasClass("slide") + ? (e.addClass(b), + e[0].offsetWidth, + d.addClass(g), + e.addClass(g), + d + .one(a.support.transition.end, function () { + e.removeClass([b, g].join(" ")).addClass("active"), + d.removeClass(["active", g].join(" ")), + (i.sliding = !1), + setTimeout(function () { + i.$element.trigger("slid.bs.carousel"); + }, 0); + }) + .emulateTransitionEnd( + 1e3 * d.css("transition-duration").slice(0, -1), + )) + : (d.removeClass("active"), + e.addClass("active"), + (this.sliding = !1), + this.$element.trigger("slid.bs.carousel")), + f && this.cycle(), + this) + ); + }); + var c = a.fn.carousel; + (a.fn.carousel = function (c) { + return this.each(function () { + var d = a(this), + e = d.data("bs.carousel"), + f = a.extend({}, b.DEFAULTS, d.data(), "object" == typeof c && c), + g = "string" == typeof c ? c : f.slide; + e || d.data("bs.carousel", (e = new b(this, f))), + "number" == typeof c + ? e.to(c) + : g + ? e[g]() + : f.interval && e.pause().cycle(); + }); + }), + (a.fn.carousel.Constructor = b), + (a.fn.carousel.noConflict = function () { + return (a.fn.carousel = c), this; + }), + a(document).on( + "click.bs.carousel.data-api", + "[data-slide], [data-slide-to]", + function (b) { + var c, + d = a(this), + e = a( + d.attr("data-target") || + ((c = d.attr("href")) && c.replace(/.*(?=#[^\s]+$)/, "")), + ), + f = a.extend({}, e.data(), d.data()), + g = d.attr("data-slide-to"); + g && (f.interval = !1), + e.carousel(f), + (g = d.attr("data-slide-to")) && e.data("bs.carousel").to(g), + b.preventDefault(); + }, + ), + a(window).on("load", function () { + a('[data-ride="carousel"]').each(function () { + var b = a(this); + b.carousel(b.data()); + }); + }); + })(jQuery), + +(function (a) { + "use strict"; + var b = function (c, d) { + (this.$element = a(c)), + (this.options = a.extend({}, b.DEFAULTS, d)), + (this.transitioning = null), + this.options.parent && (this.$parent = a(this.options.parent)), + this.options.toggle && this.toggle(); + }; + (b.DEFAULTS = { toggle: !0 }), + (b.prototype.dimension = function () { + var a = this.$element.hasClass("width"); + return a ? "width" : "height"; + }), + (b.prototype.show = function () { + if (!this.transitioning && !this.$element.hasClass("in")) { + var b = a.Event("show.bs.collapse"); + if ((this.$element.trigger(b), !b.isDefaultPrevented())) { + var c = this.$parent && this.$parent.find("> .panel > .in"); + if (c && c.length) { + var d = c.data("bs.collapse"); + if (d && d.transitioning) return; + c.collapse("hide"), d || c.data("bs.collapse", null); + } + var e = this.dimension(); + this.$element.removeClass("collapse").addClass("collapsing")[e](0), + (this.transitioning = 1); + var f = function () { + this.$element + .removeClass("collapsing") + .addClass("collapse in") + [e]("auto"), + (this.transitioning = 0), + this.$element.trigger("shown.bs.collapse"); + }; + if (!a.support.transition) return f.call(this); + var g = a.camelCase(["scroll", e].join("-")); + this.$element + .one(a.support.transition.end, a.proxy(f, this)) + .emulateTransitionEnd(350) + [e](this.$element[0][g]); + } + } + }), + (b.prototype.hide = function () { + if (!this.transitioning && this.$element.hasClass("in")) { + var b = a.Event("hide.bs.collapse"); + if ((this.$element.trigger(b), !b.isDefaultPrevented())) { + var c = this.dimension(); + this.$element[c](this.$element[c]())[0].offsetHeight, + this.$element + .addClass("collapsing") + .removeClass("collapse") + .removeClass("in"), + (this.transitioning = 1); + var d = function () { + (this.transitioning = 0), + this.$element + .trigger("hidden.bs.collapse") + .removeClass("collapsing") + .addClass("collapse"); + }; + return a.support.transition + ? void this.$element[c](0) + .one(a.support.transition.end, a.proxy(d, this)) + .emulateTransitionEnd(350) + : d.call(this); + } + } + }), + (b.prototype.toggle = function () { + this[this.$element.hasClass("in") ? "hide" : "show"](); + }); + var c = a.fn.collapse; + (a.fn.collapse = function (c) { + return this.each(function () { + var d = a(this), + e = d.data("bs.collapse"), + f = a.extend({}, b.DEFAULTS, d.data(), "object" == typeof c && c); + !e && f.toggle && "show" == c && (c = !c), + e || d.data("bs.collapse", (e = new b(this, f))), + "string" == typeof c && e[c](); + }); + }), + (a.fn.collapse.Constructor = b), + (a.fn.collapse.noConflict = function () { + return (a.fn.collapse = c), this; + }), + a(document).on( + "click.bs.collapse.data-api", + "[data-toggle=collapse]", + function (b) { + var c, + d = a(this), + e = + d.attr("data-target") || + b.preventDefault() || + ((c = d.attr("href")) && c.replace(/.*(?=#[^\s]+$)/, "")), + f = a(e), + g = f.data("bs.collapse"), + h = g ? "toggle" : d.data(), + i = d.attr("data-parent"), + j = i && a(i); + (g && g.transitioning) || + (j && + j + .find('[data-toggle=collapse][data-parent="' + i + '"]') + .not(d) + .addClass("collapsed"), + d[f.hasClass("in") ? "addClass" : "removeClass"]("collapsed")), + f.collapse(h); + }, + ); + })(jQuery), + +(function (a) { + "use strict"; + function b(b) { + a(d).remove(), + a(e).each(function () { + var d = c(a(this)), + e = { relatedTarget: this }; + d.hasClass("open") && + (d.trigger((b = a.Event("hide.bs.dropdown", e))), + b.isDefaultPrevented() || + d.removeClass("open").trigger("hidden.bs.dropdown", e)); + }); + } + function c(b) { + var c = b.attr("data-target"); + c || + ((c = b.attr("href")), + (c = c && /#[A-Za-z]/.test(c) && c.replace(/.*(?=#[^\s]*$)/, ""))); + var d = c && a(c); + return d && d.length ? d : b.parent(); + } + var d = ".dropdown-backdrop", + e = "[data-toggle=dropdown]", + f = function (b) { + a(b).on("click.bs.dropdown", this.toggle); + }; + (f.prototype.toggle = function (d) { + var e = a(this); + if (!e.is(".disabled, :disabled")) { + var f = c(e), + g = f.hasClass("open"); + if ((b(), !g)) { + "ontouchstart" in document.documentElement && + !f.closest(".navbar-nav").length && + a('