From 14d6db605b26b62638852b8a2dc0d6e7a3c0104c Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Wed, 17 Jan 2024 14:40:33 +0000 Subject: [PATCH] Apply new versioning and skip hdfs decompression --- .github/workflows/publish_lambda_packages.yml | 12 ++---------- distribution/lambda/Makefile | 9 ++++++++- distribution/lambda/cdk/cli.py | 10 ++++------ 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/.github/workflows/publish_lambda_packages.yml b/.github/workflows/publish_lambda_packages.yml index e7411ebddab..6b511183dac 100644 --- a/.github/workflows/publish_lambda_packages.yml +++ b/.github/workflows/publish_lambda_packages.yml @@ -3,12 +3,7 @@ name: Build and publish AWS Lambda packages on: push: tags: - - "v*" - - "lambda-v*" - workflow_dispatch: - -env: - PRERELEASE_VERSION_NAME: beta + - "lambda-beta-*" jobs: build-lambdas: @@ -28,9 +23,6 @@ jobs: - name: Extract asset version of release run: echo "QW_LAMBDA_VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV if: ${{ github.event_name == 'push' }} - - name: Setting version as prerelease - run: echo "QW_LAMBDA_VERSION=${{ env.PRERELEASE_VERSION_NAME }}" >> $GITHUB_ENV - if: ${{ github.event_name != 'push' }} - name: Retrieve and export commit date, hash, and tags run: | echo "QW_COMMIT_DATE=$(TZ=UTC0 git log -1 --format=%cd --date=format-local:%Y-%m-%dT%H:%M:%SZ)" >> $GITHUB_ENV @@ -57,4 +49,4 @@ jobs: file: ${{ env.SEARCHER_PACKAGE_LOCATION }};${{ env.INDEXER_PACKAGE_LOCATION }} overwrite: true draft: true - tag_name: aws-lambda-${{ env.QW_LAMBDA_VERSION }} + tag_name: aws-${{ env.QW_LAMBDA_VERSION }} diff --git a/distribution/lambda/Makefile b/distribution/lambda/Makefile index 701e9ba94b1..4dfd1d1b076 100644 --- a/distribution/lambda/Makefile +++ b/distribution/lambda/Makefile @@ -3,7 +3,8 @@ SHELL := bash .SHELLFLAGS := -eu -o pipefail -c -QW_LAMBDA_VERSION?=beta +# Update this when cutting a new release +QW_LAMBDA_VERSION?=beta-01 PACKAGE_BASE_URL=https://github.com/quickwit-oss/quickwit/releases/download/aws-lambda-$(QW_LAMBDA_VERSION)/ SEARCHER_PACKAGE_FILE=quickwit-lambda-searcher-$(QW_LAMBDA_VERSION)-x86_64.zip INDEXER_PACKAGE_FILE=quickwit-lambda-indexer-$(QW_LAMBDA_VERSION)-x86_64.zip @@ -36,10 +37,16 @@ package: cp -u ../../quickwit/target/lambda/indexer/bootstrap.zip $(INDEXER_PACKAGE_PATH) else if ! [ -f $(SEARCHER_PACKAGE_PATH) ]; then + echo "Downloading package $(PACKAGE_BASE_URL)$(SEARCHER_PACKAGE_FILE)" curl -C - -Ls -o $(SEARCHER_PACKAGE_PATH) $(PACKAGE_BASE_URL)$(SEARCHER_PACKAGE_FILE) + else + echo "Using cached package $(SEARCHER_PACKAGE_PATH)" fi if ! [ -f $(INDEXER_PACKAGE_PATH) ]; then + echo "Downloading package $(PACKAGE_BASE_URL)$(INDEXER_PACKAGE_FILE)" curl -C - -Ls -o $(INDEXER_PACKAGE_PATH) $(PACKAGE_BASE_URL)$(INDEXER_PACKAGE_FILE) + else + echo "Using cached package $(INDEXER_PACKAGE_PATH)" fi fi diff --git a/distribution/lambda/cdk/cli.py b/distribution/lambda/cdk/cli.py index 4bb9f5b3de6..ce2fe4cf75d 100644 --- a/distribution/lambda/cdk/cli.py +++ b/distribution/lambda/cdk/cli.py @@ -24,8 +24,7 @@ region = os.environ["CDK_REGION"] example_host = "quickwit-datasets-public.s3.amazonaws.com" -# the publicly hosted file is compressed and suffixed with ".gz" -example_hdfs_file = "hdfs-logs-multitenants.json" +example_hdfs_file = "hdfs-logs-multitenants.json.gz" INDEXING_BOTO_CONFIG = botocore.config.Config( retries={"max_attempts": 0}, read_timeout=60 * 15 ) @@ -139,17 +138,16 @@ def upload_hdfs_src_file(): except botocore.exceptions.ClientError as e: if e.response["Error"]["Code"] != "404": raise e - print(f"download dataset https://{example_host}/{example_hdfs_file}.gz") + print(f"download dataset https://{example_host}/{example_hdfs_file}") conn = http.client.HTTPSConnection(example_host) - conn.request("GET", f"/{example_hdfs_file}.gz") + conn.request("GET", f"/{example_hdfs_file}") response = conn.getresponse() if response.status != 200: print(f"Failed to fetch dataset") exit(1) with tempfile.NamedTemporaryFile() as tmp: - unzipped_resp = gzip.GzipFile(mode="rb", fileobj=response) while True: - chunk = unzipped_resp.read(1024 * 1024) + chunk = response.read(1024 * 1024) if len(chunk) == 0: break tmp.write(chunk)