Skip to content

Commit

Permalink
Merge pull request duckdb#9770 from Mytherin/parquetbigdecimal
Browse files Browse the repository at this point in the history
Support reading large decimals into doubles in the Parquet reader
  • Loading branch information
Mytherin authored and k-anshul committed Nov 28, 2023
1 parent 3c695d7 commit 16d28d0
Show file tree
Hide file tree
Showing 15 changed files with 1,333 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .github/actions/build_extensions/action.yml
Expand Up @@ -93,7 +93,7 @@ runs:
if: inputs.vcpkg_build == 1
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
vcpkgGitCommitId: a42af01b72c28a8e1d7b48107b33e4f286a55ef6

- name: Set vcpkg env variables
if: inputs.vcpkg_build == 1
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/manylinux_2014_setup/action.yml
Expand Up @@ -96,7 +96,7 @@ runs:
if: ${{ inputs.vcpkg == 1 }}
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
vcpkgGitCommitId: a42af01b72c28a8e1d7b48107b33e4f286a55ef6

- name: Install OpenSSL
if: ${{ inputs.openssl == 1 }}
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/ubuntu_18_setup/action.yml
Expand Up @@ -84,7 +84,7 @@ runs:
if: ${{ inputs.vcpkg == 1 }}
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
vcpkgGitCommitId: a42af01b72c28a8e1d7b48107b33e4f286a55ef6

- name: Setup Ccache
if: ${{ inputs.ccache == 1 }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/OSX.yml
Expand Up @@ -209,8 +209,8 @@ jobs:
with:
python-version: '3.7'

- name: Install Ninja
run: brew install ninja
- name: Install Ninja and Pkg-config
run: brew install pkg-config ninja

- name: Setup Ccache
uses: hendrikmuhs/ccache-action@main
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/Wasm.yml
Expand Up @@ -38,7 +38,7 @@ jobs:
- name: Setup vcpkg
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
vcpkgGitCommitId: a42af01b72c28a8e1d7b48107b33e4f286a55ef6

- name: Setup Ccache
uses: hendrikmuhs/ccache-action@main
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/_extension_distribution.yml
Expand Up @@ -34,7 +34,7 @@ on:
vcpkg_commit:
required: false
type: string
default: "501db0f17ef6df184fcdbfbe0f87cde2313b6ab1"
default: "a42af01b72c28a8e1d7b48107b33e4f286a55ef6"
# Override the default script producing the matrices. Allows specifying custom matrices.
matrix_parse_script:
required: false
Expand Down Expand Up @@ -247,6 +247,12 @@ jobs:
BUILD_SHELL: ${{ inputs.build_duckdb_shell && '1' || '0' }}

steps:
- name: Keep \n line endings
shell: bash
run: |
git config --global core.autocrlf false
git config --global core.eol lf
- uses: actions/checkout@v3
with:
fetch-depth: 0
Expand Down
2 changes: 2 additions & 0 deletions extension/httpfs/include/s3fs.hpp
Expand Up @@ -111,6 +111,8 @@ class S3FileHandle : public HTTPFileHandle {
throw NotImplementedException("Cannot open an HTTP file for appending");
}
}
~S3FileHandle() override;

S3AuthParams auth_params;
const S3ConfigParams config_params;

Expand Down
4 changes: 4 additions & 0 deletions extension/httpfs/s3fs.cpp
Expand Up @@ -223,6 +223,10 @@ S3AuthParams S3AuthParams::ReadFrom(FileOpener *opener, FileOpenerInfo &info) {
endpoint, url_style, use_ssl, s3_url_compatibility_mode};
}

S3FileHandle::~S3FileHandle() {
Close();
}

S3ConfigParams S3ConfigParams::ReadFrom(FileOpener *opener) {
uint64_t uploader_max_filesize;
uint64_t max_parts_per_file;
Expand Down
2 changes: 1 addition & 1 deletion extension/icu/icu_extension.cpp
Expand Up @@ -238,7 +238,7 @@ void IcuExtension::Load(DuckDB &ddb) {
}
collation = StringUtil::Lower(collation);

CreateCollationInfo info(collation, GetICUFunction(collation), false, true);
CreateCollationInfo info(collation, GetICUFunction(collation), false, false);
ExtensionUtil::RegisterCollation(db, info);
}
ScalarFunction sort_key("icu_sort_key", {LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::VARCHAR,
Expand Down
78 changes: 78 additions & 0 deletions scripts/extension-upload-from-nightly.sh
@@ -0,0 +1,78 @@
#!/bin/bash

# This script deploys the extension binaries that are currently deployed to the nightly bucket to the main bucket

# WARNING: don't use this script if you don't know exactly what you're doing. To deploy a binary:
# - Run the script with ./extension-upload-from-nightly.sh <extension_name> <duckdb_version> (<nightly_commit>)
# - CHECK the output of the dry run thoroughly
# - If successful, set the I_KNOW_WHAT_IM_DOING_DEPLOY_FOR_REAL env variable to the correct value
# - run the script again now deploying for real
# - check the output
# - unset the I_KNOW_WHAT_IM_DOING_DEPLOY_FOR_REAL env var

if [ -z "$1" ] || [ -z "$2" ]; then
echo "Usage: ./extension-upload-from-nightly.sh <extension_name> <duckdb_version> (<nightly_commit>)"
exit 1
fi

if [ -z "$3" ]; then
BASE_NIGHTLY_DIR="$2"
else
BASE_NIGHTLY_DIR="$1/$3/$2"
fi

# CONFIG
FROM_BUCKET=duckdb-extensions-nightly
TO_BUCKET=duckdb-extensions
CLOUDFRONT_DISTRIBUTION_ID=E2Z28NDMI4PVXP

### COPY THE FILES
REAL_RUN="aws s3 cp s3://$FROM_BUCKET/$BASE_NIGHTLY_DIR s3://$TO_BUCKET/$2 --recursive --exclude '*' --include '*/$1.duckdb_extension.gz' --acl public-read"
DRY_RUN="$REAL_RUN --dryrun"

if [ "$I_KNOW_WHAT_IM_DOING_DEPLOY_FOR_REAL" == "yessir" ]; then
echo "DEPLOYING"
echo "> FROM: $FROM_BUCKET"
echo "> TO : $TO_BUCKET"
echo "> AWS CLI deploy: "
eval "$REAL_RUN"
else
echo "DEPLOYING (DRY RUN)"
echo "> FROM: $FROM_BUCKET"
echo "> TO : $TO_BUCKET"
echo "> AWS CLI Dry run: "
eval "$DRY_RUN"
fi

echo ""

### INVALIDATE THE CLOUDFRONT CACHE
# For double checking we are invalidating the correct domain
CLOUDFRONT_ORIGINS=`aws cloudfront get-distribution --id $CLOUDFRONT_DISTRIBUTION_ID --query 'Distribution.DistributionConfig.Origins.Items[*].DomainName' --output text`

# Parse the dry run output
output=$(eval "$DRY_RUN")
s3_paths=()
while IFS= read -r line; do
if [[ $line == *"copy:"* ]]; then
s3_path=$(echo $line | grep -o 's3://[^ ]*' | awk 'NR%2==0' | awk -F "s3://$TO_BUCKET" '{print $2}' | cut -d' ' -f1)
s3_paths+=("$s3_path")
fi
done <<< "$output"

if [ "$I_KNOW_WHAT_IM_DOING_DEPLOY_FOR_REAL" == "yessir" ]; then
echo "INVALIDATION"
echo "> Total files: ${#s3_paths[@]}"
echo "> Domain: $CLOUDFRONT_ORIGINS"
for path in "${s3_paths[@]}"; do
aws cloudfront create-invalidation --distribution-id "$CLOUDFRONT_DISTRIBUTION_ID" --paths "$path"
done
else
echo "INVALIDATION (DRY RUN)"
echo "> Total files: ${#s3_paths[@]}"
echo "> Domain: $CLOUDFRONT_ORIGINS"
echo "> Paths:"
for path in "${s3_paths[@]}"; do
echo " $path"
done
fi
2 changes: 1 addition & 1 deletion scripts/merge_vcpkg_deps.py
Expand Up @@ -55,7 +55,7 @@ def prefix_overlay_port(overlay_port):

data = {
"description": f"Auto-generated vcpkg.json for combined DuckDB extension build",
"builtin-baseline": "501db0f17ef6df184fcdbfbe0f87cde2313b6ab1",
"builtin-baseline": "a42af01b72c28a8e1d7b48107b33e4f286a55ef6",
"dependencies": final_deduplicated_deps,
"overrides": [{"name": "openssl", "version": "3.0.8"}],
}
Expand Down
2 changes: 1 addition & 1 deletion scripts/setup_manylinux2014.sh
Expand Up @@ -40,7 +40,7 @@ install_deps() {
(
cd $VCPKG_TARGET_DIR ;
git clone https://github.com/Microsoft/vcpkg.git ;
git checkout 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1 ;
git checkout a42af01b72c28a8e1d7b48107b33e4f286a55ef6 ;
cd vcpkg ;
./bootstrap-vcpkg.sh
)
Expand Down
38 changes: 38 additions & 0 deletions test/sql/collate/test_icu_collate.test
Expand Up @@ -82,3 +82,41 @@ statement error
SELECT icu_sort_key('goose', 'DUCK_DUCK_ENUM');
----
Invalid Input Error


# issue duckdb/duckdb#9692
query I
select chr(2*16*256+1*256+2*16+11) collate da =chr(12*16+5) collate da;
----
True


query I
select icu_sort_key(chr(2*16*256+1*256+2*16+11),'da')=icu_sort_key(chr(12*16+5),'da');
----
True

query I
select chr(2*16*256+1*256+2*16+11) collate da > chr(12*16+5) collate da;
----
FALSE

query I
select chr(2*16*256+1*256+2*16+11) collate da > chr(12*16+5) collate da;
----
FALSE

query I
select count(*) from (select chr(2*16*256+1*256+2*16+11) union select chr(12*16+5)) as t(s) group by s collate da;
----
2

query I
select nfc_normalize(chr(2*16*256+1*256+2*16+11))=nfc_normalize(chr(12*16+5));
----
TRUE

query I
select count(*) from (select chr(2*16*256+1*256+2*16+11) union select chr(12*16+5)) as t(s) group by s collate nfc;
----
2

0 comments on commit 16d28d0

Please sign in to comment.