Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into siyuan/cdc-backfill-f…
Browse files Browse the repository at this point in the history
…ix-uuid-new
  • Loading branch information
StrikeW committed Apr 29, 2024
2 parents 6467cf5 + 8265424 commit 70134ff
Show file tree
Hide file tree
Showing 558 changed files with 18,395 additions and 5,313 deletions.
2,330 changes: 1,988 additions & 342 deletions Cargo.lock

Large diffs are not rendered by default.

14 changes: 12 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ exclude = ["e2e_test/udf/wasm", "lints"]
resolver = "2"

[workspace.package]
version = "1.7.0-alpha"
version = "1.9.0-alpha"
edition = "2021"
homepage = "https://github.com/risingwavelabs/risingwave"
keywords = ["sql", "database", "streaming"]
Expand Down Expand Up @@ -140,6 +140,7 @@ arrow-select = "50"
arrow-ord = "50"
arrow-row = "50"
arrow-udf-js = "0.1"
arrow-udf-js-deno = { git = "https://github.com/risingwavelabs/arrow-udf.git", rev = "23fe0dd" }
arrow-udf-wasm = { version = "0.2.1", features = ["build"] }
arrow-udf-python = { git = "https://github.com/risingwavelabs/arrow-udf.git", rev = "6c32f71" }
arrow-array-deltalake = { package = "arrow-array", version = "48.0.1" }
Expand All @@ -156,7 +157,7 @@ deltalake = { git = "https://github.com/risingwavelabs/delta-rs", rev = "5c2dccd
itertools = "0.12.0"
lru = { git = "https://github.com/risingwavelabs/lru-rs.git", rev = "2682b85" }
parquet = "50"
thiserror-ext = "0.0.11"
thiserror-ext = "0.1.2"
tikv-jemalloc-ctl = { git = "https://github.com/risingwavelabs/jemallocator.git", rev = "64a2d9" }
tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [
"profiling",
Expand Down Expand Up @@ -315,6 +316,15 @@ futures-timer = { git = "https://github.com/madsim-rs/futures-timer.git", rev =
etcd-client = { git = "https://github.com/risingwavelabs/etcd-client.git", rev = "4e84d40" }
# todo(wcy-fdu): remove this patch fork after opendal release a new version to apply azure workload identity change.
reqsign = { git = "https://github.com/wcy-fdu/reqsign.git", rev = "002ee2a" }
# patch to remove preserve_order from serde_json
deno_core = { git = "https://github.com/bakjos/deno_core", rev = "9b241c6" }
# patch to user reqwest 0.12.2
deno_fetch = { git = "https://github.com/bakjos/deno", rev = "787a232" }
deno_http = { git = "https://github.com/bakjos/deno", rev = "787a232" }
deno_net = { git = "https://github.com/bakjos/deno", rev = "787a232" }
deno_tls = { git = "https://github.com/bakjos/deno", rev = "787a232" }
deno_web = { git = "https://github.com/bakjos/deno", rev = "787a232" }
deno_websocket = { git = "https://github.com/bakjos/deno", rev = "787a232" }

[workspace.metadata.dylint]
libraries = [{ path = "./lints" }]
77 changes: 64 additions & 13 deletions Makefile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ extend = [
{ path = "src/storage/backup/integration_tests/Makefile.toml" },
{ path = "src/java_binding/make-java-binding.toml" },
{ path = "src/stream/tests/integration_tests/integration_test.toml" },
{ path = "e2e_test/source_inline/commands.toml" },
]

env_files = ["./risedev-components.user.env"]
Expand All @@ -34,6 +35,7 @@ is_release = get_env ENABLE_RELEASE_PROFILE
is_not_release = not ${is_release}
is_dynamic_linking = get_env ENABLE_DYNAMIC_LINKING
is_hummock_trace = get_env ENABLE_HUMMOCK_TRACE
is_deno_udf_enabled = get_env ENABLE_DENO_UDF
is_python_udf_enabled = get_env ENABLE_PYTHON_UDF
if ${is_sanitizer_enabled}
Expand All @@ -57,6 +59,11 @@ else
set_env RISINGWAVE_FEATURE_FLAGS "--features rw-static-link"
end
if ${is_deno_udf_enabled}
flags = get_env RISINGWAVE_FEATURE_FLAGS
set_env RISINGWAVE_FEATURE_FLAGS "${flags} --features embedded-deno-udf"
end
if ${is_python_udf_enabled}
flags = get_env RISINGWAVE_FEATURE_FLAGS
set_env RISINGWAVE_FEATURE_FLAGS "${flags} --features embedded-python-udf"
Expand All @@ -83,9 +90,9 @@ if ${is_not_ci}
no_rust_log = not ${rust_log}
if ${no_rust_log}
set_env RUST_LOG "pgwire_query_log=info,hyper::client::connect::http=info"
set_env RUST_LOG "pgwire_query_log=info"
else
set_env RUST_LOG "pgwire_query_log=info,hyper::client::connect::http=info,${rust_log}"
set_env RUST_LOG "pgwire_query_log=info,${rust_log}"
end
end
Expand Down Expand Up @@ -121,6 +128,16 @@ rm -rf "${PREFIX_CONFIG}"
rm -rf "${PREFIX_PROFILING}"
'''

[tasks.reset-rw]
category = "RiseDev - Start/Stop"
description = "Clean all data in the default database dev of the running RisingWave"
dependencies = ["check-and-load-risedev-env-file"]
script = '''
#!/usr/bin/env bash
psql -h $RISEDEV_RW_FRONTEND_LISTEN_ADDRESS -p $RISEDEV_RW_FRONTEND_PORT -U root -d dev -c "CREATE DATABASE risedev_tmp;"
psql -h $RISEDEV_RW_FRONTEND_LISTEN_ADDRESS -p $RISEDEV_RW_FRONTEND_PORT -U root -d risedev_tmp -c "DROP DATABASE dev; CREATE DATABASE dev;"
psql -h $RISEDEV_RW_FRONTEND_LISTEN_ADDRESS -p $RISEDEV_RW_FRONTEND_PORT -U root -d dev -c "DROP DATABASE risedev_tmp;"
'''

[tasks.l]
alias = "logs"
Expand Down Expand Up @@ -550,11 +567,16 @@ if [ ! -f "${RC_ENV_FILE}" ]; then
fi
'''

[tasks.check-and-load-risedev-env-file]
private = true
category = "RiseDev - Prepare"
dependencies = ["check-risedev-env-file"]
env_files = ["${PREFIX_CONFIG}/risedev-env"]

[tasks.psql-env]
category = "RiseDev - Start/Stop"
description = "Dump env configuration for psql"
dependencies = ["check-risedev-env-file"]
env_files = ["${PREFIX_CONFIG}/risedev-env"]
dependencies = ["check-and-load-risedev-env-file"]
script = '''
#!/usr/bin/env bash
cat <<EOF > "${PREFIX_CONFIG}/psql-env"
Expand All @@ -572,8 +594,7 @@ echo " $(tput setaf 4)source ${PREFIX_CONFIG}/psql-env$(tput sgr0)"
[tasks.psql]
category = "RiseDev - Start/Stop"
description = "Run local psql client with default connection parameters. You can pass extra arguments to psql."
dependencies = ["check-risedev-env-file"]
env_files = ["${PREFIX_CONFIG}/risedev-env"]
dependencies = ["check-and-load-risedev-env-file"]
script = '''
#!/usr/bin/env bash
psql -h $RISEDEV_RW_FRONTEND_LISTEN_ADDRESS -p $RISEDEV_RW_FRONTEND_PORT -U root -d dev "$@"
Expand All @@ -582,8 +603,7 @@ psql -h $RISEDEV_RW_FRONTEND_LISTEN_ADDRESS -p $RISEDEV_RW_FRONTEND_PORT -U root
[tasks.ctl]
category = "RiseDev - Start/Stop"
description = "Start RiseCtl"
dependencies = ["check-risedev-env-file"]
env_files = ["${PREFIX_CONFIG}/risedev-env"]
dependencies = ["check-and-load-risedev-env-file"]
script = '''
#!/usr/bin/env bash
cargo run -p risingwave_cmd_all --profile "${RISINGWAVE_BUILD_PROFILE}" -- ctl "$@"
Expand Down Expand Up @@ -658,7 +678,7 @@ if [[ $ENABLE_COREDUMP == "true" ]]; then
fi
set -x
target/${BUILD_MODE_DIR}/risedev-dev ${@}
target/debug/risedev-dev ${@}
'''

[tasks.kill-risedev]
Expand Down Expand Up @@ -707,6 +727,14 @@ ${TMUX} list-windows -F "#{window_name} #{pane_id}" \
| awk '{ print $2 }' \
| xargs -I {} ${TMUX} send-keys -t {} C-c C-d
# Stop docker components
containers=$(docker ps -a -q -f name=risedev- 2>/dev/null) || true
if [[ -n ${containers} ]]; then
echo "Stopping docker components..."
docker stop ${containers}
fi
# Kill kafka and zookeeper
if [[ -n $(${TMUX} list-windows | grep kafka) ]];
then
echo "kill kafka"
Expand Down Expand Up @@ -1290,17 +1318,28 @@ echo "All processes has exited."
"""

[tasks.slt]
env = { SLT_HOST = "${RISEDEV_RW_FRONTEND_LISTEN_ADDRESS}", SLT_PORT = "${RISEDEV_RW_FRONTEND_PORT}", SLT_DB = "dev" }
category = "RiseDev - Test - SQLLogicTest"
install_crate = { version = "0.20.0", crate_name = "sqllogictest-bin", binary = "sqllogictest", test_arg = [
install_crate = { version = "0.20.1", crate_name = "sqllogictest-bin", binary = "sqllogictest", test_arg = [
"--help",
], install_command = "binstall" }
dependencies = ["check-risedev-env-file"]
env_files = ["${PREFIX_CONFIG}/risedev-env"]
dependencies = ["check-and-load-risedev-env-file"]
command = "sqllogictest"
args = ["${@}"]
description = "🌟 Run SQLLogicTest"

[tasks.slt.env]
SLT_HOST = "${RISEDEV_RW_FRONTEND_LISTEN_ADDRESS}"
SLT_PORT = "${RISEDEV_RW_FRONTEND_PORT}"
SLT_DB = "dev"
PATH = "${PWD}/e2e_test/commands:${PATH}"

[tasks.slt-clean]
category = "RiseDev - Test - SQLLogicTest"
dependencies = ["clean-kafka", "reset-rw"]
description = "Run SQLLogicTest with a clean environment"
run_task = "slt"
args = ["${@}"]

[tasks.slt-streaming]
category = "RiseDev - Test - SQLLogicTest"
extend = "slt"
Expand Down Expand Up @@ -1415,3 +1454,15 @@ script = """
vars = dump_variables
echo ${vars}
"""

[tasks.show-risedev-env]
description = "Show risedev-env environment variables"
dependencies = ["check-risedev-env-file"]
script = '''
#!/usr/bin/env bash
set -euo pipefail
cat ${PREFIX_CONFIG}/risedev-env
echo "Hint: To load the environment variables into the shell, you may run:"
echo "$(tput setaf 4)\set -a; source ${PREFIX_CONFIG}/risedev-env; set +a$(tput sgr0)"
'''
23 changes: 21 additions & 2 deletions ci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,22 @@ ENV LANG en_US.utf8
# Use AWS ubuntu mirror
RUN sed -i 's|http://archive.ubuntu.com/ubuntu|http://us-east-2.ec2.archive.ubuntu.com/ubuntu/|g' /etc/apt/sources.list
RUN apt-get update -yy && \
DEBIAN_FRONTEND=noninteractive apt-get -y install make build-essential cmake protobuf-compiler curl parallel python3 python3-pip python3-venv software-properties-common \
DEBIAN_FRONTEND=noninteractive apt-get -y install sudo make build-essential cmake protobuf-compiler curl parallel python3 python3-pip python3-venv software-properties-common \
openssl libssl-dev libsasl2-dev libcurl4-openssl-dev pkg-config bash openjdk-11-jdk wget unzip git tmux lld postgresql-client kcat netcat-openbsd mysql-client \
maven zstd libzstd-dev locales \
python3.12 python3.12-dev \
&& rm -rf /var/lib/{apt,dpkg,cache,log}/
ENV PYO3_PYTHON=python3.12

# Install nvm and zx
ENV NVM_DIR /root/.nvm
ENV NODE_VERSION 20.11.1
RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash \
&& . $NVM_DIR/nvm.sh \
&& nvm install $NODE_VERSION
ENV PATH $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
RUN npm install -g zx

SHELL ["/bin/bash", "-c"]

RUN mkdir -p /risingwave
Expand All @@ -43,6 +52,16 @@ RUN pip3 install --break-system-packages pyarrow pytest

# Install poetry
RUN curl -sSL https://install.python-poetry.org | python3 -
# Install rpk
RUN if [ "$(uname -m)" = "amd64" ] || [ "$(uname -m)" = "x86_64" ]; then \
curl -LO https://github.com/redpanda-data/redpanda/releases/latest/download/rpk-linux-amd64.zip && \
unzip rpk-linux-amd64.zip -d ~/.local/bin/ && \
rm rpk-linux-amd64.zip; \
else \
curl -LO https://github.com/redpanda-data/redpanda/releases/latest/download/rpk-linux-arm64.zip && \
unzip rpk-linux-arm64.zip -d ~/.local/bin/ && \
rm rpk-linux-arm64.zip; \
fi
ENV PATH /root/.local/bin:$PATH

ENV CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
Expand All @@ -51,7 +70,7 @@ ENV CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
RUN curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash
RUN cargo binstall -y --no-symlinks cargo-llvm-cov cargo-nextest cargo-hakari cargo-sort cargo-cache cargo-audit \
cargo-make@0.37.9 \
sqllogictest-bin@0.19.1 \
sqllogictest-bin@0.20.1 \
sccache@0.7.4 \
&& cargo cache -a \
&& rm -rf "/root/.cargo/registry/index" \
Expand Down
2 changes: 1 addition & 1 deletion ci/build-ci-image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ cat ../rust-toolchain
# shellcheck disable=SC2155

# REMEMBER TO ALSO UPDATE ci/docker-compose.yml
export BUILD_ENV_VERSION=v20240413
export BUILD_ENV_VERSION=v20240424_x

export BUILD_TAG="public.ecr.aws/w1p7b4n3/rw-build-env:${BUILD_ENV_VERSION}"

Expand Down
10 changes: 5 additions & 5 deletions ci/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ services:
retries: 5

source-test-env:
image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240413
image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240424_x
depends_on:
- mysql
- db
Expand All @@ -84,7 +84,7 @@ services:
- ..:/risingwave

sink-test-env:
image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240413
image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240424_x
depends_on:
- mysql
- db
Expand All @@ -103,12 +103,12 @@ services:


rw-build-env:
image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240413
image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240424_x
volumes:
- ..:/risingwave

ci-flamegraph-env:
image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240413
image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240424_x
# NOTE(kwannoel): This is used in order to permit
# syscalls for `nperf` (perf_event_open),
# so it can do CPU profiling.
Expand All @@ -119,7 +119,7 @@ services:
- ..:/risingwave

regress-test-env:
image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240413
image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240424_x
depends_on:
db:
condition: service_healthy
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ cargo build \
-p risingwave_compaction_test \
-p risingwave_e2e_extended_mode_test \
"${RISINGWAVE_FEATURE_FLAGS[@]}" \
--features embedded-deno-udf \
--features embedded-python-udf \
--profile "$profile"

Expand Down
2 changes: 0 additions & 2 deletions ci/scripts/connector-node-integration-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ shift $((OPTIND -1))
RISINGWAVE_ROOT=${PWD}

echo "--- install java"
apt install sudo -y && apt-get update

if [ "$VERSION" = "11" ]; then
echo "The test imgae default java version is 11, no need to install"
else
Expand Down
8 changes: 8 additions & 0 deletions ci/scripts/docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@ echo "$GHCR_TOKEN" | docker login ghcr.io -u "$GHCR_USERNAME" --password-stdin
echo "--- dockerhub login"
echo "$DOCKER_TOKEN" | docker login -u "risingwavelabs" --password-stdin

if [[ -n "${ORIGINAL_IMAGE_TAG+x}" ]] && [[ -n "${NEW_IMAGE_TAG+x}" ]]; then
echo "--- retag docker image"
docker pull ${ghcraddr}:${ORIGINAL_IMAGE_TAG}
docker tag ${ghcraddr}:${ORIGINAL_IMAGE_TAG} ${ghcraddr}:${NEW_IMAGE_TAG}-${arch}
docker push ${ghcraddr}:${NEW_IMAGE_TAG}-${arch}
exit 0
fi

# Build RisingWave docker image ${BUILDKITE_COMMIT}-${arch}
echo "--- docker build and tag"
docker buildx create \
Expand Down
6 changes: 3 additions & 3 deletions ci/scripts/e2e-deltalake-sink-rust-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ spark-3.3.1-bin-hadoop3/bin/spark-sql --packages $DEPENDENCIES \
--conf 'spark.hadoop.fs.s3a.secret.key=hummockadmin' \
--conf 'spark.hadoop.fs.s3a.endpoint=http://127.0.0.1:9301' \
--conf 'spark.hadoop.fs.s3a.path.style.access=true' \
--S --e 'create table delta.`s3a://deltalake/deltalake-test`(v1 int, v2 short, v3 long, v4 float, v5 double, v6 string, v7 date, v8 Timestamp, v9 boolean, v10 decimal) using delta;'
--S --e 'create table delta.`s3a://deltalake/deltalake-test`(v1 int, v2 short, v3 long, v4 float, v5 double, v6 string, v7 date, v8 Timestamp, v9 boolean, v10 decimal, v11 ARRAY<decimal>) using delta;'


echo "--- testing sinks"
Expand All @@ -63,11 +63,11 @@ spark-3.3.1-bin-hadoop3/bin/spark-sql --packages $DEPENDENCIES \
--conf 'spark.hadoop.fs.s3a.secret.key=hummockadmin' \
--conf 'spark.hadoop.fs.s3a.endpoint=http://localhost:9301' \
--conf 'spark.hadoop.fs.s3a.path.style.access=true' \
--S --e 'INSERT OVERWRITE DIRECTORY "./spark-output" USING CSV SELECT * FROM delta.`s3a://deltalake/deltalake-test`;'
--S --e 'INSERT OVERWRITE DIRECTORY "./spark-output" USING CSV SELECT v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,CAST(v11 as varchar(12)) FROM delta.`s3a://deltalake/deltalake-test`;'

# check sink destination using shell
if cat ./spark-output/*.csv | sort | awk -F "," '{
exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01T01:01:01.000Z" && $9 == "false" && $10 == 1); }'; then
exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01T01:01:01.000Z" && $9 == "false" && $10 == 1 && $11 == "[1]"); }'; then
echo "DeltaLake sink check passed"
else
cat ./spark-output/*.csv
Expand Down

0 comments on commit 70134ff

Please sign in to comment.