Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,32 @@
# Release History

## 1.0.9
## 1.0.10

### Behavior Changes

- Model Development: log_loss metric calculation is now distributed.
- Model Development: precision_score, recall_score, f1_score, fbeta_score, precision_recall_fscore_support,
mean_absolute_error, mean_squared_error, and mean_absolute_percentage_error metric calculations are now distributed.
- Model Registry: `deploy` will now return `Deployment` for deployment information.

### New Features

- Model Registry: When the model signature is auto-inferred, it will be printed to the log for reference.
- Model Registry: For SPCS deployment, `Deployment` details will contains `image_name`, `service_spec` and `service_function_sql`.

### Bug Fixes

- Model Development: Fix an issue that leading to UTF-8 decoding errors when using modeling modules on Windows.
- Model Development: Fix an issue that alias definitions cause `SnowparkSQLUnexpectedAliasException` in inference.
- Model Registry: Fix an issue that signature inference could be incorrect when using Snowpark DataFrame as sample input.
- Model Registry: Fix too strict data type validation when predicting. Now, for example, if you have a INT8
type feature in the signature, if providing a INT64 dataframe but all values are within the range, it would not fail.

## 1.0.9 (2023-09-28)

### Behavior Changes

- Model Development: log_loss metric calculation is now distributed.

### Bug Fixes

- Model Registry: Fix an issue that building images fails with specific docker setup.
Expand Down
1 change: 0 additions & 1 deletion bazel/environments/conda-env-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ dependencies:
- lightgbm==3.3.5
- numpy==1.24.3
- packaging==23.0
- pytimeparse==1.1.8
- ruamel.yaml==0.17.21
- scikit-learn==1.3.0
- sphinx==5.0.2
Expand Down
5 changes: 3 additions & 2 deletions bazel/environments/conda-env-snowflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ dependencies:
- packaging==23.0
- pandas==1.5.3
- protobuf==3.20.3
- pytest==7.1.2
- pytest==7.4.0
- pytimeparse==1.1.8
- pytorch==2.0.1
- pyyaml==6.0
Expand All @@ -46,8 +46,9 @@ dependencies:
- sphinx==5.0.2
- sqlparse==0.4.4
- tensorflow==2.10.0
- tokenizers==0.13.2
- torchdata==0.6.1
- transformers==4.29.2
- transformers==4.32.1
- types-protobuf==4.23.0.1
- types-requests==2.30.0.0
- typing-extensions==4.5.0
Expand Down
7 changes: 4 additions & 3 deletions bazel/environments/conda-env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies:
- cachetools==4.2.2
- cloudpickle==2.0.0
- conda-forge::accelerate==0.22.0
- conda-forge::mypy==1.4.1
- conda-forge::mypy==1.5.1
- conda-forge::starlette==0.27.0
- conda-forge::types-PyYAML==6.0.12
- conda-forge::types-cachetools==4.2.2
Expand All @@ -35,7 +35,7 @@ dependencies:
- packaging==23.0
- pandas==1.5.3
- protobuf==3.20.3
- pytest==7.1.2
- pytest==7.4.0
- pytimeparse==1.1.8
- pytorch==2.0.1
- pyyaml==6.0
Expand All @@ -51,8 +51,9 @@ dependencies:
- sphinx==5.0.2
- sqlparse==0.4.4
- tensorflow==2.10.0
- tokenizers==0.13.2
- torchdata==0.6.1
- transformers==4.29.2
- transformers==4.32.1
- types-protobuf==4.23.0.1
- types-requests==2.30.0.0
- typing-extensions==4.5.0
Expand Down
3 changes: 3 additions & 0 deletions bazel/requirements/parse_and_generate_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,8 @@ def generate_requirements(
)
)
sys.stdout.writelines(results)
elif (mode, format) == ("dev_version", "python"):
sys.stdout.writelines(f"REQUIREMENTS = {repr(snowflake_only_env)}\n")
elif (mode, format) == ("version_requirements", "bzl"):
extras_requirements = list(filter(lambda req_info: filter_by_extras(req_info, True, False), requirements))
extras_results: MutableMapping[str, Sequence[str]] = {}
Expand Down Expand Up @@ -479,6 +481,7 @@ def main() -> None:
VALID_SETTINGS = [
("validate", None, False), # Validate the environment
("dev_version", "text", False), # requirements.txt
("dev_version", "python", True), # sproc test dependencies list
("version_requirements", "bzl", False), # wheel rule requirements
("version_requirements", "python", False), # model deployment core dependencies list
("dev_version", "conda_env", False), # dev conda-env.yml file
Expand Down
111 changes: 85 additions & 26 deletions ci/build_and_run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ WITH_SNOWPARK=false
MODE="continuous_run"
SNOWML_DIR="snowml"
SNOWPARK_DIR="snowpark-python"
IS_NT=false

while (($#)); do
case $1 in
Expand Down Expand Up @@ -74,26 +75,70 @@ while (($#)); do
shift
done

EXT=""
BAZEL_ADDITIONAL_BUILD_FLAGS=()
BAZEL_ADDITIONAL_STARTUP_FLAGS=()

# Computing artifact location
# Detect the platform, also update some platform specific bazel settings
case "$(uname)" in
Linux)
PLATFORM="linux" ;;
Darwin)
PLATFORM="darwin" ;;
*NT*)
PLATFORM="windows"
IS_NT=true ;;
esac

# Detect the architecture
ARCH="$(uname -m)"
case "$ARCH" in
aarch64|ppc64le|arm64)
ARCH="arm64" ;;
*)
ARCH="amd64" ;;
esac

# Compute the platform-arch string used to download yq.
case "${PLATFORM}_${ARCH}" in
linux_arm64|linux_amd64|darwin_arm64|darwin_amd64|windows_amd64)
;; # pass
*)
echo "Platform / Architecture is not supported by yq." >&2
exit 1
;;
esac

# Check Python3.8 exist
# TODO(SNOW-845592): ideally we should download py3.8 from conda if not exist. Currently we just fail.
set +eu
source /opt/rh/rh-python38/enable
PYTHON38_EXIST=$?
if [ $PYTHON38_EXIST -ne 0 ]; then
echo "Failed to execute tests: Python3.8 is not installed."
rm -rf "${TEMP_TEST_DIR}"
exit ${PYTHON38_EXIST}
if [ "${ENV}" = "pip" ]; then
set +eu
source /opt/rh/rh-python38/enable
PYTHON38_EXIST=$?
if [ $PYTHON38_EXIST -ne 0 ]; then
echo "Failed to execute tests: Python3.8 is not installed."
rm -rf "${TEMP_TEST_DIR}"
exit ${PYTHON38_EXIST}
fi
set -eu
fi

if [ ${IS_NT} = true ]; then
EXT=".exe"
BAZEL_ADDITIONAL_BUILD_FLAGS+=(--nobuild_python_zip)
BAZEL_ADDITIONAL_BUILD_FLAGS+=(--enable_runfiles)
BAZEL_ADDITIONAL_STARTUP_FLAGS+=(--output_user_root=D:/broot)
fi
set -eu

cd "${WORKSPACE}"

# Check and download yq if not presented.
_YQ_BIN="yq"
_YQ_BIN="yq${EXT}"
if ! command -v "${_YQ_BIN}" &>/dev/null; then
TEMP_BIN=$(mktemp -d "${WORKSPACE}/tmp_bin_XXXXX")
curl -Ls https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -o "${TEMP_BIN}/yq" && chmod +x "${TEMP_BIN}/yq"
_YQ_BIN="${TEMP_BIN}/yq"
curl -Lsv https://github.com/mikefarah/yq/releases/latest/download/yq_${PLATFORM}_${ARCH}${EXT} -o "${TEMP_BIN}/yq${EXT}" && chmod +x "${TEMP_BIN}/yq${EXT}"
_YQ_BIN="${TEMP_BIN}/yq${EXT}"
fi

# Create temp release folder
Expand All @@ -109,23 +154,39 @@ echo "Extracted Package Version from code: ${VERSION}"
OPTIONAL_REQUIREMENTS=()
while IFS='' read -r line; do OPTIONAL_REQUIREMENTS+=("$line"); done < <("${_YQ_BIN}" '.requirements.run_constrained.[] | ... style=""' ci/conda_recipe/meta.yaml)

# Generate and copy auto-gen tests.
if [[ ${MODE} = "release" ]]; then
"${BAZEL}" build //tests/... --build_tag_filters=autogen_build
cp -r "$("${BAZEL}" info bazel-bin)/tests" "${TEMP_TEST_DIR}"
fi

# Compare test required dependencies with wheel pkg dependencies and exclude tests if necessary
EXCLUDE_TESTS=$(mktemp "${TEMP_TEST_DIR}/exclude_tests_XXXXX")
if [[ ${MODE} = "continuous_run" || ${MODE} = "release" ]]; then
./ci/get_excluded_tests.sh -f "${EXCLUDE_TESTS}" -m unused -b "${BAZEL}"
elif [[ ${MODE} = "merge_gate" ]]; then
./ci/get_excluded_tests.sh -f "${EXCLUDE_TESTS}" -m all -b "${BAZEL}"
fi

# Generate and copy auto-gen tests.
if [[ ${MODE} = "release" ]]; then
# When release, we build all autogen tests
"${BAZEL}" "${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]+"${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]}"}" build "${BAZEL_ADDITIONAL_BUILD_FLAGS[@]+"${BAZEL_ADDITIONAL_BUILD_FLAGS[@]}"}" //tests/integ/...
else
# In other cases, we build required utility only.
"${BAZEL}" "${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]+"${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]}"}" build --build_tag_filters=-autogen_build,-autogen "${BAZEL_ADDITIONAL_BUILD_FLAGS[@]+"${BAZEL_ADDITIONAL_BUILD_FLAGS[@]}"}" //tests/integ/...
fi

# Rsync cannot work well with path that has drive letter in Windows,
# Thus, these two rsync has to use relative path instead of absolute ones.

rsync -av --exclude '*.runfiles_manifest' --exclude '*.runfiles/**' "bazel-bin/tests" .

# Copy tests into temp directory
pushd "${TEMP_TEST_DIR}"
rsync -av --exclude-from "${EXCLUDE_TESTS}" "${WORKSPACE}/${SNOWML_DIR}/tests" .
rsync -av --exclude-from "${EXCLUDE_TESTS}" "../${SNOWML_DIR}/tests" .
popd

# Bazel on windows is consuming a lot of memory, let's clean it before proceed to avoid OOM.
if [ ${IS_NT} = true ]; then
"${BAZEL}" "${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]+"${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]}"}" clean --expunge
"${BAZEL}" "${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]+"${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]}"}" shutdown
fi

popd

# Build snowml package
Expand All @@ -149,12 +210,10 @@ if [ "${ENV}" = "pip" ]; then

# Build SnowML
pushd ${SNOWML_DIR}
"${BAZEL}" build //snowflake/ml:wheel
"${BAZEL}" "${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]+"${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]}"}" build "${BAZEL_ADDITIONAL_BUILD_FLAGS[@]+"${BAZEL_ADDITIONAL_BUILD_FLAGS[@]}"}" //snowflake/ml:wheel
cp "$(${BAZEL} info bazel-bin)/snowflake/ml/snowflake_ml_python-${VERSION}-py3-none-any.whl" "${WORKSPACE}"
popd
else
which conda

# Clean conda cache
conda clean --all --force-pkgs-dirs -y

Expand Down Expand Up @@ -183,7 +242,7 @@ pushd "${TEMP_TEST_DIR}"
COMMON_PYTEST_FLAG=()
COMMON_PYTEST_FLAG+=(--strict-markers) # Strict the pytest markers to avoid typo in markers
COMMON_PYTEST_FLAG+=(--import-mode=append)
COMMON_PYTEST_FLAG+=(-n 10)
COMMON_PYTEST_FLAG+=(-n logical)

if [ "${ENV}" = "pip" ]; then
# Copy wheel package
Expand All @@ -196,10 +255,10 @@ if [ "${ENV}" = "pip" ]; then
# otherwise it will fail in dependency resolution.
python3.8 -m pip install --upgrade pip
python3.8 -m pip list
python3.8 -m pip install "snowflake_ml_python-${VERSION}-py3-none-any.whl[all]" pytest-xdist inflection --no-cache-dir --force-reinstall
python3.8 -m pip install "snowflake_ml_python-${VERSION}-py3-none-any.whl[all]" pytest-xdist[psutil] -r "${WORKSPACE}/${SNOWML_DIR}/requirements.txt" --no-cache-dir --force-reinstall
if [ "${WITH_SNOWPARK}" = true ]; then
cp "$(find "${WORKSPACE}" -maxdepth 1 -iname 'snowflake_snowpark_python-*.whl')" "${TEMP_TEST_DIR}"
python3.8 -m pip install "$(find . -maxdepth 1 -iname 'snowflake_snowpark_python-*.whl')" --force-reinstall
python3.8 -m pip install "$(find . -maxdepth 1 -iname 'snowflake_snowpark_python-*.whl')" --no-deps --force-reinstall
fi
python3.8 -m pip list

Expand All @@ -216,12 +275,12 @@ else
conda clean --all --force-pkgs-dirs -y

# Create testing env
conda create -y -p testenv -c "file://${WORKSPACE}/conda-bld" -c "https://repo.anaconda.com/pkgs/snowflake/" --override-channels "python=3.8" snowflake-ml-python pytest-xdist inflection "${OPTIONAL_REQUIREMENTS[@]}"
conda create -y -p testenv -c "${WORKSPACE}/conda-bld" -c "https://repo.anaconda.com/pkgs/snowflake/" --override-channels "python=3.8" snowflake-ml-python pytest-xdist psutil inflection "${OPTIONAL_REQUIREMENTS[@]}"
conda list -p testenv

# Run integration tests
set +e
TEST_SRCDIR="${TEMP_TEST_DIR}" conda run -p testenv --no-capture-output python3.8 -m pytest "${COMMON_PYTEST_FLAG[@]}" tests/integ/
TEST_SRCDIR="${TEMP_TEST_DIR}" conda run -p testenv --no-capture-output python -m pytest "${COMMON_PYTEST_FLAG[@]}" tests/integ/
TEST_RETCODE=$?
set -e

Expand Down
6 changes: 4 additions & 2 deletions ci/conda_recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ build:
noarch: python
package:
name: snowflake-ml-python
version: 1.0.9
version: 1.0.10
requirements:
build:
- python
Expand All @@ -27,11 +27,12 @@ requirements:
- aiohttp!=4.0.0a0, !=4.0.0a1
- anyio>=3.5.0,<4
- cachetools>=3.1.1,<5
- cloudpickle
- cloudpickle>=2.0.0
- fsspec>=2022.11,<2024
- numpy>=1.23,<2
- packaging>=20.9,<24
- pandas>=1.0.0,<2
- pytimeparse>=1.1.8,<2
- pyyaml>=6.0,<7
- requests
- s3fs>=2022.11,<2024
Expand All @@ -49,6 +50,7 @@ requirements:
- sentencepiece>=0.1.95,<0.2
- shap==0.42.1
- tensorflow>=2.9,<3
- tokenizers>=0.10,<1
- torchdata>=0.4,<1
- transformers>=4.29.2,<5
source:
Expand Down
2 changes: 1 addition & 1 deletion ci/get_excluded_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ if [[ $mode = "unaffected" || $mode = "all" ]]; then
# -- Begin of Query Rules Heredoc --
cat >"${unaffected_test_rule_file}" <<EndOfMessage
let unaffected_targets = //tests/... - rdeps(//tests/..., set($(<"${affected_targets_file}"))) in
kind('source file', labels(srcs, set($(<ci/skip_merge_gate_targets)) + kind('py_test rule', \$unaffected_targets)))
kind('source file', labels(srcs, set($(<ci/skip_merge_gate_targets)) + kind('py_test rule', \$unaffected_targets)) - labels(srcs, rdeps(//tests/..., set($(<"${affected_targets_file}")))))
EndOfMessage
# -- End of Query Rules Heredoc --

Expand Down
1 change: 1 addition & 0 deletions codegen/codegen_rules.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def autogen_estimators(module, estimator_info_list):
"//snowflake/ml/_internal/utils:identifier",
"//snowflake/ml/model:model_signature",
"//snowflake/ml/model/_signatures:utils",
"//snowflake/ml/modeling/_internal:estimator_utils",
],
)

Expand Down
8 changes: 4 additions & 4 deletions codegen/sklearn_wrapper_autogen.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _generate_src_files(
List of generated files.
"""

template = open(self.template_path).read()
template = open(self.template_path, encoding="utf-8").read()

generated_files_list = []
for generator in generators:
Expand All @@ -130,7 +130,7 @@ def _generate_src_files(
# Create output src dir if it don't exist already.
os.makedirs("/".join(output_file_name.split("/")[:-1]), exist_ok=True)

open(output_file_name, "w").write(wrapped_transform_string)
open(output_file_name, "w", encoding="utf-8").write(wrapped_transform_string)
logging.info("Wrote file %s", output_file_name)

return generated_files_list
Expand All @@ -149,7 +149,7 @@ def _generate_test_files(
Returns:
List of generated files.
"""
test_template = open(self.template_path).read()
test_template = open(self.template_path, encoding="utf-8").read()

generated_files_list = []
for generator in generators:
Expand All @@ -166,7 +166,7 @@ def _generate_test_files(
# Create output test dir if it don't exist already.
os.makedirs("/".join(test_output_file_name.split("/")[:-1]), exist_ok=True)

open(test_output_file_name, "w").write(wrapped_transform_string)
open(test_output_file_name, "w", encoding="utf-8").write(wrapped_transform_string)
logging.info("Wrote file %s", test_output_file_name)

return generated_files_list
Expand Down
4 changes: 2 additions & 2 deletions codegen/sklearn_wrapper_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,9 +722,9 @@ def _populate_function_names_and_signatures(self) -> None:
for arg_to_transform in args_to_transform:
if arg_to_transform in self.original_init_signature.parameters.keys():
arg_transform_calls.append(
f"{arg_to_transform} = _transform_snowml_obj_to_sklearn_obj({arg_to_transform})"
f"{arg_to_transform} = transform_snowml_obj_to_sklearn_obj({arg_to_transform})"
)
deps_gathering_calls.append(f"deps = deps | _gather_dependencies({arg_to_transform})")
deps_gathering_calls.append(f"deps = deps | gather_dependencies({arg_to_transform})")

self.estimator_init_signature = ",\n ".join(signature_lines) + ","
self.sklearn_init_arguments = ",\n ".join(sklearn_init_lines) + ","
Expand Down
Loading