Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
# Release History

## 1.0.2 (2023-06-22)

### Behavior Changes
- Model Registry: Prohibit non-snowflake-native models from being logged.
- Model Registry: `_use_local_snowml` parameter in options of `deploy()` has been removed.
- Model Registry: A default `False` `embed_local_ml_library` parameter has been added to the options of `log_model()`. With this set to `False` (default), the version of the local snowflake-ml-python library will be recorded and used when deploying the model. With this set to `True`, local snowflake-ml-python library will be embedded into the logged model, and will be used when you load or deploy the model.

### New Features
- Model Registry: A new optional argument named `code_paths` has been added to the arguments of `log_model()` for users to specify additional code paths to be imported when loading and deploying the model.
- Model Registry: A new optional argument named `options` has been added to the arguments of `log_model()` to specify any additional options when saving the model.
- Model Development: Added metrics:
- d2_absolute_error_score
- d2_pinball_score
- explained_variance_score
- mean_absolute_error
- mean_absolute_percentage_error
- mean_squared_error

### Bug Fixes

- Model Development: `accuracy_score()` now works when given label column names are lists of a single value.


## 1.0.1 (2023-06-16)
### Behavior Changes

Expand Down
203 changes: 203 additions & 0 deletions ci/build_and_run_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
#!/bin/bash

# Usage
# copy_and_run_tests.sh <workspace> [--env pip|conda] [--with-snowpark]
#
# Args
# workspace: path to the workspace, SnowML code should be in snowml directory.
#
# Optional Args
# env: Set the environment, choose from pip and conda
# with-snowpark: Build and test with snowpark in snowpark-python directory in the workspace.
#
# Action
# - Copy the integration tests from workspace folder and execute them in testing Python env using pytest.
# - This is to mimic the behavior of using snowml wheel package in user land.

set -o pipefail
set -eu

PROG=$0

help()
{
exit_code=$1
echo "Invalid usage, must provide argument for workspace"
echo "Usage: ${PROG} <workspace> [--env pip|conda] [--with-snowpark]"
exit ${exit_code}
}

WORKSPACE=$1 && shift || help 1
ENV="pip"
WITH_SNOWPARK=false
SNOWML_DIR="snowml"
SNOWPARK_DIR="snowpark-python"

while (($#)); do
case $1 in
-e|--env)
shift
if [[ $1 = "pip" || $1 = "conda" ]]; then
ENV=$1
else
help 1
fi
;;
--with-snowpark)
WITH_SNOWPARK=true
;;
-h|--help)
help 0
;;
*)
help 1
;;
esac
shift
done

# Check Python3.8 exist
# TODO(SNOW-845592): ideally we should download py3.8 from conda if not exist. Currently we just fail.
set +eu
source /opt/rh/rh-python38/enable
PYTHON38_EXIST=$?
if [ $PYTHON38_EXIST -ne 0 ]; then
echo "Failed to execute tests: Python3.8 is not installed."
rm -rf "${TEMP_TEST_DIR}"
exit ${PYTHON38_EXIST}
fi
set -eu

cd "${WORKSPACE}"

# Create temp release folder
TEMP_TEST_DIR=$(mktemp -d "${WORKSPACE}/tmp_XXXXX")

pushd ${SNOWML_DIR}
# Get the version from snowflake/ml/version.bzl
VERSION=$(grep -oE "VERSION = \"[0-9]+\\.[0-9]+\\.[0-9]+.*\"" snowflake/ml/version.bzl | cut -d'"' -f2)
echo "Extracted Package Version from code: ${VERSION}"

# Get optional requirements from snowflake/ml/requirements.bzl
OPTIONAL_REQUIREMENTS=$(cat snowflake/ml/requirements.bzl | python3 -c "import sys; exec(sys.stdin.read()); print(' '.join(map(lambda x: '\"'+x+'\"', EXTRA_REQUIREMENTS['all'])))")

# Compare test required dependencies with wheel pkg dependencies and exclude tests if necessary
EXCLUDE_TESTS=$(mktemp "${TEMP_TEST_DIR}/exclude_tests_XXXXX")
./ci/get_excluded_tests.sh -f "${EXCLUDE_TESTS}"
# Copy tests into temp directory
pushd "${TEMP_TEST_DIR}"
rsync -av --exclude-from "${EXCLUDE_TESTS}" "${WORKSPACE}/${SNOWML_DIR}/tests" .
ls tests/integ/snowflake/ml
popd
popd

# Build snowml package
if [ ${ENV} = "pip" ]; then
# Clean build workspace
rm -f ${WORKSPACE}/*.whl

# Build Snowpark
if [ "${WITH_SNOWPARK}" = true ]; then
pushd ${SNOWPARK_DIR}
rm -rf venv
python3.8 -m venv venv
source venv/bin/activate
python3.8 -m pip install -U pip setuptools wheel
echo "Building snowpark wheel from main:$(git rev-parse HEAD)."
pip wheel . --no-deps
cp snowflake_snowpark_python-*.whl ${WORKSPACE}
deactivate
popd
fi

# Build SnowML
pushd ${SNOWML_DIR}
bazel build //snowflake/ml:wheel
cp bazel-bin/snowflake/ml/snowflake_ml_python-*.whl ${WORKSPACE}
popd
else
which conda

# Clean conda build workspace
rm -rf ${WORKSPACE}/conda-bld

# Build Snowpark
if [ "${WITH_SNOWPARK}" = true ]; then
pushd ${SNOWPARK_DIR}
conda build recipe/ --python=3.8 --numpy=1.16 --croot "${WORKSPACE}/conda-bld"
popd
fi

# Build SnowML
pushd ${SNOWML_DIR}
# Build conda package
conda build --channel=conda-forge --prefix-length 50 --croot "${WORKSPACE}/conda-bld" ci/conda_recipe
conda build purge
popd
fi

# Start testing
pushd "${TEMP_TEST_DIR}"

# Set up common pytest flag
COMMON_PYTEST_FLAG=()
COMMON_PYTEST_FLAG+=(--strict-markers) # Strict the pytest markers to avoid typo in markers
COMMON_PYTEST_FLAG+=(--import-mode=append)
COMMON_PYTEST_FLAG+=(-n 10)


if [ ${ENV} = "pip" ]; then
# Copy wheel package
cp "${WORKSPACE}/snowflake_ml_python-${VERSION}-py3-none-any.whl" "${TEMP_TEST_DIR}"

# Create testing env
python3.8 -m venv testenv
source testenv/bin/activate
# Install all of the packages in single line,
# otherwise it will fail in dependency resolution.
python3.8 -m pip install --upgrade pip
python3.8 -m pip list
python3.8 -m pip install "snowflake_ml_python-${VERSION}-py3-none-any.whl[all]" pytest-xdist inflection --no-cache-dir --force-reinstall
if [ "${WITH_SNOWPARK}" = true ]; then
cp ${WORKSPACE}/snowflake_snowpark_python-*.whl "${TEMP_TEST_DIR}"
python3.8 -m pip install $(find . -maxdepth 1 -iname 'snowflake_snowpark_python-*.whl') --force-reinstall
fi
python3.8 -m pip list

# Set up pip specific pytest flags
PIP_PYTEST_FLAG=()
PIP_PYTEST_FLAG+=(-m "not pip_incompatible") # Filter out those pip incompatible tests.

# Run the tests
set +e
TEST_SRCDIR="${TEMP_TEST_DIR}" python3.8 -m pytest "${COMMON_PYTEST_FLAG[@]}" "${PIP_PYTEST_FLAG[@]}" tests/
TEST_RETCODE=$?
set -e
else
# Create local conda channel
conda index ${WORKSPACE}/conda-bld

# Clean conda cache
conda clean --all --force-pkgs-dirs -y

# Create testing env
conda create -y -p testenv -c "file://${WORKSPACE}/conda-bld" -c "https://repo.anaconda.com/pkgs/snowflake/" --override-channel "python=3.8" snowflake-ml-python pytest-xdist inflection ${OPTIONAL_REQUIREMENTS}
conda list -p testenv

# Run the tests
set +e
TEST_SRCDIR="${TEMP_TEST_DIR}" conda run -p testenv --no-capture-output python3.8 -m pytest "${COMMON_PYTEST_FLAG[@]}" tests/
TEST_RETCODE=$?
set -e

# Clean the conda environment
conda env remove -p testenv
fi

popd

# clean up temp dir
rm -rf "${TEMP_TEST_DIR}"

echo "Done running ${PROG}"
exit ${TEST_RETCODE}
2 changes: 1 addition & 1 deletion ci/conda_recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ build:
noarch: python
package:
name: snowflake-ml-python
version: 1.0.1
version: 1.0.2
requirements:
build:
- python
Expand Down
85 changes: 0 additions & 85 deletions ci/copy_and_run_tests.sh

This file was deleted.

45 changes: 28 additions & 17 deletions codegen/sklearn_wrapper_template.py_template
Original file line number Diff line number Diff line change
Expand Up @@ -548,26 +548,37 @@ class {transform.original_class_name}(BaseTransformer):
# input cols need to match unquoted / quoted
input_cols = self.input_cols
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)

estimator = self._sklearn_object

input_df = dataset[input_cols] # Select input columns with quoted column names.
if hasattr(estimator, "feature_names_in_"):
missing_features = []
for i, f in enumerate(getattr(estimator, "feature_names_in_")):
if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
missing_features.append(f)

if len(missing_features) > 0:
raise ValueError(
"The feature names should match with those that were passed during fit.\n"
f"Features seen during fit call but not present in the input: {{missing_features}}\n"
f"Features in the input dataframe : {{input_cols}}\n"
)
input_df.columns = getattr(estimator, "feature_names_in_")
else:
# Just rename the column names to unquoted identifiers.
input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
missing_features = []
features_in_dataset = set(dataset.columns)
columns_to_select = []
for i, f in enumerate(features_required_by_estimator):
if (
i >= len(input_cols)
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
and quoted_input_cols[i] not in features_in_dataset)
):
missing_features.append(f)
elif input_cols[i] in features_in_dataset:
columns_to_select.append(input_cols[i])
elif unquoted_input_cols[i] in features_in_dataset:
columns_to_select.append(unquoted_input_cols[i])
else:
columns_to_select.append(quoted_input_cols[i])

if len(missing_features) > 0:
raise ValueError(
"The feature names should match with those that were passed during fit.\n"
f"Features seen during fit call but not present in the input: {{missing_features}}\n"
f"Features in the input dataframe : {{input_cols}}\n"
)
input_df = dataset[columns_to_select]
input_df.columns = features_required_by_estimator

transformed_numpy_array = getattr(estimator, inference_method)(
input_df
Expand Down
3 changes: 3 additions & 0 deletions codegen/transformer_autogen_test_template.py_template
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ import numpy as np
import pandas as pd
import json
import random
import pytest

from typing import Optional, Any
from absl.testing.absltest import TestCase, main
{transform.test_estimator_imports}
from snowflake.ml.utils.connection_params import SnowflakeLoginOptions
from snowflake.snowpark import Session, DataFrame


@pytest.mark.pip_incompatible
class {transform.test_class_name}(TestCase):
def setUp(self):
"""Creates Snowpark and Snowflake environments for testing."""
Expand Down
Loading