Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sklearnserver: ignore converting instances into np.array (#1972)
* sklearnserver: ignore converting instances into np.array Signed-off-by: Suresh Nakkeran <suresh.n@ideas2it.com> * sklearnserver: added a test for multi datatype Signed-off-by: Suresh Nakkeran <suresh.n@ideas2it.com> * pylint and pybuild issue fix Signed-off-by: Suresh Nakkeran <suresh.n@ideas2it.com> * move sklearn multi datatype model build scripts to docs Signed-off-by: Suresh Nakkeran <suresh.n@ideas2it.com> * 1. added isvc example for sklearn mixedtype model 2. multi-datatype renamed to mixedtype Signed-off-by: Suresh Nakkeran <suresh.n@ideas2it.com>
- Loading branch information
1 parent
343bae6
commit c666680
Showing
12 changed files
with
1,839 additions
and
9 deletions.
There are no files selected for viewing
Empty file.
11 changes: 11 additions & 0 deletions
11
docs/samples/v1beta1/sklearn/v1/sklearn-mixedtype-model/custom_transformer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import pandas as pd | ||
from sklearn.base import TransformerMixin | ||
|
||
|
||
class DictToDFTransformer(TransformerMixin): | ||
|
||
def transform(self, X, y=None): | ||
return pd.DataFrame(X) | ||
|
||
def fit(self, X, y=None): | ||
return self |
5 changes: 5 additions & 0 deletions
5
docs/samples/v1beta1/sklearn/v1/sklearn-mixedtype-model/mixedtype-input.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"instances": [ | ||
{"MSZoning": "RL", "LotArea": 8450, "LotShape": "Reg", "Utilities": "AllPub", "YrSold": 2008, "Neighborhood": "CollgCr", "OverallQual": 7, "YearBuilt": 2003, "SaleType": "WD", "GarageArea": 548} | ||
] | ||
} |
299 changes: 299 additions & 0 deletions
299
docs/samples/v1beta1/sklearn/v1/sklearn-mixedtype-model/sklearn-mixedtype-model.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,299 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "58b1517b", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"from sklearn.impute import SimpleImputer\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"from sklearn.pipeline import Pipeline, make_pipeline\n", | ||
"from sklearn.compose import ColumnTransformer\n", | ||
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", | ||
"from sklearn.linear_model import SGDRegressor\n", | ||
"from custom_transformer import DictToDFTransformer\n", | ||
"import joblib" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "137b49fd", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"train_data = pd.read_csv(\"train.csv\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "7f091668", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"features = [\"MSZoning\",\n", | ||
" \"LotArea\",\n", | ||
" \"LotShape\",\n", | ||
" \"Utilities\",\n", | ||
" \"YrSold\",\n", | ||
" \"Neighborhood\",\n", | ||
" \"OverallQual\",\n", | ||
" \"YearBuilt\",\n", | ||
" \"SaleType\",\n", | ||
" \"GarageArea\"]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "66be31ab", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df = train_data[features]\n", | ||
"y = np.log1p(train_data[\"SalePrice\"])\n", | ||
"categorical_features = df.select_dtypes(object)\n", | ||
"numerical_features = df.select_dtypes(exclude=object)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "23eae04b", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"p = Pipeline(\n", | ||
" [\n", | ||
" (\"dicttodf\", DictToDFTransformer()),\n", | ||
" (\n", | ||
" \"preprocess\",\n", | ||
" ColumnTransformer(\n", | ||
" [\n", | ||
" (\n", | ||
" \"numerical\",\n", | ||
" make_pipeline(\n", | ||
" SimpleImputer(strategy=\"mean\"),\n", | ||
" StandardScaler(),\n", | ||
" ),\n", | ||
" sorted(numerical_features.columns),\n", | ||
" ),\n", | ||
" (\n", | ||
" \"categorical\",\n", | ||
" make_pipeline(\n", | ||
" SimpleImputer(strategy=\"most_frequent\"),\n", | ||
" OneHotEncoder(handle_unknown=\"ignore\", sparse=False),\n", | ||
" ),\n", | ||
" sorted(categorical_features.columns),\n", | ||
" ),\n", | ||
" ]\n", | ||
" ),\n", | ||
" ),\n", | ||
" (\"regressor\", SGDRegressor(random_state=666, learning_rate=\"adaptive\")),\n", | ||
" ]\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"id": "3f6a55b5", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"model = p.fit(df, y)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"id": "c198c543", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"Pipeline(steps=[('dicttodf',\n", | ||
" <custom_transformer.DictToDFTransformer object at 0x7fa5120c8e20>),\n", | ||
" ('preprocess',\n", | ||
" ColumnTransformer(transformers=[('numerical',\n", | ||
" Pipeline(steps=[('simpleimputer',\n", | ||
" SimpleImputer()),\n", | ||
" ('standardscaler',\n", | ||
" StandardScaler())]),\n", | ||
" ['GarageArea', 'LotArea',\n", | ||
" 'OverallQual', 'YearBuilt',\n", | ||
" 'YrSold']),\n", | ||
" ('categorical',\n", | ||
" Pipeline(steps=[('simpleimputer',\n", | ||
" SimpleImputer(strategy='most_frequent')),\n", | ||
" ('onehotencoder',\n", | ||
" OneHotEncoder(handle_unknown='ignore',\n", | ||
" sparse=False))]),\n", | ||
" ['LotShape', 'MSZoning',\n", | ||
" 'Neighborhood', 'SaleType',\n", | ||
" 'Utilities'])])),\n", | ||
" ('regressor',\n", | ||
" SGDRegressor(learning_rate='adaptive', random_state=666))])" | ||
] | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"model" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"id": "5a96ee5e", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"['model.joblib']" | ||
] | ||
}, | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"joblib.dump(model, \"model.joblib\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"id": "f3cdeb3f", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"['GarageArea',\n", | ||
" 'LotArea',\n", | ||
" 'OverallQual',\n", | ||
" 'YearBuilt',\n", | ||
" 'YrSold',\n", | ||
" 'LotShape',\n", | ||
" 'MSZoning',\n", | ||
" 'Neighborhood',\n", | ||
" 'SaleType',\n", | ||
" 'Utilities']" | ||
] | ||
}, | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"cols = sorted(numerical_features.columns) + sorted(categorical_features.columns)\n", | ||
"cols" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"id": "c2093efa", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[{'GarageArea': 548,\n", | ||
" 'LotArea': 8450,\n", | ||
" 'OverallQual': 7,\n", | ||
" 'YearBuilt': 2003,\n", | ||
" 'YrSold': 2008,\n", | ||
" 'LotShape': 'Reg',\n", | ||
" 'MSZoning': 'RL',\n", | ||
" 'Neighborhood': 'CollgCr',\n", | ||
" 'SaleType': 'WD',\n", | ||
" 'Utilities': 'AllPub'}]" | ||
] | ||
}, | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"sample_request = df[cols].head(1).to_dict('records')\n", | ||
"sample_request" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"id": "b3d18862", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"request = [{'MSZoning': 'RL', 'LotArea': 8450, 'LotShape': 'Reg', 'Utilities': 'AllPub', 'YrSold': 2008, 'Neighborhood': 'CollgCr', 'OverallQual': 7, 'YearBuilt': 2003, 'SaleType': 'WD', 'GarageArea': 548}]\n", | ||
"response = model.predict(sample_request)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"id": "3462c4e7", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"array([12.20283282])" | ||
] | ||
}, | ||
"execution_count": 12, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"response" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "d0d82a16", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.10" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
8 changes: 8 additions & 0 deletions
8
docs/samples/v1beta1/sklearn/v1/sklearn-mixedtype-model/sklearn-mixedtype.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
apiVersion: "serving.kserve.io/v1beta1" | ||
kind: "InferenceService" | ||
metadata: | ||
name: "sklearn-mixedtype" | ||
spec: | ||
predictor: | ||
sklearn: | ||
storageUri: "gs://kfserving-examples/models/sklearn/1.0/mixedtype" |
Oops, something went wrong.