Skip to content

Commit

Permalink
sklearnserver: ignore converting instances into np.array (#1972)
Browse files Browse the repository at this point in the history
* sklearnserver: ignore converting instances into np.array

Signed-off-by: Suresh Nakkeran <suresh.n@ideas2it.com>

* sklearnserver: added a test for multi datatype

Signed-off-by: Suresh Nakkeran <suresh.n@ideas2it.com>

* pylint and pybuild issue fix

Signed-off-by: Suresh Nakkeran <suresh.n@ideas2it.com>

* move sklearn multi datatype model build scripts to docs

Signed-off-by: Suresh Nakkeran <suresh.n@ideas2it.com>

* 1. added isvc example for sklearn mixedtype model
2. multi-datatype renamed to mixedtype

Signed-off-by: Suresh Nakkeran <suresh.n@ideas2it.com>
  • Loading branch information
Suresh-Nakkeran committed Jan 13, 2022
1 parent 343bae6 commit c666680
Show file tree
Hide file tree
Showing 12 changed files with 1,839 additions and 9 deletions.
Empty file.
@@ -0,0 +1,11 @@
import pandas as pd
from sklearn.base import TransformerMixin


class DictToDFTransformer(TransformerMixin):

def transform(self, X, y=None):
return pd.DataFrame(X)

def fit(self, X, y=None):
return self
@@ -0,0 +1,5 @@
{
"instances": [
{"MSZoning": "RL", "LotArea": 8450, "LotShape": "Reg", "Utilities": "AllPub", "YrSold": 2008, "Neighborhood": "CollgCr", "OverallQual": 7, "YearBuilt": 2003, "SaleType": "WD", "GarageArea": 548}
]
}
@@ -0,0 +1,299 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "58b1517b",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.impute import SimpleImputer\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.pipeline import Pipeline, make_pipeline\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.linear_model import SGDRegressor\n",
"from custom_transformer import DictToDFTransformer\n",
"import joblib"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "137b49fd",
"metadata": {},
"outputs": [],
"source": [
"train_data = pd.read_csv(\"train.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "7f091668",
"metadata": {},
"outputs": [],
"source": [
"features = [\"MSZoning\",\n",
" \"LotArea\",\n",
" \"LotShape\",\n",
" \"Utilities\",\n",
" \"YrSold\",\n",
" \"Neighborhood\",\n",
" \"OverallQual\",\n",
" \"YearBuilt\",\n",
" \"SaleType\",\n",
" \"GarageArea\"]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "66be31ab",
"metadata": {},
"outputs": [],
"source": [
"df = train_data[features]\n",
"y = np.log1p(train_data[\"SalePrice\"])\n",
"categorical_features = df.select_dtypes(object)\n",
"numerical_features = df.select_dtypes(exclude=object)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "23eae04b",
"metadata": {},
"outputs": [],
"source": [
"p = Pipeline(\n",
" [\n",
" (\"dicttodf\", DictToDFTransformer()),\n",
" (\n",
" \"preprocess\",\n",
" ColumnTransformer(\n",
" [\n",
" (\n",
" \"numerical\",\n",
" make_pipeline(\n",
" SimpleImputer(strategy=\"mean\"),\n",
" StandardScaler(),\n",
" ),\n",
" sorted(numerical_features.columns),\n",
" ),\n",
" (\n",
" \"categorical\",\n",
" make_pipeline(\n",
" SimpleImputer(strategy=\"most_frequent\"),\n",
" OneHotEncoder(handle_unknown=\"ignore\", sparse=False),\n",
" ),\n",
" sorted(categorical_features.columns),\n",
" ),\n",
" ]\n",
" ),\n",
" ),\n",
" (\"regressor\", SGDRegressor(random_state=666, learning_rate=\"adaptive\")),\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3f6a55b5",
"metadata": {},
"outputs": [],
"source": [
"model = p.fit(df, y)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c198c543",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Pipeline(steps=[('dicttodf',\n",
" <custom_transformer.DictToDFTransformer object at 0x7fa5120c8e20>),\n",
" ('preprocess',\n",
" ColumnTransformer(transformers=[('numerical',\n",
" Pipeline(steps=[('simpleimputer',\n",
" SimpleImputer()),\n",
" ('standardscaler',\n",
" StandardScaler())]),\n",
" ['GarageArea', 'LotArea',\n",
" 'OverallQual', 'YearBuilt',\n",
" 'YrSold']),\n",
" ('categorical',\n",
" Pipeline(steps=[('simpleimputer',\n",
" SimpleImputer(strategy='most_frequent')),\n",
" ('onehotencoder',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse=False))]),\n",
" ['LotShape', 'MSZoning',\n",
" 'Neighborhood', 'SaleType',\n",
" 'Utilities'])])),\n",
" ('regressor',\n",
" SGDRegressor(learning_rate='adaptive', random_state=666))])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5a96ee5e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['model.joblib']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joblib.dump(model, \"model.joblib\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f3cdeb3f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['GarageArea',\n",
" 'LotArea',\n",
" 'OverallQual',\n",
" 'YearBuilt',\n",
" 'YrSold',\n",
" 'LotShape',\n",
" 'MSZoning',\n",
" 'Neighborhood',\n",
" 'SaleType',\n",
" 'Utilities']"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols = sorted(numerical_features.columns) + sorted(categorical_features.columns)\n",
"cols"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c2093efa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'GarageArea': 548,\n",
" 'LotArea': 8450,\n",
" 'OverallQual': 7,\n",
" 'YearBuilt': 2003,\n",
" 'YrSold': 2008,\n",
" 'LotShape': 'Reg',\n",
" 'MSZoning': 'RL',\n",
" 'Neighborhood': 'CollgCr',\n",
" 'SaleType': 'WD',\n",
" 'Utilities': 'AllPub'}]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample_request = df[cols].head(1).to_dict('records')\n",
"sample_request"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "b3d18862",
"metadata": {},
"outputs": [],
"source": [
"request = [{'MSZoning': 'RL', 'LotArea': 8450, 'LotShape': 'Reg', 'Utilities': 'AllPub', 'YrSold': 2008, 'Neighborhood': 'CollgCr', 'OverallQual': 7, 'YearBuilt': 2003, 'SaleType': 'WD', 'GarageArea': 548}]\n",
"response = model.predict(sample_request)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "3462c4e7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([12.20283282])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0d82a16",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@@ -0,0 +1,8 @@
apiVersion: "serving.kserve.io/v1beta1"
kind: "InferenceService"
metadata:
name: "sklearn-mixedtype"
spec:
predictor:
sklearn:
storageUri: "gs://kfserving-examples/models/sklearn/1.0/mixedtype"

0 comments on commit c666680

Please sign in to comment.