Permalink
Browse files

Added example file for LinearSVC with TfidfVectorizer

  • Loading branch information...
abhi621 committed Dec 31, 2018
1 parent 510f6d1 commit ceef6f5b1b65205fb6abbe94b22869ece6aa5291
Showing with 144 additions and 0 deletions.
  1. +144 −0 examples/skl/LinearSVC_with_TfidfVectorizer.ipynb
@@ -0,0 +1,144 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# scikit-learn → PMML"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exporter: LinearSVC"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Data Set used: auto_mpg"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Steps: "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### - Build the Pipeline with model and pre-processing (tf-idf vectorizer) using sklearn LinearSVC\n",
"##### - Build PMML using Nyoka exporter"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model building (using pipeline) for auto-mpg Dataset"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Pipeline(memory=None,\n",
" steps=[('mapper', DataFrameMapper(default=False, df_out=False,\n",
" features=[(['mpg', 'displacement', 'horsepower'], [MinMaxScaler(copy=True, feature_range=(0, 1))]), (['weight', 'acceleration'], [StandardScaler(copy=True, with_mean=True, with_std=True)]), ('car name', TfidfVectorizer(analyzer='...ax_iter=1000,\n",
" multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
" verbose=0))])"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn_pandas import DataFrameMapper\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.preprocessing import MinMaxScaler,StandardScaler\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"from sklearn.svm import LinearSVC\n",
"\n",
"\n",
"df = pd.read_csv('auto-mpg.csv')\n",
"\n",
"X = df.drop(['cylinders','model year','origin'],axis=1)\n",
"y = df['cylinders']\n",
"feature_names = X.columns\n",
"target_name = \"cylinders\"\n",
"\n",
"\n",
"pipeline_obj = Pipeline([\n",
" ('mapper', DataFrameMapper([\n",
" (['mpg','displacement','horsepower'],[MinMaxScaler()]),\n",
" (['weight','acceleration'],[StandardScaler()]),\n",
" ('car name', TfidfVectorizer())\n",
" ])),\n",
" ('model',LinearSVC())\n",
"])\n",
"\n",
"pipeline_obj.fit(X,y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Export the Pipeline object into PMML using the Nyoka package"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from nyoka import skl_to_pmml\n",
"skl_to_pmml(pipeline_obj,feature_names,target_name,\"lsvc_tfidf_pmml.pmml\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit ceef6f5

Please sign in to comment.