In [1]:
import os
import pickle

from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

# Data

In [2]:
data_file_path = '../data/iris.pkl'

In [3]:
if os.path.isfile(data_file_path):
    with open(data_file_path, 'rb') as p:
        iris = pickle.load(p)
else:
    iris = datasets.load_iris()
    with open(data_file_path, 'wb') as p:
        pickle.dump(iris, p, pickle.HIGHEST_PROTOCOL)

In [4]:
X, y = iris.data, iris.target

In [5]:
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [7]:
len(y)

150

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Model training: logistic regression

In [9]:
scaler = StandardScaler()

In [10]:
logistic = LogisticRegression()

In [11]:
clf = Pipeline(steps=[("scaler", scaler), ("logistic", logistic)])

In [12]:
clf.fit(X_train, y_train)

In [13]:
clf.score(X_test, y_test)

1.0

# Inference

In [14]:
x = [[6, 3, 4, 1], [5, 2.5, 4.1, 2]]

In [15]:
clf.predict(x)

array([1, 2])

In [16]:
clf

# Save model

In [17]:
with open('../models/model.pkl', 'wb') as p:
        pickle.dump(clf, p, pickle.HIGHEST_PROTOCOL)

In [18]:
clf

In [19]:
str(clf)

"Pipeline(steps=[('scaler', StandardScaler()),\n                ('logistic', LogisticRegression())])"

In [20]:
clf.get_params()

{'memory': None,
 'steps': [('scaler', StandardScaler()), ('logistic', LogisticRegression())],
 'verbose': False,
 'scaler': StandardScaler(),
 'logistic': LogisticRegression(),
 'scaler__copy': True,
 'scaler__with_mean': True,
 'scaler__with_std': True,
 'logistic__C': 1.0,
 'logistic__class_weight': None,
 'logistic__dual': False,
 'logistic__fit_intercept': True,
 'logistic__intercept_scaling': 1,
 'logistic__l1_ratio': None,
 'logistic__max_iter': 100,
 'logistic__multi_class': 'auto',
 'logistic__n_jobs': None,
 'logistic__penalty': 'l2',
 'logistic__random_state': None,
 'logistic__solver': 'lbfgs',
 'logistic__tol': 0.0001,
 'logistic__verbose': 0,
 'logistic__warm_start': False}