# Pickle Digits PCA + Logistic Regression

## Modeling


In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import linear_model, decomposition, datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import joblib

import warnings
warnings.filterwarnings("ignore")

### Setup PCA to Logistic Regression Pipeline and Fit

#### PCA

In [2]:
logistic = linear_model.LogisticRegression()
pca = decomposition.PCA()
pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])
digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target
pca.fit(X_digits)

PCA()

#### Logistic Regression

In [3]:
n_components = [20, 40, 64]
Cs = np.logspace(-4, 4, 3)

#Parameters of pipelines can be set using ‘__’ separated parameter names:

estimator = GridSearchCV(pipe,
                         dict(pca__n_components=n_components,
                              logistic__C=Cs))
estimator.fit(X_digits, y_digits)

GridSearchCV(estimator=Pipeline(steps=[('pca', PCA()),
                                       ('logistic', LogisticRegression())]),
             param_grid={'logistic__C': array([1.e-04, 1.e+00, 1.e+04]),
                         'pca__n_components': [20, 40, 64]})

### Predict

In [4]:
estimator.predict(X_digits)

array([0, 1, 2, ..., 8, 9, 8])

### Pickle out Model

In [6]:
joblib.dump(estimator, 'digits_prediction.joblib')

['digits_prediction.joblib']