## Hello TPOT
Useful reads: http://epistasislab.github.io/tpot/using/

In [1]:
from tpot import TPOTClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, roc_auc_score, recall_score, confusion_matrix



### Getting dataset

In [2]:
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,
                                                    train_size=0.75, test_size=0.25)

### Perform AutoML with TPOTClassifier

In [3]:
pipeline_optimizer = TPOTClassifier(
    generations=5, population_size=20, cv=5, verbosity=2, n_jobs=4,
    max_time_mins=15)
pipeline_optimizer.fit(X_train, y_train)

Optimization Progress:   0%|          | 0/20 [00:00<?, ?pipeline/s]




Generation 1 - Current best internal CV score: 0.9725402726146222

Generation 2 - Current best internal CV score: 0.9784717059066501

Generation 3 - Current best internal CV score: 0.9806911744458212

Generation 4 - Current best internal CV score: 0.9806911744458212

Generation 5 - Current best internal CV score: 0.9859066501445684

Best pipeline: LogisticRegression(PolynomialFeatures(input_matrix, degree=2, include_bias=False, interaction_only=False), C=25.0, dual=False, penalty=l2)


### See and evaluate the pipelines returned from TPOTClassifier

In [4]:
print(pipeline_optimizer.score(X_test, y_test))

0.9711111111111111


In [5]:
def metrics(y_true, y_pred):
    print('Confusion Matrix:')
    print(confusion_matrix(y_true, y_pred))
    print(f'accuracy: {100*round(accuracy_score(y_true, y_pred), 4)}%')
    precision = precision_score(y_true, y_pred, average='micro')
    print(f'Precision score {100 * round(precision, 4)}%')
    # recall_score(y_true, y_pred)
    # print(f'roc_auc_score: {round(roc_auc_score(y_true, y_pred, multi_class="ovo"), 4)}')
    

In [6]:
y_pred = pipeline_optimizer.predict(X_test)
metrics(y_test, y_pred)

Confusion Matrix:
[[43  0  0  0  0  0  0  0  0  0]
 [ 0 46  0  0  0  0  0  0  0  0]
 [ 0  1 51  0  0  0  0  0  0  0]
 [ 0  0  0 45  0  1  0  0  1  0]
 [ 0  0  0  0 47  0  0  0  0  0]
 [ 1  1  0  0  0 44  0  0  0  0]
 [ 0  0  0  0  0  1 39  0  1  0]
 [ 0  0  0  0  0  0  0 43  0  0]
 [ 0  3  1  0  0  0  0  0 34  0]
 [ 0  0  0  1  0  0  0  0  1 45]]
accuracy: 97.11%
Precision score 97.11%


In [7]:
all_pipelines_trained = pipeline_optimizer.evaluated_individuals_
for pl in all_pipelines_trained:
    print(pl)

GaussianNB(ZeroCount(input_matrix))
SGDClassifier(input_matrix, SGDClassifier__alpha=0.01, SGDClassifier__eta0=0.01, SGDClassifier__fit_intercept=True, SGDClassifier__l1_ratio=0.5, SGDClassifier__learning_rate=invscaling, SGDClassifier__loss=log, SGDClassifier__penalty=elasticnet, SGDClassifier__power_t=50.0)
SGDClassifier(input_matrix, SGDClassifier__alpha=0.0, SGDClassifier__eta0=0.01, SGDClassifier__fit_intercept=True, SGDClassifier__l1_ratio=0.5, SGDClassifier__learning_rate=invscaling, SGDClassifier__loss=modified_huber, SGDClassifier__penalty=elasticnet, SGDClassifier__power_t=1.0)
MLPClassifier(input_matrix, MLPClassifier__alpha=0.01, MLPClassifier__learning_rate_init=0.001)
GaussianNB(input_matrix)
DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=entropy, DecisionTreeClassifier__max_depth=2, DecisionTreeClassifier__min_samples_leaf=10, DecisionTreeClassifier__min_samples_split=8)
MultinomialNB(SelectPercentile(input_matrix, SelectPercentile__percentile=73)

#### Export the pipeline in a source code

In [8]:
pipeline_optimizer.export('hello_tpot_classification_exported_pipeline.py')