In [1]:
import tensorflow as tf
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier

In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [4]:
print("The shape of X_train: {}".format(X_train.shape))
print("The shape of X_test: {}".format(X_test.shape))

The shape of X_train: (55000, 784)
The shape of X_test: (10000, 784)


In [6]:
tree = DecisionTreeClassifier(max_depth=12)
tree.fit(X_train,y_train)
print("The train score: %s"%(tree.score(X_train,y_train)))
print("The train score: %s"%(tree.score(X_test,y_test)))

The train score: 0.9514
The train score: 0.8748


In [8]:
params = {"max_depth":list(range(1,20))}
grid_search_cv = GridSearchCV(DecisionTreeClassifier(random_state=1),param_grid=params, n_jobs=-1, verbose=1, cv=5)
grid_search_cv.fit(X_train,y_train)

Fitting 5 folds for each of 19 candidates, totalling 95 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   45.8s
[Parallel(n_jobs=-1)]: Done  95 out of  95 | elapsed:  3.1min finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1,
            splitter='best'),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=1)

In [9]:
grid_search_cv.best_estimator_

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=15,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1,
            splitter='best')

In [10]:
grid_search_cv.best_estimator_.fit(X_train,y_train)
print("The train score: %s"%(grid_search_cv.best_estimator_.score(X_train,y_train)))
print("The train score: %s"%(grid_search_cv.best_estimator_.score(X_test,y_test)))

The train score: 0.9859272727272728
The train score: 0.8784


In [11]:
bag_clf = BaggingClassifier(grid_search_cv.best_estimator_, n_estimators = 500, max_samples=100, bootstrap=True, n_jobs=-1, oob_score=True)
bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=15,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1,
            splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=100, n_estimators=500, n_jobs=-1, oob_score=True,
         random_state=None, verbose=0, warm_start=False)

In [12]:
print("The train score: %s"%(bag_clf.score(X_train,y_train)))
print("The train score: %s"%(bag_clf.score(X_test,y_test)))

The train score: 0.8474727272727273
The train score: 0.854


In [13]:
rnd_clf = RandomForestClassifier(n_estimators= 500, n_jobs= -1)
rnd_clf.fit(X_train,y_train)
print("The train score: %s"%(rnd_clf.score(X_train,y_train)))
print("The train score: %s"%(rnd_clf.score(X_test,y_test)))

The train score: 1.0
The train score: 0.9711


In [24]:
feature_cols = [tf.feature_column.numeric_column("X", shape=[28 * 28])]
dnn_clf = tf.estimator.DNNClassifier(hidden_units=[300,100], n_classes=10,
                                     feature_columns=feature_cols)

input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, num_epochs=40, batch_size=50, shuffle=True)
dnn_clf.train(input_fn=input_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\lyc18\\AppData\\Local\\Temp\\tmpi_ifws3e', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000021620C162B0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create Checkp

INFO:tensorflow:global_step/sec: 458.549
INFO:tensorflow:loss = 3.407541, step = 7001 (0.218 sec)
INFO:tensorflow:global_step/sec: 452.306
INFO:tensorflow:loss = 3.1757133, step = 7101 (0.221 sec)
INFO:tensorflow:global_step/sec: 453.314
INFO:tensorflow:loss = 1.6232489, step = 7201 (0.221 sec)
INFO:tensorflow:global_step/sec: 460.686
INFO:tensorflow:loss = 0.7236688, step = 7301 (0.217 sec)
INFO:tensorflow:global_step/sec: 454.335
INFO:tensorflow:loss = 0.2980907, step = 7401 (0.220 sec)
INFO:tensorflow:global_step/sec: 451.284
INFO:tensorflow:loss = 0.27004355, step = 7501 (0.222 sec)
INFO:tensorflow:global_step/sec: 446.232
INFO:tensorflow:loss = 0.20256095, step = 7601 (0.225 sec)
INFO:tensorflow:global_step/sec: 449.218
INFO:tensorflow:loss = 0.3880268, step = 7701 (0.222 sec)
INFO:tensorflow:global_step/sec: 456.446
INFO:tensorflow:loss = 1.0174757, step = 7801 (0.220 sec)
INFO:tensorflow:global_step/sec: 444.21
INFO:tensorflow:loss = 1.2888614, step = 7901 (0.225 sec)
INFO:tenso

INFO:tensorflow:global_step/sec: 466.015
INFO:tensorflow:loss = 0.14117277, step = 15201 (0.215 sec)
INFO:tensorflow:global_step/sec: 481.81
INFO:tensorflow:loss = 0.0342658, step = 15301 (0.208 sec)
INFO:tensorflow:global_step/sec: 474.927
INFO:tensorflow:loss = 0.118258044, step = 15401 (0.210 sec)
INFO:tensorflow:global_step/sec: 457.495
INFO:tensorflow:loss = 0.08617991, step = 15501 (0.219 sec)
INFO:tensorflow:global_step/sec: 478.338
INFO:tensorflow:loss = 0.13981059, step = 15601 (0.209 sec)
INFO:tensorflow:global_step/sec: 468.239
INFO:tensorflow:loss = 0.045553606, step = 15701 (0.214 sec)
INFO:tensorflow:global_step/sec: 478.334
INFO:tensorflow:loss = 0.09160877, step = 15801 (0.209 sec)
INFO:tensorflow:global_step/sec: 467.141
INFO:tensorflow:loss = 0.158041, step = 15901 (0.215 sec)
INFO:tensorflow:global_step/sec: 455.406
INFO:tensorflow:loss = 0.11042879, step = 16001 (0.220 sec)
INFO:tensorflow:global_step/sec: 488.893
INFO:tensorflow:loss = 0.11456136, step = 16101 (0.2

INFO:tensorflow:global_step/sec: 436.472
INFO:tensorflow:loss = 0.027294379, step = 23301 (0.229 sec)
INFO:tensorflow:global_step/sec: 410.426
INFO:tensorflow:loss = 0.17371422, step = 23401 (0.245 sec)
INFO:tensorflow:global_step/sec: 382.859
INFO:tensorflow:loss = 0.045985848, step = 23501 (0.260 sec)
INFO:tensorflow:global_step/sec: 376.355
INFO:tensorflow:loss = 0.16018939, step = 23601 (0.266 sec)
INFO:tensorflow:global_step/sec: 427.999
INFO:tensorflow:loss = 0.070854835, step = 23701 (0.234 sec)
INFO:tensorflow:global_step/sec: 375.643
INFO:tensorflow:loss = 0.0075708376, step = 23801 (0.267 sec)
INFO:tensorflow:global_step/sec: 470.363
INFO:tensorflow:loss = 0.020747118, step = 23901 (0.212 sec)
INFO:tensorflow:global_step/sec: 447.235
INFO:tensorflow:loss = 0.021287546, step = 24001 (0.224 sec)
INFO:tensorflow:global_step/sec: 402.969
INFO:tensorflow:loss = 0.06392183, step = 24101 (0.248 sec)
INFO:tensorflow:global_step/sec: 423.499
INFO:tensorflow:loss = 0.09354059, step = 2

INFO:tensorflow:global_step/sec: 437.458
INFO:tensorflow:loss = 0.033144943, step = 31401 (0.229 sec)
INFO:tensorflow:global_step/sec: 372.838
INFO:tensorflow:loss = 0.0065507703, step = 31501 (0.268 sec)
INFO:tensorflow:global_step/sec: 348.111
INFO:tensorflow:loss = 0.039169203, step = 31601 (0.286 sec)
INFO:tensorflow:global_step/sec: 477.198
INFO:tensorflow:loss = 0.011425322, step = 31701 (0.210 sec)
INFO:tensorflow:global_step/sec: 478.339
INFO:tensorflow:loss = 0.0057893344, step = 31801 (0.209 sec)
INFO:tensorflow:global_step/sec: 478.343
INFO:tensorflow:loss = 0.024511429, step = 31901 (0.209 sec)
INFO:tensorflow:global_step/sec: 463.875
INFO:tensorflow:loss = 0.011808304, step = 32001 (0.216 sec)
INFO:tensorflow:global_step/sec: 378.498
INFO:tensorflow:loss = 0.026529875, step = 32101 (0.264 sec)
INFO:tensorflow:global_step/sec: 394.204
INFO:tensorflow:loss = 0.029641943, step = 32201 (0.254 sec)
INFO:tensorflow:global_step/sec: 442.281
INFO:tensorflow:loss = 0.02216002, step

INFO:tensorflow:global_step/sec: 399.737
INFO:tensorflow:loss = 0.0389787, step = 39501 (0.250 sec)
INFO:tensorflow:global_step/sec: 398.829
INFO:tensorflow:loss = 0.025809145, step = 39601 (0.251 sec)
INFO:tensorflow:global_step/sec: 402.014
INFO:tensorflow:loss = 0.024401926, step = 39701 (0.249 sec)
INFO:tensorflow:global_step/sec: 400.54
INFO:tensorflow:loss = 0.02931216, step = 39801 (0.250 sec)
INFO:tensorflow:global_step/sec: 400.539
INFO:tensorflow:loss = 0.024568183, step = 39901 (0.250 sec)
INFO:tensorflow:global_step/sec: 385.01
INFO:tensorflow:loss = 0.026408546, step = 40001 (0.261 sec)
INFO:tensorflow:global_step/sec: 393.428
INFO:tensorflow:loss = 0.0045203418, step = 40101 (0.253 sec)
INFO:tensorflow:global_step/sec: 396.527
INFO:tensorflow:loss = 0.020990333, step = 40201 (0.252 sec)
INFO:tensorflow:global_step/sec: 397.98
INFO:tensorflow:loss = 0.008698747, step = 40301 (0.251 sec)
INFO:tensorflow:global_step/sec: 398.948
INFO:tensorflow:loss = 0.015917348, step = 404

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier at 0x21620659ef0>

In [57]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_test}, y=y_test, shuffle=False)
eval_results = dnn_clf.evaluate(input_fn=test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-06-03T12:25:31Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\lyc18\AppData\Local\Temp\tmpi_ifws3e\model.ckpt-44000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-06-03-12:25:31
INFO:tensorflow:Saving dict for global step 44000: accuracy = 0.9797, average_loss = 0.09997098, global_step = 44000, loss = 12.654554
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 44000: C:\Users\lyc18\AppData\Local\Temp\tmpi_ifws3e\model.ckpt-44000


In [58]:
eval_results

{'accuracy': 0.9797,
 'average_loss': 0.09997098,
 'loss': 12.654554,
 'global_step': 44000}