<a href="https://colab.research.google.com/github/t2wain/colab/blob/master/Learn_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.utils import to_categorical, plot_model
from keras.datasets import mnist
from keras import optimizers
from keras import metrics
from keras import losses
from keras import callbacks
import keras.backend as K

#**Example**

### Configure Example

In [0]:
def build_dnn_keras(num_features, num_labels, hiddens=[], dropout_rate=0.45, name="digits"):
  lname = "%s_model" % name
  model = Sequential(name=lname)

  is_hidden_layers = len(hiddens) > 0
  num_node = num_labels
  num_node = hiddens[0] if is_hidden_layers else num_labels
  lname = "1" if is_hidden_layers else "out"
  lname = "%s_l%s" % (name, lname)
  model.add(Dense(num_node, input_dim=num_features, name=lname))

  if (is_hidden_layers):
    lname = "%s_a1" % name
    model.add(Activation('relu', name=lname))
    lname = "%s_d1" % name
    model.add(Dropout(rate=dropout_rate, name=lname))
    for layer_num, hidden_node_num in enumerate(hiddens[1:]):
      lname = "%s_l%i" % (name, layer_num+2)
      model.add(Dense(hidden_node_num, name=lname))
      lname = "%s_a%i" % (name, layer_num+2)
      model.add(Activation('relu', name=lname))
      lname = "%s_d%i" % (name, layer_num+2)
      model.add(Dropout(rate=dropout_rate, name=lname))
    lname = "%s_out" % name
    model.add(Dense(num_labels, name=lname))

  lname = "%s_aout" % name
  model.add(Activation('softmax', name=lname))
  return model

In [0]:
def get_data_keras():
  # load mnist dataset
  (x_train, y_train), (x_test, y_test) = mnist.load_data()
  y_data = y_test

  # compute the number of labels
  num_labels = len(np.unique(y_train))

  # convert to one-hot vector
  y_train = to_categorical(y_train)
  y_test = to_categorical(y_test)

  # image dimensions (assumed square)
  image_size = x_train.shape[1]
  input_size = image_size * image_size

  # resize and normalize
  x_train = np.reshape(x_train, [-1, input_size])
  x_train = x_train.astype('float32') / 255
  #x_train = np.apply_along_axis(lambda x: (x - x.mean() / x.std()), axis=1, arr=x_train) 
  x_test = np.reshape(x_test, [-1, input_size])
  x_test = x_test.astype('float32') / 255
  #x_test = np.apply_along_axis(lambda x: (x - x.mean() / x.std()), axis=1, arr=x_test)

  return (x_train, x_test, y_train, y_test, y_data)

In [0]:
def ex():
  is_trained = False

  (x_train, x_test, y_train, y_test, y_data) = get_data_keras()
  input_size = x_train.shape[1]
  num_labels = y_train.shape[1]

  # network parameters
  batch_size = 128
  hidden_layers = [256, 256]
  dropout = 0.45
  learning_rate = 0.01

  # model is a 3-layer MLP with ReLU and dropout after each layer
  model = build_dnn_keras(input_size, num_labels, hidden_layers, dropout)
  model.summary()

  # loss function for one-hot vector
  # use of adam optimizer
  # accuracy is a good metric for classification tasks
  op = optimizers.Adam(lr=learning_rate)
  model.compile(
    loss=losses.categorical_crossentropy,
    optimizer=op,
    metrics=[metrics.categorical_accuracy])    # train the network


  def _reset_weights(model):
    K.get_session().close()
    K.set_session(tf.Session())
    K.get_session().run(tf.global_variables_initializer())


  def train(epochs=1, loss_delta=0.1):
    nonlocal is_trained
    if is_trained:
      _reset_weights(model)

 
    early_stopping_monitor = callbacks.EarlyStopping(
      monitor='val_loss',
      min_delta=loss_delta,
      patience=2,
      verbose=1,
      restore_best_weights=True)

    history = model.fit(x_train, y_train, 
      epochs=epochs, 
      batch_size=batch_size,
      validation_data=(x_test, y_test),
      callbacks=[early_stopping_monitor])
    
    is_trained = True
    return history


  def evaluate():
    # validate the model on test dataset to determine generalization
    print("Training metrics:")
    loss, accuracy = model.evaluate(x_train, y_train, batch_size=batch_size, verbose=0)
    print({"loss": loss, "accuracy": accuracy})
    print("Test metrics:")
    loss, accuracy = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0)
    print({"loss": loss, "accuracy": accuracy})


  def predict():
    #pred_class = model.predict_classes(x_test)
    pred_prob = model.predict(x_test)
    pred_class = np.argmax(pred_prob, axis=1)
    df = pd.DataFrame(pred_prob.round(3), columns=range(10))
    df["ypred"] = pred_class
    df["ydata"] = y_data
    df["prob"] = df.iloc[:, 0:10].apply(lambda x: x.max(), axis=1)
    return df


  return (train, evaluate, predict)

### Training

In [27]:
train, evaluate, predict = ex()

Model: "digits_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
digits_l1 (Dense)            (None, 256)               200960    
_________________________________________________________________
digits_a1 (Activation)       (None, 256)               0         
_________________________________________________________________
digits_d1 (Dropout)          (None, 256)               0         
_________________________________________________________________
digits_l2 (Dense)            (None, 256)               65792     
_________________________________________________________________
digits_a2 (Activation)       (None, 256)               0         
_________________________________________________________________
digits_d2 (Dropout)          (None, 256)               0         
_________________________________________________________________
digits_out (Dense)           (None, 10)               

In [19]:
hist = train(20, 0.001)

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Restoring model weights from the end of the best epoch
Epoch 00004: early stopping


In [20]:
evaluate()

Training metrics:
{'loss': 0.1848208576242129, 'accuracy': 0.9516666666984558}
Test metrics:
{'loss': 0.18952698001861573, 'accuracy': 0.9492}


### Analyze Prediction

In [0]:
df = predict()

In [22]:
print("Correct prediction:")
df_correct = df.loc[df["ypred"] == df["ydata"]].sort_values(["prob"], axis=0, ascending=True)
df_correct

Correct prediction:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,ypred,ydata,prob
4874,0.149,0.053,0.067,0.064,0.145,0.072,0.079,0.061,0.138,0.172,9,9,0.172
1828,0.018,0.176,0.119,0.180,0.065,0.115,0.014,0.083,0.091,0.139,3,3,0.180
18,0.056,0.074,0.121,0.201,0.058,0.178,0.082,0.025,0.114,0.092,3,3,0.201
1289,0.067,0.044,0.057,0.077,0.086,0.201,0.134,0.022,0.154,0.157,5,5,0.201
2770,0.018,0.095,0.127,0.202,0.057,0.138,0.020,0.058,0.126,0.158,3,3,0.202
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4537,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,3,3,1.000
4535,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,3,3,1.000
4533,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1,1,1.000
4529,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,5,5,1.000


In [23]:
print("Correct prediction probability spread:")
grp_correct = df_correct.groupby(pd.cut(df_correct.prob, np.linspace(0, 1, 21)))
grp_correct.count().ypred.sort_index(ascending=False)

Correct prediction probability spread:


prob
(0.95, 1.0]    7925
(0.9, 0.95]     410
(0.85, 0.9]     260
(0.8, 0.85]     171
(0.75, 0.8]     124
(0.7, 0.75]     102
(0.65, 0.7]     107
(0.6, 0.65]      78
(0.55, 0.6]      62
(0.5, 0.55]      51
(0.45, 0.5]      54
(0.4, 0.45]      38
(0.35, 0.4]      37
(0.3, 0.35]      35
(0.25, 0.3]      21
(0.2, 0.25]      15
(0.15, 0.2]       2
(0.1, 0.15]       0
(0.05, 0.1]       0
(0.0, 0.05]       0
Name: ypred, dtype: int64

In [24]:
print("Wrong prediction:")
df_err = df.loc[df["ypred"] != df["ydata"]].sort_values(["prob"], axis=0, ascending=False)
df_err

Wrong prediction:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,ypred,ydata,prob
2098,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0,2,1.000
2387,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1,9,1.000
2135,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1,6,1.000
2927,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2,3,1.000
4615,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,4,2,1.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7233,0.027,0.038,0.146,0.155,0.030,0.128,0.032,0.178,0.157,0.108,7,3,0.178
2970,0.081,0.063,0.088,0.070,0.080,0.130,0.084,0.094,0.174,0.133,8,5,0.174
7216,0.127,0.036,0.091,0.071,0.085,0.106,0.159,0.037,0.172,0.115,8,0,0.172
3157,0.027,0.124,0.102,0.105,0.110,0.056,0.025,0.159,0.128,0.165,9,5,0.165


In [25]:
print("Wrong prediction probability spread:")
grp_err = df_err.groupby(pd.cut(df_err.prob, np.linspace(0, 1, 21)))
grp_err.count().ypred.sort_index(ascending=False)

Wrong prediction probability spread:


prob
(0.95, 1.0]    68
(0.9, 0.95]    30
(0.85, 0.9]    14
(0.8, 0.85]    13
(0.75, 0.8]    25
(0.7, 0.75]    23
(0.65, 0.7]    21
(0.6, 0.65]    31
(0.55, 0.6]    34
(0.5, 0.55]    39
(0.45, 0.5]    47
(0.4, 0.45]    34
(0.35, 0.4]    31
(0.3, 0.35]    32
(0.25, 0.3]    29
(0.2, 0.25]    26
(0.15, 0.2]    11
(0.1, 0.15]     0
(0.05, 0.1]     0
(0.0, 0.05]     0
Name: ypred, dtype: int64