<a href="https://colab.research.google.com/github/t2wain/colab/blob/master/Learn_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Input
from keras.layers import Conv2D, MaxPooling2D, Flatten
from keras.models import Model
from keras.utils import to_categorical, plot_model
from keras.datasets import mnist
from keras import optimizers
from keras import metrics
from keras import losses
from keras import callbacks
import keras.backend as K

#**Example**

### Configure Example

In [0]:
def build_dnn_keras(num_features, num_labels, hiddens=[], dropout_rate=0.45, name="digits"):
  lname = "%s_model" % name
  model = Sequential(name=lname)

  is_hidden_layers = len(hiddens) > 0
  num_node = num_labels
  num_node = hiddens[0] if is_hidden_layers else num_labels
  lname = "1" if is_hidden_layers else "out"
  lname = "%s_l%s" % (name, lname)
  model.add(Dense(num_node, input_dim=num_features, name=lname))

  if (is_hidden_layers):
    lname = "%s_a1" % name
    model.add(Activation('relu', name=lname))
    lname = "%s_d1" % name
    model.add(Dropout(rate=dropout_rate, name=lname))
    for layer_num, hidden_node_num in enumerate(hiddens[1:]):
      lname = "%s_l%i" % (name, layer_num+2)
      model.add(Dense(hidden_node_num, name=lname))
      lname = "%s_a%i" % (name, layer_num+2)
      model.add(Activation('relu', name=lname))
      lname = "%s_d%i" % (name, layer_num+2)
      model.add(Dropout(rate=dropout_rate, name=lname))
    lname = "%s_out" % name
    model.add(Dense(num_labels, name=lname))

  lname = "%s_aout" % name
  model.add(Activation('softmax', name=lname))
  return model

In [0]:
def get_data_keras():
  # load mnist dataset
  (x_train, y_train), (x_test, y_test) = mnist.load_data()
  y_data = y_test

  # compute the number of labels
  num_labels = len(np.unique(y_train))

  # convert to one-hot vector
  y_train = to_categorical(y_train)
  y_test = to_categorical(y_test)

  # image dimensions (assumed square)
  image_size = x_train.shape[1]
  input_size = image_size * image_size

  # resize and normalize
  x_train = np.reshape(x_train, [-1, input_size])
  x_train = x_train.astype('float32') / 255
  #x_train = np.apply_along_axis(lambda x: (x - x.mean() / x.std()), axis=1, arr=x_train) 
  x_test = np.reshape(x_test, [-1, input_size])
  x_test = x_test.astype('float32') / 255
  #x_test = np.apply_along_axis(lambda x: (x - x.mean() / x.std()), axis=1, arr=x_test)

  return (x_train, x_test, y_train, y_test, y_data)

In [0]:
def ex():
  is_trained = False

  (x_train, x_test, y_train, y_test, y_data) = get_data_keras()
  input_size = x_train.shape[1]
  num_labels = y_train.shape[1]

  # network parameters
  batch_size = 128
  hidden_layers = [256, 256]
  dropout = 0.45
  learning_rate = 0.01

  # model is a 3-layer MLP with ReLU and dropout after each layer
  model = build_dnn_keras(input_size, num_labels, hidden_layers, dropout)
  model.summary()

  # loss function for one-hot vector
  # use of adam optimizer
  # accuracy is a good metric for classification tasks
  op = optimizers.Adam(lr=learning_rate)
  model.compile(
    loss=losses.categorical_crossentropy,
    optimizer=op,
    metrics=[metrics.categorical_accuracy])    # train the network


  def _reset_weights(model):
    K.get_session().close()
    K.set_session(tf.Session())
    K.get_session().run(tf.global_variables_initializer())


  def train(epochs=1, loss_delta=0.1):
    nonlocal is_trained
    #if is_trained:
      #_reset_weights(model)
 
    early_stopping_monitor = callbacks.EarlyStopping(
      monitor='val_loss',
      min_delta=loss_delta,
      patience=2,
      verbose=1,
      restore_best_weights=True)

    history = model.fit(x_train, y_train, 
      epochs=epochs, 
      batch_size=batch_size,
      validation_data=(x_test, y_test),
      callbacks=[early_stopping_monitor])
    
    is_trained = True
    return history


  def evaluate():
    # validate the model on test dataset to determine generalization
    print("Training metrics:")
    loss, accuracy = model.evaluate(x_train, y_train, batch_size=batch_size, verbose=0)
    print({"loss": loss, "accuracy": accuracy})
    print("Test metrics:")
    loss, accuracy = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0)
    print({"loss": loss, "accuracy": accuracy})


  def predict():
    #pred_class = model.predict_classes(x_test)
    pred_prob = model.predict(x_test)
    pred_class = np.argmax(pred_prob, axis=1)
    df = pd.DataFrame(pred_prob.round(3), columns=range(10))
    df["ypred"] = pred_class
    df["ydata"] = y_data
    df["prob"] = df.iloc[:, 0:10].apply(lambda x: x.max(), axis=1)
    return df


  return (train, evaluate, predict)

### Training

In [0]:
train, evaluate, predict = ex()

Model: "digits_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
digits_l1 (Dense)            (None, 256)               200960    
_________________________________________________________________
digits_a1 (Activation)       (None, 256)               0         
_________________________________________________________________
digits_d1 (Dropout)          (None, 256)               0         
_________________________________________________________________
digits_l2 (Dense)            (None, 256)               65792     
_________________________________________________________________
digits_a2 (Activation)       (None, 256)               0         
_________________________________________________________________
digits_d2 (Dropout)          (None, 256)               0         
_________________________________________________________________
digits_out (Dense)           (None, 10)               

In [0]:
hist = train(20, 0.001)

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Restoring model weights from the end of the best epoch
Epoch 00006: early stopping


In [0]:
evaluate()

Training metrics:
{'loss': 0.1508090390731891, 'accuracy': 0.9580833333015442}
Test metrics:
{'loss': 0.1692750359252095, 'accuracy': 0.9553}


### Analyze Prediction

In [0]:
df = predict()

In [0]:
print("Correct prediction:")
df_correct = df.loc[df["ypred"] == df["ydata"]].sort_values(["prob"], axis=0, ascending=True)
df_correct

Correct prediction:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,ypred,ydata,prob
6598,0.068,0.022,0.125,0.097,0.066,0.174,0.131,0.044,0.150,0.124,5,5,0.174
7921,0.098,0.031,0.057,0.081,0.119,0.081,0.127,0.056,0.177,0.173,8,8,0.177
1751,0.016,0.012,0.175,0.092,0.182,0.080,0.059,0.164,0.101,0.119,4,4,0.182
7049,0.194,0.015,0.094,0.105,0.098,0.100,0.170,0.025,0.153,0.046,0,0,0.194
2189,0.025,0.119,0.065,0.062,0.124,0.067,0.060,0.090,0.190,0.198,9,9,0.198
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4127,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2,2,1.000
4125,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,6,6,1.000
4122,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,9,9,1.000
4275,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2,2,1.000


In [0]:
print("Correct prediction probability spread:")
grp_correct = df_correct.groupby(pd.cut(df_correct.prob, np.linspace(0, 1, 21)))
grp_correct.count().ypred.sort_index(ascending=False)

Correct prediction probability spread:


prob
(0.95, 1.0]    8533
(0.9, 0.95]     223
(0.85, 0.9]     147
(0.8, 0.85]      90
(0.75, 0.8]      83
(0.7, 0.75]      64
(0.65, 0.7]      59
(0.6, 0.65]      42
(0.55, 0.6]      41
(0.5, 0.55]      53
(0.45, 0.5]      47
(0.4, 0.45]      38
(0.35, 0.4]      40
(0.3, 0.35]      30
(0.25, 0.3]      38
(0.2, 0.25]      19
(0.15, 0.2]       6
(0.1, 0.15]       0
(0.05, 0.1]       0
(0.0, 0.05]       0
Name: ypred, dtype: int64

In [0]:
print("Wrong prediction:")
df_err = df.loc[df["ypred"] != df["ydata"]].sort_values(["prob"], axis=0, ascending=False)
df_err

Wrong prediction:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,ypred,ydata,prob
2135,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1,6,1.000
4615,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,4,2,1.000
4601,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,4,8,1.000
9009,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2,7,1.000
4956,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,4,8,1.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2607,0.083,0.047,0.130,0.093,0.133,0.096,0.063,0.092,0.166,0.098,8,7,0.166
4890,0.050,0.034,0.062,0.087,0.093,0.165,0.117,0.071,0.161,0.160,5,8,0.165
9679,0.066,0.052,0.079,0.080,0.108,0.123,0.077,0.095,0.164,0.157,8,6,0.164
4814,0.155,0.033,0.086,0.050,0.138,0.104,0.152,0.077,0.130,0.075,0,6,0.155


In [0]:
print("Wrong prediction probability spread:")
grp_err = df_err.groupby(pd.cut(df_err.prob, np.linspace(0, 1, 21)))
grp_err.count().ypred.sort_index(ascending=False)

Wrong prediction probability spread:


prob
(0.95, 1.0]    56
(0.9, 0.95]    13
(0.85, 0.9]    15
(0.8, 0.85]    17
(0.75, 0.8]    23
(0.7, 0.75]    19
(0.65, 0.7]    26
(0.6, 0.65]    20
(0.55, 0.6]    28
(0.5, 0.55]    23
(0.45, 0.5]    31
(0.4, 0.45]    20
(0.35, 0.4]    34
(0.3, 0.35]    35
(0.25, 0.3]    34
(0.2, 0.25]    28
(0.15, 0.2]    24
(0.1, 0.15]     1
(0.05, 0.1]     0
(0.0, 0.05]     0
Name: ypred, dtype: int64

#**Example 2**

### Configure Example

In [0]:
def get_data_cnn():
  # load mnist dataset
  (x_train, y_train), (x_test, y_test) = mnist.load_data()
  y_data = y_test

  # compute the number of labels
  num_labels = len(np.unique(y_train))

  # convert to one-hot vector
  y_train = to_categorical(y_train)
  y_test = to_categorical(y_test)

  # image dimensions (assumed square)
  image_size = x_train.shape[1]
  input_size = image_size * image_size


  x_train = np.reshape(x_train,[-1, image_size, image_size, 1])
  x_test = np.reshape(x_test,[-1, image_size, image_size, 1])
  x_train = x_train.astype('float32') / 255
  x_test = x_test.astype('float32') / 255

  return (x_train, x_test, y_train, y_test, y_data)

In [0]:
def build_cnn_keras(image_size, num_labels, dropout_rate=0.45, name="digits"):
  lname = "%s_model" % name

  input_shape = (image_size, image_size, 1)
  kernel_size = 3
  filters = 64

  # use functional API to build cnn layers
  lname = "%s_i1" % name
  inputs = Input(shape=input_shape, name=lname)
  lname = "%s_cv1" % name
  x = Conv2D(filters=filters, kernel_size=kernel_size, activation='relu', name=lname)(inputs)
  lname = "%s_mp1" % name
  x = MaxPooling2D(name=lname)(x)
  lname = "%s_cv2" % name
  x = Conv2D(filters=filters, kernel_size=kernel_size, activation='relu', name=lname)(x)
  lname = "%s_mp2" % name
  x = MaxPooling2D(name=lname)(x)
  lname = "%s_cv3" % name
  x = Conv2D(filters=filters, kernel_size=kernel_size, activation='relu', name=lname)(x)

  # image to vector before connecting to dense layer
  lname = "%s_fl1" % name
  x = Flatten(name=lname)(x)

  # dropout regularization
  lname = "%s_d1" % name
  y = Dropout(rate=dropout_rate, name=lname)(x)
  lname = "%s_out" % name
  outputs = Dense(num_labels, activation='softmax', name=lname)(y)

  # build the model by supplying inputs/outputs
  lname = "%s_model" % name
  model = Model(inputs=inputs, outputs=outputs, name=lname)

  return model

In [0]:
def ex2():

  # network parameters
  batch_size = 128
  dropout = 0.3
  learning_rate = 0.01

  (x_train, x_test, y_train, y_test, y_data) = get_data_cnn()
  image_size = x_train.shape[1]
  num_labels = y_train.shape[1]

  model = build_cnn_keras(image_size, num_labels, dropout)
  model.summary()

  op = optimizers.Adam(lr=learning_rate)
  model.compile(
    loss=losses.categorical_crossentropy,
    optimizer=op,
    metrics=[metrics.categorical_accuracy])    # train the network


  def train(epochs=1, loss_delta=0.1):
    early_stopping_monitor = callbacks.EarlyStopping(
      monitor='val_loss',
      min_delta=loss_delta,
      patience=2,
      verbose=1,
      restore_best_weights=True)

    history = model.fit(x_train, y_train, 
      epochs=epochs, 
      batch_size=batch_size,
      validation_data=(x_test, y_test),
      callbacks=[early_stopping_monitor])
    
    return history


  def evaluate():
    # validate the model on test dataset to determine generalization
    print("Training metrics:")
    loss, accuracy = model.evaluate(x_train, y_train, batch_size=batch_size, verbose=0)
    print({"loss": loss, "accuracy": accuracy})
    print("Test metrics:")
    loss, accuracy = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0)
    print({"loss": loss, "accuracy": accuracy})


  def predict():
    #pred_class = model.predict_classes(x_test)
    pred_prob = model.predict(x_test)
    pred_class = np.argmax(pred_prob, axis=1)
    df = pd.DataFrame(pred_prob.round(3), columns=range(10))
    df["ypred"] = pred_class
    df["ydata"] = y_data
    df["prob"] = df.iloc[:, 0:10].apply(lambda x: x.max(), axis=1)
    return df


  return (train, evaluate, predict)

### Training

In [13]:
train, evaluate, predict = ex2()

Model: "digits_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
digits_i1 (InputLayer)       (None, 28, 28, 1)         0         
_________________________________________________________________
digits_cv1 (Conv2D)          (None, 26, 26, 64)        640       
_________________________________________________________________
digits_mp1 (MaxPooling2D)    (None, 13, 13, 64)        0         
_________________________________________________________________
digits_cv2 (Conv2D)          (None, 11, 11, 64)        36928     
_________________________________________________________________
digits_mp2 (MaxPooling2D)    (None, 5, 5, 64)          0         
_________________________________________________________________
digits_cv3 (Conv2D)          (None, 3, 3, 64)          36928     
_________________________________________________________________
digits_fl1 (Flatten)         (None, 576)              

In [14]:
hist = train(20, 0.001)

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Restoring model weights from the end of the best epoch
Epoch 00004: early stopping


In [15]:
evaluate()

Training metrics:
{'loss': 0.03623288207153479, 'accuracy': 0.9885666666348776}
Test metrics:
{'loss': 0.037921339557319876, 'accuracy': 0.9877}


### Analyze Prediction

In [0]:
df = predict()

In [17]:
print("Correct prediction:")
df_correct = df.loc[df["ypred"] == df["ydata"]].sort_values(["prob"], axis=0, ascending=True)
df_correct

Correct prediction:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,ypred,ydata,prob
844,0.000,0.015,0.279,0.079,0.000,0.224,0.000,0.085,0.302,0.015,8,8,0.302
4761,0.000,0.005,0.000,0.001,0.242,0.004,0.000,0.064,0.313,0.372,9,9,0.372
1039,0.000,0.309,0.015,0.241,0.020,0.017,0.000,0.382,0.009,0.007,7,7,0.382
4500,0.000,0.345,0.002,0.001,0.200,0.001,0.000,0.012,0.040,0.399,9,9,0.399
1414,0.033,0.005,0.001,0.052,0.083,0.028,0.001,0.305,0.072,0.421,9,9,0.421
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4164,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,3,3,1.000
4162,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,3,3,1.000
4161,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2,2,1.000
4208,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0,0,1.000


In [18]:
print("Correct prediction probability spread:")
grp_correct = df_correct.groupby(pd.cut(df_correct.prob, np.linspace(0, 1, 21)))
grp_correct.count().ypred.sort_index(ascending=False)

Correct prediction probability spread:


prob
(0.95, 1.0]    9443
(0.9, 0.95]     169
(0.85, 0.9]      74
(0.8, 0.85]      63
(0.75, 0.8]      32
(0.7, 0.75]      23
(0.65, 0.7]      18
(0.6, 0.65]      19
(0.55, 0.6]      15
(0.5, 0.55]       9
(0.45, 0.5]       6
(0.4, 0.45]       2
(0.35, 0.4]       3
(0.3, 0.35]       1
(0.25, 0.3]       0
(0.2, 0.25]       0
(0.15, 0.2]       0
(0.1, 0.15]       0
(0.05, 0.1]       0
(0.0, 0.05]       0
Name: ypred, dtype: int64

In [19]:
print("Wrong prediction:")
df_err = df.loc[df["ypred"] != df["ydata"]].sort_values(["prob"], axis=0, ascending=False)
df_err

Wrong prediction:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,ypred,ydata,prob
1299,0.000,0.000,0.000,0.000,0.000,0.001,0.000,0.998,0.000,0.000,7,5,0.998
9729,0.002,0.000,0.000,0.000,0.000,0.002,0.995,0.000,0.001,0.000,6,5,0.995
3422,0.985,0.000,0.000,0.000,0.000,0.000,0.015,0.000,0.000,0.000,0,6,0.985
2044,0.001,0.000,0.010,0.002,0.000,0.000,0.000,0.984,0.003,0.000,7,2,0.984
1260,0.000,0.982,0.004,0.000,0.000,0.000,0.000,0.012,0.001,0.001,1,7,0.982
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2607,0.000,0.002,0.378,0.006,0.115,0.000,0.000,0.289,0.210,0.000,2,7,0.378
4380,0.000,0.000,0.000,0.001,0.000,0.376,0.362,0.000,0.261,0.000,5,8,0.376
1901,0.002,0.007,0.001,0.000,0.320,0.015,0.101,0.020,0.345,0.188,8,9,0.345
247,0.001,0.008,0.344,0.004,0.306,0.001,0.103,0.001,0.232,0.000,2,4,0.344


In [20]:
print("Wrong prediction probability spread:")
grp_err = df_err.groupby(pd.cut(df_err.prob, np.linspace(0, 1, 21)))
grp_err.count().ypred.sort_index(ascending=False)

Wrong prediction probability spread:


prob
(0.95, 1.0]    18
(0.9, 0.95]    13
(0.85, 0.9]    13
(0.8, 0.85]    10
(0.75, 0.8]     6
(0.7, 0.75]     9
(0.65, 0.7]     9
(0.6, 0.65]     8
(0.55, 0.6]    11
(0.5, 0.55]    15
(0.45, 0.5]     3
(0.4, 0.45]     2
(0.35, 0.4]     3
(0.3, 0.35]     3
(0.25, 0.3]     0
(0.2, 0.25]     0
(0.15, 0.2]     0
(0.1, 0.15]     0
(0.05, 0.1]     0
(0.0, 0.05]     0
Name: ypred, dtype: int64