<a href="https://colab.research.google.com/github/t2wain/colab/blob/master/Learn_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Input
from keras.layers import Conv2D, MaxPooling2D, Flatten
from keras.models import Model
from keras.utils import to_categorical, plot_model
from keras.datasets import mnist
from keras import optimizers
from keras import metrics
from keras import losses
from keras import callbacks
import keras.backend as K

#**Example**

### Configure Example

In [0]:
def build_dnn_keras(num_features, num_labels, hiddens=[], dropout_rate=0.45, name="digits"):
  lname = "%s_model" % name
  model = Sequential(name=lname)

  is_hidden_layers = len(hiddens) > 0
  num_node = num_labels
  num_node = hiddens[0] if is_hidden_layers else num_labels
  lname = "1" if is_hidden_layers else "out"
  lname = "%s_l%s" % (name, lname)
  model.add(Dense(num_node, input_dim=num_features, name=lname))

  if (is_hidden_layers):
    lname = "%s_a1" % name
    model.add(Activation('relu', name=lname))
    lname = "%s_d1" % name
    model.add(Dropout(rate=dropout_rate, name=lname))
    for layer_num, hidden_node_num in enumerate(hiddens[1:]):
      lname = "%s_l%i" % (name, layer_num+2)
      model.add(Dense(hidden_node_num, name=lname))
      lname = "%s_a%i" % (name, layer_num+2)
      model.add(Activation('relu', name=lname))
      lname = "%s_d%i" % (name, layer_num+2)
      model.add(Dropout(rate=dropout_rate, name=lname))
    lname = "%s_out" % name
    model.add(Dense(num_labels, name=lname))

  lname = "%s_aout" % name
  model.add(Activation('softmax', name=lname))
  return model

In [0]:
def get_data_keras():
  # load mnist dataset
  (x_train, y_train), (x_test, y_test) = mnist.load_data()
  y_data = y_test

  # compute the number of labels
  num_labels = len(np.unique(y_train))

  # convert to one-hot vector
  y_train = to_categorical(y_train)
  y_test = to_categorical(y_test)

  # image dimensions (assumed square)
  image_size = x_train.shape[1]
  input_size = image_size * image_size

  # resize and normalize
  x_train = np.reshape(x_train, [-1, input_size])
  x_train = x_train.astype('float32') / 255
  #x_train = np.apply_along_axis(lambda x: (x - x.mean() / x.std()), axis=1, arr=x_train) 
  x_test = np.reshape(x_test, [-1, input_size])
  x_test = x_test.astype('float32') / 255
  #x_test = np.apply_along_axis(lambda x: (x - x.mean() / x.std()), axis=1, arr=x_test)

  return (x_train, x_test, y_train, y_test, y_data)

In [0]:
def ex():
  is_trained = False

  (x_train, x_test, y_train, y_test, y_data) = get_data_keras()
  input_size = x_train.shape[1]
  num_labels = y_train.shape[1]

  # network parameters
  batch_size = 128
  hidden_layers = [256, 256]
  dropout = 0.45
  learning_rate = 0.01

  # model is a 3-layer MLP with ReLU and dropout after each layer
  model = build_dnn_keras(input_size, num_labels, hidden_layers, dropout)
  model.summary()

  # loss function for one-hot vector
  # use of adam optimizer
  # accuracy is a good metric for classification tasks
  op = optimizers.Adam(lr=learning_rate)
  model.compile(
    loss=losses.categorical_crossentropy,
    optimizer=op,
    metrics=[metrics.categorical_accuracy])    # train the network


  def _reset_weights(model):
    K.get_session().close()
    K.set_session(tf.Session())
    K.get_session().run(tf.global_variables_initializer())


  def train(epochs=1, loss_delta=0.1):
    nonlocal is_trained
    #if is_trained:
      #_reset_weights(model)
 
    early_stopping_monitor = callbacks.EarlyStopping(
      monitor='val_loss',
      min_delta=loss_delta,
      patience=2,
      verbose=1,
      restore_best_weights=True)

    history = model.fit(x_train, y_train, 
      epochs=epochs, 
      batch_size=batch_size,
      validation_data=(x_test, y_test),
      callbacks=[early_stopping_monitor])
    
    is_trained = True
    return history


  def evaluate():
    # validate the model on test dataset to determine generalization
    print("Training metrics:")
    loss, accuracy = model.evaluate(x_train, y_train, batch_size=batch_size, verbose=0)
    print({"loss": loss, "accuracy": accuracy})
    print("Test metrics:")
    loss, accuracy = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0)
    print({"loss": loss, "accuracy": accuracy})


  def predict():
    #pred_class = model.predict_classes(x_test)
    pred_prob = model.predict(x_test)
    pred_class = np.argmax(pred_prob, axis=1)
    df = pd.DataFrame(pred_prob.round(3), columns=range(10))
    df["ypred"] = pred_class
    df["ydata"] = y_data
    df["prob"] = df.iloc[:, 0:10].apply(lambda x: x.max(), axis=1)
    return df


  return (train, evaluate, predict)

### Training

In [0]:
train, evaluate, predict = ex()

Model: "digits_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
digits_l1 (Dense)            (None, 256)               200960    
_________________________________________________________________
digits_a1 (Activation)       (None, 256)               0         
_________________________________________________________________
digits_d1 (Dropout)          (None, 256)               0         
_________________________________________________________________
digits_l2 (Dense)            (None, 256)               65792     
_________________________________________________________________
digits_a2 (Activation)       (None, 256)               0         
_________________________________________________________________
digits_d2 (Dropout)          (None, 256)               0         
_________________________________________________________________
digits_out (Dense)           (None, 10)               

In [0]:
hist = train(20, 0.001)

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Restoring model weights from the end of the best epoch
Epoch 00006: early stopping


In [0]:
evaluate()

Training metrics:
{'loss': 0.1508090390731891, 'accuracy': 0.9580833333015442}
Test metrics:
{'loss': 0.1692750359252095, 'accuracy': 0.9553}


### Analyze Prediction

In [0]:
df = predict()

In [0]:
print("Correct prediction:")
df_correct = df.loc[df["ypred"] == df["ydata"]].sort_values(["prob"], axis=0, ascending=True)
df_correct

Correct prediction:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,ypred,ydata,prob
6598,0.068,0.022,0.125,0.097,0.066,0.174,0.131,0.044,0.150,0.124,5,5,0.174
7921,0.098,0.031,0.057,0.081,0.119,0.081,0.127,0.056,0.177,0.173,8,8,0.177
1751,0.016,0.012,0.175,0.092,0.182,0.080,0.059,0.164,0.101,0.119,4,4,0.182
7049,0.194,0.015,0.094,0.105,0.098,0.100,0.170,0.025,0.153,0.046,0,0,0.194
2189,0.025,0.119,0.065,0.062,0.124,0.067,0.060,0.090,0.190,0.198,9,9,0.198
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4127,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2,2,1.000
4125,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,6,6,1.000
4122,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,9,9,1.000
4275,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2,2,1.000


In [0]:
print("Correct prediction probability spread:")
grp_correct = df_correct.groupby(pd.cut(df_correct.prob, np.linspace(0, 1, 21)))
grp_correct.count().ypred.sort_index(ascending=False)

Correct prediction probability spread:


prob
(0.95, 1.0]    8533
(0.9, 0.95]     223
(0.85, 0.9]     147
(0.8, 0.85]      90
(0.75, 0.8]      83
(0.7, 0.75]      64
(0.65, 0.7]      59
(0.6, 0.65]      42
(0.55, 0.6]      41
(0.5, 0.55]      53
(0.45, 0.5]      47
(0.4, 0.45]      38
(0.35, 0.4]      40
(0.3, 0.35]      30
(0.25, 0.3]      38
(0.2, 0.25]      19
(0.15, 0.2]       6
(0.1, 0.15]       0
(0.05, 0.1]       0
(0.0, 0.05]       0
Name: ypred, dtype: int64

In [0]:
print("Wrong prediction:")
df_err = df.loc[df["ypred"] != df["ydata"]].sort_values(["prob"], axis=0, ascending=False)
df_err

Wrong prediction:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,ypred,ydata,prob
2135,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1,6,1.000
4615,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,4,2,1.000
4601,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,4,8,1.000
9009,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2,7,1.000
4956,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,4,8,1.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2607,0.083,0.047,0.130,0.093,0.133,0.096,0.063,0.092,0.166,0.098,8,7,0.166
4890,0.050,0.034,0.062,0.087,0.093,0.165,0.117,0.071,0.161,0.160,5,8,0.165
9679,0.066,0.052,0.079,0.080,0.108,0.123,0.077,0.095,0.164,0.157,8,6,0.164
4814,0.155,0.033,0.086,0.050,0.138,0.104,0.152,0.077,0.130,0.075,0,6,0.155


In [0]:
print("Wrong prediction probability spread:")
grp_err = df_err.groupby(pd.cut(df_err.prob, np.linspace(0, 1, 21)))
grp_err.count().ypred.sort_index(ascending=False)

Wrong prediction probability spread:


prob
(0.95, 1.0]    56
(0.9, 0.95]    13
(0.85, 0.9]    15
(0.8, 0.85]    17
(0.75, 0.8]    23
(0.7, 0.75]    19
(0.65, 0.7]    26
(0.6, 0.65]    20
(0.55, 0.6]    28
(0.5, 0.55]    23
(0.45, 0.5]    31
(0.4, 0.45]    20
(0.35, 0.4]    34
(0.3, 0.35]    35
(0.25, 0.3]    34
(0.2, 0.25]    28
(0.15, 0.2]    24
(0.1, 0.15]     1
(0.05, 0.1]     0
(0.0, 0.05]     0
Name: ypred, dtype: int64

#**Example 2**

### Configure Example

In [0]:
def get_data_cnn():
  # load mnist dataset
  (x_train, y_train), (x_test, y_test) = mnist.load_data()
  y_data = y_test

  # compute the number of labels
  num_labels = len(np.unique(y_train))

  # convert to one-hot vector
  y_train = to_categorical(y_train)
  y_test = to_categorical(y_test)

  # image dimensions (assumed square)
  image_size = x_train.shape[1]
  input_size = image_size * image_size


  x_train = np.reshape(x_train,[-1, image_size, image_size, 1])
  x_test = np.reshape(x_test,[-1, image_size, image_size, 1])
  x_train = x_train.astype('float32') / 255
  x_test = x_test.astype('float32') / 255

  return (x_train, x_test, y_train, y_test, y_data)

In [0]:
def build_cnn_keras(image_size, num_labels, dropout_rate=0.45, name="digits"):
  lname = "%s_model" % name

  input_shape = (image_size, image_size, 1)
  kernel_size = 3
  filters = 64

  # use functional API to build cnn layers
  lname = "%s_i1" % name
  inputs = Input(shape=input_shape, name=lname)
  lname = "%s_cv1" % name
  x = Conv2D(filters=filters, kernel_size=kernel_size, activation='relu', name=lname)(inputs)
  lname = "%s_mp1" % name
  x = MaxPooling2D(name=lname)(x)
  lname = "%s_cv2" % name
  x = Conv2D(filters=filters, kernel_size=kernel_size, activation='relu', name=lname)(x)
  lname = "%s_mp2" % name
  x = MaxPooling2D(name=lname)(x)
  lname = "%s_cv3" % name
  x = Conv2D(filters=filters, kernel_size=kernel_size, activation='relu', name=lname)(x)

  # image to vector before connecting to dense layer
  lname = "%s_fl1" % name
  x = Flatten(name=lname)(x)

  # dropout regularization
  lname = "%s_d1" % name
  y = Dropout(rate=dropout_rate, name=lname)(x)
  lname = "%s_out" % name
  outputs = Dense(num_labels, activation='softmax', name=lname)(y)

  # build the model by supplying inputs/outputs
  lname = "%s_model" % name
  model = Model(inputs=inputs, outputs=outputs, name=lname)

  return model

In [0]:
def ex2():

  # network parameters
  batch_size = 128
  dropout = 0.3
  learning_rate = 0.01

  (x_train, x_test, y_train, y_test, y_data) = get_data_cnn()
  image_size = x_train.shape[1]
  num_labels = y_train.shape[1]

  model = build_cnn_keras(image_size, num_labels, dropout)
  model.summary()

  op = optimizers.Adam(lr=learning_rate)
  model.compile(
    loss=losses.categorical_crossentropy,
    optimizer=op,
    metrics=[metrics.categorical_accuracy])    # train the network


  def train(epochs=1, loss_delta=0.1):
    early_stopping_monitor = callbacks.EarlyStopping(
      monitor='val_loss',
      min_delta=loss_delta,
      patience=2,
      verbose=1,
      restore_best_weights=True)

    history = model.fit(x_train, y_train, 
      epochs=epochs, 
      batch_size=batch_size,
      validation_data=(x_test, y_test),
      callbacks=[early_stopping_monitor])
    
    return history


  def evaluate():
    # validate the model on test dataset to determine generalization
    print("Training metrics:")
    loss, accuracy = model.evaluate(x_train, y_train, batch_size=batch_size, verbose=0)
    print({"loss": loss, "accuracy": accuracy})
    print("Test metrics:")
    loss, accuracy = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0)
    print({"loss": loss, "accuracy": accuracy})


  def predict():
    #pred_class = model.predict_classes(x_test)
    pred_prob = model.predict(x_test)
    pred_class = np.argmax(pred_prob, axis=1)
    df = pd.DataFrame(pred_prob.round(3), columns=range(10))
    df["ypred"] = pred_class
    df["ydata"] = y_data
    df["prob"] = df.iloc[:, 0:10].apply(lambda x: x.max(), axis=1)
    return df


  return (train, evaluate, predict)

### Training

In [47]:
train, evaluate, predict = ex2()

Model: "digits_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
digits_i1 (InputLayer)       (None, 28, 28, 1)         0         
_________________________________________________________________
digits_cv1 (Conv2D)          (None, 26, 26, 64)        640       
_________________________________________________________________
digits_mp1 (MaxPooling2D)    (None, 13, 13, 64)        0         
_________________________________________________________________
digits_cv2 (Conv2D)          (None, 11, 11, 64)        36928     
_________________________________________________________________
digits_mp2 (MaxPooling2D)    (None, 5, 5, 64)          0         
_________________________________________________________________
digits_cv3 (Conv2D)          (None, 3, 3, 64)          36928     
_________________________________________________________________
digits_fl1 (Flatten)         (None, 576)              

In [48]:
hist = train(20, 0.001)

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Restoring model weights from the end of the best epoch
Epoch 00005: early stopping


In [49]:
evaluate()

Training metrics:
{'loss': 0.03226308802859858, 'accuracy': 0.9896666666348776}
Test metrics:
{'loss': 0.03637641177512705, 'accuracy': 0.9882}


### Analyze Prediction

In [0]:
df = predict()

In [51]:
print('Confusion matrix: x-axis = predict, y-axis = actual')
cm = confusion_matrix(df['ydata'], df['ypred'], df.columns[0:10])
dm = pd.DataFrame(cm, df.columns[0:10], df.columns[0:10])
dm['TP'] = [dm.iloc[i,i] for i in dm.index]
dm['FN'] = [dm.iloc[i,:10].sum() - dm.iloc[i,i] for i in dm.index]
dm['FP'] = [dm.iloc[:,i].sum() - dm.iloc[i,i] for i in dm.index]
dm['support'] = dm.apply(lambda x: x[0:10].sum(), axis=1)
dm['precision'] = round(dm['TP'] / (dm['TP'] + dm['FP']),2)
dm['recall'] = [round(dm.iloc[i,i]/dm.loc[i,'support'],2) for i in dm.index]
dm

Confusion matrix: x-axis = predict, y-axis = actual


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,TP,FN,FP,support,precision,recall
0,975,1,0,0,0,0,3,1,0,0,975,5,12,980,0.99,0.99
1,0,1124,2,0,0,0,4,4,1,0,1124,11,10,1135,0.99,0.99
2,1,0,1025,1,0,0,0,4,1,0,1025,7,10,1032,0.99,0.99
3,1,1,1,1001,0,4,0,1,1,0,1001,9,13,1010,0.99,0.99
4,0,0,0,0,961,0,5,0,3,13,961,21,3,982,1.0,0.98
5,1,0,0,10,0,874,2,1,3,1,874,18,7,892,0.99,0.98
6,3,2,0,0,1,1,950,0,1,0,950,8,16,958,0.98,0.99
7,0,4,6,0,1,0,0,1014,1,2,1014,14,16,1028,0.98,0.99
8,3,0,1,0,0,0,1,1,968,0,968,6,15,974,0.98,0.99
9,3,2,0,2,1,2,1,4,4,990,990,19,16,1009,0.98,0.98


In [52]:
labels = [l for l in map(str, df.columns[0:10])]
print(classification_report(df['ydata'], df['ypred'], target_names=labels))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.99      0.99      0.99      1032
           3       0.99      0.99      0.99      1010
           4       1.00      0.98      0.99       982
           5       0.99      0.98      0.99       892
           6       0.98      0.99      0.99       958
           7       0.98      0.99      0.99      1028
           8       0.98      0.99      0.99       974
           9       0.98      0.98      0.98      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000



In [53]:
print("Correct prediction:")
df_correct = df.loc[df["ypred"] == df["ydata"]].sort_values(["prob"], axis=0, ascending=True)
df_correct

Correct prediction:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,ypred,ydata,prob
8376,0.000,0.296,0.000,0.001,0.009,0.075,0.159,0.000,0.246,0.214,1,1,0.296
8061,0.000,0.000,0.004,0.000,0.385,0.000,0.000,0.035,0.263,0.313,4,4,0.385
2462,0.296,0.000,0.445,0.000,0.000,0.000,0.153,0.000,0.105,0.000,2,2,0.445
7899,0.056,0.456,0.002,0.000,0.000,0.002,0.004,0.001,0.449,0.030,1,1,0.456
1178,0.018,0.000,0.062,0.000,0.475,0.000,0.432,0.000,0.009,0.003,4,4,0.475
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3825,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,3,3,1.000
3824,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,3,3,1.000
3823,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,2,2,1.000
3841,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,4,4,1.000


In [54]:
print("Correct prediction probability spread:")
grp_correct = df_correct.groupby(pd.cut(df_correct.prob, np.linspace(0, 1, 21)))
grp_correct.count().ypred.sort_index(ascending=False)

Correct prediction probability spread:


prob
(0.95, 1.0]    9559
(0.9, 0.95]     119
(0.85, 0.9]      60
(0.8, 0.85]      41
(0.75, 0.8]      20
(0.7, 0.75]      22
(0.65, 0.7]      18
(0.6, 0.65]      17
(0.55, 0.6]      10
(0.5, 0.55]       8
(0.45, 0.5]       5
(0.4, 0.45]       1
(0.35, 0.4]       1
(0.3, 0.35]       0
(0.25, 0.3]       1
(0.2, 0.25]       0
(0.15, 0.2]       0
(0.1, 0.15]       0
(0.05, 0.1]       0
(0.0, 0.05]       0
Name: ypred, dtype: int64

In [55]:
print("Wrong prediction:")
df_err = df.loc[df["ypred"] != df["ydata"]].sort_values(["prob"], axis=0, ascending=False)
df_err

Wrong prediction:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,ypred,ydata,prob
3780,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,6,4,1.000
9729,0.000,0.000,0.000,0.000,0.000,0.000,0.999,0.000,0.000,0.000,6,5,0.999
2597,0.000,0.000,0.000,0.998,0.000,0.002,0.000,0.000,0.000,0.000,3,5,0.998
3503,0.000,0.994,0.001,0.000,0.001,0.000,0.000,0.000,0.001,0.002,1,9,0.994
3941,0.000,0.000,0.006,0.000,0.000,0.000,0.993,0.000,0.000,0.000,6,4,0.993
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5634,0.001,0.000,0.469,0.496,0.000,0.000,0.000,0.000,0.033,0.000,3,2,0.496
3060,0.007,0.101,0.004,0.007,0.063,0.001,0.000,0.467,0.074,0.274,7,9,0.467
495,0.467,0.000,0.053,0.003,0.000,0.000,0.087,0.000,0.389,0.000,0,8,0.467
6598,0.230,0.000,0.041,0.447,0.000,0.054,0.219,0.000,0.008,0.001,3,5,0.447


In [56]:
print("Wrong prediction probability spread:")
grp_err = df_err.groupby(pd.cut(df_err.prob, np.linspace(0, 1, 21)))
grp_err.count().ypred.sort_index(ascending=False)

Wrong prediction probability spread:


prob
(0.95, 1.0]    16
(0.9, 0.95]    13
(0.85, 0.9]    10
(0.8, 0.85]    10
(0.75, 0.8]    11
(0.7, 0.75]     8
(0.65, 0.7]    13
(0.6, 0.65]    12
(0.55, 0.6]     7
(0.5, 0.55]    13
(0.45, 0.5]     3
(0.4, 0.45]     1
(0.35, 0.4]     1
(0.3, 0.35]     0
(0.25, 0.3]     0
(0.2, 0.25]     0
(0.15, 0.2]     0
(0.1, 0.15]     0
(0.05, 0.1]     0
(0.0, 0.05]     0
Name: ypred, dtype: int64