In [1]:
%matplotlib inline
import numpy as np
from random import randint
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy

from sklearn.metrics import confusion_matrix, plot_confusion_matrix
import itertools
import matplotlib.pyplot as plt

In [2]:
def my_plot_confusion_matrix(cm, classes, normalize=False, title='Confusion Matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float')/cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion Matrix without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i,j], horizontalalignment="center",
            color="white" if cm[i,j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [3]:
train_labels = []
train_samples = []

In [4]:
# Create our own data here of imaginary clinical trial
for i in range(50):
    # The ~5% of younger individuals who did experience side effects
    random_younger = randint(13,64)
    train_samples.append(random_younger)
    train_labels.append(1) # denotes they did experiences side effects

    # The ~5% of older individuals who did NOT experience side effects
    random_older = randint(65,100)
    train_samples.append(random_older)
    train_labels.append(0) # denotes they did NOT experiences side effects

In [5]:
for i in range(1000):
    # The ~95% of younger individuals who did experience side effects
    random_younger = randint(13,64)
    train_samples.append(random_younger)
    train_labels.append(0) # denotes they did NOT experiences side effects

    # The ~95% of older individuals who did NOT experience side effects
    random_older = randint(65,100)
    train_samples.append(random_older)
    train_labels.append(1) # denotes they did experiences side effects

In [6]:
## ADDTIONAL CODE to validation_set.py
test_labels = []
test_samples = []

In [7]:
# Create our own data here of imaginary clinical trial
for i in range(10):
    # The ~5% of younger individuals who did experience side effects
    random_younger = randint(13,64)
    test_samples.append(random_younger)
    test_labels.append(1) # denotes they did experiences side effects

    # The ~5% of older individuals who did NOT experience side effects
    random_older = randint(65,100)
    test_samples.append(random_older)
    test_labels.append(0) # denotes they did NOT experiences side effects

In [8]:
for i in range(200):
    # The ~95% of younger individuals who did experience side effects
    random_younger = randint(13,64)
    test_samples.append(random_younger)
    test_labels.append(0) # denotes they did NOT experiences side effects

    # The ~95% of older individuals who did NOT experience side effects
    random_older = randint(65,100)
    test_samples.append(random_older)
    test_labels.append(1) # denotes they did experiences side effects

In [9]:
# Now process above data

# Make them as numpy array
train_labels = np.array(train_labels)
train_samples = np.array(train_samples)
# Shuffle them to make them random
train_labels, train_samples = shuffle(train_labels, train_samples)

In [10]:
test_labels = np.array(test_labels)
test_samples = np.array(test_samples)
# Shuffle them to make them random
test_labels, test_samples = shuffle(test_labels, test_samples)

In [11]:
# normalize the age data to make them in range of 0 to 1 (as against 13 - 100)
scaler = MinMaxScaler(feature_range=(0,1))
scaled_train_samples = scaler.fit_transform(train_samples.reshape(-1, 1))
scaled_test_samples = scaler.fit_transform(test_samples.reshape(-1, 1))

In [12]:
# just print scaled data
# for i in scaled_train_samples:
#    print(i)

model = Sequential([
    Dense(units=16, input_shape=(1,), activation='relu'),
    Dense(units=32, activation='relu'),
    Dense(units=2,  activation='softmax') # units = 2 since we need two outputs (did or did not experience )
])

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 16)                32        
_________________________________________________________________
dense_1 (Dense)              (None, 32)                544       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 66        
Total params: 642
Trainable params: 642
Non-trainable params: 0
_________________________________________________________________


In [15]:
# Compile it
model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [16]:
# Train it and split 10% of data as validation set
# even though shuffle is true, validation set is seperated before shuffle, so it may not contain random data as we want
# we will see val_loss & val_accuracy output as against when we did not specify the validation split param
model.fit(x=scaled_train_samples, y=train_labels, validation_split=0.1, batch_size=10, epochs=39, shuffle=True, verbose=2)

Epoch 1/57
189/189 - 0s - loss: 0.7172 - accuracy: 0.4921 - val_loss: 0.7039 - val_accuracy: 0.4952
Epoch 2/57
189/189 - 0s - loss: 0.6886 - accuracy: 0.7127 - val_loss: 0.6753 - val_accuracy: 0.8333
Epoch 3/57
189/189 - 0s - loss: 0.6649 - accuracy: 0.8280 - val_loss: 0.6517 - val_accuracy: 0.8333
Epoch 4/57
189/189 - 0s - loss: 0.6438 - accuracy: 0.8212 - val_loss: 0.6305 - val_accuracy: 0.8524
Epoch 5/57
189/189 - 0s - loss: 0.6227 - accuracy: 0.8296 - val_loss: 0.6074 - val_accuracy: 0.8333
Epoch 6/57
189/189 - 0s - loss: 0.5999 - accuracy: 0.8302 - val_loss: 0.5827 - val_accuracy: 0.8524
Epoch 7/57
189/189 - 0s - loss: 0.5752 - accuracy: 0.8423 - val_loss: 0.5561 - val_accuracy: 0.8524
Epoch 8/57
189/189 - 0s - loss: 0.5486 - accuracy: 0.8429 - val_loss: 0.5271 - val_accuracy: 0.8714
Epoch 9/57
189/189 - 0s - loss: 0.5205 - accuracy: 0.8545 - val_loss: 0.4975 - val_accuracy: 0.8810
Epoch 10/57
189/189 - 0s - loss: 0.4922 - accuracy: 0.8635 - val_loss: 0.4676 - val_accuracy: 0.8905

<tensorflow.python.keras.callbacks.History at 0x7f2639dbadd8>

In [None]:
# NOW PREDICT using test samples
predictions = model.predict(x=scaled_test_samples, batch_size=10, verbose=0)
rounded_predictions = np.argmax(predictions, axis=-1)

In [None]:
cm = confusion_matrix(y_true=test_labels, y_pred=rounded_predictions)

In [None]:
cm_plot_labels = ['No Side Effects', 'Side Effects']

In [None]:
my_plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')

## Save and Load a Model

In [None]:
model.summary()

### 1. model.save()

In [None]:
# check is file already exist and save if not
import os.path
filename = 'models/medical_trial_model.h5'
if os.path.isfile(filename) is False:
    model.save(filename)

 This saves:
 
    * The arch of the model which allows to re-create the model
    * The weights of the model
    * The training configuration (loss, optimizer)
    * The state of the optimizer, allowing to resume training exactly where you left off

In [None]:
from tensorflow.keras.models import load_model
new_model = load_model(filename)

In [None]:
new_model.summary()

In [None]:
new_model.get_weights()

In [None]:
new_model.optimizer