

---



# HDAT9900: Dissertation (Part 2)


#### Google Colab

In [2]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive/


In [3]:
# Change directory and link to google drive
cd drive

/content/drive


In [4]:
cd My Drive

/content/drive/My Drive


In [5]:
cd 5_Code

/content/drive/My Drive/5_Code


In [7]:
ls

[0m[01;34mdata[0m/  model.png  [01;34mplan-violation[0m/  [01;34m__pycache__[0m/  [01;34mstandardisation[0m/


#### Dependencies

In [8]:
import numpy as np
import pandas as pd

#cnn
from keras.optimizers import Adam
from keras.layers import Conv3D, MaxPool3D, Flatten, Dense
from keras.layers import Dropout, BatchNormalization

# rnn
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from keras.callbacks import EarlyStopping

# combined model
from keras.utils import plot_model

# evaluation
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# Set seed
seeds = 0
np.random.seed(seeds)

Using TensorFlow backend.


## 2. Build Models

Let's load the data that we prepared in part 1. We will also reshape the voxel data from a flattened 7698x4096 to a 3D voxel structure of height x width x depth x channels (16x16x16x1).

### 2.1. Load data

In [9]:
# 2.1. load data -------------------------------------------------------------------
filenames_y = pd.read_csv('data/processed/dataset2labelsorgansclean.csv')
X_cnn = np.load('data/processed/dataset2voxels16.npy')
X_rnn = filenames_y['organs']
#y_orig = filenames_y['class7']
#k=8
y_orig = filenames_y['class13'] # actually 18 different organs
k=18

# 2.1.2. prepare cnn data ------------------------------------------------------------
size = 16
h, w, d = size, size, size
c = 1  # Channels 1 = grey scale, 3 = colour

#taking random indices to split the dataset into train and test
test_ids = np.random.permutation(X_cnn.shape[0])

#splitting data and labels into train and test
#keeping last 10 entries for testing, rest for training

X_train_cnn = X_cnn[test_ids[:-int(np.ceil(X_cnn.shape[0]*0.25))]]
X_test_cnn = X_cnn[test_ids[-int(np.ceil(X_cnn.shape[0]*0.25)):]]

X_train_cnn = X_train_cnn.reshape(X_train_cnn.shape[0], h, w, d, c)
X_test_cnn = X_test_cnn.reshape(X_test_cnn.shape[0], h, w, d, c)

# 2.1.3. prepare rnn data ---------------------------------------------------------

MAX_NB_CHARS = 26
MAX_SEQUENCE_LENGTH = 5
EMBEDDING_DIM = 20

# 2.1.3.1 Tokenize the data
tokenizer = Tokenizer(num_words=MAX_NB_CHARS,
                      filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True,
                      char_level=True)
tokenizer.fit_on_texts(y_orig)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

X_rnn = tokenizer.texts_to_sequences(y_orig)
X_rnn = pad_sequences(X_rnn, maxlen=MAX_SEQUENCE_LENGTH)
X_train_rnn = X_rnn[test_ids[:-int(np.ceil(X_cnn.shape[0]*0.25))]]
X_test_rnn = X_rnn[test_ids[-int(np.ceil(X_cnn.shape[0]*0.25)):]]

# 2.1.4. prepare the labels ------------------------------------------------------
y = pd.get_dummies(y_orig).values
# last 75/25 split
y_train = y[test_ids[:-int(np.ceil(X_cnn.shape[0]*0.25))]]
y_test = y[test_ids[-int(np.ceil(X_cnn.shape[0]*0.25)):]]

# tags
my_tags = sorted([i for i in set(y_orig)])

# 2.1.5. checks -----------------------------------------------------------------
print("CNN train shape: \t", X_train_cnn.shape)
print("RNN train shape: \t", X_train_rnn.shape)
print("Label train shape: \t", y_train.shape)
print("\n")
print("CNN test shape: \t", X_test_cnn.shape)
print("RNN test shape: \t", X_test_rnn.shape)
print("Label test shape: \t", y_test.shape)

Found 24 unique tokens.
CNN train shape: 	 (5773, 16, 16, 16, 1)
RNN train shape: 	 (5773, 5)
Label train shape: 	 (5773, 18)


CNN test shape: 	 (1925, 16, 16, 16, 1)
RNN test shape: 	 (1925, 5)
Label test shape: 	 (1925, 18)


---
### 2. 2. 1. 3D Convolutional Neural Network (CNN)

Input and Output layers:

* One Input layer with dimentions 16, 16, 16, 3
* Output layer with dimensions 2

Convolutions :
* Apply 4 Convolutional layer with increasing order of filter size (standard size : 8, 16, 32, 64) and fixed kernel size = (3, 3, 3)
* Apply 2 Max Pooling layers, one after 2nd convolutional layer and one after fourth convolutional layer.

MLP architecture:
* Batch normalization on convolutional architecture
* Dense layers with 2 layers followed by dropout to avoid overfitting

In [0]:
# ==============================================================================
# 2.2.1. Convolutional Neural Network
# ==============================================================================

def CNN(X_train, X_test, y_train, y_test, k, my_tags):
    
    # Hyper parameters ---------------------------------------------------------
    max_epochs = 25
    batch_size = 128
    dropout_rate=0.5

    # Optimizers
    # from keras.optimizers import SGD
    # opt = Adadelta(lr=0.001)
    opt = Adam(lr=0.01, decay=0.7)
    # opt = SGD(lr=0.001, momentum=0.9)

    # Model Architecture -------------------------------------------------------
    
    model = Sequential()
    # Convolution layers
    model.add(Conv3D(filters=8, kernel_size=(3, 3, 3), activation='relu',\
                     input_shape=(h, w, d, c)))
    model.add(Conv3D(filters=16, kernel_size=(3, 3, 3), activation='relu'))
    # Add max pooling to obtain the most informative features
    model.add(MaxPool3D(pool_size=(2, 2, 2)))

    # Convolution layers
    model.add(Conv3D(filters=32, kernel_size=(3, 3, 3), activation='relu'))
    model.add(Conv3D(filters=64, kernel_size=(3, 3, 3), activation='relu'))
    # Add max pooling to obtain the most informative features
    model.add(MaxPool3D(pool_size=(2, 2, 2)))

    # perform batch normalization on the convolution outputs before
    # feeding it to MLP architecture
    model.add(BatchNormalization())
    model.add(Flatten())

    # create an MLP architecture with dense layers : 4096 -> 512 -> 10
    # add dropouts to avoid over-fitting / perform regularization
    model.add(Dense(units=(h*w*d), activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(BatchNormalization())
    model.add(Dense(units=512, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(BatchNormalization())
    model.add(Dense(units=k, activation='softmax'))

    # Compile
    model.compile(loss='categorical_crossentropy', optimizer=opt, \
                  metrics=['accuracy'])
    model.summary()

    print("\n####################### Training Model #############################")
    print("Training...")
    history = model.fit(x=X_train, y=y_train,
                        batch_size=batch_size,
                        epochs=max_epochs,
                        validation_split=0.1,
                        verbose=1)

    # ==========================================================================
    # 3. Results
    # ==========================================================================
    print("###################################################################")
    print("\nResults:\n")
    accr = model.evaluate(X_test, y_test)
    print('Test set\n  Loss: {:0.3f}\n  Accuracy: {:0.3f}'.format(accr[0],
                                                                  accr[1]))
    print("-------------------------------------------------------------------")
    plt.title('Loss')
    plt.plot(history.history['loss'], label='train')
    plt.plot(history.history['val_loss'], label='test')
    plt.legend()
    plt.show();

    plt.title('Accuracy')
    plt.plot(history.history['acc'], label='train')
    plt.plot(history.history['val_acc'], label='test')
    plt.legend()
    plt.show();
    print("###################################################################")
    
    # Make predictions 
    y_pred = model.predict(X_test)

    # evaluate the model
    _, train_acc = model.evaluate(X_train, y_train, verbose=0)
    _, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print('\nTrain: %.3f, Test: %.3f' % (train_acc, test_acc))
    print("\n####################################################################")

    # Classification report
    report = classification_report(y_test, y_pred.round(), target_names=my_tags)
    #report = classification_report(y_test, y_pred.round())
    print("\nClassfication Report for test:\n", report)
    print("\n####################################################################")
    
    return(model, report)



---
### Document classification (Recurrent Neural Network)

In order to improve the performance of our model, we will also utilise the name of which the clinician named the 3D file (organ). This name will then be fed into the NN to give further information.

Help: https://towardsdatascience.com/machine-learning-nlp-text-classification-using-scikit-learn-python-and-nltk-c52b92a7c73a


"Recurrent Neural Networks (RNNs) adopt the same principle, albeit in an extremely simplified version: they process sequences by iterating through the sequence elements and maintaining a "state" containing information relative to what they have seen so far. In effect, RNNs are a type of neural network that has an internal loop (Figure 6.8). The state of the RNN is reset in-between processing two different, independent sequences (e.g. two different IMDB reviews), so we still consider one sequence as a single datapoint, a single input to the network—what changes is that this datapoint is no longer processed in a single step, rather, the network internally loops over sequence elements. " 

### LSTM for name classification

Based on the filename of the organ named by the clinician, we will create a Recurrent Neural Network (RNN) using the Long Short Term Memory (LSTM) architecture to predict what the organ is. 

* Vectorize file names by turning each word into either a sequence of integers or into a vector.
* Limit the data set to the top 1000 words.
* Set the max number of words in each file to 20.

In [0]:
def rnn(X_train, X_test, Y_train, Y_test, k, my_tags):
    print("Building Document Classifier... \n")
    # 0. Hyperparameters -------------------------------------------------------
    # The maximum number of words to be used
    MAX_NB_WORDS = 26

    # Max number of words in each file name
    MAX_SEQUENCE_LENGTH = 5

    # This is fixed.
    EMBEDDING_DIM = 20

    epochs = 25
    batch_size = 32

    '''
    # 1. Tokenize the data -----------------------------------------------------
    tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True, char_level=True)
    tokenizer.fit_on_texts(name_df['synthetic'].values)
    word_index = tokenizer.word_index
    print('Found %s unique tokens.' % len(word_index))

    X = tokenizer.texts_to_sequences(name_df['synthetic'].values)
    X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH)
    print('Shape of data tensor:', X.shape)

    Y = pd.get_dummies(name_df['tags']).values
    print('Shape of label tensor:', Y.shape)

    # 2. Split the data --------------------------------------------------------
    X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.10, random_state = 42)
    print(X_train.shape,Y_train.shape)
    print(X_test.shape,Y_test.shape)

    '''
    # 3. Build the model -------------------------------------------------------
    model = Sequential()
    model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X_train.shape[1]))
    model.add(SpatialDropout1D(0.2))
    model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
    # add dense layer to add to cnn
    model.add(Dense(k, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam',
                  metrics=['accuracy'])
    print(model.summary())
    history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size,
                        validation_split=0.1,
                        callbacks=[EarlyStopping(monitor='val_loss',
                                                 patience=3, min_delta=0.0001)])

    # 4. Results ---------------------------------------------------------------
    print("###################################################################")
    print("\nResults:\n")
    accr = model.evaluate(X_test, Y_test)
    print('Test set\n  Loss: {:0.3f}\n  Accuracy: {:0.3f}'.format(accr[0],
                                                                  accr[1]))
    print("-------------------------------------------------------------------")
    plt.title('Loss')
    plt.plot(history.history['loss'], label='train')
    plt.plot(history.history['val_loss'], label='test')
    plt.legend()
    plt.show();

    plt.title('Accuracy')
    plt.plot(history.history['acc'], label='train')
    plt.plot(history.history['val_acc'], label='test')
    plt.legend()
    plt.show();
    
    # Make predictions 
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred.round(), target_names=my_tags)
    print("\nClassfication Report for test:\n", report)
    print("###################################################################")
    
    return(model)

---
### Combined (CNN + RNN)

We want to create a model that takes the organ structure from the ply for the CNN and add information from the predicted RNN to help the model become more accurate. 

This model will take the 3D model of the organ and its filename and learn about the two concurrently. 

In [0]:
from keras.layers import Conv3D, MaxPooling3D, Flatten
from keras.layers import Input, LSTM, Embedding, Dense
from keras.models import Model, Sequential
import keras
import matplotlib.pyplot as plt

def standardisation_model(X_train_cnn, X_train_rnn, X_test_cnn, X_test_rnn, y_train, y_test, k, my_tags):
    
    # Hyperparameters ----------------------------------------------------------
    dropout_rate=0.5
    epochs=25
    batch_size=128
    
    # First, let's define a vision model using a Sequential model.
    # This model will encode an image into a vector.
    vision_model = Sequential()
    vision_model.add(Conv3D(filters=8, kernel_size=(3, 3, 3), activation='relu', input_shape=(h, w, d, c)))
    vision_model.add(Conv3D(filters=16, kernel_size=(3, 3, 3), activation='relu'))
    vision_model.add(MaxPool3D(pool_size=(2, 2, 2)))
    vision_model.add(Conv3D(filters=32, kernel_size=(3, 3, 3), activation='relu'))
    vision_model.add(Conv3D(filters=64, kernel_size=(3, 3, 3), activation='relu'))
    vision_model.add(MaxPool3D(pool_size=(2, 2, 2)))
    vision_model.add(BatchNormalization())
    vision_model.add(Flatten())
    vision_model.add(Dense(units=(h*w*d), activation='relu'))
    vision_model.add(Dropout(dropout_rate))
    vision_model.add(BatchNormalization())
    vision_model.add(Dense(units=512, activation='relu'))
    vision_model.add(Dropout(dropout_rate))
    vision_model.add(BatchNormalization())

    # Now let's get a tensor with the output of our vision model:
    cnn_inputs = Input(shape=(h, w, d, c))
    encoded_image = vision_model(cnn_inputs)

    # Next, let's define a language model to encode the filename into a vector.
    # Each filename will be at most 20 characters long,
    # and we will index words as integers from 1 to 99.
    filename_inputs = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    embedded_filename = Embedding(input_dim=MAX_NB_CHARS, output_dim=MAX_SEQUENCE_LENGTH, input_length=MAX_SEQUENCE_LENGTH)(filename_inputs)
    embedded_filename = SpatialDropout1D(0.2)(embedded_filename)
    encoded_filename = LSTM(100)(embedded_filename)

    # Let's concatenate the filename vector and the image vector:
    merged = keras.layers.concatenate([encoded_filename, encoded_image])

    # And let's train a logistic regression over 100 words on top:
    output = Dense(k, activation='softmax')(merged)

    # This is our final model:
    vqa_model = Model(inputs=[cnn_inputs, filename_inputs], outputs=output)

    vqa_model.compile(loss='categorical_crossentropy', optimizer='adam',
                      metrics=['accuracy'])
    vqa_model.summary()
    
    history = vqa_model.fit([X_train_cnn, X_train_rnn], y_train,
                        epochs=epochs,
                        batch_size=batch_size,
                        validation_split=0.1,
                        callbacks=[EarlyStopping(monitor='val_loss',
                                                 patience=3, 
                                                 min_delta=0.0001)])
    plot_model(vqa_model, to_file='model.png')

    print("###################################################################")
    print("\nResults:\n")
    accr = vqa_model.evaluate([X_test_cnn, X_test_rnn], y_test)
    print('Test set\n  Loss: {:0.3f}\n  Accuracy: {:0.3f}'.format(accr[0],
                                                                  accr[1]))
    # Plot training & validation loss values
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()

    # Plot training & validation accuracy values
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()
    
    # Make predictions 
    y_pred = vqa_model.predict([X_test_cnn, X_test_rnn])
    report = classification_report(y_test, y_pred.round(), target_names=my_tags)
    print("\nClassfication Report for test:\n", report)
    print("###################################################################")
    return(vqa_model)

---
### Build Reference Model (KNN)

The reference model is a k-nearest-neighbours model. 

Description

In [0]:
# ==============================================================================
# KNN Reference model
# ==============================================================================

def knn(X_train, X_test, y_train ,y_test, my_tags):
    '''
    This is a reference model for the dissertation-standardisation task
    The dimensionality for the feature set currently is (1716, 4096)
    Where each row represents an organ of which has been reshaped into a 1d V
    Performance will be compared to a 3D convolutional neural network
    '''
    print("\n####################################################################")
    print("Building KNN Reference Model")
    
    X_train = X_train.reshape(X_train.shape[0], 4096)
    X_test = X_test.reshape(X_test.shape[0], 4096)
    # Dependencies ------------------------------------------------------------
    import numpy as np
    from sklearn.metrics import classification_report

    # Build model -------------------------------------------------------------
    print("Building model...")
    from sklearn.neighbors import KNeighborsClassifier
    knn = KNeighborsClassifier(n_neighbors = 18)
    print("Training model...")
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    
    # Evaluation --------------------------------------------------------------
    print("Evaluation -------------------------------------------------------")
    print("Test set score: {:.2f}".format(knn.score(X_test, y_test)))
    #Classification report ----------------------------------------------------
    report = classification_report(y_test, y_pred.round(),\
                                   target_names=my_tags)
    print("\nClassfication Report for test:\n", report)
    print("\n####################################################################")
    return(knn, report)


#### PointNet Model

In [0]:
import random
import numpy as np
import tensorflow as tf
from numpy.random import seed
import matplotlib.pyplot as plt
from keras.models import Sequential
from tensorflow import set_random_seed
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve, classification_report
from keras.layers import Dense, MaxPooling1D, Convolution1D, Dropout, Flatten, BatchNormalization, Reshape, Lambda
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping

def mat_mul(A, B):
    return tf.matmul(A, B)

    # Rotate and jitter points
def rotate_point_cloud(batch_data):
    """ Randomly rotate the point clouds to augument the dataset
        rotation is per shape based along up direction
        Input:
          BxNx3 array, original batch of point clouds
        Return:
          BxNx3 array, rotated batch of point clouds
    """
    rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
    for k in range(batch_data.shape[0]):
        rotation_angle = np.random.uniform() * 2 * np.pi
        cosval = np.cos(rotation_angle)
        sinval = np.sin(rotation_angle)
        rotation_matrix = np.array([[cosval, 0, sinval],
                                    [0, 1, 0],
                                    [-sinval, 0, cosval]])
        shape_pc = batch_data[k, ...]
        rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
    return rotated_data


def jitter_point_cloud(batch_data, sigma=0.01, clip=0.05):
    """ Randomly jitter points. jittering is per point.
        Input:
          BxNx3 array, original batch of point clouds
        Return:
          BxNx3 array, jittered batch of point clouds
    """
    B, N, C = batch_data.shape
    assert(clip > 0)
    jittered_data = np.clip(sigma * np.random.randn(B, N, C), -1 * clip, clip)
    jittered_data += batch_data
    return jittered_data

    # ==========================================================================
    # PointNet Full Model
    # ==========================================================================	
def pointnet_full(y, my_tags, test_ids, num_classes=18):
    # -------------------------------------------------------------------------
    # Load data
    desired_points = 1024
    X = np.load('data/processed/pointnetdata.npy', allow_pickle=True)
    
    X_train = X[test_ids[:-int(np.ceil(X.shape[0]*0.25))]]
    X_test = X[test_ids[-int(np.ceil(X.shape[0]*0.25)):]]
    y_train = y[test_ids[:-int(np.ceil(y.shape[0]*0.25))]]
    y_test = y[test_ids[-int(np.ceil(X.shape[0]*0.25)):]]

    # Training set
    train_points_r = X_train
    train_labels_r = y_train

    # Test set
    test_points_r = X_test
    test_labels_r = y_test

    # label to categorical
    from keras.utils import to_categorical
    #y_test = to_categorical(y_test)
    #y_train = to_categorical(y_train)
    # Let's examine the data. 

    print("Training shape: ", train_points_r.shape)
    print("Test shape: \t", test_points_r.shape)

    # ------------------------------------------------------------------------------
    # hyperparameter
    # number of points in each sample
    num_points = desired_points

    # number of categories
    k = 18

    # define optimizer
    opt = Adam(lr=0.001, decay=0.7)

    max_epochs=25
    batch_size=32
    dropout_rate = 0.7

    # ------------------------------------------------------------------------------
    ### POINTNET ARCHITECTURE

    input_points = Input(shape=(num_points, 3))
    x = Convolution1D(64, 1, activation='relu', input_shape=(num_points, 3))(input_points)
    x = BatchNormalization()(x)
    x = Convolution1D(128, 1, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Convolution1D(1024, 1, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=num_points)(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dense(9, weights=[np.zeros([256, 9]), np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]).astype(np.float32)])(x)
    input_T = Reshape((3, 3))(x)

    # For affine transformation need to matrix multiply
    # forward net
    g = Lambda(mat_mul, arguments={'B': input_T})(input_points)
    g = Convolution1D(64, 1, input_shape=(num_points, 3), activation='relu')(g)
    g = BatchNormalization()(g)
    g = Convolution1D(64, 1, input_shape=(num_points, 3), activation='relu')(g)
    g = BatchNormalization()(g)

    # feature transform net
    f = Convolution1D(64, 1, activation='relu')(g)
    f = BatchNormalization()(f)
    f = Convolution1D(128, 1, activation='relu')(f)
    f = BatchNormalization()(f)
    f = Convolution1D(1024, 1, activation='relu')(f)
    f = BatchNormalization()(f)
    f = MaxPooling1D(pool_size=num_points)(f)
    f = Dense(512, activation='relu')(f)
    f = BatchNormalization()(f)
    f = Dense(256, activation='relu')(f)
    f = BatchNormalization()(f)
    f = Dense(64 * 64, weights=[np.zeros([256, 64 * 64]), np.eye(64).flatten().astype(np.float32)])(f)
    feature_T = Reshape((64, 64))(f)


    # forward net
    g = Lambda(mat_mul, arguments={'B': feature_T})(g)
    g = Convolution1D(64, 1, activation='relu')(g)
    g = BatchNormalization()(g)
    g = Convolution1D(128, 1, activation='relu')(g)
    g = BatchNormalization()(g)
    g = Convolution1D(1024, 1, activation='relu')(g)
    g = BatchNormalization()(g)


    # global_feature
    global_feature = MaxPooling1D(pool_size=num_points)(g)


    # point_net_cls
    c = Dense(512, activation='relu')(global_feature)
    c = BatchNormalization()(c)
    c = Dropout(rate=dropout_rate)(c)
    c = Dense(256, activation='relu')(c)
    c = BatchNormalization()(c)
    c = Dropout(rate=dropout_rate)(c)
    c = Dense(num_classes, activation='sigmoid')(c)
    prediction = Flatten()(c)


    # print the model summary
    model = Model(inputs=input_points, outputs=prediction)
    print(model.summary())


    # compile classification model
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # ------------------------------------------------------------------------------
    # Fit model on training data
    for i in range(1,max_epochs+1):
        # model.fit(train_points_r, Y_train, batch_size=32, epochs=1, shuffle=True, verbose=1)
        # rotate and jitter the points
        train_points_rotate = rotate_point_cloud(train_points_r)
        train_points_jitter = jitter_point_cloud(train_points_rotate)
        history = model.fit(X_train, y_train, batch_size=batch_size, epochs=1,\
                        shuffle=True, verbose=0, validation_split=0.1)
        s = "Current epoch is:" + str(i)
        print(s)
        if i % 5 == 0:
            score = model.evaluate(test_points_r, y_test, verbose=1)
            print('Test loss: ', score[0])
            print('Test accuracy: ', score[1])


    # ## 10. Evaluate the Model
    # score the model
    score = model.evaluate(test_points_r, y_test, verbose=1)
    print('Test loss: ', score[0])
    print('Test accuracy: ', score[1])

    print("###################################################################")
    print("\nResults:\n")
    # ------------------------------------------------------------------------------
    # Classification Report

    # make predictions on the test set
    y_pred = model.predict(X_test)

    ################################################################################
    from sklearn.metrics import accuracy_score, confusion_matrix
    print("\n###################### Model Performance ############################")
    # evaluate the model
    _, train_acc = model.evaluate(X_train, y_train, verbose=0)
    _, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print('\nTrain: %.3f, Test: %.3f' % (train_acc, test_acc))
    ################################################################################
    print("\n#####################################################################")

    #Classification report
    report = classification_report(y_test, y_pred.round(), target_names=my_tags)
    print("\nClassfication Report for test:\n", report)
    print("\n#####################################################################")

    return(model, report)

### Run Models

In [15]:
knn, knn_report = knn(X_train_cnn, X_test_cnn, y_train, y_test, my_tags)


####################################################################
Building KNN Reference Model
Building model...
Training model...
Evaluation -------------------------------------------------------
Test set score: 0.46

Classfication Report for test:
                     precision    recall  f1-score   support

              Apex       0.00      0.00      0.00         2
           Bladder       0.64      0.23      0.34        78
     Bladder noisy       0.00      0.00      0.00        74
              Body       1.00      0.89      0.94        65
               CTV       0.24      0.11      0.15       271
             Crura       0.00      0.00      0.00        12
      Femoral head       1.00      0.83      0.91        58
 Femoral head left       0.97      0.81      0.88        75
Femoral head right       0.98      0.86      0.92        76
               GTV       0.00      0.00      0.00        32
          Hydrogel       0.00      0.00      0.00         6
             Other     

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


#### PointNet Model

PointNet takes a non-Euclidean approach to 3D data classification and takes cartesian coordinates (x,y,z) as input.

In [0]:
pointnet, pointnet_report = pointnet_full(y, my_tags, test_ids)

Training shape:  (5773, 1024, 3)
Test shape: 	 (1925, 1024, 3)


W0728 09:12:10.893286 139756220352384 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0728 09:12:10.894582 139756220352384 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0728 09:12:10.902050 139756220352384 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0728 09:12:11.018502 139756220352384 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0728 09:12:11.221660 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 1024, 3)           0         
_________________________________________________________________
lambda_1 (Lambda)            (None, 1024, 3)           0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 1024, 64)          256       
_________________________________________________________________
batch_normalization_6 (Batch (None, 1024, 64)          256       
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 1024, 64)          4160      
_________________________________________________________________
batch_normalization_7 (Batch (None, 1024, 64)          256       
_________________________________________________________________
lambda_2 (Lambda)            (None, 1024, 64)          0         
__________

#### Convolutional Neural Network

In [0]:
cnn, cnn_report = CNN(X_train_cnn, X_test_cnn, y_train, y_test, k, my_tags = my_tags)

#### Recurrent Neural Network

In [0]:
rnn = rnn(X_train_rnn, X_test_rnn, y_train, y_test, k, my_tags=my_tags)

#### Our model (3DCNN + RNN)

In [0]:
final_model = standardisation_model(X_train_cnn, X_train_rnn, X_test_cnn, X_test_rnn, y_train, y_test, k, my_tags=my_tags)

# The End