Necessary imports for the code

In [3]:

import os
import glob
import numpy as np
import tensorflow as tf
import pylab as plt
import sklearn as skl
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics import f1_score
from random import sample
from sklearn.model_selection import train_test_split



In [4]:
# Variables for the model


N = 90 # Number of points per event
num_classes = 5 # Number of classes we're classifying into
BATCH_SIZE = 32

Loads data from the .npy files and and returns numpy arrays for further analysis. 
This data contains, the position, time and charge indentities in each events. 

In [5]:

    
    deuteron = np.load('./3DsmallDataParticleChargeArrays/deuteron_array3D.npy', allow_pickle=True)
    he3 = np.load('./3DsmallDataParticleChargeArrays/he3_array3D.npy', allow_pickle=True)
    he4 = np.load('./3DsmallDataParticleChargeArrays/he4_array3D.npy', allow_pickle=True)
    proton = np.load('./3DsmallDataParticleChargeArrays/proton_array3D.npy', allow_pickle=True)
    triton = np.load('./3DsmallDataParticleChargeArrays/triton_array3D.npy', allow_pickle=True)
    




This function takes in numpy arrays containing the data for position, time, charge and converts it to 
3D tensorflow datatypes consisting of the position data in 3D

Variables: 

N - the threshold for number of points considered per event.
num_classes = The number of classifications there will be in the data
BATCH_SIZE = the size of the sub arrays in the training/test dataset that will contain the events

In [11]:

    
    # For each event for loops to append the z-dimensional data which is the time.
    time_deuteron = []
    for event in range(len(deuteron)):
        time_deuteron.append(deuteron[event][0])
   
    time_he3 = []
    for event in range(len(he3)):
        time_he3.append(he3[event][0])
  
    time_he4 = []
    for event in range(len(he4)):
        time_he4.append(he4[event][0])
  
    time_proton = []
    for event in range(len(proton)):
        time_proton.append(proton[event][0])

    time_triton = []
    for event in range(len(triton)):
        time_triton.append(triton[event][0])
        
   
    #only data where there was a particle detection will be stored in these arrays as PointNet model only needs the points where 
    #a particle will be present
    only_detected_events_deuteron = [] 
    only_detected_events_he3 = []
    only_detected_events_he4 = []
    only_detected_events_proton = []
    only_detected_events_triton = []
    
    num_events = len(triton) #should be 1500
    num_rows = len(triton[0][0]) #should be 108
    num_cols = len(triton[0][0][0]) #should be 112
    
    # For each events only takes the points where the particle was present
    for event in range(num_events):

        single_event_only_detected_events_deuteron = []
        single_event_only_detected_events_he3 = []
        single_event_only_detected_events_he4 = []
        single_event_only_detected_events_proton = []
        single_event_only_detected_events_triton = []
        
        #we floor divide time by 3, because a 108 by 112 by 291 system would be disproportionate. floor divisor subject to change later
        #290.84//3 = 96 <-- we use 97 as the upper bound of the z-coordinate
        
        for row in range(num_rows):
            for col in range(num_cols):
                if time_deuteron[event][row][col] != []:
                    single_event_only_detected_events_deuteron.append([row, col, int(time_deuteron[event][row][col][0]//3)])
                    
                if time_he3[event][row][col] != []:
                    single_event_only_detected_events_he3.append([row, col, int(time_he3[event][row][col][0]//3) ])
                    
                if time_he4[event][row][col] != []:
                    single_event_only_detected_events_he4.append([row, col, int(time_he4[event][row][col][0]//3)])
                    
                if time_proton[event][row][col] != []:
                    single_event_only_detected_events_proton.append([row, col, int(time_proton[event][row][col][0]//3)])
                    
                if time_triton[event][row][col] != []:
                    single_event_only_detected_events_triton.append([row, col, int(time_triton[event][row][col][0]//3)])
              

        # There must only be a constant number of points in each of the events for the PointNet model to work successfully. Here, we randomly take N points from the events which have N or more than N points. 
        if len(single_event_only_detected_events_deuteron)>=N:
            only_detected_events_deuteron.append(sample(single_event_only_detected_events_deuteron,N))
        else:
            X = N-len(single_event_only_detected_events_deuteron)
            for i in np.arange(X):
                single_event_only_detected_events_deuteron.append((0,0,0))
            only_detected_events_deuteron.append(sample(single_event_only_detected_events_deuteron , N))
        if len(single_event_only_detected_events_he3)>=N:
            only_detected_events_he3.append(sample(single_event_only_detected_events_he3,N))
        else:
            X = N-len(single_event_only_detected_events_he3)
            for i in np.arange(X):
                single_event_only_detected_events_he3.append((0,0,0))
            only_detected_events_he3.append(sample(single_event_only_detected_events_he3 , N))
        if len(single_event_only_detected_events_he4)>=N:
            only_detected_events_he4.append(sample(single_event_only_detected_events_he4,N))
        else:
            X = N-len(single_event_only_detected_events_he4)
            for i in np.arange(X):
                single_event_only_detected_events_he4.append((0,0,0))
            only_detected_events_he4.append(sample(single_event_only_detected_events_he4 , N))
        if len(single_event_only_detected_events_proton)>=N:
            only_detected_events_proton.append(sample(single_event_only_detected_events_proton,N))
        else:
            X = N-len(single_event_only_detected_events_proton)
            for i in np.arange(X):
                single_event_only_detected_events_proton.append((0,0,0))
            only_detected_events_proton.append(sample(single_event_only_detected_events_proton , N))
        if len(single_event_only_detected_events_triton)>=N:
            only_detected_events_triton.append(sample(single_event_only_detected_events_triton,N))
        else:
            X = N-len(single_event_only_detected_events_triton)
            for i in np.arange(X):
                single_event_only_detected_events_triton.append((0,0,0))
            only_detected_events_triton.append(sample(single_event_only_detected_events_triton , N))

    # Creating the train arrays and test arrays
    train_labels = []
    train_points = []

    # For each event we're inputting a different label number 
    for i in range(len(only_detected_events_deuteron)):
        train_points.append(only_detected_events_deuteron[i])
        train_labels.append(0)
    for i in range(len(only_detected_events_he3)):
        train_points.append(only_detected_events_he3[i])
        train_labels.append(1)
    for i in range(len(only_detected_events_he4)):
        train_points.append(only_detected_events_he4[i])
        train_labels.append(2)
    for i in range(len(only_detected_events_proton)):
        train_points.append(only_detected_events_proton[i])
        train_labels.append(3)
    for i in range(len(only_detected_events_triton)):
        train_points.append(only_detected_events_triton[i])
        train_labels.append(4)

    # Splitting the data
    train_events_final, test_events, train_labels_final, test_labels = train_test_split(train_points, train_labels, test_size=0.2)
    
    # For use in the PN model it needs to be converted to tensor_slices
    train_dataset = tf.data.Dataset.from_tensor_slices((train_events_final, train_labels_final))
    test_dataset = tf.data.Dataset.from_tensor_slices((test_events, test_labels))
    
  



Used to shuffle points within each events

In [50]:
print(len(only_detected_events_deuteron))

869


In [30]:

def augment(points, label):
    
    # shuffle points
    points = tf.random.shuffle(points)
    return points, label

# This shuffles the points within each events. 
    train_dataset = train_dataset.shuffle(len(train_events_final)).map(augment).batch(BATCH_SIZE)
    test_dataset = test_dataset.shuffle(len(test_events)).map(augment).batch(BATCH_SIZE)

In [42]:
data = test_dataset.take(len(test_dataset))
points, labels = list(data)[len(test_dataset)-2]
points = points
labels = labels.numpy()
print(labels)

1


The functions below including 

1. conv_bn(x, filters)
2. dense_bn(x, filters)
3. OrthogonalRegularizer(keras.regularizers.Regularizer)
4. tnet(inputs, num_features)
5. build_point_cloud_model(NUM_POINTS, NUM_CLASSES)

are used to build our PointNet Neural Network. To learn more or understand this model better please visit either or both 
of these resourses.
https://keras.io/examples/vision/pointnet/
https://www.youtube.com/watch?v=GGxpqfTvE8c&t=878s

In [32]:

def conv_bn(x, filters):
    x = layers.Conv1D(filters, kernel_size=1, padding="valid")(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    return layers.Activation("relu")(x)


def dense_bn(x, filters):
    x = layers.Dense(filters)(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    return layers.Activation("relu")(x)

class OrthogonalRegularizer(keras.regularizers.Regularizer):
    def __init__(self, num_features, l2reg=0.001):
        self.num_features = num_features
        self.l2reg = l2reg
        self.eye = tf.eye(num_features)

    def __call__(self, x):
        x = tf.reshape(x, (-1, self.num_features, self.num_features))
        xxt = tf.tensordot(x, x, axes=(2, 2))
        xxt = tf.reshape(xxt, (-1, self.num_features, self.num_features))
        return tf.reduce_sum(self.l2reg * tf.square(xxt - self.eye))
    
def tnet(inputs, num_features):

    # Initalise bias as the indentity matrix
    bias = keras.initializers.Constant(np.eye(num_features).flatten())
    reg = OrthogonalRegularizer(num_features)

    x = conv_bn(inputs, 32)
    x = conv_bn(x, 64)
    x = conv_bn(x, 512)
    x = layers.GlobalMaxPooling1D()(x)
    x = dense_bn(x, 256)
    x = dense_bn(x, 128)
    x = layers.Dense(
        num_features * num_features,
        kernel_initializer="zeros",
        bias_initializer=bias,
        activity_regularizer=reg,
    )(x)
    feat_T = layers.Reshape((num_features, num_features))(x)
    # Apply affine transformation to input features
    return layers.Dot(axes=(2, 1))([inputs, feat_T])

def build_point_cloud_model(NUM_POINTS, NUM_CLASSES):
    
    inputs = keras.Input(shape=(NUM_POINTS, 3))

    x = tnet(inputs, 3)
    x = conv_bn(x, 32)
    x = conv_bn(x, 32)
    x = tnet(x, 32)
    x = conv_bn(x, 32)
    x = conv_bn(x, 64)
    x = conv_bn(x, 512)
    x = layers.GlobalMaxPooling1D()(x)
    x = dense_bn(x, 256)
    x = layers.Dropout(0.3)(x)
    x = dense_bn(x, 128)
    x = layers.Dropout(0.3)(x)

    outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)

    return keras.Model(inputs=inputs, outputs=outputs, name="pointnet")



This function utilizes the train_dataset to train the model and 
uses test_dataset to plot the learning curve and confusion matrix

In [26]:

# Plots the Learning curve and saves to the give directory

def plot_learning_curve(history):
    LEARNING_CURVE_DIR = './Learning-Curve-PointNet-Model'
    PRED_FIGS_DIR = './Prediction-label-images'

    if not os.path.exists(LEARNING_CURVE_DIR):
        os.makedirs(LEARNING_CURVE_DIR)

    #want to save figure with the data and time.
    #change to log_scale or power_transform accordingly:
    today_time = str(datetime.today())
    file_name = "log_scale_LC{}{}".format(today_time,'.png')
    #file_name = "power_transform_LC{}".format(today_time,'.png')

    save_fig_path = os.path.join(LEARNING_CURVE_DIR, file_name)
    plt.plot(history["loss"], label="training loss")
    plt.plot(history["val_loss"], label="validation loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.yscale('log')
    plt.legend()
    plt.show()
    

In [27]:

# Plots the confusion matrix and saves to the given directory

def plot_confusion_matrix(label, preds, classes, cmap=plt.cm.Blues):
    
    
    CONFUSION_MATRIX_DIR = './confusion-matrix-directory'
    
    if not os.path.exists(CONFUSION_MATRIX_DIR):
        os.makedirs(CONFUSION_MATRIX_DIR)
    
    today_time = str(datetime.today())
    file_name = "confusion_matrix{}{}".format(today_time,'.png')
    
    save_fig_path = os.path.join(CONFUSION_MATRIX_DIR, file_name)    
    
    cm = confusion_matrix(label, preds)

    fig, ax = plt.subplots(figsize=(4, 4), dpi=100)
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title='Confusion Matrix',
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
    
    # Loop over data dimensions and create text annotations.
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], 'd'),
                    ha='center', va='center',
                    color='white' if cm[i, j] > thresh else 'black')
    fig.tight_layout()

    
    plt.show()



In [28]:

    model = build_point_cloud_model(N, num_classes)
    
    classes = ['Deuteron',  'Helium-3',  'Helium-4', 'Proton',  'Triton']

    # Compiles the model with the given learning rate. 
    model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=10e-6),
    metrics=["sparse_categorical_accuracy"],
    )

    result = model.fit(train_dataset, epochs=1, validation_data=test_dataset) 
    plot_learning_curve(result.history)
    
    #Converts the test dataset to numpy arrays for use in confusion matrix function
    all_labels = np.array([])
    all_preds = np.array([])
    
    for i in range(len(test_dataset)):
        data = test_dataset.take(i+1)
        points, labels = list(data)[0]
        points = points
        labels = labels.numpy()

        # run test data through model
        preds = model.predict(points)
        preds = tf.math.argmax(preds, -1)
        preds = preds.numpy()
        all_labels = np.array(all_labels.tolist()+labels.tolist())
        all_preds = np.array(all_preds.tolist()+ preds.tolist())
    
    plot_confusion_matrix(all_labels, all_preds, classes, cmap=plt.cm.Blues)
    

ValueError: in user code:

    File "/home/DAVIDSON/taawal/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/home/DAVIDSON/taawal/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/DAVIDSON/taawal/.local/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "/home/DAVIDSON/taawal/.local/lib/python3.8/site-packages/keras/engine/training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "/home/DAVIDSON/taawal/.local/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/home/DAVIDSON/taawal/.local/lib/python3.8/site-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "pointnet" is incompatible with the layer: expected shape=(None, 90, 3), found shape=(90, 3)


In [None]:
for i in range(len(test_dataset)):
        data = test_dataset.take(i+1)
        points, labels = list(data)[0]
        points = points
        labels = labels.numpy()

        # run test data through model
        preds = model.predict(points)
        preds = tf.math.argmax(preds, -1)
        preds = preds.numpy()
        all_labels = np.array(all_labels.tolist()+labels.tolist())
        all_preds = np.array(all_preds.tolist()+ preds.tolist())

In [None]:
for i in range(len(test_dataset)):
        data = test_dataset.take(i+1)

In [18]:
data.numpy()

AttributeError: 'TakeDataset' object has no attribute 'numpy'