In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling3D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import BatchNormalization,Flatten,\
Add,Input,Dense, Dropout, Activation, InputLayer
from tensorflow.keras.optimizers import Adam



from augmentation_code import data_augmentation
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import os 

In [2]:
def set_globals():
    global open3D_directory
    global dataset_name_num
    global MIF_nodes 
    global xyz_offset
    open3D_directory = "C:\open3dtools"
    dataset_name_num = "06.DHFR"
    MIF_nodes = [23,27,21]
    xyz_offset = [-11,-10,-9]
set_globals()

In [3]:
def import_data():
    path = os.path.join(r"C:\Users\Linden\GitHub\3DQSAR\data\LabelledData",dataset_name_num)
    labels = pd.read_csv(os.path.join(path,"pIC50.CSV"))
    Y = labels["Y"].to_numpy()

    data=[]

    for entry in os.scandir(path):
        if (entry.path.endswith(".npy")):
            file = np.load(entry.path)
            data.append(file)
    field_1 = data[0::2]
    field_2 = data[1::2]
    X_1 = np.stack(field_1,0)
    X_2 = np.stack(field_2,0)    
    
    return X_1, X_2, Y

In [16]:
X_1, X_2, Y = import_data()

In [17]:
####Augment and extend data
X_1, X_2, Y = data_augmentation(X_1,X_2,Y,augment_repetitions=5)

In [18]:
###Pickle data

In [19]:
###Normalise image data 
###Normalise by amax value?

In [20]:
X_1.shape

(1836, 25, 24, 26)

In [21]:
###Reshape
X_1 = np.reshape(X_1,(X_1.shape[0],X_1.shape[1],X_1.shape[2],X_1.shape[3],1))
X_2 = np.reshape(X_2,(X_2.shape[0],X_2.shape[1],X_2.shape[2],X_2.shape[3],1))

In [22]:
###Shuffle
permutation = np.random.RandomState(seed=42)\
                .permutation(X_1.shape[0])

X_1 = X_1[permutation]
X_2 = X_2[permutation]
Y = Y[permutation]

In [23]:
####Train test split
train_upper = int(np.around(X_1.shape[0]*0.9,0))

X_1_train = X_1[:train_upper]
X_2_train = X_2[:train_upper]
X_1_test = X_1[train_upper:]
X_2_test = X_2[train_upper:]

Y_train = Y[:train_upper]
Y_test = Y[train_upper:]

In [24]:
###Train validation split
train_upper = int(np.around(X_1_train.shape[0]*0.8,0))
X_1_val = X_1_train[train_upper:]
X_2_val = X_2_train[train_upper:]
X_1_train = X_1_train[:train_upper]
X_2_train = X_2_train[:train_upper]

Y_val = Y_train[train_upper:]
Y_train = Y_train[:train_upper]


In [25]:
def build_CNN():
    model = Sequential(name='CNN')
    
    # elu=Exponential Linear Unit, similar to leaky Relu
    #perhaps normalise in layer 1 
    
    # Convolution Layers
    inputs = Input(shape=(X_1_train.shape[1], X_1_train.shape[2],X_1_train.shape[3],X_1_train.shape[4]))
    model = MaxPooling3D(pool_size=(2,2,2))(inputs)
    model = Conv2D(32, (2, 2), strides=(1, 1), activation='elu')(model)
    model = MaxPooling3D(pool_size=(2,2,2))(model)
    model = Conv2D(64, (2, 2), strides=(1, 1), activation='elu')(model)
    model = MaxPooling3D(pool_size=(2,2,2))(model)
    model = Conv2D(128, (2, 2), strides=(1, 1), activation='elu')(model)
    

    output = Flatten()(model)
    model = Model(inputs=inputs, outputs=output)

    return model


In [26]:
def two_headed_network():
    X_1_CNN = build_CNN()
    X_2_CNN = build_CNN()
    network = Add()([X_1_CNN.output,X_2_CNN.output])
    network = Flatten()(network)
    network = Dense(10,activation='elu')(network)
    network = Dense(1,activation = 'linear')(network)
    model = Model([X_1_CNN.input, X_2_CNN.input], network)
    optimizer = Adam(learning_rate=1e-3)
    model.compile(loss='mse', optimizer=optimizer, metrics=['accuracy'])
    
    return model  
CNN = two_headed_network()

In [None]:
##Train 
##monitor early stopping using val_acc
#Set min_delta to 1e-3
monitor = EarlyStopping(monitor='val_loss',
                        min_delta=0,
                        patience=15,
                        verbose=1, 
                        mode='auto',
                        restore_best_weights=True)

CNN.fit([X_1_train, X_2_train],
        Y_train,
        batch_size=64,
        epochs=200, 
        validation_data=([X_1_val, X_2_val],
                          Y_val),
                          callbacks=[monitor])


Epoch 1/200
Epoch 2/200

In [None]:
##Test model w/ best weights on validation data
model_loss = CNN.evaluate([X_1_test,X_2_test], Y_test)
print(f"Model loss:{model_loss}")
