In [19]:
def loguniform(low=0, high=1):
    val = np.exp(np.random.uniform(0, 1, None))
    scaled_val = (((val - np.exp(0)) * (high - low)) / (np.exp(1) - np.exp(0))) + low
    return scaled_val

def loguniform_int(low=0, high=1):
    val = np.exp(np.random.uniform(0, 1, None))
    scaled_val = (((val - np.exp(0)) * (high - low)) / (np.exp(1) - np.exp(0))) + low
    return int(scaled_val)

def uniform(low=0, high=1):
    val = np.random.uniform(low, high, None)
    return val

In [20]:
def generate_3d_data(data, max_len):
    data = data.sort_values('complete_timestamp', ascending=True, kind="mergesort").groupby('unique_id_ros').head(max_len)
    grouped = data.sort_values('complete_timestamp', ascending=True, kind="mergesort").groupby('unique_id_ros')

    data_dim = data.shape[1]-8
    n_cases = data['unique_id'].nunique()

    X = np.zeros((n_cases, max_len, data_dim), dtype=np.float32)
    y = np.zeros((n_cases, 2), dtype=np.float32)

    idx = 0
    # each prefix will be a separate instance
    for _, group in grouped:
        group = group.sort_values('complete_timestamp', ascending=True, kind="mergesort")
        label = group['Releasetreue'].iloc[0]
        group = group.to_numpy()
        X[idx] = pad_sequences(group[np.newaxis,:30,8:], maxlen=max_len, dtype=np.float32)
        y[idx, label] = 1
        idx += 1
    return (X, y)

In [21]:
class AUCHistory(keras.callbacks.Callback):
    def __init__(self, X_val_static, X_val_dynamic, y_val):
        self.X_val_dynamic = X_val_dynamic
        self.X_val_static = X_val_static
        self.y_val = y_val
        self.aucs = []
        self.aucs_pr_0 = []
        self.aucs_pr_1 = []
    
    def on_train_begin(self, logs={}):
        self.aucs = []
        self.aucs_pr = []
        self.aucs_pr_0 = []
        self.aucs_pr_1 = []

    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        y_pred = self.model.predict([self.X_val_static, self.X_val_dynamic])
        self.aucs.append(roc_auc_score(self.y_val[:,0], y_pred[:,0]))
        precision, recall, _ = precision_recall_curve(self.y_val[:,0],  y_pred[:,0])
        self.aucs_pr_0.append(auc(recall, precision))
        precision, recall, _ = precision_recall_curve(self.y_val[:,1],  y_pred[:,1])
        self.aucs_pr_1.append(auc(recall, precision))
        return

    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return

In [23]:
def DynamicSubnetwork(max_len, data_dim_dynamic, n_layers, lstmsize, dropout):
    
    dynamic_input = Input(shape=(max_len, data_dim_dynamic), name='dynamic_input')
    if n_layers == 1:
        l2_3 = LSTM(lstmsize, input_shape=(max_len, data_dim_dynamic), implementation=2, kernel_initializer='glorot_uniform', return_sequences=False, dropout=dropout)(dynamic_input)
        b2_3 = BatchNormalization()(l2_3)

    elif n_layers == 2:
        l1 = LSTM(lstmsize, input_shape=(max_len, data_dim_dynamic), implementation=2, kernel_initializer='glorot_uniform', return_sequences=True, dropout=dropout)(dynamic_input)
        b1 = BatchNormalization(axis=1)(l1)
        l2_3 = LSTM(lstmsize, implementation=2, kernel_initializer='glorot_uniform', return_sequences=False, dropout=dropout)(b1)
        b2_3 = BatchNormalization()(l2_3)

    elif n_layers == 3:
        l1 = LSTM(lstmsize, input_shape=(max_len, data_dim_dynamic), implementation=2, kernel_initializer='glorot_uniform', return_sequences=True, dropout=dropout)(dynamic_input)
        b1 = BatchNormalization(axis=1)(l1)
        l2 = LSTM(lstmsize, implementation=2, kernel_initializer='glorot_uniform', return_sequences=True, dropout=dropout)(b1)
        b2 = BatchNormalization(axis=1)(l2)
        l3 = LSTM(lstmsize, implementation=2, kernel_initializer='glorot_uniform', return_sequences=False, dropout=dropout)(b2)
        b2_3 = BatchNormalization()(l3)
            
    dynamic_output=Dropout(dropout,name='Dropout_Layer_Dynamic')(b2_3)

    model = Model(inputs=dynamic_input, outputs=dynamic_output)
    return model


def StaticSubnetwork(data_dim_static, n_layers, dropout ):
    
    static_input = Input(shape=(data_dim_static), name='static_input')
    if n_layers == 1:
        l2_3= Dense(8, activation='relu', kernel_initializer='glorot_uniform')(static_input)
        b2_3 = BatchNormalization()(l2_3)

    elif n_layers == 2:
        l1 = Dense(16, activation='relu', kernel_initializer='glorot_uniform')(static_input)
        b1 = BatchNormalization(axis=1)(l1)
        l2_3 = Dense(8, activation='relu', kernel_initializer='glorot_uniform')(b1)
        b2_3 = BatchNormalization()(l2_3)

    elif n_layers == 3:
        l1 = Dense(32, activation='relu', kernel_initializer='glorot_uniform')(static_input)
        b1 = BatchNormalization(axis=1)(l1)
        l2 = Dense(16, activation='relu', kernel_initializer='glorot_uniform')(b1)
        b2 = BatchNormalization(axis=1)(l2)
        l3 = Dense(8, activation='relu', kernel_initializer='glorot_uniform')(b2)
        b2_3 = BatchNormalization()(l3)
            
    static_output=Dropout(dropout,name='Dropout_Layer_Static')(b2_3)

    model = Model(inputs=static_input, outputs=static_output)
    return model

def DeepMultimodalModel(max_len, data_dim_dynamic, data_dim_static, n_layers, lstmsize, dropout, activation):
    
    static_subnet = StaticSubnetwork(data_dim_static, n_layers, dropout )
    dynamic_subnet = DynamicSubnetwork(max_len, data_dim_dynamic, n_layers, lstmsize, dropout)
    
    x = concatenate([static_subnet.output, dynamic_subnet.output]) 
    
    x = Dense(8, activation='relu', name='final_dense_layer_')(x)
    
    outcome_output = Dense(2, activation=activation, kernel_initializer='glorot_uniform', name='outcome_output')(x)
    
    model = Model(inputs=[static_subnet.input, dynamic_subnet.input], outputs=outcome_output)
    
    return model
        
        


In [None]:
#hypermarameter tuning
best_auc=0
best_param={}
all_auc=[]
all_param=[]
best_aucpr=0
all_aucpr=[]
best_parampr={}
time=[]
confusion=[]

activation = "sigmoid"
nb_epoch = 50
start=datetime.now()
max_len=30
data_dim_static=train_static_ready.shape[1] - 4
data_dim_dynamic=train_dynamic_ready.shape[1] - 7
n_layers_values = [1, 2, 3]
batch_size_values = [8, 16, 32, 64]
optimizer_values = ["rmsprop", "nadam"]

for i in range(5,6):
     for k in range(1):
        
        print('run ' + str(i) + '.' + str(k) + ' started at ' + str(datetime.now()))
        np.random.seed(i)
        train_split = train_static_ready.reindex(np.random.permutation(train_static_ready.index))
        val_ids = list(train_split['EC batch'].unique())[-int(val_ratio*len(train_split['EC batch'].unique())):]
        val_dynamic_pre = train_dynamic_ready[train_dynamic_ready['EC batch'].isin(val_ids)]
        train_dynamic_pre = train_dynamic_ready[~train_dynamic_ready['EC batch'].isin(val_ids)]
        val_static_pre = train_static_ready[train_static_ready['EC batch'].isin(val_ids)]
        train_static_pre = train_static_ready[~train_static_ready['EC batch'].isin(val_ids)]

        del train_split
        
        X_train_dynamic, y_train_dynamic = generate_3d_data(train_dynamic_pre,max_len)      
        X_val_dynamic,y_val_dynamic = generate_3d_data(val_dynamic_pre,max_len)
        
        X_train_static=train_static_pre.drop(['Adherence','EC batch','id'], axis=1).to_numpy()
        X_val_static=val_static_pre.drop(['Adherence','EC batch','id'], axis=1).to_numpy()
        
        lstmsize = loguniform_int(10, 150)
        dropout = uniform(0, 0.3)
        n_layers = n_layers_values[np.random.randint(0, len(n_layers_values))]
        batch_size = batch_size_values[np.random.randint(0, len(batch_size_values))]
        optimizer = optimizer_values[np.random.randint(0, len(optimizer_values))]
        learning_rate = loguniform(low=0.000001, high=0.0001)    

        if optimizer == "nadam":
            opt = Nadam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=3)
        elif optimizer == "rmsprop":
            opt = RMSprop(learning_rate=learning_rate, rho=0.9, epsilon=1e-08, decay=0.0)

        model= DeepMultimodalModel(max_len, data_dim_dynamic, data_dim_static, n_layers, lstmsize, dropout, activation)
        
        model.compile(loss={'outcome_output':'binary_crossentropy'}, optimizer=opt, metrics=[tf.keras.metrics.AUC(curve='PR')])

        auc_cb = AUCHistory(X_val_static, X_val_dynamic, y_val_dynamic)
        history = model.fit(x=[X_train_static,X_train_dynamic],y=y_train_dynamic, validation_data=([X_val_static, X_val_dynamic], y_val_dynamic),
                            verbose=2, callbacks=[auc_cb], batch_size=batch_size, epochs=nb_epoch)

        pr_auc=auc_cb.aucs_pr_1[-1]
        lr_auc=auc_cb.aucs[-1]



        if lr_auc>best_auc:
            best_auc=lr_auc
            best_param={'run ' +str(i)+'.'+str(k),'lstmsize: ' +str(lstmsize), lstmsize, 'dropout: ' +str(dropout), dropout, 'n_layers: ' +str(n_layers), n_layers,
                        'batch_size: ' +str(batch_size), batch_size, 'optimizer:  '+str(optimizer), optimizer,'learning_rate:  '+str(learning_rate),learning_rate}

        if pr_auc>best_aucpr:
            best_aucpr=pr_auc
            best_parampr={'run ' +str(i)+'.'+str(k),'lstmsize: ' +str(lstmsize), lstmsize, 'dropout: ' +str(dropout), dropout, 'n_layers: ' +str(n_layers), n_layers,
                        'batch_size: ' +str(batch_size), batch_size, 'optimizer:  '+str(optimizer), optimizer,'learning_rate:  '+str(learning_rate),learning_rate}

        all_auc.append(lr_auc)
        all_aucpr.append(pr_auc)
        all_param.append(['lstmsize= ' +str(lstmsize),'dropout= ' +str(dropout),'n_layers= ' +str(n_layers),'batch_size= ' +str(batch_size),
                          'optimizer= ' +str(optimizer),'learning_rate= ' +str(learning_rate)])


        print('run ' + str(i) + ' ended at ' + str(datetime.now()))
        
print('cv ended at ' + str(datetime.now()))

end=datetime.now()
time.append(end-start)




 

run 5.0 started at 2022-11-17 14:04:39.239731


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Epoch 1/50
2806/2806 - 97s - loss: 0.5212 - auc: 0.8191 - val_loss: 0.5411 - val_auc: 0.8242
Epoch 2/50
2806/2806 - 91s - loss: 0.4517 - auc: 0.8654 - val_loss: 0.5080 - val_auc: 0.8365
Epoch 3/50
2806/2806 - 91s - loss: 0.4295 - auc: 0.8805 - val_loss: 0.5265 - val_auc: 0.8376
Epoch 4/50
2806/2806 - 91s - loss: 0.4130 - auc: 0.8909 - val_loss: 0.5078 - val_auc: 0.8445
Epoch 5/50
2806/2806 - 91s - loss: 0.4036 - auc: 0.8962 - val_loss: 0.5092 - val_auc: 0.8455
Epoch 6/50
2806/2806 - 91s - loss: 0.3965 - auc: 0.8995 - val_loss: 0.4968 - val_auc: 0.8503
Epoch 7/50
2806/2806 - 91s - loss: 0.3858 - auc: 0.9063 - val_loss: 0.4914 - val_auc: 0.8520
Epoch 8/50
2806/2806 - 91s - loss: 0.3813 - auc: 0.9087 - val_loss: 0.5025 - val_auc: 0.8534
Epoch 9/50
2806/2806 - 91s - loss: 0.3735 - auc: 0.9124 - val_loss: 0.4995 - val_auc: 0.8533
Epoch 10/50
2806/2806 - 91s - loss: 0.3692 - auc: 0.9150 - val_loss: 0.5020 - val_auc: 0.8573
Epoch 11/50
2806/2806 - 91s - loss: 0.3666 - auc: 0.9157 - val_loss: 

In [None]:
#final testing 

best_auc=0
best_param={}
all_auc=[]
all_param=[]
best_aucpr=0
all_aucpr=[]
best_parampr={}
time=[]
confusion=[]

precision_list=[]
recall_list=[]
thres_pr_list=[]
fpr_list=[]
tpr_list=[]
thres_roc_list=[]

activation = "sigmoid"
nb_epoch = 50
start=datetime.now()
max_len=30
data_dim_static=train_static_ready.shape[1] - 4
data_dim_dynamic=train_dynamic_ready.shape[1] - 7

X_test_static = test_static_ready.drop(['Adherence','id','EC batch'],axis=1).to_numpy()
X_test_dynamic, y_test_dynamic = generate_3d_data(test_dynamic_ready,max_len)


for i in range(1,2):
     for k in range(20):
        
        print('run ' + str(i) + '.' + str(k) + ' started at ' + str(datetime.now()))
        np.random.seed(i)
        train_split = train_static_ready.reindex(np.random.permutation(train_static_ready.index))
        val_ids = list(train_split['EC batch'].unique())[-int(val_ratio*len(train_split['EC batch'].unique())):]
        val_dynamic_pre = train_dynamic_ready[train_dynamic_ready['EC batch'].isin(val_ids)]
        train_dynamic_pre = train_dynamic_ready[~train_dynamic_ready['EC batch'].isin(val_ids)]
        val_static_pre = train_static_ready[train_static_ready['EC batch'].isin(val_ids)]
        train_static_pre = train_static_ready[~train_static_ready['EC batch'].isin(val_ids)]

        del train_split
             
        X_train_dynamic, y_train_dynamic = generate_3d_data(train_dynamic_pre,max_len)      
        X_val_dynamic,y_val_dynamic = generate_3d_data(val_dynamic_pre,max_len)
        
        X_train_static=train_static_pre.drop(['Adherence','EC batch','id'], axis=1).to_numpy()
        X_val_static=val_static_pre.drop(['Adherence','EC batch','id'], axis=1).to_numpy()
        del val_dynamic_pre
        del train_dynamic_pre, train_static_pre

        
        lstmsize = loguniform_int(10, 150)
        dropout = uniform(0, 0.3)
        n_layers = n_layers_values[np.random.randint(0, len(n_layers_values))]
        batch_size = batch_size_values[np.random.randint(0, len(batch_size_values))]
        optimizer = optimizer_values[np.random.randint(0, len(optimizer_values))]
        learning_rate = loguniform(low=0.000001, high=0.0001)    
        
        np.random.seed(i+k)

        if optimizer == "nadam":
            opt = Nadam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=3)
        elif optimizer == "rmsprop":
            opt = RMSprop(learning_rate=learning_rate, rho=0.9, epsilon=1e-08, decay=0.0)

        model= DeepMultimodalModel(max_len, data_dim_dynamic, data_dim_static, n_layers, lstmsize, dropout, activation)
        
        model.compile(loss={'outcome_output':'binary_crossentropy'}, optimizer=opt, metrics=[tf.keras.metrics.AUC(curve='PR')])

        auc_cb = AUCHistory(X_val_static, X_val_dynamic, y_val_dynamic)
        history = model.fit(x=[X_train_static,X_train_dynamic],y=y_train_dynamic, validation_data=([X_val_static, X_val_dynamic], y_val_dynamic),
                            verbose=2, callbacks=[auc_cb], batch_size=batch_size, epochs=nb_epoch)
        
        
        predictions=model.predict([X_test_static, X_test_dynamic])
        
        
        # Data to plot precision - recall curve
        precision, recall, thres_pr = precision_recall_curve(y_test_dynamic[:,1], predictions[:,1])
        precision_list.append(precision)
        recall_list.append(recall)
        thres_pr_list.append(thres_pr)
        
        # Data to plot roc curve
        fpr, tpr, thres_roc = roc_curve(y_test_dynamic[:,1],predictions[:,1])
        fpr_list.append(fpr)
        tpr_list.append(tpr)
        thres_roc_list.append(thres_roc)
        
        # Use AUC function to calculate the area under the curve of precision recall curve
        all_aucpr.append(auc(recall, precision))
        
        # calculate AUC ROC value
        all_auc.append(roc_auc_score(y_test_dynamic[:,1],predictions[:,1]))

        print('run ' + str(i) + ' ended at ' + str(datetime.now()))
        
print('cv ended at ' + str(datetime.now()))

np.set_printoptions(threshold=100000)
curve_set=[None]*8

curve_set[0]=tpr_list
curve_set[1]=fpr_list
curve_set[2]=thres_roc_list
curve_set[3]=all_auc
curve_set[4]=precision_list
curve_set[5]=recall_list
curve_set[6]=thres_pr_list
curve_set[7]=all_aucpr

fileexport_curve='Data/curve_mm.csv'
pd.DataFrame(curve_set).to_csv(fileexport_curve,index=False, header=False)

end=datetime.now()
time.append(end-start)

fileexport_time='Data/time_mm.csv'
pd.DataFrame(time).to_csv(fileexport_time,index=False, header=False)