<a href="https://colab.research.google.com/github/karimadadda/Deep_learning_project/blob/main/AMI_NET.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importation des bibliothéques

In [1]:
import pandas as pd
import numpy as np
import sys
import tensorflow as tf
from sklearn.model_selection import KFold
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import warnings
warnings.filterwarnings('ignore')

## load data and preprocess


In [39]:
def data_preparation(path, type='excel'):

    if type is 'csv':
        df = pd.read_csv(path)
    if type is 'excel':
        df = pd.read_excel(path)

    # x and y
    y = np.array(df['y'])
    x = df.drop(['y'], axis=1)

    # for each patient, convert their containing symptoms to a list of words
    bin_feats = []

    for i in x.columns:
        if len(x[i].unique()) == 2:
            bin_feats.append(i)

    x_bin = x[bin_feats]
    x_bin = x_bin.replace(0, np.nan)
    x_bin_features = bin_gen(x_bin)

    # pad to the maximum length and convert to matrix
    x_bin_features, feats, tokens, feat_max = bin_pad_convert(x_bin_features, bin_feats)

    return x_bin_features, feats, tokens, feat_max, y
  



In [40]:
# convert to a list of words for binary features
def bin_gen(x):

    x_features = []

    for i in range(x.shape[0]):
        index = x.columns[x.iloc[i, :].notnull()]
        feats = np.array(index)
        x_features.append(feats)

    return np.array(x_features)

In [41]:
# convert to matrix and generate descriptions (binary features)
def bin_pad_convert(txt_features, bin_feat_list):

    bin_feat_max = max([len(feat) for feat in txt_features])
    bin_feats = ['pad'] + bin_feat_list
    tokens = len(bin_feats)

    x_features = np.zeros((len(txt_features), bin_feat_max), dtype='int32')
    feat_index = dict([(char, i) for i, char in enumerate(bin_feats)])

    for i, input_text in enumerate(txt_features):
        for t, char in enumerate(input_text):
            x_features[i, t] = feat_index[char]

    return x_features, bin_feats, tokens, bin_feat_max

In [42]:
data_preparation('/content/sample_data.xlsx')



(array([[ 6,  8, 10, ...,  0,  0,  0],
        [ 3,  5,  8, ...,  0,  0,  0],
        [ 4,  8, 10, ...,  0,  0,  0],
        ...,
        [ 6, 10, 27, ...,  0,  0,  0],
        [ 4, 10, 28, ...,  0,  0,  0],
        [ 4,  7,  8, ...,  0,  0,  0]], dtype=int32),
 ['pad',
  'income_0',
  'income_1',
  'marriage_0',
  'income_2',
  'income_3',
  'income_4',
  'marriage_1',
  'job_0',
  'marriage_2',
  'marriage_3',
  'HDL_0',
  'HDL_1',
  'prolactin_0',
  'prolactin_1',
  'prolactin_2',
  'glucose_0',
  'glucose_1',
  'glucose_2',
  'triglyceride_0',
  'triglyceride_1',
  'triglyceride_2',
  'hemameba_0',
  'hemameba_1',
  'ACTH_0',
  'ACTH_1',
  'ACTH_2',
  'length_0',
  'length_1',
  'length_2',
  'LOS_0',
  'LOS_1',
  'LOS_2',
  'MECT_0',
  'MECT_1',
  'MECT_2',
  'olan_0',
  'olan_1',
  'olan_2',
  'CDT_0',
  'CDT_1',
  'CDT_2',
  'AO_0',
  'AO_1',
  'AO_2',
  'Risperidone_0',
  'Risperidone_1',
  'Risperidone_2',
  'gap_0',
  'gap_1',
  'gap_2',
  'total_length_0',
  'total_length_1'

In [43]:
## gated attention based multi-instance pooling layer
class MIL_gated_attention(tf.keras.layers.Layer):

    def __init__(self, d_model):

        super(MIL_gated_attention, self).__init__()

        self.w1 = tf.keras.layers.Dense(d_model)
        self.w2 = tf.keras.layers.Dense(d_model)
        self.w3 = tf.keras.layers.Dense(d_model)


    def call(self, x):

        # linear projection
        alpha = tf.tanh(self.w1(x))

        # gate mechanism
        gate = tf.nn.sigmoid(self.w2(x))
        alpha = self.w3(tf.multiply(alpha, gate))

        # attention weights
        attention_weights = tf.nn.softmax(alpha)

        # output
        output = tf.multiply(x, attention_weights)
        output = tf.reduce_mean(output, axis=-1)

        return output, attention_weights

In [44]:

# multi-head attention layer
class MultiHeadAttention(tf.keras.layers.Layer):

    def __init__(self, d_model, num_heads):

        super(MultiHeadAttention, self).__init__()

        self.num_heads = num_heads
        self.d_model = d_model

        assert d_model % self.num_heads == 0

        self.depth = d_model // self.num_heads
        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)
        self.dense = tf.keras.layers.Dense(d_model)

    def split_heads(self, x, batch_size):

        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))

        return tf.transpose(x, perm=[0, 2, 1, 3])

    # scaled dot product attention
    def scaled_dot_product_attention(self, q, k, v):

        matmul_qk = tf.matmul(q, k, transpose_b=True)

        # scale matmul_qk
        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_attention_logits = matmul_qk / tf.sqrt(dk)

        attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
        output = tf.matmul(attention_weights, v)

        return output, attention_weights

    def call(self, v, k, q):

        batch_size = tf.shape(q)[0]

        q = self.wq(q)  # (batch_size, seq_len, d_model)
        k = self.wk(k)  # (batch_size, seq_len, d_model)
        v = self.wv(v)  # (batch_size, seq_len, d_model)

        q = self.split_heads(q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
        k = self.split_heads(k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
        v = self.split_heads(v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)

        scaled_attention, attention_weights = self.scaled_dot_product_attention(q, k, v)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))
        output = self.dense(concat_attention)

        return output, attention_weights

In [46]:
# AMI-NET
class Graph(tf.keras.Model):

    def __init__(self, tokens, d_model, feat_max, num_heads, rate):

        super(Graph, self).__init__()

        self.embedding = tf.keras.layers.Embedding(tokens, d_model)
        self.multihead_att = MultiHeadAttention(d_model, num_heads)
        self.pooling = MIL_gated_attention(feat_max)
        self.ln = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.w1 = tf.keras.layers.Dense(d_model/2, activation='relu')
        self.w2 = tf.keras.layers.Dense(d_model/4)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)


    def call(self, x_bin):

        # word embedding
        x = self.embedding(x_bin)

        # multi-head attention
        mha_out, mha_att_matrix = self.multihead_att(x, x, x)
        mha_out = self.dropout1(mha_out)
        out = self.ln(x + mha_out)

        # fully connected layers
        x_dense1 = self.w1(out)
        x_dense2 = self.w2(x_dense1)
        x_dense2_drop = self.dropout2(x_dense2)

        # Instance-level Pooling
        rep = tf.reduce_sum(x_dense2_drop, axis=-1)

        # Bag-level Pooling
        mil_out, mil_att_matrix = self.pooling(rep)
        pred = tf.nn.sigmoid(mil_out)

        return pred, mha_att_matrix, mil_att_matrix



In [56]:
class config(object):

    # training
    epochs = 500
    batch_size = 64

    # adam
    learning_rate = 0.00001
    beta_1 = 0.9
    beta_2 = 0.98
    epsilon = 1e-8

    # embedding
    embedding = 128

    # multi-head attention
    num_heads = 4

    # dropout
    dropout_rate = 0.3

    # early stopping tolerance epochs
    tolerance = 100

In [57]:


if __name__ == '__main__':

    np.random.seed(2019)
    tf.random.set_seed(2019)
    config = config()


    data_file = '/content/sample_data.xlsx'

    if len(sys.argv) > 2:
        data_file = sys.argv[1]

    x_bin_features, feats, tokens, feat_max, y = data_preparation('/content/sample_data.xlsx')

    kf = KFold(n_splits=5, random_state=2019, shuffle=True)
    fold = 1

    accuracy = []
    f1 = []
    auc = []

    for train_index, test_index in kf.split(x_bin_features):

        x_bf_train, x_bf_test = x_bin_features[train_index], x_bin_features[test_index]
        y_train, y_test = y[train_index], y[test_index]


        def compute_loss(label, pred):

            return criterion(label, pred)


        def train_step(x_bin, t):

            with tf.GradientTape() as tape:
                pred, _, _ = model(x_bin)
                loss = compute_loss(t, pred)

            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            train_loss(loss)

            return pred


        def test_step(x_bin, t):

            pred, _, _ = model(x_bin)
            loss = compute_loss(t, pred)
            test_loss(loss)

            return pred


        model = Graph(tokens, config.embedding, feat_max, config.num_heads, config.dropout_rate)
        optimizer = tf.keras.optimizers.Adam(learning_rate=config.learning_rate, beta_1=config.beta_1,
                                             beta_2=config.beta_2, epsilon=config.epsilon)

        epochs = config.epochs
        batch_size = config.batch_size
        n_batches = x_bin_features.shape[0] // batch_size

        criterion = tf.losses.BinaryCrossentropy()

        train_loss = tf.keras.metrics.Mean()
        test_loss = tf.keras.metrics.Mean()

        es = []
        preds_temp = []
        stop = False

        for epoch in range(epochs):

            # early stopping
            if stop == False:
                _x_bf_train, _y_train = shuffle(x_bf_train, y_train, random_state=2019)

                for batch in range(n_batches):
                    start = batch * batch_size
                    end = start + batch_size
                    trainpreds = train_step(_x_bf_train[start:end], _y_train[start:end])

                testpreds = test_step(x_bf_test, y_test)
                score = roc_auc_score(y_test, testpreds)
                es.append(score)

                print(' epoch:', epoch, ' auc:', score)
                preds_temp.append(testpreds)

                if len(es) - np.argmax(es) > config.tolerance:
                    stop = True

            else:
                break

        num = np.argmax(es)
        print('fold:', fold, ' epoch:', num)

        pred_temp_thres = np.int32(preds_temp[num] > 0.5)

        acc_temp = accuracy_score(y_test, pred_temp_thres)
        accuracy.append(acc_temp)
        print('fold:', fold, ' accuracy:', acc_temp)

        f1_temp = f1_score(y_test, pred_temp_thres)
        f1.append(f1_temp)
        print('fold:', fold, ' f1_score:', f1_temp)

        auc_temp = roc_auc_score(y_test, preds_temp[num])
        auc.append(auc_temp)
        print('fold:', fold, ' auc:', auc_temp)

        fold += 1

    print('###################################################')
    print('auc:', np.mean(auc))
    print('f1 score:', np.mean(f1))
    print('accuracy:', np.mean(accuracy))
    print('\n')

 epoch: 0  auc: 0.40541252965468766
 epoch: 1  auc: 0.41477023108689914
 epoch: 2  auc: 0.41696687461558735
 epoch: 3  auc: 0.42263421491960285
 epoch: 4  auc: 0.42636850891837275
 epoch: 5  auc: 0.4330463052455848
 epoch: 6  auc: 0.43950443721992793
 epoch: 7  auc: 0.44367805992443543
 epoch: 8  auc: 0.4464897636411563
 epoch: 9  auc: 0.4500043932870574
 epoch: 10  auc: 0.45184957385115543
 epoch: 11  auc: 0.45523240488533523
 epoch: 12  auc: 0.4622177313065635
 epoch: 13  auc: 0.4718390299622177
 epoch: 14  auc: 0.48110886565328176
 epoch: 15  auc: 0.4939372638608207
 epoch: 16  auc: 0.5080836481855724
 epoch: 17  auc: 0.5228450926983569
 epoch: 18  auc: 0.5406379052807312
 epoch: 19  auc: 0.5587382479571216
 epoch: 20  auc: 0.5773218522098235
 epoch: 21  auc: 0.5977506370266233
 epoch: 22  auc: 0.6188384149020296
 epoch: 23  auc: 0.6395307969422722
 epoch: 24  auc: 0.6610579035234162
 epoch: 25  auc: 0.6806519637993147
 epoch: 26  auc: 0.6978297161936561
 epoch: 27  auc: 0.713689482