# Mechanisms of Action (MoA) Prediction

Prédire plusieurs cibles de la ou des réponses du mécanisme d'action (MoA) de différents échantillons (sig_id), compte tenu de diverses entrées telles que les données d'expression génique et les données de viabilité cellulaire.


Certains des termes importants utilisés dans les en-têtes des tableaux sont présentés ici:**


g -: signifie des données d'expression génique
c -: signifie les données d'expression de cellule
cp_type: indiquant les échantillons traités avec un composé (cp_vehicle) ou avec une perturbation de contrôle (ctrl_vehicle)
REMARQUE: (les échantillons avec des perturbations de contrôle n'ont pas de MoA)
cp_time - durée du traitement (24,48,72) Heures
cp_dose - Dosage - HAUT ou BAS

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


# Lire les fichiers CSV

In [None]:
train_features = pd.read_csv("/kaggle/input/lish-moa/train_features.csv")
train_features.drop(['sig_id'], axis='columns', inplace=True)

train_target_full = pd.read_csv("/kaggle/input/lish-moa/train_targets_scored.csv")
train_target = train_target_full.copy()
train_target.drop(['sig_id'], axis='columns', inplace=True)

test_features_full = pd.read_csv("/kaggle/input/lish-moa/test_features.csv")
test_features = test_features_full.copy()
test_features.drop(['sig_id'], axis='columns', inplace=True)

In [None]:
train_features

In [None]:
train_features['cp_dose'] = train_features['cp_dose'].map({'D1':'0',"D2":'1'})
train_features['cp_type'] = train_features['cp_type'].map({'trt_cp':'0','ctl_vehicle':'1'})

In [None]:
train_features

In [None]:
test_features['cp_dose'] = test_features['cp_dose'].map({'D1':'0',"D2":'1'})
test_features['cp_type'] = test_features['cp_type'].map({'trt_cp':'0','ctl_vehicle':'1'})

In [None]:
train = train_features.columns.tolist()

len(train)

In [None]:
target = train_target_full.columns.tolist()
target.remove('sig_id')
len(target)

In [None]:
train_features = train_features.T.fillna(train_features.mean(axis=0)).T
X_train = np.asarray(train_features[train].values,dtype ='float32')
y_train = np.asarray(train_target_full[target].values,dtype='float32')

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Activation,Dropout
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam,RMSprop


# modèle de réseau neuronal simple

In [None]:
def model_network():
    X_input = Input(shape = (875,))
    X = BatchNormalization()(X_input)
    X = Dense(units=2048, kernel_initializer = 'he_uniform')(X)
    X = tf.keras.layers.LeakyReLU(alpha=0.2)(X)
    X = BatchNormalization()(X)
    X = Dense(units =1024,kernel_initializer = 'he_uniform')(X)
    X = tf.keras.layers.LeakyReLU(alpha=0.2)(X)
    X = Dense(units =512,kernel_initializer = 'glorot_uniform')(X)
    X = Activation('tanh')(X)
    X =Dropout(0.2)(X)
    X_output = Dense(206,activation = 'sigmoid')(X)
    model = Model(inputs = X_input,outputs = X_output)
    return model
from functools import partial
RegularizedDense = partial(tf.keras.layers.Dense,activation = 'relu',kernel_initializer = 'he_normal',kernel_regularizer = tf.keras.regularizers.l2(0.0001))
def model2_network():
    X_input = X_input = Input(shape = (875,))
    X = BatchNormalization()(X_input)
    X = RegularizedDense(1024)(X)
    X = RegularizedDense(1024)(X)
    X =Dropout(0.2)(X)
    X_output = Dense(206,activation = 'sigmoid')(X)
    model = Model(inputs = X_input,outputs = X_output)
    return model

In [None]:
model = model_network()
model.compile(optimizer = RMSprop(0.0001), loss = BinaryCrossentropy())

In [None]:
test_features_full['cp_dose'] = test_features_full['cp_dose'].map({'D1':'0',"D2":'1'})
test_features_full['cp_type'] = test_features_full['cp_type'].map({'trt_cp':'0','ctl_vehicle':'1'})

In [None]:
model_final = model2_network()
model_final.compile(optimizer = Adam(), loss = BinaryCrossentropy())
model.fit(X_train,y_train,batch_size =256, epochs=15)
X_test = np.asarray(test_features_full[train].values, dtype = 'float32')
predictions = model.predict(X_test)

In [None]:
data = pd.DataFrame.from_records(predictions)
data.insert(0,'sig_id',test_features_full['sig_id'])
data.columns = train_target_full.columns
data.to_csv('submission.csv', index=False)