In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.decomposition import PCA
from sklearn.metrics import log_loss

from catboost import CatBoostRegressor, Pool
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
import tensorflow.keras.callbacks as C
import sklearn
import matplotlib.pyplot as plt


In [None]:
print(sklearn.__version__)

# epochs: 100  batch size: 1024  nb_hiddens: 256  dropout: 0.1  nb_blocks: 5
# NEW BEST SCORE :  3.0524902204895814
# score :  3.0524902204895814


In [None]:
N_COMPONENTS = 50
#INPUT_SHAPE = 878
INPUT_SHAPE = N_COMPONENTS
OUTPUT_SHAPE = 206
BATCH_SIZE=2048
EPOCHS=200
NFOLD=5
DROPOUT=0.1
NB_HIDDENS=4096
# epochs: 100  batch size: 128  nb_hiddens: 4096  dropout: 0.2  nb_blocks: 1

In [None]:
df_train_features = pd.read_csv('/kaggle/input/lish-moa/train_features.csv')
#df_train_targets_scored = pd.read_csv('/kaggle/input/lish-moa/train_targets_scored.csv')
df_train_targets_scored = pd.read_csv('/kaggle/input/train-with-fold/train_with_fold_y.csv')
df_train_targets_nonscored = pd.read_csv('/kaggle/input/lish-moa/train_targets_nonscored.csv')
df_test = pd.read_csv('/kaggle/input/lish-moa/test_features.csv')
df_sub = pd.read_csv('/kaggle/input/lish-moa/sample_submission.csv')

non_ctl_idx = df_train_features.loc[df_train_features['cp_type']!='ctl_vehicle'].index.to_list()
labels_train = df_train_targets_scored.drop(['sig_id', 'fold'],axis=1).values
print(labels_train.shape)
labels_train = labels_train[non_ctl_idx]

print(labels_train.shape)
bias = tf.keras.initializers.Constant(-np.log(labels_train.mean(axis=0)))


In [None]:
# Prediction Clipping Thresholds
p_min = 0.001
p_max = 0.999

# Evaluation Metric with clipping and no label smoothing
def logloss(y_true, y_pred):
    y_pred = tf.clip_by_value(y_pred,p_min,p_max)
    return -K.mean(y_true*K.log(y_pred) + (1-y_true)*K.log(1-y_pred))

def make_model():
    z = L.Input(shape=(INPUT_SHAPE,), name="Id")
    x = L.Dense(2048, activation="relu", name="d1")(z)
    x = L.Dropout(0.25)(x)
    x = L.BatchNormalization()(x)
    x = L.Dense(1024, activation="relu", name="d2")(x)
    x = L.Dropout(0.25)(x)
    x = L.BatchNormalization()(x)
    x = L.Dense(512, activation="relu", name="d3")(x)
    x = L.Dropout(0.25)(x)
    x = L.BatchNormalization()(x)
    x = L.Dense(OUTPUT_SHAPE, activation="sigmoid", name="p1")(x)
    
    model = M.Model(z, x, name="MOA")
    model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[tf.keras.metrics.BinaryCrossentropy()])
    #model.compile(loss=mloss(0.8), optimizer=tf.keras.optimizers.Adam(lr=0.1, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.01, amsgrad=False), metrics=[score])
    return model

def make_model_ddb1():
    z = L.Input(shape=(INPUT_SHAPE,), name="Id")
    x = L.Dense(4096, activation="relu", name="d1")(z)
    x = L.BatchNormalization()(x)
    x = L.Dropout(DROPOUT)(x)
    x = L.Dense(OUTPUT_SHAPE, activation="sigmoid", name="p1")(x)
    
    model = M.Model(z, x, name="MOA")
    model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[tf.keras.metrics.BinaryCrossentropy()])
    #model.compile(loss=mloss(0.8), optimizer=tf.keras.optimizers.Adam(lr=0.1, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.01, amsgrad=False), metrics=[score])
    return model

def make_model_ddb2():
    z = L.Input(shape=(INPUT_SHAPE,), name="Id")
    
    x = L.Dense(NB_HIDDENS, activation="relu", name="d1")(z)
    x = L.BatchNormalization()(x)
    x = L.Dropout(DROPOUT)(x)
    
    x = L.Dense(NB_HIDDENS, activation="relu", name="d2")(x)
    x = L.BatchNormalization()(x)
    x = L.Dropout(DROPOUT)(x)
    
    x = L.Dense(NB_HIDDENS, activation="relu", name="d3")(x)
    x = L.BatchNormalization()(x)
    x = L.Dropout(DROPOUT)(x)
    
    #x = L.Dense(OUTPUT_SHAPE, activation="sigmoid", bias_initializer=bias, name="p1")(x)
    x = L.Dense(OUTPUT_SHAPE, activation="sigmoid", name="p1")(x)
    
    model = M.Model(z, x, name="MOA")
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer="adam", metrics=[tf.keras.metrics.BinaryCrossentropy()])
    #model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.001), metrics=logloss)
    #model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(), metrics=logloss)
    #model.compile(loss=mloss(0.8), optimizer=tf.keras.optimizers.Adam(lr=0.1, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.01, amsgrad=False), metrics=[score])
    return model

def make_model_best():
    z = L.Input(shape=(INPUT_SHAPE,), name="Id")
    x = L.Dense(NB_HIDDENS, activation="relu", name="d1")(z)
    x = L.BatchNormalization()(x)
    x = L.Dropout(DROPOUT)(x)
    
    
    x = L.Dense(NB_HIDDENS, activation="relu", name="d2")(x)
    x = L.BatchNormalization()(x)
    x = L.Dropout(DROPOUT)(x)
    
    x = L.Dense(NB_HIDDENS, activation="relu", name="d3")(x)
    x = L.BatchNormalization()(x)
    x = L.Dropout(DROPOUT)(x)
    
    
    x = L.Dense(NB_HIDDENS, activation="relu", name="d4")(x)
    x = L.BatchNormalization()(x)
    x = L.Dropout(DROPOUT)(x)
    
     
    x = L.Dense(NB_HIDDENS, activation="relu", name="d5")(x)
    x = L.BatchNormalization()(x)
    x = L.Dropout(DROPOUT)(x)
    
    x = L.Dense(OUTPUT_SHAPE, activation="sigmoid", name="p1")(x)
    
    model = M.Model(z, x, name="MOA")
    model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[tf.keras.metrics.BinaryCrossentropy()])
    #model.compile(loss=mloss(0.8), optimizer=tf.keras.optimizers.Adam(lr=0.1, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.01, amsgrad=False), metrics=[score])
    return model

def make_model_original():
    z = L.Input(shape=(INPUT_SHAPE,), name="Id")
    x = L.Dense(100, activation="relu", name="d1")(z)
    x = L.Dense(100, activation="relu", name="d2")(x)
    x = L.Dense(100, activation="relu", name="d3")(x)
    x = L.Dense(OUTPUT_SHAPE, activation="sigmoid", name="p1")(x)
    
    model = M.Model(z, x, name="MOA")
    model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[tf.keras.metrics.BinaryCrossentropy()])
    return model

def preprocess_df(df, str_cols, scaler=None):
    cols_to_scale = [c for c in df.columns if c not in str_cols]
    if scaler is None:
        scaler = MinMaxScaler()
        scaler.fit(df[cols_to_scale])
    np_scaled = scaler.transform(df[cols_to_scale])
    df_scaled =pd.DataFrame(data=np_scaled[0:,0:],
            index=[i for i in range(np_scaled.shape[0])],
            columns=cols_to_scale)
    
    for c in str_cols:
        df_scaled[c] = df[c]
    
    df = df_scaled
    df['cp_type'] = df.apply(lambda row: 1 if row['cp_type']=='trt_cp' else 0, axis=1)
    #df['cp_type_ctl_vehicle'] = df.apply(lambda row: 1 if row['cp_type']=='ctl_vehicle' else 0, axis=1)
    df['cp_dose_D1'] = df.apply(lambda row: 1 if row['cp_dose']=='D1' else 0, axis=1)
    df['cp_dose_D2'] = df.apply(lambda row: 1 if row['cp_dose']=='D2' else 0, axis=1)
    df['cp_time_24'] = df.apply(lambda row: 1 if row['cp_time']==24 else 0, axis=1)
    df['cp_time_48'] = df.apply(lambda row: 1 if row['cp_time']==48 else 0, axis=1)
    df['cp_time_72'] = df.apply(lambda row: 1 if row['cp_time']==72 else 0, axis=1)
    df.drop(['cp_dose', 'cp_time'], axis=1, inplace=True)
    return (df_scaled, scaler)

def display_training_curves(training, validation, title, subplot):
    if subplot%10==1: # set up the subplots on the first call
        plt.subplots(figsize=(10,10), facecolor='#F0F0F0')
        plt.tight_layout()
    ax = plt.subplot(subplot)
    ax.set_facecolor('#F8F8F8')
    ax.plot(training)
    ax.plot(validation)
    ax.set_title('model '+ title)
    ax.set_ylabel(title)
    #ax.set_ylim(0.28,1.05)
    ax.set_xlabel('epoch')
    ax.legend(['train', 'valid.'])
    
def get_prediction_multifold(X, Y, X_predict, epochs, batch_size):
    
    cols = [c for c in Y if c != 'fold']
    
    pe = np.zeros(X_predict.shape)
    pred = np.zeros((X.shape[0], OUTPUT_SHAPE))
    
    pe = None

    cnt = 0
    diff_eval = 3000
    history = ''
    ev_tr = np.zeros((OUTPUT_SHAPE, 1))
    ev_val = np.zeros((OUTPUT_SHAPE, 1))
    
    ev_tr = None
    ev_val = None
    targets = [c for c in X if c not in ['sig_id', 'fold']]
    for i in range(NFOLD):
    #for tr_idx, val_idx in kf.split(X):
        cnt += 1
        print(f"FOLD {cnt}")
        
        net = make_model_ddb2()
        
        test_index = Y.loc[Y.fold==i].index
        train_index = Y.loc[Y.fold!=i].index
        
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = Y.loc[train_index, cols], Y.loc[test_index, cols]
        
        callback_lr = get_callback_ReduceLROnPlateau()
        checkpoint = C.ModelCheckpoint(
            filepath='best_nn_'+str(cnt)+'.h5',
            save_best_only=True, 
            monitor='val_loss', 
            mode='min')
        
        history=net.fit(
            X_train,
            y_train, 
            batch_size=batch_size, 
            epochs=epochs, 
            validation_data=(X_test, y_test), 
            callbacks=[callback_lr, checkpoint],
            verbose=0
        )
            
        net.load_weights('best_nn_'+str(cnt)+'.h5')    
        
        preds_val = net.predict(X_test, batch_size=batch_size, verbose=0)
        #pred[test_index] = preds_val.reshape(preds_val.shape[0],preds_val.shape[2])
        
        display_training_curves(history.history['loss'], history.history['val_loss'], 'loss', 211)
        #display_training_curves(history.history['logloss'], history.history['val_logloss'], 'loss', 211)
        display_training_curves(history.history['binary_crossentropy'], history.history['val_binary_crossentropy'], 'SCORE', 212)
        #pe += net.predict(X_predict, batch_size=batch_size, verbose=0) / NFOLD
        
        preds_test = net.predict(X_predict, batch_size=BATCH_SIZE, verbose=0) / NFOLD
        
        if pe is None:
            pe = preds_test
        else:
            pe = np.add(pe, preds_test)
        break    
    score = log_loss(Y[cols], pred)
    print("Score : ", score)
     
    return (score, pred, pe)

def get_prediction_multifold_Conv(X, Y, X_predict, epochs, batch_size):
    cols = [c for c in Y if c != 'fold']
    
    pe = np.zeros(X_predict.shape)
    pred = np.zeros((X.shape[0], OUTPUT_SHAPE))

    X_predict = X_predict.values.reshape((X_predict.shape[0], 1, X_predict.shape[1]))
    
    pe = None

    cnt = 0
    diff_eval = 3000
    history = ''
    ev_tr = np.zeros((OUTPUT_SHAPE, 1))
    ev_val = np.zeros((OUTPUT_SHAPE, 1))
    
    ev_tr = None
    ev_val = None
    targets = [c for c in X if c not in ['sig_id', 'fold']]
    for i in range(NFOLD):
    #for tr_idx, val_idx in kf.split(X):
        cnt += 1
        print(f"FOLD {cnt}")
        
        net = make_model_Conv1D_2()
        
        test_index = Y.loc[Y.fold==i].index
        train_index = Y.loc[Y.fold!=i].index
        
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = Y.loc[train_index, cols], Y.loc[test_index, cols]
        
        X_train = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
        X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))
        y_train = y_train.values.reshape((y_train.shape[0], 1, y_train.shape[1]))
        y_test = y_test.values.reshape((y_test.shape[0], 1, y_test.shape[1]))
        
        callback_lr = get_callback_ReduceLROnPlateau()
        checkpoint = C.ModelCheckpoint(
            filepath='best_nn_'+str(cnt)+'.h5',
            save_best_only=True, 
            monitor='val_loss', 
            mode='min')
        
        history=net.fit(
            X_train,
            y_train, 
            batch_size=batch_size, 
            epochs=epochs, 
            validation_data=(X_test, y_test), 
            callbacks=[callback_lr, checkpoint],
            verbose=0
        )
            
        net.load_weights('best_nn_'+str(cnt)+'.h5')    
        
        preds_val = net.predict(X_test, batch_size=batch_size, verbose=0)
        pred[test_index] = preds_val.reshape(preds_val.shape[0],preds_val.shape[2])
        
        display_training_curves(history.history['loss'], history.history['val_loss'], 'loss', 211)
        #display_training_curves(history.history['logloss'], history.history['val_logloss'], 'loss', 211)
        display_training_curves(history.history['binary_crossentropy'], history.history['val_binary_crossentropy'], 'SCORE', 212)
        #pe += net.predict(X_predict, batch_size=batch_size, verbose=0) / NFOLD
        
        preds_test = net.predict(X_predict, batch_size=BATCH_SIZE, verbose=0) / NFOLD
        pe_to_add = preds_test.reshape(preds_test.shape[0],preds_test.shape[2])
        if pe is None:
            pe = pe_to_add
        else:
            pe = np.add(pe, pe_to_add)
            
        
    score = log_loss(Y[cols], pred)
    print("Score : ", score)

    return (score, pred, pe)

def get_callback_ReduceLROnPlateau():
    callback = C.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.1,
                patience=3,
                verbose=0,
                mode='min',
                min_delta=0.00001,
                cooldown=1,
                min_lr=0,
            )
    return callback


def make_model_Conv1D():
    z = L.Input(shape=(1,INPUT_SHAPE), name="Id")
    x = L.Conv1D(25, 100, activation='relu', padding="same", input_shape=(1,INPUT_SHAPE))(z)
    x = L.Dense(OUTPUT_SHAPE, activation="sigmoid", name="p1")(x)
    
    model = M.Model(z, x, name="MOA")
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.01), optimizer="adam", metrics=[tf.keras.metrics.BinaryCrossentropy()])
    
    return model

def make_model_Conv1D_2():
    z = L.Input(shape=(INPUT_SHAPE,1), name="Id")
    x = L.Conv1D(100, 400, activation='relu', padding="valid", input_shape=(INPUT_SHAPE,1))(z)
    x = L.Conv1D(100, 400, activation='relu', padding="valid", input_shape=(INPUT_SHAPE,1))(x)
    x = L.Conv1D(100, 80, activation='relu', padding="valid", input_shape=(INPUT_SHAPE,1))(x)
    x = L.Dense(OUTPUT_SHAPE, activation="sigmoid", name="p1")(x)
    
    model = M.Model(z, x, name="MOA")
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.001), optimizer="adam", metrics=[tf.keras.metrics.BinaryCrossentropy()])
    
    return model

In [None]:
'''
print('df_train_features ', df_train_features.shape)
print('df_train_targets_scored ', df_train_targets_scored.shape)
print('df_train_targets_nonscored ', df_train_targets_nonscored.shape)

xx = df_train_targets_nonscored.drop('sig_id', axis=1)
sum_cols = xx.sum(axis=0).sort_values()
tranche = sum_cols.loc[sum_cols==6]
print(sum_cols[200:250])

x = range(sum_cols.shape[0])
plt.plot(x, sum_cols)
plt.show()
'''

In [None]:
scaler = None
str_cols = [c for c in df_train_features.columns if df_train_features[c].dtype=='object']
#str_cols = [c for c in df_test.columns if df_test[c].dtype=='object']
str_cols_train = str_cols.copy()

(df_train_features, scaler) = preprocess_df(df_train_features, str_cols, scaler)
(df_test, sc) = preprocess_df(df_test, str_cols, scaler)

df_train_temp = df_train_features.copy()
df_train_temp['where']='train'

df_test_temp = df_test.copy()
df_test_temp['where']='test'
df_all = df_train_temp.append(df_test_temp)

pca = PCA(n_components=N_COMPONENTS, whiten=True)
cols_pca = [c for c in df_all if c not in ['sig_id', 'where']]
df_pca = df_all[cols_pca]
df_short = pca.fit_transform(df_pca)

df_all_short = pd.DataFrame(data=df_short).reset_index(drop=True)

df_all_sliced = df_all.loc[:, ['sig_id', 'where']].reset_index(drop=True)

print("Shape all short", df_all_short.shape)

print("Shape df all", df_all_sliced.shape)
      
df_all_short.head()
df_all_sliced.head() 

df_all = pd.concat([df_all_sliced, df_all_short], axis=1, ignore_index=True)
print("Shape all", df_all.shape)

df_train = df_all.loc[df_all[1]=='train']
df_test = df_all.loc[df_all[1]=='test']

df_train.drop(1, axis=1, inplace=True)
df_test.drop(1, axis=1, inplace=True)

df_train.rename(columns={0:'sig_id'}, inplace=True)
df_test.rename(columns={0:'sig_id'}, inplace=True)

#cat_features = [c for c in str_cols if c != 'sig_id']
cat_features = []
df_train_all = df_train.merge(df_train_targets_scored).reset_index(drop=True)

sub_sig_id = df_sub['sig_id']
Ys = [c for c in df_sub.columns if c != 'sig_id']
Yss = [c for c in df_train_targets_scored.columns if c != 'sig_id']

X = df_train_all.drop(df_sub.columns, axis=1)
X = X.drop('fold', axis=1)
X_test = df_test.drop('sig_id', axis=1)
Y = df_train_all.loc[:,Yss]
sc = df_sub.head(1).copy()

print(sc.shape)

In [None]:
print(Y.shape)
print(X.shape)
print(X_test.shape)

In [None]:
(score, pred, pe) = get_prediction_multifold(X, Y, X_test, EPOCHS, BATCH_SIZE)

In [None]:
#display_training_curves(history.history['loss'], history.history['val_loss'], 'loss', 211)
#display_training_curves(history.history['mse'], history.history['val_mse'], 'SCORE', 212)

def ff(x):
    if float(x) < 0.0001:
        return 0
    else:
        return x
vfunc = np.vectorize(ff)
#pe = vfunc(pe)

In [None]:
print(pe.shape)

In [None]:
ID = df_test['sig_id'].reset_index(drop=True)
df = pd.DataFrame(data=pe, columns=Ys)
df.insert(loc=0, column='sig_id', value=ID)

df.to_csv('submission.csv', index=False)

In [None]:
ID_train = df_train_targets_scored['sig_id'].reset_index(drop=True)
dft = pd.DataFrame(data=pred, columns=Ys)
dft.insert(loc=0, column='sig_id', value=ID_train)

dft.to_csv('submission_train.csv', index=False)