### Importing libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
import tensorflow.keras.backend as K

from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras.losses import BinaryCrossentropy
import tensorflow_addons as tfa

from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss
from sklearn import preprocessing


from tqdm.notebook import tqdm

import math
from sklearn.preprocessing import StandardScaler
pd.options.display.max_columns = None
import tensorflow_addons as tfa

### Loading train and test data

train_features.csv - Features for the training set. Features g- signify gene expression data, and c- signify cell viability data. cp_type indicates samples treated with a compound (cp_vehicle) or with a control perturbation (ctrl_vehicle); control perturbations have no MoAs; cp_time and cp_dose indicate treatment duration (24, 48, 72 hours) and dose (high or low).

test_features.csv - Features for the test data. You must predict the probability of each scored MoA for each row in the test data.

In [None]:
train_features = pd.read_csv('/kaggle/input/lish-moa/train_features.csv')
test_features = pd.read_csv('/kaggle/input/lish-moa/test_features.csv')

train_features['dataset'] = 'train'
test_features['dataset'] = 'test'

df = pd.concat([train_features, test_features])

In [None]:
train_features.head()

In [None]:
test_features.head()

In [None]:
print('Number of rows in training set:', train_features.shape[0])
print('Number of columns in training set:', train_features.shape[1] - 1)

print('Number of rows in test set:', test_features.shape[0])
print('Number of columns in test set:', test_features.shape[1] - 1)


In [None]:
df.info()

We can see that we have 872 float features 1 integer (cp_time) and 3 categorical (sig_id, cp_type and cp_dose).

In [None]:
sample_submission = pd.read_csv('/kaggle/input/lish-moa/sample_submission.csv')
sample_submission

### Categories Visualization

Here we are going to check categorical features:

- Features g- signify gene expression data.

- Features c- signify cell viability data.

- cp_type indicates samples treated with a compound, trt_cp samples treated with the compounds.

- cp_vehicle or with a control perturbation (ctrl_vehicle); control perturbations have no MoAs.

- cp_time and cp_dose indicate treatment duration (24, 48, 72 hours) and dose (high or low).

In [None]:
cp_width = 500
cp_height = 400
scatter_size = 600

In [None]:
ds = df.groupby(['cp_type', 'dataset'])['sig_id'].count().reset_index()
ds.columns = ['cp_type', 'dataset', 'count']

fig = px.bar(
    ds, 
    x='cp_type', 
    y="count", 
    color='dataset',
    barmode='group',
    orientation='v', 
    title='cp_type train/test counts', 
    width=cp_width,
    height=cp_height
)

fig.show()


In [None]:
ds = df.groupby(['cp_time', 'dataset'])['sig_id'].count().reset_index()
ds.columns = [
    'cp_time', 
    'dataset', 
    'count'
]

fig = px.bar(
    ds, 
    x='cp_time', 
    y="count", 
    color='dataset',
    barmode='group',
    orientation='v', 
    title='cp_time train/test counts', 
    width=cp_width,
    height=cp_height
)

fig.show()


In [None]:
ds = df.groupby(['cp_dose', 'dataset'])['sig_id'].count().reset_index()
ds.columns = [
    'cp_dose', 
    'dataset', 
    'count'
]

fig = px.bar(
    ds, 
    x='cp_dose', 
    y="count", 
    color='dataset',
    barmode='group',
    orientation='v', 
    title='cp_dose train/test counts', 
    width=cp_width,
    height=cp_height
)

fig.show()

In [None]:
plt.style.use('seaborn')
sns.set_style('whitegrid')
fig = plt.figure(figsize=(15,5))
#1 rows 2 cols
#first row, first col
ax1 = plt.subplot2grid((1,2),(0,0))
sns.countplot(x='cp_type', data=train_features, palette='rainbow', alpha=0.75)
plt.title('Train: Control and treated samples', fontsize=15, weight='bold')
#first row sec col
ax1 = plt.subplot2grid((1,2),(0,1))
sns.countplot(x='cp_dose', data=train_features, palette='Purples', alpha=0.75)
plt.title('Train: Treatment Doses: Low and High',weight='bold', fontsize=18)
plt.show()

In [None]:
plt.figure(figsize=(15,5))
sns.distplot( train_features['cp_time'], color='red', bins=5)
plt.title("Train: Treatment duration ", fontsize=15, weight='bold')
plt.show()

Conclusion:

- Few control samples.
- The low and high doses were applied equally.
- 3 treatment durations: 24h, 48h and 72h.

### Gene and cell features distribution

Some distribution of randomly selected columns.

In [None]:
train_columns = train_features.columns.to_list()
g_list = [i for i in train_columns if i.startswith('g-')]
c_list = [i for i in train_columns if i.startswith('c-')]

In [None]:
def plot_set_histograms(plot_list, title):
    fig = make_subplots(rows=6, cols=2)
    traces = [
        go.Histogram(x=train_features[col], nbinsx=100, name=col) for col in plot_list
    ]

    for i in range(len(traces)):
        fig.append_trace(
            traces[i], 
            (i // 2) + 1, 
            (i % 2) + 1
        )

    fig.update_layout(
        title_text=title,
        height=1000,
        width=800
    )
    fig.show()

In [None]:
plot_list = [
    g_list[np.random.randint(0, len(g_list)-1)] for i in range(50)
]
plot_list = list(set(plot_list))[:12]
plot_set_histograms(plot_list, 'Randomly selected gene expression features distributions')


In [None]:
plot_list = [
    c_list[np.random.randint(0, len(c_list)-1)] for i in range(50)
]
plot_list = list(set(plot_list))[:12]
plot_set_histograms(plot_list, 'Randomly selected cell expression features distributions')

### Training features correlation

Let's see some correlation between randomly selected variables.

In [None]:
columns = g_list + c_list
for_correlation = list(set([columns[np.random.randint(0, len(columns)-1)] for i in range(200)]))[:40]
data = df[for_correlation]

f = plt.figure(figsize=(18, 18))
plt.matshow(data.corr(), fignum=f.number)
plt.xticks(range(data.shape[1]), data.columns, fontsize=14, rotation=50)
plt.yticks(range(data.shape[1]), data.columns, fontsize=14)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=13)

Time taken to find pairs of features with high correlation.

In [None]:
%%time

cols = ['cp_time'] + columns
all_columns = []
for i in range(0, len(cols)):
    for j in range(i+1, len(cols)):
        if abs(train_features[cols[i]].corr(train_features[cols[j]])) > 0.9:
            all_columns = all_columns + [cols[i], cols[j]]

In [None]:
all_columns = list(set(all_columns))
print('Number of columns:', len(all_columns))

In total we have 35 columns that have correlation with at least another 1 higher than 0.9. Let's visualize them.

In [None]:
data = df[all_columns]

f = plt.figure(figsize=(18, 18))
plt.matshow(data.corr(), fignum=f.number)
plt.xticks(range(data.shape[1]), data.columns, fontsize=14, rotation=50)
plt.yticks(range(data.shape[1]), data.columns, fontsize=14)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=14)


In [None]:
fig = make_subplots(rows=12, cols=3)

traces = [
    go.Histogram(x=train_features[col], nbinsx=100, name=col) for col in all_columns
]

for i in range(len(traces)):
    fig.append_trace(
        traces[i], 
        (i // 3) + 1, 
        (i % 3) + 1
    )

fig.update_layout(
    title_text='Highly correlated features',
    height=1200
)

fig.show()

### Targets analysis

In [None]:
train_targets = pd.read_csv("../input/lish-moa/train_targets_scored.csv")
train_targets_nonscored = pd.read_csv('../input/lish-moa/train_targets_nonscored.csv')
print('Number of rows: ', train_targets.shape[0])
print('Number of cols: ', train_targets.shape[1])

train_targets.head()

In [None]:
x = train_targets.drop(['sig_id'], axis=1).sum(axis=0).sort_values().reset_index()
x.columns = [
    'column', 
    'nonzero_records'
]
x = x.tail(50)

fig = px.bar(
    x, 
    x='nonzero_records', 
    y='column', 
    orientation='h', 
    title='Columns with the higher number of positive samples (top 50)', 
    width=800,
    height=1000
)

fig.show()

In [None]:
x = train_targets.drop(['sig_id'], axis=1).sum(axis=0).sort_values(ascending=False).reset_index()
x.columns = [
    'column', 
    'nonzero_records'
]
x = x.tail(50)

fig = px.bar(
    x, 
    x='nonzero_records', 
    y='column', 
    orientation='h', 
    title='Columns with the lowest number of positive samples (top 50)', 
    width=800,
    height=1000 
)

fig.show()

We can see that at least 50 target columns have number of positive samples less than 20 (about 0.1%)

In [None]:
x = train_targets.drop(['sig_id'], axis=1).sum(axis=0).sort_values(ascending=False).reset_index()
x.columns = ['column', 'count']
x['count'] = x['count'] * 100 / len(train_targets)

fig = px.bar(
    x, 
    x='column', 
    y='count', 
    orientation='v', 
    title='Percent of positive records for every column in target', 
    width=1200,
    height=800 
)

fig.show()


The biggest number of positive samples for 1 target column is 3.5%. So we deal here with highly imbalanced data.

In [None]:
data = train_targets.drop(['sig_id'], axis=1).astype(bool).sum(axis=1).reset_index()
data.columns = ['row', 'count']
data = data.groupby(['count'])['row'].count().reset_index()

fig = px.bar(
    data, 
    y=data['row'], 
    x="count", 
    title='Number of activations in targets for every sample', 
    width=800, 
    height=500
)

fig.show()

In [None]:
data = train_targets.drop(['sig_id'], axis=1).astype(bool).sum(axis=1).reset_index()
data.columns = ['row', 'count']
data = data.groupby(['count'])['row'].count().reset_index()

fig = px.pie(
    data, 
    values=100 * data['row'] / len(train_targets), 
    names="count", 
    title='Number of activations in targets for every sample (Percent)', 
    width=800, 
    height=500
)

fig.show()

We can see here that about 40% of sample have zeros in all columns and more than 50% have only one active target column.

In [None]:
train_targets.describe()

### Train and Targets correlations

Time taken to find the most correlated features for every target column.

In [None]:
%%time

correlation_matrix = pd.DataFrame()

for t_col in train_targets.columns:
    corr_list = list()
    if t_col == 'sig_id':
        continue
    for col in columns:
        res = train_features[col].corr(train_targets[t_col])
        corr_list.append(res)
    correlation_matrix[t_col] = corr_list

In [None]:
correlation_matrix['train_features'] = columns
correlation_matrix = correlation_matrix.set_index('train_features')

correlation_matrix

Let's see what is the higher value (absolute) of correlation for target columns with every column from train set. Every column on chart is max correlation of current target column with all of columns from training set.

In [None]:
maxCol=lambda x: max(x.min(), x.max(), key=abs)

high_scores = correlation_matrix.apply(maxCol, axis=0).reset_index()
high_scores.columns = [
    'column', 
    'best_correlation'
]

fig = px.bar(
    high_scores, 
    x='column', 
    y="best_correlation", 
    orientation='v', 
    title='Best correlation with train columns for every target column', 
    width=1200,
    height=800
)

fig.show()

Now let's see what columns from training set have the higher number of "high" correlations with target columns. Every row from chart means that column A N times has the best value of correlation with different target columns.

In [None]:
col_df = pd.DataFrame()
tr_cols = list()
tar_cols = list()

for col in correlation_matrix.columns:
    tar_cols.append(col)
    tr_cols.append(
        correlation_matrix[col].abs().sort_values(ascending=False).reset_index()['train_features'].head(1).values[0]
    )

col_df['column'] = tar_cols
col_df['train_best_column'] = tr_cols

total_scores = pd.merge(high_scores, col_df)

total_scores

In [None]:
count_features = total_scores['train_best_column'].value_counts().reset_index().sort_values('train_best_column')
count_features.columns = ['column', 'count']
count_features = count_features.tail(33)

fig = px.bar(
    count_features, 
    x='count', 
    y="column", 
    orientation='h', 
    title='Columns from training set with number of high correlations with target columns', 
    width=800,
    height=700
)

fig.show()

Target columns and pairs of highly correlated features.

Let's select some random columns and see how they deal with pairs of the highly correlated features.

In [None]:
target_columns = train_targets.columns.tolist()
target_columns.remove('sig_id')
for_analysis = [
    target_columns[np.random.randint(0, len(target_columns)-1)] for i in range(5)
]
current_corr = correlation_matrix[for_analysis]

In [None]:
col_df = pd.DataFrame()
tr_first_cols = list()
tr_second_cols = list()
tar_cols = list()

for col in current_corr.columns:
    tar_cols.append(col)
    tr_first_cols.append(
        current_corr[col].abs().sort_values(ascending=False).reset_index()['train_features'].head(2).values[0]
    )
    tr_second_cols.append(
        current_corr[col].abs().sort_values(ascending=False).reset_index()['train_features'].head(2).values[1]
    )

col_df['column'] = tar_cols
col_df['train_1_column'] = tr_first_cols
col_df['train_2_column'] = tr_second_cols

col_df


In [None]:
def plot_scatter(col_df, index):
    analysis = pd.DataFrame()
    analysis['color'] = train_targets[col_df.iloc[index]['column']]
    analysis['x'] = train_features[col_df.iloc[index]['train_1_column']]
    analysis['y'] = train_features[col_df.iloc[index]['train_2_column']]
    analysis.columns = [
        'color', 
        col_df.iloc[index]['train_1_column'], 
        col_df.iloc[index]['train_2_column']
    ]
    analysis['size'] = 1
    analysis.loc[analysis['color'] == 1, 'size'] = 12

    fig = px.scatter(
        analysis, 
        x=col_df.iloc[index]['train_1_column'], 
        y=col_df.iloc[index]['train_2_column'], 
        color="color", 
        size='size', 
        width=scatter_size,
        height=scatter_size,
        title='Scatter plot for ' + col_df.iloc[index]['column']
    )
    fig.show()

In [None]:
plot_scatter(col_df, 0)

In [None]:
plot_scatter(col_df, 1)

In [None]:
plot_scatter(col_df, 2)

Let's do the same but for 3d plots.

In [None]:
for_analysis = [
    target_columns[np.random.randint(0, len(target_columns)-1)] for i in range(5)
]
current_corr = correlation_matrix[for_analysis]

col_df = pd.DataFrame()
tr_first_cols = list()
tr_second_cols = list()
tr_third_cols = list()
tar_cols = list()

for col in current_corr.columns:
    tar_cols.append(col)
    tr_first_cols.append(
        current_corr[col].abs().sort_values(ascending=False).reset_index()['train_features'].head(3).values[0]
    )
    tr_second_cols.append(
        current_corr[col].abs().sort_values(ascending=False).reset_index()['train_features'].head(3).values[1]
    )
    tr_third_cols.append(
        current_corr[col].abs().sort_values(ascending=False).reset_index()['train_features'].head(3).values[2]
    )

col_df['column'] = tar_cols
col_df['train_1_column'] = tr_first_cols
col_df['train_2_column'] = tr_second_cols
col_df['train_3_column'] = tr_third_cols

col_df


In [None]:
def plot_3dscatter(col_df, index):
    analysis = pd.DataFrame()
    analysis['color'] = train_targets[col_df.iloc[index]['column']]
    analysis['x'] = train_features[col_df.iloc[index]['train_1_column']]
    analysis['y'] = train_features[col_df.iloc[index]['train_2_column']]
    analysis['z'] = train_features[col_df.iloc[index]['train_3_column']]
    analysis.columns = [
        'color', 
        col_df.iloc[index]['train_1_column'], 
        col_df.iloc[index]['train_2_column'], 
        col_df.iloc[index]['train_3_column']
    ]
    analysis['size'] = 1
    analysis.loc[analysis['color'] == 1, 'size'] = 20

    fig = px.scatter_3d(
        analysis, 
        x=col_df.iloc[index]['train_1_column'], 
        y=col_df.iloc[index]['train_2_column'],
        z=col_df.iloc[index]['train_3_column'], 
        color="color", 
        size='size', 
        height=scatter_size,
        width=scatter_size,
        title='Scatter plot for ' + col_df.iloc[index]['column']
    )
    fig.show()

In [None]:
plot_3dscatter(col_df, 0)

In [None]:
plot_3dscatter(col_df, 1)

In [None]:
plot_3dscatter(col_df, 2)

We can extract several group names from target column names. Looks like that last term in column name is definition of a group. Let's extact them and visualize groups with number of columns > 1.

In [None]:
last_term = dict()

for item in target_columns:
    try:
        last_term[item.split('_')[-1]] += 1
    except:
        last_term[item.split('_')[-1]] = 1

last_term = pd.DataFrame(last_term.items(), columns=['group', 'count'])
last_term = last_term.sort_values('count')
last_term = last_term[last_term['count']>1]
last_term['count'] = last_term['count'] * 100 / 206

fig = px.bar(
    last_term, 
    x='count', 
    y="group", 
    orientation='h', 
    title='Groups in target columns (Percent from all target columns)', 
    width=800,
    height=500
)

fig.show()

Number of activation for 1 sample in every group

In [None]:
answer = list()

for group in last_term.group.tolist():
    agent_list = list()
    for item in target_columns:
        if item.split('_')[-1] == group:
            agent_list.append(item)
    agent_df = train_targets[agent_list]
    data = agent_df.astype(bool).sum(axis=1).reset_index()
    answer.append(data[0].max())

In [None]:
ds = pd.DataFrame()
ds['group'] = last_term.group.tolist()
ds['max_value'] = answer

fig = px.bar(
    ds, 
    x='max_value', 
    y="group", 
    orientation='h', 
    title='Maximum number of active columns in 1 sample for every group', 
    width=800,
    height=500
)

fig.show()

We can see that for groups activator, agent, blocker maximum number of active columns in sample is 1.

### Targets & Train features dependecies

Let's check target columns with categorical columns from training set.

In [None]:
categories = train_features[['cp_type', 'cp_time', 'cp_dose']]
tar = train_targets.copy()
tar = tar.drop(['sig_id'], axis=1)
analysis = pd.concat([categories, tar], axis=1)

In [None]:
for category in analysis['cp_dose'].unique().tolist():
    
    number = 0
    cols = list()
    
    for col in analysis.columns:
        if col in ['cp_type', 'cp_time', 'cp_dose']:
            continue
        if len(analysis[analysis['cp_dose'] == category][col].value_counts()) == 1:
            number += 1
            cols.append(col)

    print(category, '. Number of columns with 1 unique value: ', number, '. Columns: ', cols)

Let's check problematic columns for dp_dose = 2.

In [None]:
analysis[analysis['cp_dose'] == 'D2']['atp-sensitive_potassium_channel_antagonist'].value_counts()

In [None]:
analysis[analysis['cp_dose']=='D2']['erbb2_inhibitor'].value_counts()

In [None]:
for category in analysis['cp_time'].unique().tolist():
    
    number = 0
    cols = list()
    
    for col in analysis.columns:
        if col in ['cp_type', 'cp_time', 'cp_dose']:
            continue
        if len(analysis[analysis['cp_time']==category][col].value_counts()) == 1:
            number += 1
            cols.append(col)

    print(category, '. Number of columns with 1 unique value: ', number, '. Columns: ', cols)

Let's check problematic columns for cp_time = 24 and 72.

In [None]:
analysis[analysis['cp_time'] == 24]['erbb2_inhibitor'].value_counts()

In [None]:
analysis[analysis['cp_time'] == 72]['erbb2_inhibitor'].value_counts()

In [None]:
analysis[analysis['cp_time'] == 24]['atp-sensitive_potassium_channel_antagonist'].value_counts()

In [None]:
analysis[analysis['cp_time'] == 72]['atp-sensitive_potassium_channel_antagonist'].value_counts()

In [None]:
for category in analysis['cp_type'].unique().tolist():
    
    number = 0
    cols = list()
    
    for col in analysis.columns:
        if col in ['cp_type', 'cp_time', 'cp_dose']:
            continue
        if len(analysis[analysis['cp_type'] == category][col].value_counts()) == 1:
            number += 1
            cols.append(col)

    print(category, '. Number of columns with 1 unique value: ', number, '. Columns: ', cols)

In [None]:
analysis[analysis['cp_type']=='ctl_vehicle']['igf-1_inhibitor'].value_counts()

We can see that for column cp_type all records where value is ctl_vehicle for all targets are 0. The same picture for cp_time == 72 ana == 24, but only for 2 target columns and for cp_dose == D2 also for 2 target columns.

### Training

In [None]:
import sys
sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [None]:
data = train_features.append(test_features)

In [None]:
def preprocess(df):
    df = df.copy()
    #df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
    df.drop(['cp_type'], axis=1, inplace=True)
    df.drop(['dataset'],axis=1,inplace=True)
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    df.loc[:, 'cp_time'] = df.loc[:, 'cp_time'].map({24: 0, 48: 1, 72: 2})    
    df = pd.get_dummies(df, columns=['cp_time','cp_dose'])
    del df['sig_id']
    return df

train = preprocess(train_features)
test = preprocess(test_features)

del train_targets['sig_id']

In [None]:
train.head()

In [None]:
# Fit scaler to joint train and test data
scaler = preprocessing.MinMaxScaler()
scaler.fit(train.append(test))

train_trans = scaler.transform(train)
test_trans = scaler.transform(test)

train = pd.DataFrame(train_trans, columns=train.columns)
test = pd.DataFrame(test_trans, columns=test.columns)

In [None]:
somthing_rate = 1e-15
P_MIN = somthing_rate
P_MAX = 1 - P_MIN

def loss_fn(yt, yp):
    yp = np.clip(yp, P_MIN, P_MAX)
    return log_loss(yt, yp, labels=[0,1])


In [None]:
def create_model(num_columns, actv='relu'):
    model = tf.keras.Sequential([tf.keras.layers.Input(num_columns)])
                
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(0.2))
    model.add(tfa.layers.WeightNormalization(tf.keras.layers.Dense(1024, activation=actv)))
    
    if actv == 'elu':
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.AlphaDropout(0.2))
        model.add(tfa.layers.WeightNormalization(tf.keras.layers.Dense(512, kernel_initializer='lecun_normal', activation='selu')))
    else:
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.2))
        model.add(tfa.layers.WeightNormalization(tf.keras.layers.Dense(1024, activation=actv))) 

    #============ Final Layer =================
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tfa.layers.WeightNormalization(tf.keras.layers.Dense(206, activation="sigmoid")))
    
    model.compile(optimizer=tfa.optimizers.AdamW(lr = 1e-3, weight_decay = 1e-5, clipvalue = 756), 
                  loss=BinaryCrossentropy(label_smoothing=somthing_rate),
                  )
    return model

In [None]:
# Use All feats as top feats
top_feats = [i for i in range(train.shape[1])]
print("Top feats length:",len(top_feats))

In [None]:
mod = create_model(len(top_feats))
mod.summary()

In [None]:
def metric(y_true, y_pred):
    metrics = []
    for _target in train_targets.columns:
        metrics.append(loss_fn(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float)))
    return np.mean(metrics)

In [None]:
N_STARTS = 14
S_STARTS = int(N_STARTS/2) 

res_relu = train_targets.copy()
res_elu = train_targets.copy()
res_relu.loc[:, train_targets.columns] = 0
res_elu.loc[:, train_targets.columns] = 0

ss_relu = sample_submission.copy()
ss_elu = sample_submission.copy()
ss_relu.loc[:, train_targets.columns] = 0
ss_elu.loc[:, train_targets.columns] = 0

#ss.loc[:, train_targets.columns] = 0
ss_dict = {}

historys = dict()

tf.random.set_seed(42)
for seed in range(N_STARTS):
    for n, (tr, te) in enumerate(MultilabelStratifiedKFold(n_splits=7, random_state=seed, shuffle=True).split(train_targets, train_targets)):
        print(f"======{train_targets.values[tr].shape}========{train_targets.values[te].shape}=====")
        
        if seed < S_STARTS: # every actv. will train for 7 times seed.
            print(f'Seed: {seed} => Fold: {n} ==> (RELU MODEL)')
            model = create_model(len(top_feats), actv='relu')
        else:
            print(f'Seed: {seed} => Fold: {n} ==> (ELU MODEL)')
            model = create_model(len(top_feats), actv='elu')

        
        checkpoint_path = f'repeat:{seed}_Fold:{n}.hdf5'
        reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.2, min_lr=1e-6, patience=4, verbose=1, mode='auto')
        cb_checkpt = ModelCheckpoint(checkpoint_path, monitor = 'val_loss', verbose = 1, save_best_only = True,
                                     save_weights_only = True, mode = 'auto')
        early = EarlyStopping(monitor="val_loss", mode="min", restore_best_weights=True, patience= 10, verbose = 1)
        
        history = model.fit(train.values[tr][:, top_feats],
                  train_targets.values[tr],
                  validation_data=(train.values[te][:, top_feats], train_targets.values[te]),
                  epochs=60, batch_size=128,
                  callbacks=[reduce_lr_loss, cb_checkpt, early], verbose=2
                 )
        
        historys[f'history_{seed+1}'] = history
        print("Model History Saved.")
        
        model.load_weights(checkpoint_path)
        
        test_predict = model.predict(test.values[:, top_feats])
        val_predict = model.predict(train.values[te][:, top_feats])

        if seed < S_STARTS: 
            ss_relu.loc[:, train_targets.columns] += test_predict
            res_relu.loc[te, train_targets.columns] += val_predict
        else:
            ss_elu.loc[:, train_targets.columns] += test_predict
            res_elu.loc[te, train_targets.columns] += val_predict
            
        print(f'OOF Metric For SEED {seed} => FOLD {n} : {metric(train_targets.loc[te, train_targets.columns], pd.DataFrame(val_predict, columns=train_targets.columns))}')
        print('+-' * 10)
        
ss_relu.loc[:, train_targets.columns] /= ((n+1) * S_STARTS)
res_relu.loc[:, train_targets.columns] /= S_STARTS

ss_elu.loc[:, train_targets.columns] /= ((n+1) * S_STARTS)
res_elu.loc[:, train_targets.columns] /= S_STARTS

In [None]:
# Show Model loss in plots

for k,v in historys.items():
    loss = []
    val_loss = []
    loss.append(v.history['loss'][:40])
    val_loss.append(v.history['val_loss'][:40])
    
import matplotlib.pyplot as plt
plt.figure(figsize = (15, 6))
plt.plot(np.mean(loss, axis=0))
plt.plot(np.mean(val_loss, axis=0))
plt.yscale('log')
plt.yticks(ticks=[1,1e-1,1e-2])
plt.xlabel('Epochs')
plt.ylabel('Average Logloss')
plt.legend(['Training','Validation'])

In [None]:
print(f'OOF Metric (relu): {metric(train_targets, res_relu)}')
print(f'OOF Metric (elu): {metric(train_targets, res_elu)}')

In [None]:
ss_elu.to_csv('submission_elu.csv', index=False)

In [None]:
ss_relu.to_csv('submission.csv', index=False)