# Keras Multiclass Neural Network with Tanh Activation & Blended Shuffled Stratified Runs

**This model performs relatively well for log loss. The downside is it tends to pick one class. Feel free to copy and play around with it. 

Special thanks to https://www.kaggle.com/simakov/keras-multilabel-neural-network-v1-2

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import keras
import matplotlib.pyplot as plt
from keras.layers import Dense, Activation, Dropout, Flatten, BatchNormalization, Input
from keras.models import Sequential, save_model
from keras.utils import np_utils
import tensorflow as tf
from keras.callbacks import EarlyStopping
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.decomposition import PCA
from sklearn.utils import class_weight
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from sklearn.metrics import log_loss
import tensorflow_addons as tfa
from sklearn.model_selection import StratifiedKFold
from imblearn.combine import SMOTEENN
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline

# Types of Over and Undersampling Using imblearn

1) **Random Under Sampling:** We can use the function RandomUnderSampler(sampling_strategy= 'not minority') or 'majority' to adjust the relationship between the minority class and majority class. We can use dict to input a dictionary with keys corresponding to classes and values corresponding to desired # of samples for each class. Finally, the default is to sample without replacement but that can be altered by utilizing replacement=True. Note: For binary classification, you can input a float with the desired minority / majority ratio. 


**Here we reach a 2 to 1 balance:** 
define undersample strategy

undersample = RandomUnderSampler(sampling_strategy='not minority')

sampling_strategy = {0: 10, 1: 15, 2: 20}

rus = RandomUnderSampler(sampling_strategy=sampling_strategy)

fit and apply the transform

X_under, y_under = undersample.fit_resample(X, y)

2) **Random Over Sampling:** 

ros = RandomOverSampler(random_state=0)

ros.fit(X, y)

X_resampled, y_resampled = ros.sample(X, y)

3) **Over Sampling Using SMOTE:** This method takes two minority class points that are close to each other and creates a synthetic point repeatedly until the desired balance is acheived. 

**Here we reach a 2 to 1 balance:** 
oversample = SMOTE(sampling_strategy='not minority') 
X_over, y_over = oversample.fit_resample(X, y)


4) **Nearest Neighbor Under Sampling and SMOTE Over Sampling (SMOTEENN)**: This strategy combines the above approaches to acheive the desired ratio. 

**Here we reach a 2 to 1 balance:** 
define sampling strategy
sample = SMOTEENN(sampling_strategy='not minority')
fit and apply the transform
X_over, y_over = sample.fit_resample(X, y)

4)Bagging: Bootstrap resampling with replacement from under represented classes. 


In [None]:
train = pd.read_csv('../input/tabular-playground-series-may-2021/train.csv')
train = train.drop(columns=['id'])

In [None]:
#dic_targ = {"target": {"Class_1": 0, "Class_2": 1,"Class_3": 2,"Class_4": 3}}
#train = train.replace(dic_targ)
# histograms of the variables
plt.figure(figsize=(20,20))
train.hist()
plt.show()

In [None]:
test = pd.read_csv('../input/tabular-playground-series-may-2021/test.csv')
test_id=test.id
test = test.drop(columns=['id'])

In [None]:
train

In [None]:
cat_columns = ['target']
targ_dummy=pd.get_dummies(train, prefix_sep="_",
                              columns=cat_columns)
targ_dummy

In [None]:
#np.ascontiguousarray()
X=train.iloc[:,0:50]
y=targ_dummy.iloc[:,50:55]
y

In [None]:
###Check Class Balance
Total = y.sum()
print (Total)

In [None]:
X.values

In [None]:
###Check for Blank Values
y["sum"] = y.sum(axis=1)
print(y[y["sum"]==0])
del y["sum"]

In [None]:
from sklearn.model_selection import train_test_split
np.random.seed(15)
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    stratify=y, 
                                                    test_size=0.3)

##### Get Indexes for Training Set
indexs_list2=y_train.index.values.tolist() 
y_class_df=train['target'].iloc[indexs_list2]

In [None]:
###Check Class Balance
Total2 = y_train.sum()
print (Total2)

In [None]:

# define pipeline
#over = SMOTE(sampling_strategy=0.1)
####5943,40248,14994, 8815
sampling_strategy_under = {0: 5943, 1: 25000, 2: 14994, 3:8815}
#sampling_strategy_over = {0: 12500, 1: 25000, 2: 20000, 3:16000}

under = RandomUnderSampler(sampling_strategy=sampling_strategy_under)
#over = SMOTEENN(sampling_strategy=sampling_strategy_over)

X_train, y_train=under.fit_resample(X_train.values, y_train.values)
#X_train, y_train=over.fit_resample(X_train, y_train)

#X_train=pd.DataFrame(X_train)
#y_train=pd.DataFrame(y_train)

In [None]:
###Check Class Balance
Total2 = y_train.sum()
print (Total2)

In [None]:
num_classes=4
num_classes

In [None]:
num_columns=X_train.shape[1]
num_columns

In [None]:
def create_model(num_columns):
    model = Sequential()
    model.add(Input(num_columns))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(units=50, kernel_initializer='glorot_uniform', activation='tanh'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(units=25,activation='tanh'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2) )
    model.add(Dense(units=num_classes,activation='softmax'))
    opt = tfa.optimizers.Lookahead(tf.optimizers.Adam(), sync_period=10)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=[tf.keras.metrics.CategoricalAccuracy()])
    return model
    
#Best drop out .05 , .1, .2, 38 epochs
#metrics=[tf.keras.metrics.AUC(name='auc')]
#tf.keras.metrics.AUC(name='auc')
#tf.keras.metrics.Recall(name='recall')
#tf.keras.metrics.Precision(name='precision')

In [None]:
#Class Penalty Weights
class_weights = {0: 1.3,
                1: 1.,
                2: 1.,
                3: 1.2}
#class_weight=class_weights,

In [None]:
#reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-4, mode='min')
#model = create_model(num_columns)
#history = model.fit(X_train,
#                  y_train,
#                  validation_data=(X_test, y_test),
#                  epochs=60, batch_size=128,
#                  callbacks=[reduce_lr_loss], verbose=2
#                 )

In [None]:
# Plot training & validation accuracy values
#plt.plot(history.history['categorical_accuracy'])
#plt.plot(history.history['val_categorical_accuracy'])
#plt.title('Model accuracy')
#plt.ylabel('Categorical Accuracy')
#plt.xlabel('Epoch')
#plt.legend(['Train', 'Test'], loc='upper left')
#plt.show()

In [None]:
# Plot training & validation accuracy values
#plt.plot(history.history['loss'])
#plt.plot(history.history['val_loss'])
#plt.title('Model accuracy')
#plt.ylabel('Categorical Crossentropy')
#plt.xlabel('Epoch')
#plt.legend(['Train', 'Test'], loc='upper left')
#plt.show()

In [None]:
####Get Length of Test
l=len(test)-1
l

In [None]:
##Empty Predictions Set
ss = y.copy()
ss = ss.reset_index()
del ss['index']
ss=ss.loc[0:l,:]
ss.loc[:, y.columns] = 0
ss

In [None]:
##Empty Validation Set
res = y.copy()
res = res.reset_index ()
res.loc[:, y.columns] = 0
del res['index']
res

In [None]:
#Class Penalty Weights
#from sklearn.utils import class_weight

#class_weights = {0: 3.,
#                1: 1.,
#                2: 1.3,
#                3: 2.}
#class_weight=class_weights,
#under = RandomUnderSampler(sampling_strategy='not minority')

In [None]:
tf.random.set_seed(42)
for n, (tr, te) in enumerate(StratifiedKFold(n_splits=5, shuffle=True).split(train['target'], train['target'])):
    print(train['target'][tr].value_counts().sort_index()[0]/(max(train['target'][tr].value_counts())))
    print(train['target'][tr].value_counts().sort_index())

In [None]:

N_STARTS = 7
tf.random.set_seed(42)

####This iterates through starts:

for seed in range(N_STARTS):
#####This iteraties through folds n, validation indexes te, and train indexes tr:  
    for n, (tr, te) in enumerate(StratifiedKFold(n_splits=10, random_state=seed, shuffle=True).split(train['target'], train['target'])):
        print(f'Fold {n}')
    
        model = create_model(num_columns)
        #checkpoint_path = f'repeat:{seed}_Fold:{n}.hdf5'
        reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-4, mode='min')
        #cb_checkpt = ModelCheckpoint(checkpoint_path, monitor = 'val_loss', verbose = 0, save_best_only = True,
          #                           save_weights_only = True, mode = 'min')
        
####This fits the model to each fold and validation set. .values avoids creating a np array:
######Try Adjustments to Class Balance
        #sampling_strategy_under = {0: 5943, 1: 25000, 2: 14994, 3:8815}
        #under = RandomUnderSampler(sampling_strategy=sampling_strategy_under)
        #X_train, y_train=under.fit_resample(X.values[tr], y.values[tr])
        model.fit(X.values[tr],
                  y.values[tr],
                  validation_data=(X.values[te], y.values[te]),
                  epochs=35, batch_size=64,
                  callbacks=[reduce_lr_loss], verbose=2
                 )
        
        #model.load_weights(checkpoint_path)
####Makes predictions for each fold & seed:
        test_predict = model.predict(test.values[:, :])
        val_predict = model.predict(X.values[te])
####Sum Predictions for Each Epoch     
        ss.loc[:, y.columns] += test_predict
        res.loc[te, y.columns] += val_predict
        print('')
        
####After all summed, Divide summed predictions by the number of starts times the number of folds:     
ss.loc[:, y.columns] /= ((n+1) * N_STARTS)
res.loc[:, y.columns] /= N_STARTS

In [None]:
####Estimate Validation Loss of Averaged Results
def metric(y_true, y_pred):
    metrics = []
    for _target in y.columns:
        metrics.append(log_loss(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float), labels=[0,1]))
    return np.mean(metrics)

In [None]:
print(f'OOF Metric: {metric(y, res)}')

In [None]:
ss

In [None]:
test_id=pd.DataFrame(test_id)
ss
test_id

In [None]:
ss=pd.merge(test_id, ss, how='inner', left_index=True, right_index=True)
ss=pd.DataFrame(ss)
ss

In [None]:
ss.columns = ['id','Class_1', 'Class_2', 'Class_3', 'Class_4']

In [None]:
ss

In [None]:
ss.to_csv('submission.csv', index=False)