In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sb
from collections import Counter
import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import LabelEncoder,normalize,MinMaxScaler
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix,roc_auc_score,roc_curve
import seaborn as sns


import tensorflow as tf

In [None]:
# Reading data
train = pd.read_csv('../input/higgs-boson/training.zip')
test = pd.read_csv('../input/higgs-boson/test.zip')

print(train.shape,test.shape)

In [None]:
train

In [None]:
print(train.columns.values,'\n')
print(test.columns.values)

In [None]:
train = train.drop(['Weight'], axis=1)

In [None]:
print(train['Label'].value_counts())

rcParams['figure.figsize'] = 10,5
sb.barplot(x = train['Label'].value_counts().index, y = train['Label'].value_counts().values)
plt.title('Label counts')
plt.show()

In [None]:
# getting dummy variables column

enc = LabelEncoder()

train['Label'] = enc.fit_transform(train['Label'])
train.head()

In [None]:
y = train["Label"]
X = train
X_test = test

In [None]:
X.set_index(['EventId'],inplace = True)
X_test.set_index(['EventId'],inplace = True)
X = X.drop(['Label'], axis=1)

X.head()

In [None]:
X_test.head()

In [None]:
train.describe()

In [None]:
#Normalizing

from sklearn.preprocessing import normalize

X = normalize(X)
X_test = normalize(X_test)

In [None]:
#K Fold Cross Validation

from sklearn.model_selection import KFold


kf = KFold(n_splits=5, random_state=2020, shuffle=True)

for train_index, val_index in kf.split(X):
    print("TRAIN:", train_index, "TEST:", val_index)
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

In [None]:
#reshape for rnn

X_train = X_train.reshape(-1, 1, 30)
X_val  = X_val.reshape(-1, 1, 30)
y_train = y_train.values #convert pd to array
y_train = y_train.reshape(-1, 1,)
y_val = y_val.values #convert pd to array
y_val = y_val.reshape(-1, 1,)

In [None]:
X_train.shape

In [None]:
from tensorflow.keras.layers import Conv2D,LSTM,LeakyReLU, MaxPooling2D,Concatenate,Input, Dropout, Flatten, Dense, GlobalAveragePooling2D,Activation, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.models import Model


  # create model
    

#input 
input_layer = Input(shape=(1,30))
main_rnn_layer = LSTM(64, return_sequences=True, recurrent_dropout=0.2)(input_layer)

    
#output
rnn = LSTM(32)(main_rnn_layer)
dense = Dense(128)(rnn)
dropout_c = Dropout(0.3)(dense)
classes = Dense(1, activation= LeakyReLU(alpha=0.1),name="class")(dropout_c)

model = Model(input_layer, classes)

# Compile model
callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=4, verbose=1, factor=0.6),
             EarlyStopping(monitor='val_loss', patience=20),
             ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)]
model.compile(loss=[tf.keras.losses.MeanSquaredLogarithmicError(),tf.keras.losses.MeanSquaredLogarithmicError()], optimizer="adam")


model.summary()
# Fit the model
history = model.fit(X_train, y_train, 
          epochs = 250, 
          batch_size = 16, 
          validation_data=(X_val,  y_val), 
          callbacks=callbacks)


In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss over epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='best')
plt.show()

In [None]:
X_test.shape

In [None]:
model.load_weights("best_model.h5")

test = X_test #convert pd to array
test = test.reshape(-1, 1,30)


predictions = model.predict(test)

In [None]:
print(predictions.shape)
print(predictions)

In [None]:
sub = pd.read_csv('../input/higgs-boson/random_submission.zip')

In [None]:
sub

In [None]:
type(predictions)

In [None]:
pred = np.where(predictions > 0.5, 1, 0)
pred

In [None]:
test_predict = pd.Series(pred[:,0])

In [None]:
test_predict

In [None]:
test_predict = pd.DataFrame({"EventId":sub['EventId'],"RankOrder":sub['RankOrder'],"Class":test_predict})
test_predict

In [None]:
test_predict = test_predict.replace(1,'s')
test_predict = test_predict.replace(0,'b')
test_predict

In [None]:
test_predict['RankOrder'] = test_predict['Class'].argsort().argsort() + 1 # +1 to start at 1

In [None]:
test_predict

In [None]:
test_predict.to_csv("submission.csv",index=False)