In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
train = pd.read_csv('../input/data-clean/clean_train.csv')
test = pd.read_csv('../input/data-clean/clean_test.csv')

In [None]:
print(list(train.columns))

In [None]:
tid=test['TransactionID']

### Removing high correl features

In [None]:
# Create correlation matrix
corr_matrix = train.corr().abs()

# Select upper triangle of correlation matrix
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))

# Find index of feature columns with correlation greater than 0.95
to_drop = [column for column in upper.columns if any(upper[column] > 0.70)]

In [None]:
len(to_drop)

In [None]:
droplist=['id_23','id_27','id_33','id_30','id_34','id_16','id_31','id_28','id_29','id_15','id_35','id_36','id_37','id_38']
train=train.drop(columns=droplist,axis=1)
train=train.drop(columns=to_drop,axis=1)
test=test.drop(columns=to_drop,axis=1)

In [None]:
train.info()

In [None]:
train=train.drop('Unnamed: 0',axis=1)
test=test.drop('Unnamed: 0',axis=1)

In [None]:
# list(train.select_dtypes(include=['float64']).columns)

### Label Encoding

In [None]:
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
numerical = [col for col in list(train.select_dtypes(include=numerics)) if col in train.columns]
categorical = [col for col in list(train.select_dtypes(include='object')) if col in train.columns]

In [None]:
categorical

In [None]:
train_copy = train
numerical.remove('isFraud')

In [None]:
category_counts = {}
from sklearn import preprocessing
for f in train.columns:
    if  train[f].dtype=='object': 
        lbl = preprocessing.LabelEncoder()
        lbl.fit(list(train[f].values))
        train[f] = lbl.transform(list(train[f].values))
        category_counts[f] = len(list(lbl.classes_)) + 1
#         test[f] = lbl.transform(list(test[f].values))  
train= train.reset_index()
# test = test.reset_index()

In [None]:
for f in test.columns:
    if  test[f].dtype=='object': 
        lbl = preprocessing.LabelEncoder()
        lbl.fit(list(test[f].values))
        test[f] = lbl.transform(list(test[f].values))  
#         test = test.reset_index()

In [None]:
train=train.drop('index',axis=1)

In [None]:
# train_ = train["isFraud"]
# train = train.drop(["isFraud"],axis=1)

In [None]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

for column in numerical:
    scaler = StandardScaler()
    if train[column].max() > 100 and train[column].min() >= 0:
        train[column] = np.log1p(train[column])
        test[column] = np.log1p(test[column])
    scaler.fit(np.concatenate([train[column].values.reshape(-1,1), test[column].values.reshape(-1,1)]))
    train[column] = scaler.transform(train[column].values.reshape(-1,1))
    test[column] = scaler.transform(test[column].values.reshape(-1,1))

In [None]:
target = 'isFraud'

In [None]:
def get_input_features(df):
    X = {'numerical':np.array(df[numerical])}
    for cat in categorical:
        X[cat] = np.array(df[cat])
    return X

 ### Model Implementation
 * Different combination of layers can be created to boost scores

In [None]:
from keras.layers import Concatenate, Input, Dense, Embedding, Flatten, Dropout, BatchNormalization, SpatialDropout1D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.models import Model
from keras.optimizers import  Adam
import keras.backend as k
from keras.optimizers import SGD
import graphviz

In [None]:
category_counts

In [None]:
def make_model():
    k.clear_session()

    categorical_inputs = []
    for cat in categorical:
        categorical_inputs.append(Input(shape=[1], name=cat))

    categorical_embeddings = []
    for i, cat in enumerate(categorical):
        categorical_embeddings.append(
            Embedding(category_counts[cat], int(np.log1p(category_counts[cat]) + 1), name = cat + "_embed")(categorical_inputs[i]))
    
    categorical_logits = Concatenate(name = "categorical_conc")([Flatten()(SpatialDropout1D(.1)(cat_emb)) for cat_emb in categorical_embeddings])
#     categorical_logits = Dropout(.5)(categorical_logits)

    numerical_inputs = Input(shape=[train[numerical].shape[1]], name = 'numerical')
    numerical_logits = Dropout(.1)(numerical_inputs)
  
    x = Concatenate()([
        categorical_logits, 
        numerical_logits,
    ])
#     x = categorical_logits
    x = BatchNormalization()(x)
    x = Dense(128, activation = 'relu')(x)
    x = Dropout(.2)(x)
    x=  BatchNormalization()(x)
    x = Dense(128, activation = 'relu')(x)
    x = Dropout(.4)(x)
    x=  BatchNormalization()(x)
    x = Dense(128, activation = 'relu')(x)
    x = Dropout(.4)(x)
    x=  BatchNormalization()(x)
    x = Dense(128, activation = 'relu')(x)
    x = Dropout(.2)(x)
    x=  BatchNormalization()(x)
    x = Dense(128, activation = 'relu')(x)
    x = Dropout(.2)(x)
    out = Dense(1, activation = 'sigmoid')(x)
    

    model = Model(inputs=categorical_inputs + [numerical_inputs],outputs=out)
    loss = "binary_crossentropy"
    model.compile(optimizer=SGD(lr = 0.003), loss = loss)
    return model

In [None]:
X_train, X_val = train_test_split(train, test_size=0.2, random_state=42)

In [None]:
# train = get_input_features(train)
X_train = get_input_features(X_train)
X_valid = get_input_features(X_val)
X_test = get_input_features(test)

In [None]:
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split, StratifiedKFold,KFold

y_train, y_valid = train_test_split(train_copy[target], test_size=0.2, random_state=42)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=10, patience=1, mode='auto', verbose = 1)

In [None]:
model = make_model()
best_score = 0
patience = 0
for i in range(100):
    if patience < 4:
        hist = model.fit(X_train, y_train, validation_data = (X_valid,y_valid), batch_size = 512, epochs = 1, verbose = 1)
        valid_preds = model.predict(X_valid, batch_size = 512, verbose = True)
        score = roc_auc_score(y_valid, valid_preds)
        print(score)
        if score > best_score:
            model.save_weights("model.h5")
            best_score = score
            patience = 0
        else:
            patience += 1
            pass

In [None]:
from keras.layers.normalization import BatchNormalization
from keras.callbacks import LearningRateScheduler
# Creating the model
model = Sequential()

# Inputing the first layer with input dimensions
model.add(Dense(100, 
                activation='relu',  
                input_dim=111,
                kernel_initializer='uniform'))
#The argument being passed to each Dense layer (18) is the number of hidden units of the layer. 
# A hidden unit is a dimension in the representation space of the layer.

#Stacks of Dense layers with relu activations can solve a wide range of problems
#(including sentiment classification), and you’ll likely use them frequently.

# Adding an Dropout layer to previne from overfitting
model.add(Dropout(0.50))
model.add(BatchNormalization())
#adding second hidden layer 
model.add(Dense(128,
                kernel_initializer='uniform',
                activation='relu'))

# Adding another Dropout layer
model.add(Dropout(0.50))
model.add(BatchNormalization())
model.add(Dense(128,
                kernel_initializer='uniform',
                activation='relu'))

# model.add(Dense(256,
#                 kernel_initializer='uniform',
#                 activation='relu'))
# model.add(BatchNormalization())
model.add(Dropout(0.50))
model.add(BatchNormalization())
# model.add(Dense(128,
#                 kernel_initializer='uniform',
#                 activation='relu'))
# model.add(BatchNormalization())
# Adding another Dropout layer
# model.add(Dropout(0.50))

# adding the output layer that is binary [0,1]
model.add(Dense(1,
                kernel_initializer='uniform',
                activation='sigmoid'))
#With such a scalar sigmoid output on a binary classification problem, the loss
#function you should use is binary_crossentropy
annealer = LearningRateScheduler(lambda x: 1e-2 * 0.95 ** x)
#Visualizing the model
model.summary()

In [None]:
#Creating an Stochastic Gradient Descent
sgd = SGD(lr = 0.02, momentum = 0.9)

# Compiling our model
model.compile(optimizer = 'Adam',
                   loss = 'binary_crossentropy', 
                   metrics = ['accuracy'])
#optimizers list
#optimizers['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']

# Fitting the ANN to the Training set
# model.fit(X_train, y_train, 
#                batch_size = 128, 
#                epochs = 30, verbose=2)
model.fit(X_train, y_train, nb_epoch=15, batch_size=64, validation_split=0.2, verbose = 2)

In [None]:
y_pred = model.predict(X_test)

In [None]:
z = np.hstack(y_pred)
print(len(z))

In [None]:
sub = pd.DataFrame()
sub['TransactionID'] = tid
sub['isFraud'] = z

### Below implementation is from [Mobius](http://https://www.kaggle.com/arashnic)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
# from keras.layers.normalization import BatchNormalization
# from scipy import optimize

# model = Sequential()

# model.add(Dense(100,input_dim=141,kernel_initializer='uniform',
#                 activation='relu'))

# model.add(Dropout(0.40))

# model.add(BatchNormalization())

# # model.add(Activation('relu'))

# model.add(Dense(100),activation='relu')

# model.add(Dropout(0.40))

# model.add(BatchNormalization())

# # model.add(Activation('relu'))

# model.add(Dense(1,kernel_initializer='uniform',activation='sigmoid'))

# model.compile(optimizer=Adam(lr=0.02), loss="binary_crossentropy", metrics=["accuracy"])

# annealer = LearningRateScheduler(lambda x: 1e-2 * 0.95 ** x)

In [None]:
# from sklearn.metrics import roc_auc_score
# from keras.callbacks import Callback

# class printAUC(Callback()):
    
#     def __init__(self, X_train, y_train):

#         super(printAUC, self).__init__()

#         self.bestAUC = 0

#         self.X_train = X_train[0]

#         self.y_train = y_train[0]


#     def on_epoch_end(self, epoch, logs={}):

#         pred = self.model.predict(np.array(self.X_train))

#         auc = roc_auc_score(self.y_train, pred)

#         print("Train AUC: " + str(auc))

#         pred = self.model.predict(self.validation_data[0])

#         auc = roc_auc_score(self.validation_data[1], pred)

#         print ("Validation AUC: " + str(auc))
#         if (self.bestAUC < auc) : 

#             self.bestAUC = auc

#             self.model.save("bestNet.h5", overwrite=True)

#         return

In [None]:
# model.fit(X_train,y_train, batchsize=32, epochs = 30, callbacks=[annealer, printAUC(X_train, y_train)], validationdata = (X_val,Y_val), verbose=2)

In [None]:
# scores = model.evaluate(X_train, y_train, batch_size=30)
# print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

In [None]:
sub[['TransactionID','isFraud']].to_csv('submission.csv',index=False)

In [None]:
# # Fit the model
# history = model.fit(X_train, y_train, validation_split=0.20, 
#                     epochs=18, batch_size=10, verbose=0)

# # list all data in history
# print(history.history.keys())

In [None]:
# # summarizing historical accuracy
# plt.plot(history.history['acc'])
# plt.plot(history.history['val_acc'])
# plt.title('Model Accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(['train', 'test'], loc='upper left')
# plt.show()

In [None]:
# # summarize history for loss
# plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
# plt.title('model loss')
# plt.ylabel('loss')
# plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
# plt.show()

### Some great kernels to learn from-

* https://www.kaggle.com/mirichoi0218/ann-making-model-for-binary-classification
* https://www.kaggle.com/parthsuresh/binary-classifier-using-keras-97-98-accuracy
* https://www.kaggle.com/kabure/titanic-eda-keras-nn-pipelines
* https://www.kaggle.com/karthik7395/binary-classification-using-neural-networks/data
* https://www.kaggle.com/harnalashok/deep-learning-for-binary-classification
* https://www.kaggle.com/deepthiappam/keras-binary-classification-neural-networks
* https://www.kaggle.com/c/avito-demand-prediction/discussion/59917
* http://blog.kaggle.com/2018/01/18/an-intuitive-introduction-to-generative-adversarial-networks/
* https://www.kaggle.com/devm2024/keras-model-for-beginners-0-210-on-lb-eda-r-d
* http://blog.kaggle.com/2017/06/15/stacking-made-easy-an-introduction-to-stacknet-by-competitions-grandmaster-marios-michailidis-kazanova/