## Deep Neural Network Method

#### Importing Packages

In [22]:
import keras
import tensorflow

# Helper packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn 

# Modeling process
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve
from sklearn.preprocessing import LabelEncoder

### Data Processing

In [None]:
data = pd.read_csv('../data/mushrooms.csv')
data.head()

##### Since the dataset is categorical, and we want to perform a DNN model, we need to encode the dataset.

In [None]:
le = preprocessing.LabelEncoder()
data_en = data
for i in data_en.columns.tolist():
    data_en[i]= le.fit_transform(data_en[i])
data_en.head()

### DNN Modeling

##### Now, we define `Y` to be the __class__ feature, which shows whether a certain mushroom is poisonous or edible based on the other features.
##### Notice that in the definition of the set `X`, the __class__ feature is dropped.

In [25]:

X = data_en.iloc[:,1:23].values
Y = data_en.iloc[:,0].values
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=0)

#### Next, we initialize our model and add two hidden layers with respective units, 8 and 5, and a `Dropout`* layer and then an output layer.

##### * `The Dropout layer randomly sets input units to 0 with a frequency of rate at each step during training time, which helps prevent overfitting. Inputs not set to 0 are scaled up by 1/(1 - rate) such that the sum over all inputs is unchanged.`

In [34]:
#Initialising DNN
DNN = tensorflow.keras.models.Sequential()

#Adding First Hidden Layer
DNN.add(tensorflow.keras.layers.Dense(units=8,activation="relu"))

#Adding Second Hidden Layer
DNN.add(tensorflow.keras.layers.Dense(units=5,activation="relu"))

#Adding Dropout Layer
DNN.add(tensorflow.keras.layers.Dropout(0.2,input_shape=(2,)))

#Adding Output Layer
DNN.add(tensorflow.keras.layers.Dense(units=1,activation="sigmoid"))



In [35]:
#Compiling DNN
DNN.compile(optimizer="adam",loss="binary_crossentropy",metrics=['accuracy'])

#### After compiling the model, and before fitting it, it is nice to have an `EarlyStopping`* callback which makes the model stop training when a monitored metric has stopped improving.

##### *Important arguments:
`1- monitor: Quantity to be monitored, such as 'loss' or 'accuracy'.`

`2- min_delta: Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.`

`3- patience: Number of epochs with no improvement after which training will be stopped.`

`4- verbose: Verbosity mode, 0 or 1. Mode 0 is silent, and mode 1 displays messages when the callback takes an action.`

In [None]:
#Fitting DNN
my_c = tensorflow.keras.callbacks.EarlyStopping( monitor='loss', patience=2)
DNN.fit(X_train,Y_train,batch_size=32,epochs = 100, callbacks=my_c, verbose=1)

#### Next we interpret how well our model is performing, by investigating the confusion matrix, the classification report, the graph of the `ROC curve`*, and the area under the ROC curve.

##### * `An ROC curve (receiver operating characteristic curve) is a graph showing the performance of a classification model at all classification thresholds.`

In [None]:
y_pred = DNN.predict(X_test)
y_pred = (y_pred > 0.5)

print('*'*20)
score, acc = DNN.evaluate(X_test, Y_test,
                            batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc)
cm = confusion_matrix(Y_test, y_pred)


In [None]:
p = sns.heatmap(pd.DataFrame(cm), annot=True, cmap="YlGnBu" ,fmt='g')
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

In [None]:
print(classification_report(Y_test,y_pred))


In [None]:
y_pred_proba = DNN.predict(X_test)
fpr, tpr, thresholds = roc_curve(Y_test, y_pred_proba)
plt.plot([0,1],[0,1],'k--')
plt.plot(fpr,tpr, label='ANN')
plt.xlabel('fpr')
plt.ylabel('tpr')
plt.title('ROC curve')
plt.show()
roc_auc_score(Y_test,y_pred_proba)