# NOTEBOOK DESCRIPTION
In this notebook, we are going to classify mushrooms between poisonous or not. We will create a neuronal network model using Tensorflow and we will optimize its structure defining a HyperModel using the Keras Tuner. Finally, we will compare the optimized structure with two others made by hand to check their performance.

The dataset that we are going to be using has been downloaded from Kaggle and can be found in the next link: https://www.kaggle.com/uciml/mushroom-classification. It is composed of 23 columns, one for the class (p = poisonous, e = edible) and 22 categorical features. For more information, check the previous link.

# IMPORTS

In [None]:
import IPython

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import layers

# Uncoment if you don't have kerastuner in your environment
!pip install -q -U keras-tuner
import kerastuner as kt

# Uncoment if you don't have tensorflow_docs in your environment
!pip install -q git+https://github.com/tensorflow/docs
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots


print(tf.__version__)

# GET DATA

In [None]:
data = pd.read_csv('../input/mushroom-classification/mushrooms.csv')

In [None]:
# All columns are categorical and there is no NaN values
print(data.shape)
data.head(1)

# PREPROCESSING

#### Note:
The maximum number of categories is useful to know to select what kind of categorical feature column use.

In [None]:
num_categories_per_column = data.apply(lambda x : len(x.unique()), axis=1)
print('Max # categories = {}'.format(max(num_categories_per_column)))

### Train - Test split

In [None]:
train, test = train_test_split(data, test_size=0.3, random_state=42)
train, val = train_test_split(train, test_size=0.2, random_state=42)

print('# test = {}'.format(len(test)))
print('# train = {}'.format(len(train)))
print('# val = {}'.format(len(val)))

In [None]:
def df_to_dataset(dataframe, shuffle=False, batch_size=32):
    dataframe = dataframe.copy()
    
    # For a correct binary classification, we are going to 
    # replace string labels with 0 and 1.
    labels = dataframe.pop('class').map({'e':1, 'p':0})
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    
    if shuffle:
        ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    
    return ds

### Create Datasets

In [None]:
BATCH_SIZE = 32
train_ds = df_to_dataset(train, shuffle=True, batch_size= BATCH_SIZE)
val_ds = df_to_dataset(val, shuffle=False, batch_size=BATCH_SIZE)

N_TRAIN = len(train)
STEPS_PER_EPOCH = N_TRAIN // BATCH_SIZE
FIT_MAX_EPOCHS = 200

### Select columns to train the model


In [None]:
data.columns

In [None]:
# This will be the columns used by the model
columns_selected = ['cap-shape', 'cap-surface', 'cap-color',  
                    'stalk-shape', 'stalk-surface-above-ring', 'stalk-surface-below-ring',
                    'stalk-color-above-ring', 'stalk-color-below-ring']

# BUILD HYPER MODEL 

### Optimizer

In [None]:
schedules = tf.keras.optimizers.schedules.InverseTimeDecay(
                initial_learning_rate=0.001,
                decay_steps=100*STEPS_PER_EPOCH,
                decay_rate=1,
                staircase=False
            )

def get_optimizer():
    return tf.keras.optimizers.Adam(schedules)

### Callbacks

In [None]:
def get_callbacks():
    return [
        tfdocs.modeling.EpochDots(report_every=50),
        tf.keras.callbacks.EarlyStopping(monitor='val_binary_crossentropy', patience=50)
    ]

### Feature layer

In [None]:
def get_feature_layer(columns):
    # columns = data.columns.drop('class') # Be careful with add 'class' column as it is the target.
    feature_columns = []

    for column in columns:
        categorical_column = tf.feature_column.categorical_column_with_vocabulary_list(column, data[column].unique())
        feature_columns.append(tf.feature_column.indicator_column(categorical_column))

    feature_layer = layers.DenseFeatures(feature_columns)
    
    return feature_layer

### HyperModel

In [None]:
class MushroomHyperModel(kt.HyperModel):
    
    def __init__(self, columns, *args, **kwargs):
        self.columns = columns
    
    def build(self, hp):
        model = tf.keras.Sequential()
        
        # Feature layer
        model.add(get_feature_layer(self.columns))

        # Hidden layers (1 or 2 layers with 2, 6, 10 or 14 neurons)
        for i in range(hp.Int('num_layers', 1, 2, step=1)):
            hp_units = hp.Int('units_' + str(i), min_value=2, max_value=14, step=4)
            model.add(layers.Dense(units=hp_units, activation='relu'))

        # Output layer
        model.add(layers.Dense(1, activation='sigmoid'))

        # Compile model
        model.compile(optimizer=get_optimizer(),
                      loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                      metrics=[
                          tf.keras.losses.BinaryCrossentropy(from_logits=True),
                          'accuracy'
                      ])

        return model

### Create the tuner

In [None]:
hypermodel = MushroomHyperModel(columns_selected)

tuner = kt.Hyperband(hypermodel,
                     objective = 'val_binary_crossentropy', 
                     max_epochs = 200,
                     project_name = 'hyper_cap_stalk')   

### Define a class to clear the output of the keras Tuner

In [None]:
class ClearTrainingOutput(tf.keras.callbacks.Callback):
    def on_train_end(*args, **kwargs):
        IPython.display.clear_output(wait = True)

### Search the best structure with the tuner

In [None]:
callbacks = [ClearTrainingOutput()]
callbacks.append(get_callbacks())
tuner.search(train_ds, validation_data = val_ds, callbacks=callbacks)

### Get optimal hyperparameters

In [None]:
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]

In [None]:
best_num_layers = best_hps.get('num_layers')
print('# Layers = {}'.format(best_num_layers))

for i in range(best_num_layers):
    print('# units = {}'.format(best_hps.get('units_' + str(i))))

### Train a model with the optimal hyper parameters

In [None]:
history = {}

hyper_model = tuner.hypermodel.build(best_hps)
history['hyper'] = hyper_model.fit(train_ds, 
                                    validation_data=val_ds,
                                    epochs=FIT_MAX_EPOCHS,
                                    callbacks=get_callbacks(),
                                    verbose=0)


# REGULAR MODELS (not HyperModels)

### Compile and fit

In [None]:
def compile_and_fit(model):
    model.compile(optimizer=get_optimizer(),
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=[
                      tf.keras.losses.BinaryCrossentropy(from_logits=True),
                      'accuracy'
                  ])

    history = model.fit(train_ds, 
                        validation_data=val_ds,
                        epochs=FIT_MAX_EPOCHS,
                        callbacks=get_callbacks(),
                        verbose=0)
    
    return history

### Small model

In [None]:
small_model = tf.keras.models.Sequential([
    get_feature_layer(columns_selected),
    layers.Dense(6, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

history['small'] = compile_and_fit(small_model)

In [None]:
big_model = tf.keras.models.Sequential([
    get_feature_layer(columns_selected),
    layers.Dense(18, activation='relu'),
    layers.Dense(18, activation='relu'),
    layers.Dense(18, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

history['big'] = compile_and_fit(small_model)

# SHOW PERFORMANCE

In [None]:
def plot_history(history):
    plt.figure(figsize=(12,6))
    plotter = tfdocs.plots.HistoryPlotter(metric = 'binary_crossentropy', smoothing_std=10)
    plotter.plot(history)

In [None]:
plot_history(history)

# CONCLUSION
Depending on the execution, the HyperModel may have a slightly better performance and, although it is not so better than the small one, is still a good example of how to use Keras Tuner to find a good structure for our model. I hope you find this notebook useful! 

# THANKS FOR READING! :)