# Tutorial 1 - KERAS DNN Classification

We will predict the price category, among 4 categories, of an AIRBNB listing (`price_category` column). This is a multi-class classification task.

**The unit of analysis is an AIRBNB LISTING**

# Setup

In [None]:
# Common imports
import numpy as np
import pandas as pd

np.random.seed(42)


# Get the data

In [None]:
#We will predict the "price_gte_150" value in the data set:

airbnb = pd.read_csv("airbnb.csv")
airbnb.head()

# Split the data into train and test

In [None]:
from sklearn.model_selection import train_test_split

train_set, test_set = train_test_split(airbnb, test_size=0.3)

### Be careful: we haven't seperated the target column yet

# Data Prep

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

from sklearn.preprocessing import FunctionTransformer

## Drop the variables we can't use in this tutorial

In [None]:
# We can't use the following columns in this tutorial

train = train_set.drop(['price', 'price_gte_150'], axis=1)
test = test_set.drop(['price', 'price_gte_150'], axis=1)

## Separate the target variable (we don't want to transform it)

In [None]:
train_target = train[['price_category']]
test_target = test[['price_category']]

train_inputs = train.drop(['price_category'], axis=1)
test_inputs = test.drop(['price_category'], axis=1)

## Feature Engineering: Let's derive a new column

#### Remember, the "minimum_nights" column is highly skewed. Let's try to transform it. 

In [None]:
train_inputs['minimum_nights'].value_counts()

In [None]:
train_inputs['minimum_nights'].hist()

In [None]:
# Import power transformer from sklearn. It will help us create a "normal distribution"
from sklearn.preprocessing import PowerTransformer

PT = PowerTransformer(method = 'yeo-johnson', standardize=True)

In [None]:
transformed_min_nights = PT.fit_transform(train_inputs[['minimum_nights']])

In [None]:
pd.DataFrame(transformed_min_nights).hist()

NOTE1: We didn't make the transformed variable as part of the input variables yet. To do that, we will use the pipeline.

NOTE2: We don't need to create a function (like before). This transformer already has fit() and transform(). So, we can use this in the pipeline.

##  Identify the numerical and categorical columns

In [None]:
train_inputs.dtypes

**At this stage, you can manually identify numeric, binary, and categorical columns as follows:**

`numeric_columns = ['latitude', 'longitude', 'accommodates', 'bathrooms', 'bedrooms', 'beds', 'Number of amenities', 'guests_included', 'price_per_extra_person', 'minimum_nights', 'number_of_reviews', 'number_days_btw_first_last_review', 'review_scores_rating']`
 
 `binary_columns = ['host_is_superhost', 'host_identity_verified']`
 
 `categorical_columns = ['neighbourhood_cleansed', 'property_type', 'room_type', 'bed_type', 'cancellation_policy']`
 
<br>
 
**If you do not want to manually type these, you can do the below tricks:**

In [None]:
# Identify the numerical columns
numeric_columns = train_inputs.select_dtypes(include=[np.number]).columns.to_list()

# Identify the categorical columns
categorical_columns = train_inputs.select_dtypes('object').columns.to_list()

In [None]:
# Identify the binary columns so we can pass them through without transforming
binary_columns = ['host_is_superhost', 'host_identity_verified']

In [None]:
# Be careful: numerical columns already includes the binary columns,
# So, we need to remove the binary columns from numerical columns.

for col in binary_columns:
    numeric_columns.remove(col)

In [None]:
binary_columns

In [None]:
numeric_columns

In [None]:
categorical_columns

In [None]:
transformed_columns = ['minimum_nights']

# Pipeline

In [None]:
numeric_transformer = Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='median')),
                ('scaler', StandardScaler())])

In [None]:
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='unknown')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

In [None]:
binary_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent'))])

In [None]:
my_new_column = Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='median')),
                ('powertransformer', PowerTransformer(method = 'yeo-johnson', standardize=True))])

In [None]:
preprocessor = ColumnTransformer([
        ('num', numeric_transformer, numeric_columns),
        ('cat', categorical_transformer, categorical_columns),
        ('binary', binary_transformer, binary_columns),
        ('trans', my_new_column, transformed_columns)],
        remainder='passthrough')

#passtrough is an optional step. You don't have to use it.

# Transform: fit_transform() for TRAIN

In [None]:
#Fit and transform the train data
train_x = preprocessor.fit_transform(train_inputs)

train_x

In [None]:
train_x.shape

# Tranform: transform() for TEST

In [None]:
# Transform the test data
test_x = preprocessor.transform(test_inputs)

test_x

In [None]:
test_x.shape

# Keras needs Ordinal target values for classification

In [None]:
from sklearn.preprocessing import OrdinalEncoder

ord_enc = OrdinalEncoder()

train_y = ord_enc.fit_transform(train_target)

train_y

In [None]:
test_y = ord_enc.transform(test_target)

test_y

# Baseline

In [None]:
from sklearn.dummy import DummyClassifier

dummy_clf = DummyClassifier(strategy="most_frequent")

dummy_clf.fit(train_x, train_y)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
#Baseline Train Accuracy
dummy_train_pred = dummy_clf.predict(train_x)

baseline_train_acc = accuracy_score(train_y, dummy_train_pred)

print('Baseline Train Accuracy: {}' .format(baseline_train_acc))

In [None]:
#Baseline Test Accuracy
dummy_test_pred = dummy_clf.predict(test_x)

baseline_test_acc = accuracy_score(test_y, dummy_test_pred)

print('Baseline Test Accuracy: {}' .format(baseline_test_acc))

# Multiclass classification using Keras



In [None]:
import tensorflow as tf
from tensorflow import keras

# fix random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
#What is your input shape?
#(meaning: how many neurons should be in the input layer?)

train_x.shape

## Single Layer (Shallow) Network

In [None]:
#Define the model: for multi-class

model = keras.models.Sequential()

model.add(keras.layers.Input(shape=67))
model.add(keras.layers.Dense(50, activation='relu'))
model.add(keras.layers.Dense(4, activation='softmax'))

#final layer: there has to be 4 nodes with softmax (because we have 4 categories)


In [None]:
# Compile model

#Optimizer:
adam = keras.optimizers.Adam(learning_rate=0.01)

model.compile(loss='sparse_categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

## Ordinal target (as in this example):

Final layer's activation = **softmax** <br>
loss = **sparse_categorical_crossentropy**

## Binary target 

Final layer has only 1 neuron <br>
Final layer's activation = **sigmoid** <br>
loss = **binary_crossentropy**

## One-hot target (rare cases)

Final layer's activation = **softmax** <br>
loss = **categorical_crossentropy**

## Regression task (target is continuous)

Final layer has only 1 neuron (keras.layers.Dense(1))<br>
Activation is None<br>
loss = **mean_squared_error**


In [None]:
# Fit the model

history = model.fit(train_x, train_y, 
                    validation_data=(test_x, test_y), 
                    epochs=20, batch_size=100)

In [None]:
# evaluate the model

scores = model.evaluate(test_x, test_y, verbose=0)

scores

# In results, first is loss, second is accuracy

In [None]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


## Deep Network (Pipe Architecture)

In [None]:
#Define the model: for multi-class

model = keras.models.Sequential()

model.add(keras.layers.Input(shape=67))
model.add(keras.layers.Dense(67, activation='relu'))
model.add(keras.layers.Dense(67, activation='relu'))
model.add(keras.layers.Dense(67, activation='relu'))
model.add(keras.layers.Dense(4, activation='softmax'))

#final layer: there has to be 4 nodes with softmax (because we have 4 categories)

In [None]:
# Compile model

#Optimizer:
adam = keras.optimizers.Adam(learning_rate=0.01)

model.compile(loss='sparse_categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

In [None]:
# Fit the model

history = model.fit(train_x, train_y, 
                    validation_data=(test_x, test_y), 
                    epochs=20, batch_size=100)

In [None]:
# evaluate the model

scores = model.evaluate(test_x, test_y, verbose=0)

scores

# In results, first is loss, second is accuracy

In [None]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


# Wide & Deep Network

## Let's send all inputs to the last layer

In [None]:
model = keras.models.Sequential()


inputlayer = keras.layers.Input(shape=67)

hidden1 = keras.layers.Dense(67, activation='relu')(inputlayer)
hidden2 = keras.layers.Dense(67, activation='relu')(hidden1)
hidden3 = keras.layers.Dense(67, activation='relu')(hidden2)

concat = keras.layers.Concatenate()([inputlayer, hidden3])

#final layer: there has to be 4 nodes with softmax (because we have 4 categories)
output = keras.layers.Dense(4, activation='softmax')(concat)

model = keras.Model(inputs =[inputlayer], outputs = output)


In [None]:
# Compile model

#Optimizer:
adam = keras.optimizers.Adam(learning_rate=0.01)

model.compile(loss='sparse_categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

In [None]:
# Fit the model

history = model.fit(train_x, train_y, 
                    validation_data=(test_x, test_y), 
                    epochs=20, batch_size=100)

In [None]:
# evaluate the model

scores = model.evaluate(test_x, test_y, verbose=0)

scores

# In results, first is loss, second is accuracy

In [None]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


## Let's send two inputs to the last layer

In [None]:
# Select the first two columns: longitude and latitude
#(WHY: because lat and lon are good and important predictors)

lon_lat = train_x[:,:2]

lon_lat

In [None]:
model = keras.models.Sequential()

input1 = keras.layers.Input(shape=2)
input2 = keras.layers.Input(shape=67)

hidden1 = keras.layers.Dense(67, activation='relu')(input2)
hidden2 = keras.layers.Dense(67, activation='relu')(hidden1)
hidden3 = keras.layers.Dense(67, activation='relu')(hidden2)

concat = keras.layers.Concatenate()([input1, hidden3])

#final layer: there has to be 4 nodes with softmax (because we have 4 categories)
output = keras.layers.Dense(4, activation='softmax')(concat)

model = keras.Model(inputs =[input1, input2], outputs = output)

In [None]:
# Compile model

#Optimizer:
adam = keras.optimizers.Adam(learning_rate=0.01)

model.compile(loss='sparse_categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

In [None]:
# Fit the model

history = model.fit((lon_lat, train_x), train_y, 
                    validation_data=((test_x[:,:2], test_x), test_y), 
                    epochs=20, batch_size=100)

In [None]:
# evaluate the model

scores = model.evaluate((test_x[:,:2], test_x), test_y, verbose=0)

scores

# In results, first is loss, second is accuracy

In [None]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


# Optimizers, Learning rate, Dropout, Initialization & Activation functions

In [None]:
#Define the model: for multi-class


#Set the learning rate:
lr=0.001


#Available optimizers:
adagrad = keras.optimizers.Adagrad(learning_rate=lr, epsilon=None, decay=0.0)
sgd = keras.optimizers.SGD(learning_rate=lr, momentum=0.0, decay=0.0, nesterov=False)
rmsprop = keras.optimizers.RMSprop(learning_rate=lr, rho=0.9, epsilon=None, decay=0.0)
adam = keras.optimizers.Adam(learning_rate=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
nesterov_adam = keras.optimizers.Nadam(learning_rate=lr, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)

#Initializations:
xavier = keras.initializers.glorot_normal(seed=None)
he = keras.initializers.he_normal(seed=None)


# Activation functions. Uncomment only one
activation = 'elu' 
#activation = 'relu'
#activation = 'tanh'
#activation = 'sigmoid'



#See the droput layers below:
input1 = keras.layers.Input(shape=67)

hidden1 = keras.layers.Dense(67, activation=activation, kernel_initializer=xavier)(input1)
drop1   = keras.layers.Dropout(0.2)(hidden1)
hidden2 = keras.layers.Dense(67, activation=activation, kernel_initializer=xavier)(drop1)
drop2   = keras.layers.Dropout(0.2)(hidden2)
hidden3 = keras.layers.Dense(67, activation=activation, kernel_initializer=xavier)(drop2)

#final layer: there has to be 4 nodes with softmax (because we have 4 categories)
output = keras.layers.Dense(4, activation='softmax')(hidden3)

#Compile"
model = keras.Model(inputs = input1, outputs = output)

model.compile(loss='sparse_categorical_crossentropy', 
              optimizer=nesterov_adam, metrics=['accuracy'])

In [None]:
# Fit the model

history = model.fit(train_x, train_y, 
                    validation_data=(test_x, test_y), 
                    epochs=20, batch_size=100)



In [None]:
# evaluate the model

scores = model.evaluate(test_x, test_y, verbose=0)

scores

# In results, first is loss, second is accuracy

In [None]:
# extract the accuracy from model.evaluate

print("%s: %.2f" % (model.metrics_names[0], scores[0]))
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


# Early stopping based on validation results

To do this, you need to send the validation data sets to the fit() function and use a callback.

EarlyStopping Arguments:

**monitor:** quantity to be monitored.<br>
**min_delta:** minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.<br>
**patience:** number of epochs with no improvement after which training will be stopped.<br>
**verbose:** verbosity mode.<br>
**mode:** one of {auto, min, max}. In min mode, training will stop when the quantity monitored has stopped decreasing; in max mode it will stop when the quantity monitored has stopped increasing; in auto mode, the direction is automatically inferred from the name of the monitored quantity.<br>
**baseline:** Baseline value for the monitored quantity to reach. Training will stop if the model doesn't show improvement over the baseline.<br>
**restore_best_weights:** whether to restore model weights from the epoch with the best value of the monitored quantity. If False, the model weights obtained at the last step of training are used.

In [None]:
#Define the model: for multi-class

model = keras.models.Sequential()

model.add(keras.layers.Input(shape=67))
model.add(keras.layers.Dense(67, activation='relu'))
model.add(keras.layers.Dense(67, activation='relu'))
model.add(keras.layers.Dense(67, activation='relu'))
model.add(keras.layers.Dense(4, activation='softmax'))

#final layer: there has to be 4 nodes with softmax (because we have 4 categories)

In [None]:
model.compile(loss='sparse_categorical_crossentropy', 
              optimizer=nesterov_adam, metrics=['accuracy'])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping


earlystop = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

callback = [earlystop]

model.fit(train_x, train_y, validation_data=(test_x, test_y), 
          epochs=100, batch_size=100, callbacks=callback)

# Grid Search

In [None]:
def build_model(hidden_layer_sizes, dropout):
    model = keras.models.Sequential()
    model.add(keras.layers.Input(shape=67))
    for hidden_layer_size in hidden_layer_sizes:
        model.add(keras.layers.Dense(hidden_layer_size, activation="relu"))
        model.add(keras.layers.Dropout(dropout))
    model.add(keras.layers.Dense(4, activation='softmax'))
    return model

In [None]:
from scikeras.wrappers import KerasClassifier, KerasRegressor

keras_clf = KerasClassifier(
    model=build_model,
    loss="sparse_categorical_crossentropy",
    optimizer="adam",
    optimizer__learning_rate=0.01,
    hidden_layer_sizes=(50,),
    dropout=0,
    verbose=False,
    batch_size=100
)

In [None]:
earlystop = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

callback = [earlystop]

In [None]:
from sklearn.model_selection import RandomizedSearchCV

params = {
    'optimizer__learning_rate': [0.05, 0.1],
    'model__hidden_layer_sizes': [(100, ), (50, 50, ), (67, 67, 67)],
    'model__dropout': [0, 0.1],
}

rnd_search_cv = RandomizedSearchCV(keras_clf, params, scoring='accuracy', n_iter=5, cv=3)

rnd_search_cv.fit(train_x, train_y, epochs=5, validation_data=(test_x, test_y), 
                  callbacks=callback, verbose=0)


In [None]:
rnd_search_cv.best_params_

In [None]:
# evaluate the model

train_preds = rnd_search_cv.best_estimator_.predict(train_x)

test_preds = rnd_search_cv.best_estimator_.predict(test_x)

In [None]:
from sklearn.metrics import accuracy_score

print("TRAIN:")

print("%s: %.2f" % ("Accuracy", accuracy_score(train_preds, train_y)))

In [None]:
print("TEST:")

print("%s: %.2f" % ("Accuracy", accuracy_score(test_preds, test_y)))