## Deep Learning Keras Model

In [1]:
import time
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('instacart_pca.csv')
df.head()

Unnamed: 0,order_id,product_id,aisle_id,orders,order_number,days_reorder_ratio,days_since_prior_order,n_orders,order_hour_of_day,reorder_interval,total_items,reordered
0,1,0.0,0.0,0.0,4,9.0,0.0,0.0,0.0,1.0,0.0,1.0
1,1,1.0,1.0,76.0,4,0.473684,0.0,10.0,0.0,19.0,11.0,1.0
2,1,2.0,2.0,4.0,4,0.552632,0.0,14.0,0.0,16.285714,31.0,0.0
3,1,3.0,2.0,6.0,4,0.75,0.0,12.0,0.0,12.0,0.0,0.0
4,1,4.0,3.0,22.0,4,0.529412,0.0,5.0,0.0,17.0,0.0,1.0


In [3]:
df.shape

(1384617, 12)

In [4]:
X = df.drop(['reordered'], axis=1)
y = df['reordered'].values.reshape(-1,1)
(X.shape, y.shape)

((1384617, 11), (1384617, 1))

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
(X_train.shape, y_train.shape)

((969231, 11), (969231, 1))

In [7]:
from kerastuner import HyperModel, RandomSearch
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import log_loss, accuracy_score, classification_report, confusion_matrix

## Keras Tuner Hypermodel

In [8]:
class DLModel(HyperModel):
    def __init__(self, input_shape, num_classes):
        self.input_shape = input_shape
        self.num_classes = num_classes

    def build(self, hp):
        model = Sequential()
        model.add(
            Dense(
                units=hp.Int(
                    'dense_units_1',
                    min_value=32,
                    max_value=512,
                    step=32,
                    default=128),
                activation="relu",
                input_shape=self.input_shape,
                  )
            )
        model.add(
            Dropout(
                rate=hp.Float(
                    "dropout_1", min_value=0.0, max_value=0.5, default=0.25, step=0.05,
                )
            )
        )
        model.add(BatchNormalization())
        model.add(
            Dense(
                units=hp.Int(
                    "dense_units_2", 
                    min_value=32, 
                    max_value=512, 
                    step=32, 
                    default=64
                ),
                activation="relu",
            )
        )
        model.add(
            Dropout(
                rate=hp.Float(
                    "dropout_2", min_value=0.0, max_value=0.5, default=0.25, step=0.05
                )
            )
        )
        model.add(BatchNormalization())
        model.add(Dense(self.num_classes, activation="sigmoid"))

        model.compile(
            optimizer=Adam(
                hp.Float(
                    "learning_rate",
                    min_value=1e-4,
                    max_value=1e-2,
                    default=1e-3,
                )
            ),
            loss="binary_crossentropy",
            metrics=["accuracy"],
        )

        return model

In [9]:
SEED = 101

NUM_CLASSES = y_train.shape[1]
INPUT_SHAPE = (X_train.shape[1],)

N_EPOCH_SEARCH = 10
MAX_TRIALS = 5
EXECUTION_PER_TRIAL = 2

hypermodel = DLModel(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES)

tuner = RandomSearch(
        hypermodel,
        objective='val_accuracy',
        seed=SEED,
        max_trials=MAX_TRIALS, 
        executions_per_trial=EXECUTION_PER_TRIAL, 
        directory='/content/',
        project_name='dl_model',
    )

search_start = time.time()
tuner.search(X_train, y_train, epochs=N_EPOCH_SEARCH, validation_split=0.2)
search_end = time.time()
elapsed_time = search_end - search_start

tuner.results_summary()

dlm = tuner.get_best_models(num_models=1)[0]
dlm.summary()

loss, accuracy = dlm.evaluate(X_test, y_test)

print(f"Elapsed time = {elapsed_time:10.4f} s, accuracy = {accuracy}, loss = {loss}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO:tensorflow:Oracle triggered exit


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 416)               4992      
_________________________________________________________________
dropout (Dropout)            (None, 416)               0         
_________________________________________________________________
batch_normalization (BatchNo (None, 416)               1664      
_________________________________________________________________
dense_1 (Dense)              (None, 384)               160128    
_________________________________________________________________
dropout_1 (Dropout)          (None, 384)               0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 384)               1536      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 3

In [10]:
prediction_dlm = (dlm.predict(X_test) > 0.5).astype("int32")
print("Confusion Matrix:\n", confusion_matrix(y_test,prediction_dlm))
print("\nClassification Report:\n", classification_report(y_test,prediction_dlm))
print("Accuracy Score: ", accuracy_score(y_test,prediction_dlm))

Confusion Matrix:
 [[ 27704 139121]
 [ 23315 225246]]

Classification Report:
               precision    recall  f1-score   support

         0.0       0.54      0.17      0.25    166825
         1.0       0.62      0.91      0.73    248561

    accuracy                           0.61    415386
   macro avg       0.58      0.54      0.49    415386
weighted avg       0.59      0.61      0.54    415386

Accuracy Score:  0.6089516738647908


In [12]:
dlm.save('dlm_pca.h5')