<a href="https://colab.research.google.com/github/maabel0712/low-code-ai/blob/main/chapter8/keras_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

!wget -q https://storage.googleapis.com/low-code-ai-book/car_prices_train.csv
!wget -q https://storage.googleapis.com/low-code-ai-book/car_prices_valid.csv
!wget -q https://storage.googleapis.com/low-code-ai-book/car_prices_test.csv

train_df = pd.read_csv('./car_prices_train.csv')
y_train = train_df['sellingprice']
X_train = train_df.drop('sellingprice', axis=1)

valid_df = pd.read_csv('./car_prices_valid.csv')
y_valid = valid_df['sellingprice']
X_valid = valid_df.drop('sellingprice', axis=1)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import StringLookup, HashedCrossing, Discretization, Concatenate

cat_cols = ['make', 'model', 'trim', 'body', 'transmission', 'state',      
            'color', 'interior']
num_cols = ['odometer', 'year', 'condition']

inputs = {}

for col in cat_cols:
  inputs[col] = tf.keras.Input(shape=(1,), name=col, 
                               dtype = tf.string)

for col in num_cols:
  inputs[col] = tf.keras.Input(shape=(1,), name=col, dtype = tf.int64)


In [None]:
preproc_layers = {}
for col in cat_cols:
  layer = StringLookup(output_mode='one_hot')
  layer.adapt(X_train[col])
  preproc_layers[col] = layer(inputs[col])

In [None]:
for col in num_cols:
  layer = Discretization(num_bins=10,  
                         output_mode='one_hot')
  layer.adapt(X_train[col])
  preproc_layers[col] = layer(inputs[col])


In [None]:
model_trim = tf.keras.layers.HashedCrossing(num_bins=1000, output_mode='one_hot')((inputs['model'], inputs['trim']))
color_int = tf.keras.layers.HashedCrossing(num_bins=400, output_mode='one_hot')((inputs['color'], inputs['interior']))

preproc_layers['model_trim'] = model_trim
preproc_layers['color_int'] = color_int

In [None]:
prepared_layer = Concatenate()(preproc_layers.values())
prepared_layer = tf.reshape(prepared_layer, [-1,3903])

In [None]:
hid_1 = tf.keras.layers.Dense(16, activation='relu')(prepared_layer)
hid_2 = tf.keras.layers.Dense(16, activation='relu')(hid_1)
output = tf.keras.layers.Dense(1)(hid_2)

model = tf.keras.Model(inputs=inputs, outputs=output)

model.compile(optimizer='adam', loss='mae')

train_ds = tf.data.Dataset.from_tensor_slices((dict(X_train), y_train)).batch(100)
valid_ds = tf.data.Dataset.from_tensor_slices((dict(X_valid), y_valid)).batch(1000)

history = model.fit(
    x=train_ds,
    epochs=25,
    verbose=1,
    validation_data=valid_ds
)


In [None]:
!pip install -q keras-tuner

In [None]:
import keras_tuner as kt
from functools import partial

def _build_model_fn(hp, prepared_layer):

  units_1 = hp.Int('units_1', min_value=8, max_value=64, step=4)
  units_2 = hp.Int('units_2', min_value=4, max_value=64, step=4)
  units_3 = hp.Int('units_3', min_value=4, max_value=32, step=2)

  hid_1 = tf.keras.layers.Dense(units_1,     
                                activation='relu')(prepared_layer)
  hid_2 = tf.keras.layers.Dense(units_2, activation='relu')(hid_1)
  hid_3 = tf.keras.layers.Dense(units_3, activation='relu')(hid_2)
  output = tf.keras.layers.Dense(1, activation='linear')(hid_3)

  model = tf.keras.Model(inputs=inputs, outputs=output)

  model.compile(optimizer='adam', loss='mae')

  return model

build_model = partial(_build_model_fn, prepared_layer=prepared_layer)

In [None]:
tuner = kt.BayesianOptimization(
    build_model,
    objective=kt.Objective("val_loss", direction="min"),
    max_trials=20)

tuner.search(
    x=train_ds,
    epochs=5,
    verbose=1,
    validation_data=valid_ds)

In [None]:
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print('units_1:', best_hps.get('units_1'))
print('units_2:', best_hps.get('units_2'))
print('units_3:', best_hps.get('units_3'))

In [None]:
best_model = tuner.hypermodel.build(best_hps)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

history = best_model.fit(
    x=train_ds,
    epochs=1000,
    verbose=1,
    callbacks = [early_stopping],
    validation_data=valid_ds
)

In [None]:
test_df = pd.read_csv('./car_prices_test.csv')
y_test = test_df['sellingprice']
X_test = test_df.drop('sellingprice', axis=1)
test_ds = tf.data.Dataset.from_tensor_slices(
                                            (dict(X_test),  
                                             y_test)).batch(1000)

best_model.evaluate(test_ds)