<a href="https://colab.research.google.com/github/shuchimishra/Tensorflow_projects/blob/main/Tensorflow_Code/Regression/Cal_Housing_Regression_w_KerasTuner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import os
import tarfile
import csv
from sklearn.model_selection import train_test_split
from scipy import stats

In [2]:
!wget https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz

--2024-04-09 22:39:27--  https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz
Resolving www.dcc.fc.up.pt (www.dcc.fc.up.pt)... 193.136.39.12
Connecting to www.dcc.fc.up.pt (www.dcc.fc.up.pt)|193.136.39.12|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 441963 (432K) [application/x-gzip]
Saving to: ‘cal_housing.tgz.1’


2024-04-09 22:39:29 (419 KB/s) - ‘cal_housing.tgz.1’ saved [441963/441963]



In [3]:
file = tarfile.open('./cal_housing.tgz', 'r')
file.extractall('')
file.close()

In [4]:
header_names=[]
for row in open('./CaliforniaHousing/cal_housing.domain','r'):
  header_names.append(row.split(':')[0])
print(header_names)

['longitude', 'latitude', 'housingMedianAge', 'totalRooms', 'totalBedrooms', 'population', 'households', 'medianIncome', 'medianHouseValue']


In [5]:
housing_data = pd.read_csv('./CaliforniaHousing/cal_housing.data', header=None, names=header_names)
housing_data.head()

Unnamed: 0,longitude,latitude,housingMedianAge,totalRooms,totalBedrooms,population,households,medianIncome,medianHouseValue
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0


In [6]:
housing_data.describe()

Unnamed: 0,longitude,latitude,housingMedianAge,totalRooms,totalBedrooms,population,households,medianIncome,medianHouseValue
count,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0
mean,-119.569704,35.631861,28.639486,2635.763081,537.898014,1425.476744,499.53968,3.870671,206855.816909
std,2.003532,2.135952,12.585558,2181.615252,421.247906,1132.462122,382.329753,1.899822,115395.615874
min,-124.35,32.54,1.0,2.0,1.0,3.0,1.0,0.4999,14999.0
25%,-121.8,33.93,18.0,1447.75,295.0,787.0,280.0,2.5634,119600.0
50%,-118.49,34.26,29.0,2127.0,435.0,1166.0,409.0,3.5348,179700.0
75%,-118.01,37.71,37.0,3148.0,647.0,1725.0,605.0,4.74325,264725.0
max,-114.31,41.95,52.0,39320.0,6445.0,35682.0,6082.0,15.0001,500001.0


In [7]:
#Create Features and labels
labels = housing_data['medianHouseValue']
features = housing_data.drop('medianHouseValue', axis=1)

#convert into numpy array
labels = np.array(labels)
features = np.array(features)

In [8]:
#Define the split size and training datasize
split_size = 0.8
training_data_size = int(len(labels) * split_size)

#Split training and testing data
features_train=features[:training_data_size]
label_train=labels[:training_data_size]

features_valid=features[training_data_size:]
label_valid=labels[training_data_size:]

# Alternative method:
# features_train, features_valid, label_train, label_valid = train_test_split(features, labels, test_size=0.2, random_state=121)

# **Keras Hyperparameter tuning**

In [9]:
#Install keras-tuner library; uncomment if necessary
!pip install keras-tuner -q

# **Important data preprocessing step to normalize the input**

In [10]:
normalizer = tf.keras.layers.experimental.preprocessing.Normalization()
normalizer.adapt(features_train)
# x = normalizer(features_train)

In [11]:
#Build the model

def build_model(hp):
  model_tune = keras.Sequential()

  inputs = tf.keras.Input(shape=[len(features_train[1])])

  x = normalizer(inputs)

  count = 0  ###---if count==0 than it will make first layer too---

  for i in range(hp.Int(name='layers_num',min_value=1,max_value=3)):

    if count==0:
      x = keras.layers.Dense(
            # Define the hyperparameter.
            units=hp.Int("units", min_value=8, max_value=256, step=8),
            activation="relu", name='units'+str(i))(x)

      # Tune whether to use dropout.
      if hp.Boolean("dropout"):
        # dropout_layer_name = 'dropout_layer_'+str(random.randint(0, 10000))
        x = keras.layers.Dropout(value=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
                                            name='dropout'+str(i))(x)

    else:
      x = keras.layers.Dense(
            # Define the hyperparameter.
            units=hp.Int("units", min_value=8, max_value=256, step=8),
            activation="relu", name='units'+str(i))(x)

      # Tune whether to use dropout.
      if hp.Boolean("dropout"):
        # dropout_layer_name = 'dropout_layer_'+str(random.randint(0, 10000))
        x = keras.layers.Dropout(value=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
                                            name='dropout'+str(i))(x)

    count += 1

  output = keras.layers.Dense(1)(x)
  model_tune = tf.keras.Model(inputs,output)

  # Select optimizer
  optimizer=hp.Choice('optimizer', values=['adam', 'RMSprop', 'SGD'])

  # Conditional for each optimizer
  if optimizer == 'adam':
    learning_rate = hp.Float('lrate', min_value=1e-8, max_value=1e-1, sampling='LOG')

  elif optimizer == 'RMSprop':
    learning_rate = hp.Float('lrate', min_value=1e-8, max_value=1e-1, sampling='LOG')

  elif optimizer == 'SGD':
    learning_rate = hp.Float('lrate', min_value=1e-8, max_value=1e-1, sampling='LOG')
    momentum = hp.Float('momentum', min_value=0.1, max_value=1.0, sampling='linear')

  #compile the model
  model_tune.compile(optimizer=optimizer,
                loss='mean_squared_error',
                metrics=['mse','mae'])

  #Print model summary
  model_tune.summary()

  return model_tune

In [12]:
import keras_tuner

build_model(keras_tuner.HyperParameters())

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 8)]               0         
                                                                 
 normalization (Normalizati  (None, 8)                 17        
 on)                                                             
                                                                 
 units0 (Dense)              (None, 8)                 72        
                                                                 
 dense (Dense)               (None, 1)                 9         
                                                                 
Total params: 98 (396.00 Byte)
Trainable params: 81 (324.00 Byte)
Non-trainable params: 17 (72.00 Byte)
_________________________________________________________________


<keras.src.engine.functional.Functional at 0x7e46a410a950>

# **Start the search**

In [13]:
tuner = keras_tuner.BayesianOptimization( #can be Hyperband, RandomSearch, or BayesianOptimization
    hypermodel=build_model,
    objective="val_mse",
    max_trials=3,
    executions_per_trial=2,
    overwrite=True,
    directory="./CaliHousing-Model-Tuner",
    project_name="KerasTuning",
)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 8)]               0         
                                                                 
 normalization (Normalizati  (None, 8)                 17        
 on)                                                             
                                                                 
 units0 (Dense)              (None, 8)                 72        
                                                                 
 dense_1 (Dense)             (None, 1)                 9         
                                                                 
Total params: 98 (396.00 Byte)
Trainable params: 81 (324.00 Byte)
Non-trainable params: 17 (72.00 Byte)
_________________________________________________________________


In [14]:
#Print summary of search space

tuner.search_space_summary()

Search space summary
Default search space size: 5
layers_num (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
units (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 256, 'step': 8, 'sampling': 'linear'}
dropout (Boolean)
{'default': False, 'conditions': []}
optimizer (Choice)
{'default': 'adam', 'conditions': [], 'values': ['adam', 'RMSprop', 'SGD'], 'ordered': False}
lrate (Float)
{'default': 1e-08, 'conditions': [], 'min_value': 1e-08, 'max_value': 0.1, 'step': None, 'sampling': 'log'}


In [16]:
#Train the model

num_epochs = 20

history = tuner.search(features_train, label_train,
          epochs=num_epochs, verbose=1,
          validation_data=(features_valid, label_valid),
                       callbacks=[keras.callbacks.TensorBoard("/tmp/tb_logs")])

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 8)]               0         
                                                                 
 normalization (Normalizati  (None, 8)                 17        
 on)                                                             
                                                                 
 units0 (Dense)              (None, 120)               1080      
                                                                 
 units1 (Dense)              (None, 120)               14520     
                                                                 
 dense (Dense)               (None, 1)                 121       
                                                                 
Total params: 15738 (61.48 KB)
Trainable params: 15721 (61.41 KB)
Non-trainable params: 17 (72.00 Byte)
_______________________



Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 8)]               0         
                                                                 
 normalization (Normalizati  (None, 8)                 17        
 on)                                                             
                                                                 
 units0 (Dense)              (None, 120)               1080      
                                                                 
 units1 (Dense)              (None, 120)               14520     
                                                                 
 dense (Dense)               (None, 1) 



Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


  np.nanmin(values) if self.direction == "min" else np.nanmax(values)


RuntimeError: Number of consecutive failures exceeded the limit of 3.


# **Query the results**

In [None]:
# Get the top 2 models.
models = tuner.get_best_models(num_models=2)
best_model = models[0]
best_model.summary()

In [None]:
tuner.results_summary()

# **Retrain the model**
If you want to train the model with the entire dataset, you may retrieve the best hyperparameters and retrain the model by yourself.

In [None]:

# Get the top 2 hyperparameters.
best_hps = tuner.get_best_hyperparameters(5)

# Build the model with the best hp.
model = build_model(best_hps[0])

# Fit with the entire dataset.
history = model.fit(train_features, train_labels,
          epochs=150, verbose=1,
          validation_data=(test_features, test_labels))