In [None]:
!pip install keras-tuner




In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch
from tensorflow.keras.optimizers.schedules import ExponentialDecay

In [None]:
def dataPipeline(df):



    df_live, df_backup = train_test_split(df,test_size=0.3, random_state=42)

    num_cols = df_live.select_dtypes([np.number]).columns
    df_nums = df_live[num_cols].reset_index(drop=True)

    X = df_nums.loc[:,df_nums.columns != 'price']
    y = df_nums['price'].values

    scaler = MinMaxScaler()
    X_normalized = scaler.fit_transform(X)
    X_normalized_df = pd.DataFrame(X_normalized, columns=X.columns)

    selected_columns_2 = ['squareMeters', 'longitude']
    selected_columns_3 = ['squareMeters', 'longitude', 'poiCount']
    selected_columns_5 = ['squareMeters', 'longitude', 'poiCount', 'rooms', 'centreDistance']
    selected_columns_7 = ['squareMeters', 'longitude', 'poiCount', 'rooms', 'centreDistance', 'clinicDistance', 'kindergartenDistance']

    X2=X_normalized_df[selected_columns_2]
    X3=X_normalized_df[selected_columns_3]
    X5=X_normalized_df[selected_columns_5]
    X7=X_normalized_df[selected_columns_7]

    FeatureDividedData = {'X2':X2,'X3':X3,'X5':X5,'X7':X7}

    workableData = {'X':X,'y':y,'X_normalized':X_normalized,'X_normalized_df':X_normalized_df,'FeatureDividedData':FeatureDividedData}

    dataJourney={'df_live':df_live,'df_backup':df_backup,'df_nums':df_nums,'scaler':scaler,'workableData':workableData}

    return dataJourney

In [None]:
def getData():
    df_august = pd.read_csv('https://raw.githubusercontent.com/WitoldSurdej/PFML/master/apartments_pl_2023_08.csv')
    df_september = pd.read_csv('https://raw.githubusercontent.com/WitoldSurdej/PFML/master/apartments_pl_2023_09.csv')
    df_october = pd.read_csv('https://raw.githubusercontent.com/WitoldSurdej/PFML/master/apartments_pl_2023_10.csv')

    df_august['Month'] = 0
    df_september['Month'] = 1
    df_october['Month'] = 2

    frames = [df_august, df_september, df_october]
    df = pd.concat(frames)

    dataJourney = dataPipeline(df)

    return dataJourney

In [None]:
# Define your model building function
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Dense(units=hp.Int('units', min_value=2, max_value=22, step=10), activation='sigmoid'))
    model.add(layers.Dense(units=hp.Int('units', min_value=2, max_value=22, step=10), activation='sigmoid'))


    model.add(layers.Dense(1))  # Output layer

    # Compile the model
    model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-1, 1e-2, 1e-3])),
                  loss='mse',
                  metrics=['mae'])
    return model




def build_model2(hp):
    model = keras.Sequential()

    # Define the hyperparameter search space for the number of layers
    num_layers = hp.Int('num_layers', min_value=1, max_value=6, step=2)

    # Add a variable number of Dense layers based on the sampled value
    for i in range(num_layers):
        # For each layer, define the number of units (neurons) in the layer
        units = hp.Int(f'units_{i}', min_value=2, max_value=22, step=10)
        model.add(keras.layers.Dense(units=units, activation='sigmoid'))

    # Output layer
    model.add(keras.layers.Dense(units=1))  # Assuming a regression task with one output neuron

    # Define the hyperparameter search space for the learning rate
    learning_rate = hp.Choice('learning_rate', values=[1e-1, 1e-2, 1e-3])

    # Compile the model with the sampled learning rate
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='mse',
                  metrics=['mse'])
    return model


In [None]:
dataFrame = getData()

i=3

X2=dataFrame['workableData']['FeatureDividedData'][f'X{i}']
y=dataFrame['workableData']['y']

X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size = 0.2, random_state=42)
X1_train1, X1_validation, y1_train1, y1_validation = train_test_split(X_train, y_train, test_size = 0.3, random_state=42)



In [None]:
# Instantiate a tuner (e.g., RandomSearch tuner)
tuner = RandomSearch(
    build_model2,
    objective='val_loss',
    max_trials=5,  # Number of hyperparameter combinations to try
    executions_per_trial=1,
    directory='my_dir',
    project_name='allIn')

# Search for the best hyperparameter configuration
tuner.search(x=X1_train1, y=y1_train1, epochs=5, validation_data=(X1_validation, y1_validation))



Reloading Tuner from my_dir/allIn/tuner0.json


In [None]:
# Get the best hyperparameters
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the best model using the best hyperparameters
best_model = tuner.hypermodel.build(best_hyperparameters)

# Explicitly call the build method of the model to finalize building
best_model.build(input_shape=X_train.shape)

# Now, let's summarize the best built model
best_model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (29451, 2)                8         
                                                                 
 dense_3 (Dense)             (29451, 1)                3         
                                                                 
Total params: 11 (44.00 Byte)
Trainable params: 11 (44.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
tuner.results_summary()

Results summary
Results in my_dir/allIn
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 0 summary
Hyperparameters:
num_layers: 1
units_0: 2
learning_rate: 0.1
Score: 620113494016.0

Trial 2 summary
Hyperparameters:
num_layers: 1
units_0: 22
learning_rate: 0.01
Score: 620400803840.0

Trial 1 summary
Hyperparameters:
num_layers: 1
units_0: 2
learning_rate: 0.01
Score: 621341179904.0

Trial 3 summary
Hyperparameters:
num_layers: 3
units_0: 2
learning_rate: 0.01
units_1: 2
units_2: 2
Score: 621342359552.0

Trial 4 summary
Hyperparameters:
num_layers: 1
units_0: 2
learning_rate: 0.001
units_1: 22
units_2: 22
Score: 621471531008.0


In [None]:
def expDecay_of_learningRate(initial_learning_rate,final_learning_rate,total_epochs):

  # Define the initial and final learning rates
  initial_learning_rate = 1.0
  final_learning_rate = 0.001
  total_epochs = 100

  # Calculate the decay factor
  decay_factor = final_learning_rate / initial_learning_rate

  # Calculate the decay rate per epoch
  decay_rate = decay_factor ** (1 / total_epochs)



  # Create a learning rate schedule with exponential decay
  lr_schedule = ExponentialDecay(
      initial_learning_rate,
      decay_steps=decay_steps,
      decay_rate=decay_rate,
      staircase=True  # For discrete steps (staircase decay)
  )

  # Create an optimizer and use the learning rate schedule
  optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)

  return optimizer


In [None]:
def build_model2(hp):
    model = keras.Sequential()

    # Define the hyperparameter search space for the number of layers
    num_layers = hp.Int('num_layers', min_value=1, max_value=12, step=2)

    # Add a variable number of Dense layers based on the sampled value
    for i in range(num_layers):
        # For each layer, define the number of units (neurons) in the layer
        units = hp.Int(f'units_{i}', min_value=2, max_value=52, step=10)
        model.add(keras.layers.Dense(units=units, activation='sigmoid'))

    # Output layer
    model.add(keras.layers.Dense(units=1))  # Assuming a regression task with one output neuron

    optimizer = expDecay_of_learningRate()
    # Compile the model with the sampled learning rate
    model.compile(optimizer=optimizer,
                  loss='mse',
                  metrics=['mse'])
    return model

In [None]:
# Instantiate a tuner (e.g., RandomSearch tuner)
tuner = RandomSearch(
    build_model2,
    objective='val_loss',
    max_trials=5,  # Number of hyperparameter combinations to try
    executions_per_trial=1,
    directory='my_dir2',
    project_name='init')

# Search for the best hyperparameter configuration
tuner.search(x=X1_train1, y=y1_train1, epochs=50, validation_data=(X1_validation, y1_validation))

Trial 5 Complete [00h 02m 05s]
val_loss: 343182573568.0

Best val_loss So Far: 145301176320.0
Total elapsed time: 00h 09m 39s


In [None]:
# Get the best hyperparameters
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the best model using the best hyperparameters
best_model = tuner.hypermodel.build(best_hyperparameters)

# Explicitly call the build method of the model to finalize building
best_model.build(input_shape=X_train.shape)

# Now, let's summarize the best built model
best_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (29451, 42)               168       
                                                                 
 dense_7 (Dense)             (29451, 1)                43        
                                                                 
Total params: 211 (844.00 Byte)
Trainable params: 211 (844.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
tuner.results_summary()

Results summary
Results in my_dir2/init
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 2 summary
Hyperparameters:
num_layers: 1
units_0: 42
units_1: 12
units_2: 52
units_3: 42
units_4: 12
Score: 145301176320.0

Trial 01 summary
Hyperparameters:
num_layers: 1
units_0: 32
units_1: 52
units_2: 32
units_3: 12
units_4: 42
Score: 200791801856.0

Trial 4 summary
Hyperparameters:
num_layers: 5
units_0: 12
units_1: 32
units_2: 22
units_3: 32
units_4: 32
units_5: 32
units_6: 52
Score: 343182573568.0

Trial 3 summary
Hyperparameters:
num_layers: 7
units_0: 42
units_1: 12
units_2: 2
units_3: 42
units_4: 32
units_5: 2
units_6: 2
Score: 566641360896.0

Trial 00 summary
Hyperparameters:
num_layers: 5
units_0: 2
units_1: 2
units_2: 2
units_3: 2
units_4: 2
Score: 608905003008.0
