In [1]:
import tensorflow as tf
import pandas as pd

In [2]:
training_data = pd.read_csv("./dataset/processed_training_data.csv")
test_data = pd.read_csv("./dataset/processed_test_data.csv")

#### Preprocessing

In [18]:
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
import numpy as np

numerical_features = training_data.select_dtypes(include='number').columns.tolist()
categorical_features = training_data.select_dtypes(include='object').columns.tolist()

column_transformer = ColumnTransformer(
        transformers=[
            ('scaler',StandardScaler(),numerical_features),
            ('encoder', OneHotEncoder(handle_unknown="ignore"), categorical_features)
        ],
        remainder='passthrough',
    )

processed_features = column_transformer.fit_transform(training_data).toarray()

# Get the list of all the features after transformation
encoded_cat_columns = column_transformer.named_transformers_['encoder'] \
                                          .get_feature_names_out(input_features=categorical_features)
all_column_names = numerical_features+list(encoded_cat_columns)
processed_df = pd.DataFrame(processed_features, columns=all_column_names)

# Process the test data
processed_test_data = test_data.copy()
processed_test_data["SalePrice"] = np.ones(test_data.shape[0])
processed_test_data.drop("Id", axis=1, inplace=True)

processed_test_features = column_transformer.transform(processed_test_data).toarray()
processed_test_df = pd.DataFrame(processed_test_features, columns=all_column_names)

#### Model Builder

In [10]:
from sklearn.model_selection import train_test_split
import keras_tuner as kt

def model_builder(hp):
    input_shape = processed_df.shape[1]-1
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(input_shape, activation='relu', input_shape=[input_shape]),
    ])
    
    hp_num_layers = hp.Int('num_layers', min_value=1, max_value=5, step=1)

    for layer in range(hp_num_layers):
        # Tune the number of units in each dense layer
        hp_units = hp.Int(f'units_{layer}', min_value=32, max_value=512, step=32)
        
        # Add the dense layer with the specified number of units and ReLU activation
        model.add(tf.keras.layers.Dense(units=hp_units, activation='relu'))
    
    model.add(tf.keras.layers.Dense(1))

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss=tf.keras.losses.mae,
                  metrics=['mae'])

    return model

#### Split Train and Test Data

In [5]:
X = processed_df.drop("SalePrice", axis=1)
y = processed_df["SalePrice"]

x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

#### Model Tuning

In [11]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                    patience=20,
                                    verbose=1)


tuner = kt.Hyperband(model_builder,
        objective='val_loss',
        max_epochs=100,
        factor=3,
        directory='tuner_dir',
        project_name='house_price_prediction')

tuner.search(x_train, y_train,
        epochs=100,
        batch_size=32,
        validation_data=(x_test, y_test),
        callbacks=[early_stopping],
        verbose=0)


INFO:tensorflow:Reloading Tuner from tuner_dir\house_price_prediction\tuner0.json
Epoch 34: early stopping
Epoch 88: early stopping
Epoch 73: early stopping
Epoch 58: early stopping
Epoch 68: early stopping
Epoch 24: early stopping
Epoch 30: early stopping
Epoch 30: early stopping
Epoch 73: early stopping
Epoch 55: early stopping
Epoch 75: early stopping
Epoch 38: early stopping
Epoch 34: early stopping
Epoch 38: early stopping
Epoch 25: early stopping
Epoch 53: early stopping
Epoch 69: early stopping
INFO:tensorflow:Oracle triggered exit


#### Get Test Params

In [12]:
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.values)

{'num_layers': 3, 'units_0': 64, 'learning_rate': 0.01, 'units_1': 288, 'units_2': 32, 'units_3': 448, 'units_4': 384, 'tuner/epochs': 34, 'tuner/initial_epoch': 12, 'tuner/bracket': 4, 'tuner/round': 3, 'tuner/trial_id': '0134'}


#### Train Model with Best Params

In [14]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                    patience=20,
                                    verbose=1)
model = tuner.hypermodel.build(best_hps)
model.fit(X,y,
        epochs=100,
        batch_size=32,
        callbacks=[early_stopping],
        verbose=0)

<keras.callbacks.History at 0x1c2b5adc5e0>

#### Predict Test Data

In [24]:
x_test = processed_test_df.drop("SalePrice", axis=1)

pred_scaled = model.predict(x_test)

test_data["SalePrice"] = pred_scaled.reshape(-1)
test_data[numerical_features] = column_transformer.named_transformers_['scaler'].inverse_transform(test_data[numerical_features])



#### Export To Submission CSV

In [25]:
submission = pd.DataFrame()
submission["Id"] = test_data["Id"]
submission["SalePrice"] = test_data["SalePrice"]
submission["SalePrice"].fillna(submission["SalePrice"].mean(),inplace=True)
submission.to_csv("./dataset/submission.csv",index=False)