In [2]:
!pip install keras-tuner --upgrade

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from keras_tuner.tuners import RandomSearch

In [7]:
# Load training and test datasets
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [8]:
# Extract target variable and drop ID and SalePrice from training data
y = np.log1p(train_df["SalePrice"])  # Apply log transformation to reduce skewness
X = train_df.drop(["Id", "SalePrice"], axis=1)
X_test = test_df.drop(["Id"], axis=1)
test_ids = test_df["Id"]

In [9]:
# Combine training and test data for consistent preprocessing
all_data = pd.concat([X, X_test], axis=0)

In [10]:
# Handle missing values
for col in all_data.columns:
    if all_data[col].dtype == "object":
        all_data[col] = all_data[col].fillna("Missing")
    else:
        all_data[col] = all_data[col].fillna(all_data[col].median())

In [11]:
# Convert categorical variables to numerical using label encoding
for col in all_data.select_dtypes(include="object").columns:
    le = LabelEncoder()
    all_data[col] = le.fit_transform(all_data[col])

In [12]:
# Standardize features
scaler = StandardScaler()
all_data_scaled = scaler.fit_transform(all_data)

In [13]:
# Split the data back into training and test sets
X = all_data_scaled[:len(train_df)]
X_test = all_data_scaled[len(train_df):]

In [16]:
# Define the model building function for KerasTuner
def build_model(hp):
    model = Sequential()
    model.add(Input(shape=(X.shape[1],)))

    for i in range(hp.Int('num_layers', 2, 4)):
        model.add(Dense(units=hp.Int(f'units_{i}', 64, 256, step=32)))
        model.add(LeakyReLU(negative_slope=0.1))  # updated here
        model.add(BatchNormalization())
        model.add(Dropout(rate=hp.Float(f'dropout_{i}', 0.2, 0.5, step=0.1)))

    model.add(Dense(1))

    model.compile(
        optimizer=Adam(learning_rate=hp.Float('lr', 1e-4, 1e-2, sampling='log')),
        loss='mse'
    )
    return model


In [17]:
# Initialize the tuner with RandomSearch
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=1,
    directory='kt_dir',
    project_name='house_price_tuning'
)

In [18]:
# Set early stopping to avoid overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [19]:
# Start the hyperparameter search
tuner.search(X, y, validation_split=0.1, epochs=100, batch_size=32, callbacks=[early_stop])

Trial 10 Complete [00h 00m 12s]
val_loss: 0.22199398279190063

Best val_loss So Far: 0.024212408810853958
Total elapsed time: 00h 02m 09s


In [22]:
# Retrieve the best model found by the tuner
best_model = tuner.get_best_models(num_models=1)[0]

In [23]:
# Predict on the test data and reverse the log transformation
preds = np.expm1(best_model.predict(X_test))

[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [24]:
# Create the submission file
submission = pd.DataFrame({"Id": test_ids, "SalePrice": preds.flatten()})
submission.to_csv("submission_tuned.csv", index=False)

print("Submission saved: submission_tuned.csv")

Submission saved: submission_tuned.csv
