In [8]:
# !pip install scikeras[tensorflow]


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
# from scikeras.wrappers import KerasRegressor
from tensorflow.keras import regularizers

In [2]:
df = pd.read_csv('preprocessed_data.csv')
df.head()

Unnamed: 0,bedroom,bathroom,beds,guests,wifi,tv,cable_tv,ac,workspace,hot_water,...,lng,distance_to_coastline,room_name,booking_window,stay_duration_in_days,review_sentiment_score,rating,booking_earned,average_daily_rate,rating_rounded
0,1,1,1,2,1,0,0,1,1,1,...,115.113378,15.6035,Bingin Ombak Apartment - 1 Lantai 1,81,4,0.458603,4.0,5911197.97,1477799.0,4.0
1,1,1,1,2,1,0,0,1,1,0,...,115.113378,15.6035,Bingin Ombak Apartment - 2 Lantai 2,81,4,0.458603,4.0,5911197.97,1477799.0,4.0
2,1,2,1,2,1,0,0,0,1,0,...,115.113378,15.6035,Standard (PLEASE IGNORE),81,4,0.45545,4.0,5911197.97,1477799.0,4.0
3,1,2,1,2,1,0,0,0,1,0,...,115.113378,15.6035,Standard (PLEASE IGNORE),81,4,0.45545,4.0,5911197.97,1477799.0,4.0
4,1,3,1,2,1,0,0,0,1,0,...,115.113378,15.6035,Standard (PLEASE IGNORE),81,4,0.452298,4.0,5911197.97,1477799.0,4.0


## Prepare the data

In [3]:
numeric_columns = df.select_dtypes(include=['number'])
numeric_df = numeric_columns.drop(columns=['average_daily_rate'])

# Separate features and target variable
X = numeric_df.drop(columns=['booking_earned'])  # Features
y = numeric_df['booking_earned']  # Target variable

# Membagi data menjadi set pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Melakukan standarisasi pada fitur
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


## Coba Model

In [4]:
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))

model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(1))

In [5]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [6]:
history = model.fit(X_train, y_train, epochs=200, batch_size=64, validation_split=0.2, verbose=1)

Epoch 1/200
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 25ms/step - loss: 9931409326080.0000 - val_loss: 10125519618048.0000
Epoch 2/200
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - loss: 9954901622784.0000 - val_loss: 10125282639872.0000
Epoch 3/200
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 10196264943616.0000 - val_loss: 10124878938112.0000
Epoch 4/200
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 9902983479296.0000 - val_loss: 10124535005184.0000
Epoch 5/200
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 10253848543232.0000 - val_loss: 10124022251520.0000
Epoch 6/200
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 10567494402048.0000 - val_loss: 10123343822848.0000
Epoch 7/200
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 10195968196608.0000 - v

In [7]:
# Memprediksi pada set pengujian
y_pred = model.predict(X_test)

# Menghitung MSE
mse = mean_squared_error(y_test, y_pred)
print(f'MSE: {mse}')

# Memastikan MSE di bawah 100,000
if mse < 100000:
    print("MSE is below 100,000")
else:
    print("MSE is above 100,000")

[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
MSE: 9655721648135.76
MSE is above 100,000


## Model

In [5]:
# # Mengatur parameter grid
# param_grid = {
#     'batch_size': [10, 20, 30],
#     'epochs': [50, 100, 150],
#     'model__optimizer': ['adam', 'rmsprop'],
#     'model__neurons': [16, 32, 64],
#     'model__activation': ['relu', 'tanh']
# }

In [6]:
# def build_model(optimizer, neurons, activation):
#     model = Sequential()
#     model.add(Dense(neurons, input_dim=X_train_scaled.shape[1], activation=activation))
#     model.add(Dense(neurons, activation=activation))
#     model.add(Dense(1))
#     model.compile(optimizer=optimizer, loss='mean_squared_error')
#     return model

In [9]:
# # Membuat KerasRegressor
# model = KerasRegressor(build_fn=build_model, verbose=1)


In [14]:
# # Mengatur GridSearchCV
# grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=3, verbose=1)

# # Melakukan fit pada GridSearchCV
# grid_result = grid.fit(X_train_scaled, y_train)

# # Menampilkan hasil terbaik
# print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')
# best_model = grid_result.best_estimator_

# # Memprediksi pada set pengujian
# y_pred = best_model.predict(X_test_scaled)

# # Menghitung MSE
# mse = mean_squared_error(y_test, y_pred)
# print(f'MSE: {mse}')
