In [8]:
# !pip install scikeras[tensorflow]


In [10]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
# from scikeras.wrappers import KerasRegressor
from tensorflow.keras import regularizers

In [11]:
data_path = os.path.join(os.getcwd(), '..', '..', 'data', 'preprocessed_data.csv')

In [12]:
df = pd.read_csv(data_path)
df.head()

Unnamed: 0,bedroom,bathroom,beds,guests,wifi,tv,cable_tv,ac,workspace,hot_water,...,lng,distance_to_coastline,room_name,booking_window,stay_duration_in_days,review_sentiment_score,rating,booking_earned,average_daily_rate,rating_rounded
0,1,1,1,2,1,0,0,1,1,1,...,115.113378,15.6035,Bingin Ombak Apartment - 1 Lantai 1,81,4,0.458603,4.0,5911197.97,1477799.0,4.0
1,1,1,1,2,1,0,0,1,1,0,...,115.113378,15.6035,Bingin Ombak Apartment - 2 Lantai 2,81,4,0.458603,4.0,5911197.97,1477799.0,4.0
2,1,2,1,2,1,0,0,0,1,0,...,115.113378,15.6035,Standard (PLEASE IGNORE),81,4,0.45545,4.0,5911197.97,1477799.0,4.0
3,1,2,1,2,1,0,0,0,1,0,...,115.113378,15.6035,Standard (PLEASE IGNORE),81,4,0.45545,4.0,5911197.97,1477799.0,4.0
4,1,3,1,2,1,0,0,0,1,0,...,115.113378,15.6035,Standard (PLEASE IGNORE),81,4,0.452298,4.0,5911197.97,1477799.0,4.0


## Prepare the data

In [13]:
numeric_columns = df.select_dtypes(include=['number'])
numeric_df = numeric_columns.drop(columns=['average_daily_rate'])

# Separate features and target variable
X = numeric_df.drop(columns=['booking_earned'])  # Features
y = numeric_df['booking_earned']  # Target variable

# Melakukan standarisasi pada fitur
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Membagi data menjadi set pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


## Coba Model

In [14]:
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))

model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(1))

In [15]:
model.compile(optimizer='adam', loss='mean_absolute_error')

In [16]:
history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.2, verbose=1)

Epoch 1/100
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - loss: 2052676.8750 - val_loss: 2065098.0000
Epoch 2/100
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 2061806.5000 - val_loss: 2065068.2500
Epoch 3/100
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 2054552.2500 - val_loss: 2065021.5000
Epoch 4/100
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 2057321.0000 - val_loss: 2064960.7500
Epoch 5/100
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 2076146.2500 - val_loss: 2064885.0000
Epoch 6/100
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 2078721.7500 - val_loss: 2064798.3750
Epoch 7/100
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 2093601.0000 - val_loss: 2064701.3750
Epoch 8/100
[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [17]:
# Memprediksi pada set pengujian
y_pred = model.predict(X_test)

# Menghitung MAE
mae = mean_absolute_error(y_test, y_pred)
print(f'MAE: {mae}')

# Memastikan MAE di bawah 100,000
if mae < 100000:
    print("MAE is below 100,000")
else:
    print("MAE is above 100,000")

[1m316/316[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
MAE: 2073906.2446440428
MAE is above 100,000


## Model

In [5]:
# # Mengatur parameter grid
# param_grid = {
#     'batch_size': [10, 20, 30],
#     'epochs': [50, 100, 150],
#     'model__optimizer': ['adam', 'rmsprop'],
#     'model__neurons': [16, 32, 64],
#     'model__activation': ['relu', 'tanh']
# }

In [6]:
# def build_model(optimizer, neurons, activation):
#     model = Sequential()
#     model.add(Dense(neurons, input_dim=X_train_scaled.shape[1], activation=activation))
#     model.add(Dense(neurons, activation=activation))
#     model.add(Dense(1))
#     model.compile(optimizer=optimizer, loss='mean_squared_error')
#     return model

In [9]:
# # Membuat KerasRegressor
# model = KerasRegressor(build_fn=build_model, verbose=1)


In [14]:
# # Mengatur GridSearchCV
# grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=3, verbose=1)

# # Melakukan fit pada GridSearchCV
# grid_result = grid.fit(X_train_scaled, y_train)

# # Menampilkan hasil terbaik
# print(f'Best: {grid_result.best_score_} using {grid_result.best_params_}')
# best_model = grid_result.best_estimator_

# # Memprediksi pada set pengujian
# y_pred = best_model.predict(X_test_scaled)

# # Menghitung MSE
# mse = mean_squared_error(y_test, y_pred)
# print(f'MSE: {mse}')
