In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Membaca data
df_train = pd.read_csv('cleaned_train.csv')
df_train = df_train.dropna()

# Membuang kolom 'Id'
df_train = df_train.drop(['Id'], axis=1)

# Memilih fitur dan target
X_combined = df_train.drop(['CO2 Emissions(g/km)'], axis=1)
y_combined = df_train['CO2 Emissions(g/km)']

# One-hot encode categorical variables
X_combined = pd.get_dummies(X_combined)

# Standardize the data
scaler = StandardScaler()
X_combined_scaled = scaler.fit_transform(X_combined)

# Split data
X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(X_combined_scaled, y_combined, test_size=0.2, random_state=42)

# Model GradientBoosting
gb_model = GradientBoostingRegressor(n_estimators=150, max_depth=10, random_state=42, learning_rate=0.1)
gb_model.fit(X_train_scaled, y_train)
gb_pred = gb_model.predict(X_test_scaled)

# Evaluasi RMSE
gb_rmse = mean_squared_error(y_test, gb_pred, squared=False)
print(f"RMSE on the test set using Gradient Boosting: {gb_rmse}")
