In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

# Load the dataset from an Excel file
df = pd.read_excel('data_inflasi.xlsx')
df.head()

# Convert 'tanggal' to datetime
df['tanggal'] = pd.to_datetime(df['tanggal'])

# Extract year and month from 'tanggal'
df['year'] = df['tanggal'].dt.year
df['month'] = df['tanggal'].dt.month

# Update features and target
X = df[['year', 'month']]
y = df['Data Inflasi (dalam persen)']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)
mae_lr = mean_absolute_error(y_test, y_pred_lr)
mse_lr = mean_squared_error(y_test, y_pred_lr)
rmse_lr = np.sqrt(mse_lr)
r2_lr = r2_score(y_test, y_pred_lr)

# K-Nearest Neighbors
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)
mae_knn = mean_absolute_error(y_test, y_pred_knn)
mse_knn = mean_squared_error(y_test, y_pred_knn)
rmse_knn = np.sqrt(mse_knn)
r2_knn = r2_score(y_test, y_pred_knn)

# Decision Tree
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)
mae_dt = mean_absolute_error(y_test, y_pred_dt)
mse_dt = mean_squared_error(y_test, y_pred_dt)
rmse_dt = np.sqrt(mse_dt)
r2_dt = r2_score(y_test, y_pred_dt)

# Random Forest
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
mse_rf = mean_squared_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mse_rf)
r2_rf = r2_score(y_test, y_pred_rf)

# Support Vector Machine
svm_model = SVR(kernel='rbf')
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
mae_svm = mean_absolute_error(y_test, y_pred_svm)
mse_svm = mean_squared_error(y_test, y_pred_svm)
rmse_svm = np.sqrt(mse_svm)
r2_svm = r2_score(y_test, y_pred_svm)

# Print evaluation metrics for all models
print(f"Linear Regression - MAE: {mae_lr}, MSE: {mse_lr}, RMSE: {rmse_lr}, R²: {r2_lr}")
print(f"KNN - MAE: {mae_knn}, MSE: {mse_knn}, RMSE: {rmse_knn}, R²: {r2_knn}")
print(f"Decision Tree - MAE: {mae_dt}, MSE: {mse_dt}, RMSE: {rmse_dt}, R²: {r2_dt}")
print(f"Random Forest - MAE: {mae_rf}, MSE: {mse_rf}, RMSE: {rmse_rf}, R²: {r2_rf}")
print(f"SVM - MAE: {mae_svm}, MSE: {mse_svm}, RMSE: {rmse_svm}, R²: {r2_svm}")


Linear Regression - MAE: 1.7824237286037348, MSE: 6.521937163342014, RMSE: 2.5538083646472014, R²: 0.380264728781014
KNN - MAE: 1.7170612244897958, MSE: 6.050636571428572, RMSE: 2.459804173390348, R²: 0.42504921425518494
Decision Tree - MAE: 1.1053061224489797, MSE: 7.484591836734694, RMSE: 2.7357982083360413, R²: 0.2887902112927885
Random Forest - MAE: 0.8468346938775498, MSE: 2.392893033265302, RMSE: 1.5468978742196597, R²: 0.7726196717588758
SVM - MAE: 2.287821746114424, MSE: 11.060089326166175, RMSE: 3.3256712594852447, R²: -0.050964964333613194


In [3]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset from an Excel file
df = pd.read_excel('data_inflasi.xlsx')

# Convert 'tanggal' to datetime
df['tanggal'] = pd.to_datetime(df['tanggal'])

# Extract year and month from 'tanggal'
df['year'] = df['tanggal'].dt.year
df['month'] = df['tanggal'].dt.month

# Update features and target
X = df[['year', 'month']]
y = df['Data Inflasi (dalam persen)']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest Model with Hyperparameter Tuning
rf_params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10]
}
rf_model = RandomForestRegressor(random_state=42)
rf_grid_search = GridSearchCV(rf_model, rf_params, cv=5, scoring='neg_mean_squared_error')
rf_grid_search.fit(X_train, y_train)
best_rf_model = rf_grid_search.best_estimator_

# Save the best Random Forest model
joblib.dump(best_rf_model, 'best_random_forest_model.pkl')

# Gradient Boosting Model
gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_model.fit(X_train, y_train)

# Save the Gradient Boosting model
joblib.dump(gb_model, 'gradient_boosting_model.pkl')

# Evaluate Random Forest
y_pred_rf = best_rf_model.predict(X_test)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
mse_rf = mean_squared_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mse_rf)
r2_rf = r2_score(y_test, y_pred_rf)
print(f"Best Random Forest - MAE: {mae_rf}, MSE: {mse_rf}, RMSE: {rmse_rf}, R²: {r2_rf}")

# Evaluate Gradient Boosting
y_pred_gb = gb_model.predict(X_test)
mae_gb = mean_absolute_error(y_test, y_pred_gb)
mse_gb = mean_squared_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(mse_gb)
r2_gb = r2_score(y_test, y_pred_gb)
print(f"Gradient Boosting - MAE: {mae_gb}, MSE: {mse_gb}, RMSE: {rmse_gb}, R²: {r2_gb}")


Best Random Forest - MAE: 0.8468346938775498, MSE: 2.392893033265302, RMSE: 1.5468978742196597, R²: 0.7726196717588758
Gradient Boosting - MAE: 1.2360674219881618, MSE: 4.1246434479496115, RMSE: 2.030921822215127, R²: 0.6080632238739927
