In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [6]:
path = "/content/drive/MyDrive/Real estate.csv"
df = pd.read_csv(path)

In [7]:
print("Kolom dalam dataset:", df.columns.tolist())

Kolom dalam dataset: ['No', 'X1 transaction date', 'X2 house age', 'X3 distance to the nearest MRT station', 'X4 number of convenience stores', 'X5 latitude', 'X6 longitude', 'Y house price of unit area']


In [8]:
X = df.drop(columns=["No", "Y house price of unit area"])
y = df["Y house price of unit area"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [9]:
def evaluate_model(model, X_test, y_test, kernel=None):
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(f"Kernel: {kernel if kernel else 'Linear Regression'}")
    print(f"MAE: {mae:.3f}")
    print(f"RMSE: {rmse:.3f}")
    print(f"R²: {r2:.6f}\n")
    return kernel if kernel else 'Linear', mae, rmse, r2

In [10]:
def run_experiments(X, y, split_ratio):
    print(f"\n===== SPLIT: {int(split_ratio * 100)}% : {int((1 - split_ratio) * 100)}% =====")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 - split_ratio, random_state=42)

    results = []

    lr = LinearRegression()
    lr.fit(X_train, y_train)
    results.append(evaluate_model(lr, X_test, y_test))

    svr_lin = SVR(kernel='linear')
    svr_lin.fit(X_train, y_train)
    results.append(evaluate_model(svr_lin, X_test, y_test, kernel='linear'))

    svr_rbf = SVR(kernel='rbf')
    svr_rbf.fit(X_train, y_train)
    results.append(evaluate_model(svr_rbf, X_test, y_test, kernel='rbf'))

    svr_poly = SVR(kernel='poly', degree=3)
    svr_poly.fit(X_train, y_train)
    results.append(evaluate_model(svr_poly, X_test, y_test, kernel='poly'))

    return results

results_70 = run_experiments(X_scaled, y, 0.7)
results_80 = run_experiments(X_scaled, y, 0.8)

all_results = pd.DataFrame(results_70 + results_80, columns=["Kernel", "MAE", "RMSE", "R2"])
all_results["Split"] = ["70:30"] * 4 + ["80:20"] * 4

print("\n=== RINGKASAN HASIL EKSPERIMEN ===")
print(all_results)


===== SPLIT: 70% : 30% =====
Kernel: Linear Regression
MAE: 6.185
RMSE: 8.577
R²: 0.560064

Kernel: linear
MAE: 5.986
RMSE: 8.559
R²: 0.561941

Kernel: rbf
MAE: 5.986
RMSE: 8.396
R²: 0.578437

Kernel: poly
MAE: 7.700
RMSE: 9.867
R²: 0.417859


===== SPLIT: 80% : 19% =====
Kernel: Linear Regression
MAE: 5.305
RMSE: 7.315
R²: 0.681058

Kernel: linear
MAE: 5.308
RMSE: 7.416
R²: 0.672187

Kernel: rbf
MAE: 5.487
RMSE: 7.598
R²: 0.655871

Kernel: poly
MAE: 7.054
RMSE: 8.732
R²: 0.545483


=== RINGKASAN HASIL EKSPERIMEN ===
   Kernel       MAE      RMSE        R2  Split
0  Linear  6.184836  8.577201  0.560064  70:30
1  linear  5.986379  8.558878  0.561941  70:30
2     rbf  5.986438  8.396188  0.578437  70:30
3    poly  7.699818  9.866543  0.417859  70:30
4  Linear  5.305356  7.314754  0.681058  80:20
5  linear  5.307568  7.415777  0.672187  80:20
6     rbf  5.486650  7.598092  0.655871  80:20
7    poly  7.054102  8.732101  0.545483  80:20
