# Import Libraries

In [6]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score



# Load Clean Dataset

In [2]:
train = pd.read_csv('../data/clean_train.csv')
test = pd.read_csv('../data/clean_test.csv')

# Split Train Dataset 80/20

In [3]:
X = train.drop(columns=['SalePrice'])
y = train['SalePrice']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale Features

In [4]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(test)

# Train and Evaluate Baseline Models

In [7]:
def evaluate_model(model, X_train, y_train, X_val, y_val):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    r2 = r2_score(y_val, y_pred)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    print(f"Model: {model.__class__.__name__}")
    print(f"R²: {r2:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print("-" * 30)
    return model

models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(random_state=42),
    "XGBoost": XGBRegressor(random_state=42, objective="reg:squarederror"),
    "SVM": SVR(kernel="rbf", C=100, gamma=0.1, epsilon=0.1)
}

trained_models = {}
for name, model in models.items():
    print(f"Evaluating {name}...")
    trained_models[name] = evaluate_model(model, X_train_scaled, y_train, X_val_scaled, y_val)

Evaluating Linear Regression...
Model: LinearRegression
R²: -594859748011.1166
RMSE: 67548266556.4267
------------------------------
Evaluating Random Forest...
Model: RandomForestRegressor
R²: 0.8866
RMSE: 29492.5507
------------------------------
Evaluating XGBoost...
Model: XGBRegressor
R²: 0.8924
RMSE: 28725.8357
------------------------------
Evaluating SVM...
Model: SVR
R²: -0.0247
RMSE: 88657.4355
------------------------------
