In [None]:
# Data Loading

import pandas as pd

# Load processed training set (24 features)
TRAIN_PATH = "../data/train"
PROCESSED_TRAIN_FILE = f"{TRAIN_PATH}/housing_train_processed.csv"

housing = pd.read_csv(PROCESSED_TRAIN_FILE)

# Split features/labels
X = housing.drop("median_house_value", axis=1)
y = housing["median_house_value"]

X.head()
y.head()

In [None]:
# Model Fitting

from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np

# Scale features for SVR
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Initialize and fit model
svr_reg = SVR(kernel="rbf", C=100, gamma=0.1)
svr_reg.fit(X_scaled, y)

# Display basic training results
y_pred_train = svr_reg.predict(X_scaled)
rmse_train = np.sqrt(mean_squared_error(y, y_pred_train))
print(f"Training RMSE: {rmse_train:.2f}")

In [None]:
# Cross-Validation

from sklearn.model_selection import cross_val_score

# 10-fold cross-validation
scores = cross_val_score(svr_reg, X_scaled, y,
                         scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)

print("Cross-validation RMSE scores:", rmse_scores)
print("Mean:", rmse_scores.mean())
print("Standard deviation:", rmse_scores.std())

In [None]:
# Hyperparameter Tuning

from sklearn.model_selection import GridSearchCV

# Grid search for C, gamma, and kernel
param_grid = {
    "C": [1, 10, 100],
    "gamma": [0.01, 0.1, 0.5],
    "kernel": ["rbf", "linear"]
}

grid_search = GridSearchCV(SVR(), param_grid,
                           scoring="neg_mean_squared_error", cv=5)
grid_search.fit(X_scaled, y)

print("Best parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_

# Evaluate best model
y_pred_best = best_model.predict(X_scaled)
rmse_best = np.sqrt(mean_squared_error(y, y_pred_best))
print(f"Training RMSE with best parameters: {rmse_best:.2f}")

In [None]:
# Model Saving

import joblib
import os

# Save trained model
MODELS_PATH = "../models"
os.makedirs(MODELS_PATH, exist_ok=True)

model_file = os.path.join(MODELS_PATH, "svr_model.pkl")
joblib.dump(best_model, model_file)
print(f"Model saved to {model_file}")