In [26]:
# --- Import Libraries ---
import numpy as np
import pandas as pd

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error

In [27]:
data = load_diabetes()
df = pd.DataFrame(data = data.data, columns=data.feature_names)
df['target'] = data.target
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [28]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [29]:
X.shape, y.shape

((442, 10), (442,))

In [30]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [31]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((353, 10), (89, 10), (353,), (89,))

In [32]:
# --- Pipeline (Scaling + SVR) ---
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svr', SVR())
])

In [33]:
# --- Hyperparameter Grid ---
param_grid = {
    "svr__C": [0.1, 1, 10],
    "svr__kernel": ["linear", "rbf", "poly"],
    "svr__gamma": ["scale", "auto"],
    "svr__epsilon": [0.1, 0.2, 0.5]   # margin of tolerance
}

In [34]:
grid = GridSearchCV(
    pipeline,
    param_grid=param_grid,
    cv=5,
    scoring="r2",   # R² for regression
    n_jobs=-1,
    verbose=1
)

grid.fit(X_train, y_train)

Fitting 5 folds for each of 54 candidates, totalling 270 fits


In [36]:
# --- Evaluation ---
print("Best Parameters:", grid.best_params_)
print("Best CV R² Score:", grid.best_score_)

y_pred = grid.predict(X_test)

print("Test R² Score:", r2_score(y_test, y_pred))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

Best Parameters: {'svr__C': 10, 'svr__epsilon': 0.5, 'svr__gamma': 'scale', 'svr__kernel': 'rbf'}
Best CV R² Score: 0.41355918991433765
Test R² Score: 0.4948626674222323
Test RMSE: 51.73291255900746


In [40]:
svr = SVR(C=10, epsilon= 0.5, gamma='scale', kernel='rbf')
svr.fit(X_train, y_train)
y_pred=svr.predict(X_test)

In [41]:
r2_score(y_test, y_pred)

0.4948626674222323

In [42]:
mean_squared_error(y_test, y_pred)

2676.294241837912