In [29]:
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import datasets

from sklearn.svm import SVR

In [30]:
df = datasets.load_diabetes(as_frame=True).frame

In [31]:
df.head()
df.shape

(442, 11)

In [32]:
X = df.drop(columns ="target", axis=1)
y = df["target"]

In [33]:
X_train, X_test, y_train,y_test= train_test_split(
    X,y,test_size = 0.3, random_state=42
)

In [34]:
scaler = StandardScaler()

# fit_transform expects  2d array so we change shape to 2d transform it 
# and then ravl it to original shape i.e. 1d

y_train_scaled = scaler.fit_transform(y_train.values.reshape(-1,1)).ravel()
y_test_scaled = scaler.transform(y_test.values.reshape(-1,1)).ravel()

In [35]:
# Model

model = SVR()

model.fit(X_train, y_train_scaled)

In [36]:
y_train_pred_scaled = model.predict(X_train)
y_test_pred_scaled = model.predict(X_test)

In [37]:
print("r2 score train : ", r2_score(y_train_scaled,y_train_pred_scaled))
print("r2 score test : ", r2_score(y_test_scaled,y_test_pred_scaled))

r2 score train :  0.6596361676267712
r2 score test :  0.48844443151651884


In [38]:
# Linear Kernel
model = SVR(kernel = "linear")
model.fit(X_train, y_train_scaled)

y_train_pred_scaled = model.predict(X_train)
y_test_pred_scaled = model.predict(X_test)

print("r2 score train : ", r2_score(y_train_scaled,y_train_pred_scaled))
print("r2 score test : ", r2_score(y_test_scaled,y_test_pred_scaled))

r2 score train :  0.45191229982475245
r2 score test :  0.4433761323833776


In [40]:
# Polynomial kernel
model = SVR(kernel = "poly")
model.fit(X_train, y_train_scaled)

y_train_pred_scaled = model.predict(X_train)
y_test_pred_scaled = model.predict(X_test)

print("r2 score train : ", r2_score(y_train_scaled,y_train_pred_scaled))
print("r2 score test : ", r2_score(y_test_scaled,y_test_pred_scaled))

r2 score train :  0.5790920834310542
r2 score test :  0.24203771038107758


### HYPER-PARAMETER TUNING USING GRIDSEARCH CV

In [41]:
from sklearn.model_selection import GridSearchCV

In [48]:
param_grid = {
    "C" : [1,2,3,4,5,10,50,70,100],
    "kernel" : ["rbf","linear"],
    "epsilon": [0.01,0.097,0.098,0.1,0.2,0.3,0.5,0.05]
}

In [49]:
svr = SVR()

grid_search = GridSearchCV(svr,param_grid, scoring = "r2",cv=5)

grid_search.fit(X_train,y_train_scaled)

In [50]:
print("best_parameters : ", grid_search.best_params_)

best_parameters :  {'C': 10, 'epsilon': 0.097, 'kernel': 'linear'}


In [53]:
best_model = SVR(kernel="linear", C=10,epsilon=0.097)

best_model.fit(X_train,y_train_scaled)

y_train_pred_scaled = best_model.predict(X_train)
y_test_pred_scaled = best_model.predict(X_test)

print("r2 score train : ", r2_score(y_train_scaled,y_train_pred_scaled))
print("r2 score test : ", r2_score(y_test_scaled,y_test_pred_scaled))

r2 score train :  0.5148773401534066
r2 score test :  0.4745176564636029


In [56]:
from sklearn.svm import LinearSVR

model = LinearSVR(C=10,epsilon=0.097, max_iter=5000)

model.fit(X_train,y_train_scaled)

y_train_pred_scaled = model.predict(X_train)
y_test_pred_scaled = model.predict(X_test)

print("r2 score train : ", r2_score(y_train_scaled,y_train_pred_scaled))
print("r2 score test : ", r2_score(y_test_scaled,y_test_pred_scaled))

r2 score train :  0.515155165001139
r2 score test :  0.47441723339800923
