**SUPPORT VECTOR REGRESSION**

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from sklearn import model_selection
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn import neighbors
from sklearn.svm import SVR

from warnings import filterwarnings
filterwarnings("ignore")



In [2]:
df = pd.read_csv("/content/Hitters.csv")
df = df.dropna()
dms = pd.get_dummies(df[["League","Division","NewLeague"]])
y = df["Salary"]
X_ = df.drop(["Salary","League","Division","NewLeague"],axis=1).astype("float64")
X  = pd.concat([X_, dms[["League_N","Division_W","NewLeague_N"]]],axis=1)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=50)

In [3]:
#Model & Prediction

#kernel:rbf 
svr_model = SVR(kernel="rbf").fit(X_train,y_train) #rbf: radial basis function= we use this when we dont have prior knowloedge about the data
svr_model.predict(X_test)[0:5]

array([385.26044779, 447.55101146, 391.70134691, 386.87174215,
       436.21431318])

In [4]:
svr_model.intercept_

array([421.15330855])

In [5]:
y_pred = svr_model.predict(X_test)
rmse= np.sqrt(mean_squared_error(y_pred,y_test))
rmse

549.9065470662332

In [6]:
# kernel:"linear" (for linear regressions)
svr_model = SVR(kernel="linear").fit(X_train,y_train) #rbf: radial basis function= we use this when we dont have prior knowloedge about the data
svr_model.predict(X_test)[0:5]

print(svr_model.intercept_)
print(svr_model.coef_)

[-16.25106095]
[[ -1.44276485   6.18811817  -2.16314929  -2.86255843   1.67027327
    4.56406824  10.44605183  -0.12547158  -0.04013086  -0.29079857
    1.82387496   0.2052123   -0.92138004   0.57055726   0.04759365
   -0.94760535   5.77156898 -17.40308583   6.77156898]]


In [7]:
y_pred = svr_model.predict(X_test)
rmse= np.sqrt(mean_squared_error(y_pred,y_test))

In [8]:
rmse

373.7244551677321

In [9]:
 # We can say that "linear" kernel is much more effective for this dataset.

**Model Tuning**

In [10]:
svr_params={"C":[0.1,0.5,1.3]}
svr_cv_model=GridSearchCV(svr_model,svr_params,cv=5,verbose=2,n_jobs=-1).fit(X_train,y_train) 
#verbose=2 allows to display processes. 
#njobs=-1 allows to increase usage of processor so the operation will be completed quicker

Fitting 5 folds for each of 3 candidates, totalling 15 fits


In [11]:
svr_cv_model.best_params_

{'C': 1.3}

In [12]:
svr_tuned = SVR(kernel="linear",C= 1.3).fit(X_train,y_train)
y_pred = svr_tuned.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_pred,y_test))
rmse 

372.0053309796307