# SVR Destek Vektor Regresyonu

In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

In [2]:
data_file = "Hitters.csv"

In [3]:
df = pd.read_csv(data_file).dropna()
df.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary,NewLeague
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,N,W,632,43,10,475.0,N
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,A,W,880,82,14,480.0,A
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,N,E,200,11,3,500.0,N
4,321,87,10,39,42,30,2,396,101,12,48,46,33,N,E,805,40,4,91.5,N
5,594,169,4,74,51,35,11,4408,1133,19,501,336,194,A,W,282,421,25,750.0,A


In [4]:
x64 = df.drop(["Salary", "League", "Division", "NewLeague"], axis=1).astype("float64")
y = df["Salary"]
dummies = pd.get_dummies(df[["League", "Division", "NewLeague"]])
X = pd.concat([x64, dummies[["League_N", "Division_W", "NewLeague_N"]]], axis=1)
X.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,PutOuts,Assists,Errors,League_N,Division_W,NewLeague_N
1,315.0,81.0,7.0,24.0,38.0,39.0,14.0,3449.0,835.0,69.0,321.0,414.0,375.0,632.0,43.0,10.0,1,1,1
2,479.0,130.0,18.0,66.0,72.0,76.0,3.0,1624.0,457.0,63.0,224.0,266.0,263.0,880.0,82.0,14.0,0,1,0
3,496.0,141.0,20.0,65.0,78.0,37.0,11.0,5628.0,1575.0,225.0,828.0,838.0,354.0,200.0,11.0,3.0,1,0,1
4,321.0,87.0,10.0,39.0,42.0,30.0,2.0,396.0,101.0,12.0,48.0,46.0,33.0,805.0,40.0,4.0,1,0,1
5,594.0,169.0,4.0,74.0,51.0,35.0,11.0,4408.0,1133.0,19.0,501.0,336.0,194.0,282.0,421.0,25.0,0,1,0


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3232)
X_train = pd.DataFrame(X_train["Hits"])
X_test = pd.DataFrame(X_test["Hits"])

svr_model = SVR(kernel="linear").fit(X_train, y_train)
svr_model.predict(X_train)

array([6.67500e+02, 3.58100e+02, 7.86840e+02, 3.05060e+02, 5.21640e+02,
       4.95120e+02, 7.07280e+02, 4.99540e+02, 5.17220e+02, 2.43180e+02,
       1.98980e+02, 7.42640e+02, 7.16120e+02, 2.74120e+02, 1.76880e+02,
       4.02300e+02, 7.38220e+02, 4.77440e+02, 7.16120e+02, 6.58660e+02,
       6.01200e+02, 6.18880e+02, 3.31580e+02, 1.68040e+02, 2.78540e+02,
       4.50920e+02, 4.50920e+02, 4.11140e+02, 4.19980e+02, 7.64740e+02,
       6.18880e+02, 2.52020e+02, 7.78000e+02, 5.08380e+02, 2.96220e+02,
       5.70260e+02, 3.40420e+02, 5.61420e+02, 2.96220e+02, 4.50920e+02,
       3.49260e+02, 3.62520e+02, 1.98980e+02, 8.79660e+02, 3.53680e+02,
       7.42640e+02, 1.81300e+02, 6.18880e+02, 2.21080e+02, 5.26060e+02,
       4.06720e+02, 9.28280e+02, 3.53680e+02, 6.36560e+02, 7.51480e+02,
       6.98440e+02, 4.42080e+02, 5.52580e+02, 2.47600e+02, 6.76340e+02,
       4.73020e+02, 3.66940e+02, 6.23300e+02, 2.29920e+02, 3.93460e+02,
       3.62520e+02, 5.65840e+02, 6.67500e+02, 3.18320e+02, 3.625

# Tahmin

In [11]:
# Tahmin denklemi
print("y =", svr_model.intercept_[0], "+ x *", svr_model.coef_[0][0])

y = -4.340000000002393 + x * 4.420000000000073


In [14]:
y_pred = svr_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

401.17740900946495

# Model Tuning

In [16]:
svr_params = {"C": np.arange(0.1, 2, 0.1)}
svr_cv_model = GridSearchCV(svr_model, svr_params, cv=10).fit(X_train, y_train)
svr_cv_model.best_params_

{'C': 1.8000000000000003}

In [17]:
svr_tuned = SVR(kernel="linear", C=1.8000000000000003).fit(X_train, y_train)
y_pred = svr_tuned.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

401.17740900944176