In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler,scale
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.svm import SVR

In [2]:
data = pd.read_csv('data.csv')
data = data.dropna()
dms = pd.get_dummies(data[['League', 'Division', 'NewLeague']])
y = data["Salary"]
X_ = data.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.25, 
                                                    random_state=42)

In [3]:
data.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary,NewLeague
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,N,W,632,43,10,475.0,N
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,A,W,880,82,14,480.0,A
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,N,E,200,11,3,500.0,N
4,321,87,10,39,42,30,2,396,101,12,48,46,33,N,E,805,40,4,91.5,N
5,594,169,4,74,51,35,11,4408,1133,19,501,336,194,A,W,282,421,25,750.0,A


In [4]:
model = SVR(kernel ="linear").fit(X_train, y_train)

In [5]:
model

In [6]:
model.predict(X_train)[0:5]

array([219.32622627, 702.43039317, 623.20559641, 153.77538484,
       463.15191157])

In [7]:
model.predict(X_test)[0:5]

array([679.14754919, 633.72883529, 925.68639938, 270.28464317,
       530.26659421])

In [8]:
model.intercept_  # sabit katsayı

array([-80.15196063])

In [9]:
model.coef_  # bağımsız değişken katsayıları

array([[ -1.2183904 ,   6.09602978,  -3.67574533,   0.14217072,
          0.51435925,   1.28388992,  12.55922527,  -0.08693754,
          0.46597185,   2.98259931,   0.52944513,  -0.79820793,
         -0.16015531,   0.30872795,   0.28842348,  -1.79560066,
          6.41868986, -10.74313785,   1.33374319]])

In [10]:
y_pred = model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

370.0408415795005

# Model Tuning

In [11]:
modelPamas = {"C": [0.1,0.5,1,3]}
modelTuning = GridSearchCV(model, modelPamas, cv=10, n_jobs=-1, verbose=2) # n_jobs=-1 işlemciyi son hızda çalıştırır
modelTuning.fit(X_train, y_train)

modelTuning.best_params_
modelTuning.best_score_

Fitting 10 folds for each of 4 candidates, totalling 40 fits


0.07574823014646363

In [12]:
modelFinal = SVR(kernel ="linear", C=0.1).fit(X_train, y_train)
y_pred = modelFinal.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

367.960797346887