In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler,scale
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.neighbors import KNeighborsRegressor


In [2]:
data = pd.read_csv('data.csv')
data = data.dropna()
dms = pd.get_dummies(data[['League', 'Division', 'NewLeague']])
y = data["Salary"]
X_ = data.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.25, 
                                                    random_state=42)

In [5]:
data.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary,NewLeague
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,N,W,632,43,10,475.0,N
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,A,W,880,82,14,480.0,A
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,N,E,200,11,3,500.0,N
4,321,87,10,39,42,30,2,396,101,12,48,46,33,N,E,805,40,4,91.5,N
5,594,169,4,74,51,35,11,4408,1133,19,501,336,194,A,W,282,421,25,750.0,A


In [3]:
model = KNeighborsRegressor().fit(X_train, y_train)

In [6]:
model

In [7]:
model.n_neighbors

5

In [8]:
model.metric

'minkowski'

In [9]:
dir(model)

['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__sklearn_clone__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_build_request_for_signature',
 '_check_algorithm_metric',
 '_check_feature_names',
 '_check_n_features',
 '_doc_link_module',
 '_doc_link_template',
 '_doc_link_url_param_generator',
 '_estimator_type',
 '_fit',
 '_fit_X',
 '_fit_method',
 '_get_default_requests',
 '_get_doc_link',
 '_get_metadata_request',
 '_get_param_names',
 '_get_tags',
 '_kneighbors_reduce_func',
 '_more_tags',
 '_parameter_constraints',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_tree',
 '_validate_data',
 '_validate

In [10]:
model.predict(X_test)[0:5]

array([ 510.3334,  808.3334,  772.5   ,  125.5   , 1005.    ])

In [11]:
y_pred = model.predict(X_test)

In [12]:
np.sqrt(mean_squared_error(y_test, y_pred)) #test error

426.6570764525201

# MODEL TUNİNG

In [13]:
RMSE = []

for k in range(10):
    k = k+1
    model = KNeighborsRegressor(n_neighbors = k).fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    RMSE.append(rmse)
    print("k =", k, "için RMSE değeri: ", rmse)

k = 1 için RMSE değeri:  455.03925390751965
k = 2 için RMSE değeri:  415.99629571490965
k = 3 için RMSE değeri:  420.6765370082348
k = 4 için RMSE değeri:  428.8564674588792
k = 5 için RMSE değeri:  426.6570764525201
k = 6 için RMSE değeri:  423.5071669008732
k = 7 için RMSE değeri:  414.9361222421057
k = 8 için RMSE değeri:  413.7094731463598
k = 9 için RMSE değeri:  417.84419990871265
k = 10 için RMSE değeri:  421.6252180741266


In [14]:
RMSE

[455.03925390751965,
 415.99629571490965,
 420.6765370082348,
 428.8564674588792,
 426.6570764525201,
 423.5071669008732,
 414.9361222421057,
 413.7094731463598,
 417.84419990871265,
 421.6252180741266]

In [15]:
# GridSearchCV

knn_params = {"n_neighbors": np.arange(1,30,1)}
modelTuning =GridSearchCV(model, knn_params, cv=10).fit(X_train, y_train)

In [16]:
modelTuning.best_params_


{'n_neighbors': 8}

In [17]:
modelFinal = KNeighborsRegressor(n_neighbors = modelTuning.best_params_["n_neighbors"]).fit(X_train, y_train)
y_pred = modelFinal.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred)) #test error

413.7094731463598