In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import mean_squared_error,r2_score

from warnings import filterwarnings
filterwarnings("ignore")


In [2]:
df=pd.read_csv("Hitters.csv")
df=df.dropna()
dms=pd.get_dummies(df[['League', 'Division','NewLeague']])
y=df['Salary']
X_= df.drop(['Salary','League', 'Division','NewLeague'],axis=1).astype('float64')
X= pd.concat([X_, dms[['League_N', 'Division_W','NewLeague_N']]],axis=1)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=42)

In [3]:
%pip install catboost

Collecting catboost
  Downloading catboost-1.1.1-cp39-none-win_amd64.whl (74.0 MB)
Collecting graphviz
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
Installing collected packages: graphviz, catboost
Successfully installed catboost-1.1.1 graphviz-0.20.1
Note: you may need to restart the kernel to use updated packages.


In [4]:
from catboost import CatBoostRegressor

In [5]:
catb_model=CatBoostRegressor().fit(X_train,y_train)

Learning rate set to 0.031674
0:	learn: 437.6430699	total: 146ms	remaining: 2m 25s
1:	learn: 431.3923642	total: 154ms	remaining: 1m 16s
2:	learn: 424.8820360	total: 161ms	remaining: 53.4s
3:	learn: 418.2514904	total: 220ms	remaining: 54.8s
4:	learn: 412.6394021	total: 227ms	remaining: 45.2s
5:	learn: 406.6247020	total: 234ms	remaining: 38.8s
6:	learn: 400.5321206	total: 240ms	remaining: 34s
7:	learn: 394.6683437	total: 246ms	remaining: 30.5s
8:	learn: 388.2496484	total: 315ms	remaining: 34.6s
9:	learn: 382.9448842	total: 322ms	remaining: 31.8s
10:	learn: 377.2600080	total: 329ms	remaining: 29.5s
11:	learn: 372.4829606	total: 335ms	remaining: 27.6s
12:	learn: 366.6823437	total: 342ms	remaining: 26s
13:	learn: 362.6076230	total: 404ms	remaining: 28.4s
14:	learn: 358.0107745	total: 409ms	remaining: 26.9s
15:	learn: 353.2802665	total: 414ms	remaining: 25.5s
16:	learn: 348.5646265	total: 422ms	remaining: 24.4s
17:	learn: 343.6407912	total: 428ms	remaining: 23.4s
18:	learn: 339.2363847	total

In [6]:
y_pred=catb_model.predict(X_test)
np.sqrt(mean_squared_error(y_test,y_pred))

351.194631344607

### Model Tuned

In [7]:
catb_params={"iterations":[200,500,1000],
"learning_rate":[0.01,0.1],
"depth":[3,6,8]}

In [8]:
catb_model=CatBoostRegressor()

In [9]:
catb_cv_model=GridSearchCV(catb_model,catb_params,cv=5,n_jobs=-1,verbose=2).fit(X_train,y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits
0:	learn: 425.7900818	total: 982us	remaining: 195ms
1:	learn: 404.8723520	total: 1.98ms	remaining: 196ms
2:	learn: 387.4057666	total: 4.13ms	remaining: 271ms
3:	learn: 372.2801584	total: 5.64ms	remaining: 276ms
4:	learn: 358.9204229	total: 7.18ms	remaining: 280ms
5:	learn: 347.0083933	total: 8.45ms	remaining: 273ms
6:	learn: 336.0130818	total: 11.9ms	remaining: 327ms
7:	learn: 324.3923300	total: 13.3ms	remaining: 319ms
8:	learn: 314.8690957	total: 14.7ms	remaining: 312ms
9:	learn: 308.5075563	total: 15.9ms	remaining: 302ms
10:	learn: 298.8587285	total: 17.9ms	remaining: 307ms
11:	learn: 294.7655438	total: 19.7ms	remaining: 309ms
12:	learn: 288.0697862	total: 21.2ms	remaining: 305ms
13:	learn: 282.6697154	total: 22.9ms	remaining: 304ms
14:	learn: 277.6121667	total: 25.2ms	remaining: 310ms
15:	learn: 273.4383979	total: 26.9ms	remaining: 309ms
16:	learn: 269.1556201	total: 28.7ms	remaining: 309ms
17:	learn: 264.8098704	total: 30

### Diğer alg göre daha uzun zaman aldı.

In [10]:
catb_cv_model.best_params_

{'depth': 3, 'iterations': 200, 'learning_rate': 0.1}

In [12]:
catb_tuned=CatBoostRegressor(depth=3,iterations=200,learning_rate=0.1).fit(X_train,y_train)

0:	learn: 425.7900818	total: 1.61ms	remaining: 321ms
1:	learn: 404.8723520	total: 3.4ms	remaining: 336ms
2:	learn: 387.4057666	total: 5.04ms	remaining: 331ms
3:	learn: 372.2801584	total: 6.6ms	remaining: 323ms
4:	learn: 358.9204229	total: 8.81ms	remaining: 344ms
5:	learn: 347.0083933	total: 10.2ms	remaining: 331ms
6:	learn: 336.0130818	total: 11.5ms	remaining: 318ms
7:	learn: 324.3923300	total: 12.5ms	remaining: 300ms
8:	learn: 314.8690957	total: 13.5ms	remaining: 287ms
9:	learn: 308.5075563	total: 14.5ms	remaining: 275ms
10:	learn: 298.8587285	total: 15.4ms	remaining: 265ms
11:	learn: 294.7655438	total: 16.4ms	remaining: 257ms
12:	learn: 288.0697862	total: 17.3ms	remaining: 249ms
13:	learn: 282.6697154	total: 18.3ms	remaining: 243ms
14:	learn: 277.6121667	total: 19.2ms	remaining: 237ms
15:	learn: 273.4383979	total: 21.4ms	remaining: 246ms
16:	learn: 269.1556201	total: 23.2ms	remaining: 250ms
17:	learn: 264.8098704	total: 24.9ms	remaining: 252ms
18:	learn: 261.6700768	total: 26.3ms	rem

In [13]:
y_pred=catb_tuned.predict(X_test)
np.sqrt(mean_squared_error(y_test,y_pred))

344.3125832615482