# Makine Öğrenmesi Algoritmalarının Otomatikleştirilmesi

In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from warnings import filterwarnings
filterwarnings("ignore")

In [2]:
df = pd.read_csv("hitters.csv")
df = df.dropna()
df.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary,NewLeague
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,N,W,632,43,10,475.0,N
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,A,W,880,82,14,480.0,A
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,N,E,200,11,3,500.0,N
4,321,87,10,39,42,30,2,396,101,12,48,46,33,N,E,805,40,4,91.5,N
5,594,169,4,74,51,35,11,4408,1133,19,501,336,194,A,W,282,421,25,750.0,A


In [3]:
dms = pd.get_dummies(df[["League","Division","NewLeague"]])
dms.head()

Unnamed: 0,League_A,League_N,Division_E,Division_W,NewLeague_A,NewLeague_N
1,0,1,0,1,0,1
2,1,0,0,1,1,0
3,0,1,1,0,0,1
4,0,1,1,0,0,1
5,1,0,0,1,1,0


In [4]:
def compML(df,y,alg):
    y = df[y]
    X = df.drop(["Salary","League","Division","NewLeague"], axis=1).astype("float64")
    X = pd.concat([X, dms[["League_N","Division_W","NewLeague_N"]]], axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    
    model = alg().fit(X_train, y_train)
    y_pred = model.predict(X_test)
    MSE = mean_squared_error(y_test, y_pred)
    RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    model_name = alg.__name__
    print("Model:", model_name, "\n"
          "RMSE:", RMSE, "\n"
          "r2_score:", r2)

In [5]:
compML(df, "Salary", LGBMRegressor)

Model: LGBMRegressor 
RMSE: 363.8712087611089 
r2_score: 0.3883739925283991


In [6]:
models = [
    LGBMRegressor, 
    XGBRegressor, 
    GradientBoostingRegressor, 
    RandomForestRegressor, 
    DecisionTreeRegressor,
    MLPRegressor,
    KNeighborsRegressor, 
    SVR,
]

for i in models:
    print(compML(df, "Salary", i), "\n")

Model: LGBMRegressor 
RMSE: 363.8712087611089 
r2_score: 0.3883739925283991
None 

Model: XGBRegressor 
RMSE: 355.46515176059927 
r2_score: 0.41630682013515363
None 

Model: GradientBoostingRegressor 
RMSE: 355.1812813528464 
r2_score: 0.4172387092938751
None 

Model: RandomForestRegressor 
RMSE: 340.1653499679345 
r2_score: 0.4654716995495518
None 

Model: DecisionTreeRegressor 
RMSE: 446.9487289298405 
r2_score: 0.07720343286724818
None 

Model: MLPRegressor 
RMSE: 456.3651774276728 
r2_score: 0.037910324247586336
None 

Model: KNeighborsRegressor 
RMSE: 426.6570764525201 
r2_score: 0.1590920400477076
None 

Model: SVR 
RMSE: 460.0032657244849 
r2_score: 0.02250985629101887
None 

