# Yapay Sinir Ağları

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from warnings import filterwarnings
filterwarnings("ignore")

In [2]:
df = pd.read_csv("hitters.csv")
df = df.dropna()
df.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary,NewLeague
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,N,W,632,43,10,475.0,N
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,A,W,880,82,14,480.0,A
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,N,E,200,11,3,500.0,N
4,321,87,10,39,42,30,2,396,101,12,48,46,33,N,E,805,40,4,91.5,N
5,594,169,4,74,51,35,11,4408,1133,19,501,336,194,A,W,282,421,25,750.0,A


In [3]:
dms = pd.get_dummies(df[["League","Division","NewLeague"]])
dms.head()

Unnamed: 0,League_A,League_N,Division_E,Division_W,NewLeague_A,NewLeague_N
1,0,1,0,1,0,1
2,1,0,0,1,1,0
3,0,1,1,0,0,1
4,0,1,1,0,0,1
5,1,0,0,1,1,0


In [4]:
y = df["Salary"]
X = df.drop(["League","Division","NewLeague","Salary"], axis = 1).astype("float64")
X = pd.concat([X, dms[["League_N","Division_W","NewLeague_N"]]], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

## Model & Tahmin

* Yapay Sinir Ağları homojen veri setlerinde daha iyi çalışan bir algoritmadır.
* Tahmin değerler ile gerçek değerler arasında ciddi farklar oluşturan değerlerin ortaya çıkarabildiği bazı problemler söz konusudur.
* Bu yüzden Yapay Sinir Ağları'nı standartlaştırma işlemi sonrasında kullanmak daha sağlıklıdır.

In [5]:
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
model = MLPRegressor().fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
np.sqrt(mean_squared_error(y_test, y_pred))

658.3441606328224

## Model Tuning

* **hidden_layer_sizes:** Gizli katman sayısıdır.
* Biz birbirinden farklı katman sayılarını denemek istiyoruz.
* Girdiğimiz değer kadar katman, girilen değerler kadar hücre oluşur. **(örn. (10,20) 2 katmanlı, 10 ve 20 nöronlu)**

In [7]:
params = {"alpha" : [0.1, 0.01, 0.02, 0.001, 0.0001],
          "hidden_layer_sizes" : [(10,20),(50,50),(100,100)]}

mlp = MLPRegressor()
model_cv = GridSearchCV(mlp, params, cv = 10, verbose = 2, n_jobs = -1).fit(X_train_scaled, y_train)
model_cv.best_params_

Fitting 10 folds for each of 15 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:    8.6s finished


{'alpha': 0.1, 'hidden_layer_sizes': (100, 100)}

In [8]:
model_tuned = MLPRegressor(alpha = 0.1, hidden_layer_sizes = (100,100)).fit(X_train_scaled, y_train)
y_pred = model_tuned.predict(X_test_scaled)
np.sqrt(mean_squared_error(y_test, y_pred))

359.872813658499