In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNet, ElasticNetCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import Normalizer

In [None]:
df = pd.read_csv("/content/Hitters.csv")
df.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary,NewLeague
0,293,66,1,30,29,14,1,293,66,1,30,29,14,A,E,446,33,20,,A
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,N,W,632,43,10,475.0,N
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,A,W,880,82,14,480.0,A
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,N,E,200,11,3,500.0,N
4,321,87,10,39,42,30,2,396,101,12,48,46,33,N,E,805,40,4,91.5,N


In [None]:
dummies = pd.get_dummies(df, columns=["League","Division", "NewLeague"])
y = df["Salary"]
dummies.dropna(inplace=True)
dummies.head()
X = Normalizer().fit_transform(dummies)
y

1       475.0
2       480.0
3       500.0
4        91.5
5       750.0
        ...  
317     700.0
318     875.0
319     385.0
320     960.0
321    1000.0
Name: Salary, Length: 263, dtype: float64

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

Scaling and Normalizing

**StandardScaler :** Ortalaması 0, standart sapması 1 olacak şekilde veriyi dönüştürür. Kısaca veriyi standartlaştırır. Standardizasyon, negatif değerlere sahip veriler için faydalıdır. Verileri standart normal dağılıma göre düzenler. Regresyondan ziyade sınıflandırmada daha kullanışlıdır

**Normalizer :** Veriyi 0 ile 1 arasına sıkıştırır. Normalleştirme yapar. Menzil ve büyüklüğün azalması nedeniyle eğitim sürecindeki eğimler patlamaz ve daha yüksek kayıp değerleri elde edemezsiniz. Regresyonda sınıflandırmadan daha faydalıdır

In [None]:
model = ElasticNet(alpha=0.1).fit(X_train, y_train)

In [None]:
model.coef_

array([-4.60299812e+02, -1.06442808e+02, -7.68529948e+00, -5.20387165e+01,
       -4.57911304e+01, -3.98483532e+01, -4.16736898e+00,  1.07777113e+02,
        9.69776823e+01,  2.53615998e+01,  6.24239211e+01,  8.49424052e+01,
        3.28344952e+01, -1.74271749e+02, -1.24084516e+02, -1.22624993e+01,
        3.86125964e+02, -7.53885078e-03, -7.13246639e-03, -0.00000000e+00,
       -1.73557812e-01, -5.80386448e-02, -0.00000000e+00])

In [None]:
np.sqrt(mean_squared_error(y_train,model.predict(X_train)))

380.4117342227405

In [None]:
# Tunning the alpha variable

alpha_set = 10**np.linspace(10, -2, 100)*0.5
coefs = []
for i in alpha_set:
  model.set_params(alpha=i)
  model.fit(X_train, y_train)
  coefs.append(model.coef_)

In [None]:
alpha_set

array([5.00000000e+09, 3.78231664e+09, 2.86118383e+09, 2.16438064e+09,
       1.63727458e+09, 1.23853818e+09, 9.36908711e+08, 7.08737081e+08,
       5.36133611e+08, 4.05565415e+08, 3.06795364e+08, 2.32079442e+08,
       1.75559587e+08, 1.32804389e+08, 1.00461650e+08, 7.59955541e+07,
       5.74878498e+07, 4.34874501e+07, 3.28966612e+07, 2.48851178e+07,
       1.88246790e+07, 1.42401793e+07, 1.07721735e+07, 8.14875417e+06,
       6.16423370e+06, 4.66301673e+06, 3.52740116e+06, 2.66834962e+06,
       2.01850863e+06, 1.52692775e+06, 1.15506485e+06, 8.73764200e+05,
       6.60970574e+05, 5.00000000e+05, 3.78231664e+05, 2.86118383e+05,
       2.16438064e+05, 1.63727458e+05, 1.23853818e+05, 9.36908711e+04,
       7.08737081e+04, 5.36133611e+04, 4.05565415e+04, 3.06795364e+04,
       2.32079442e+04, 1.75559587e+04, 1.32804389e+04, 1.00461650e+04,
       7.59955541e+03, 5.74878498e+03, 4.34874501e+03, 3.28966612e+03,
       2.48851178e+03, 1.88246790e+03, 1.42401793e+03, 1.07721735e+03,
      

In [None]:
# Using CV for the best alpha parameter

e_net_cv = ElasticNetCV(alphas= alpha_set, cv=10, max_iter=100000 ).fit(X_train, y_train)

model = e_net_cv.set_params(alphas=e_net_cv.alphas_).fit(X_train, y_train)

In [None]:
model.alphas

array([5.00000000e+09, 3.78231664e+09, 2.86118383e+09, 2.16438064e+09,
       1.63727458e+09, 1.23853818e+09, 9.36908711e+08, 7.08737081e+08,
       5.36133611e+08, 4.05565415e+08, 3.06795364e+08, 2.32079442e+08,
       1.75559587e+08, 1.32804389e+08, 1.00461650e+08, 7.59955541e+07,
       5.74878498e+07, 4.34874501e+07, 3.28966612e+07, 2.48851178e+07,
       1.88246790e+07, 1.42401793e+07, 1.07721735e+07, 8.14875417e+06,
       6.16423370e+06, 4.66301673e+06, 3.52740116e+06, 2.66834962e+06,
       2.01850863e+06, 1.52692775e+06, 1.15506485e+06, 8.73764200e+05,
       6.60970574e+05, 5.00000000e+05, 3.78231664e+05, 2.86118383e+05,
       2.16438064e+05, 1.63727458e+05, 1.23853818e+05, 9.36908711e+04,
       7.08737081e+04, 5.36133611e+04, 4.05565415e+04, 3.06795364e+04,
       2.32079442e+04, 1.75559587e+04, 1.32804389e+04, 1.00461650e+04,
       7.59955541e+03, 5.74878498e+03, 4.34874501e+03, 3.28966612e+03,
       2.48851178e+03, 1.88246790e+03, 1.42401793e+03, 1.07721735e+03,
      

In [None]:
model.alpha_

0.005

In [None]:
y_pred = model.predict(X_train)
RMSE = np.sqrt(mean_squared_error(y_pred, y_train))
RMSE

289.7205491110078