# **Grid Search**

## *Data Import*

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_excel("diabetes.xlsx")

In [3]:
df.head()

Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6,Y
0,59,2,32.1,101.0,157,93.2,38.0,4.0,4.8598,87,151
1,48,1,21.6,87.0,183,103.2,70.0,3.0,3.8918,69,75
2,72,2,30.5,93.0,156,93.6,41.0,4.0,4.6728,85,141
3,24,1,25.3,84.0,198,131.4,40.0,5.0,4.8903,89,206
4,50,1,23.0,101.0,192,125.4,52.0,4.0,4.2905,80,135


In [4]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
AGE,442.0,48.5181,13.109028,19.0,38.25,50.0,59.0,79.0
SEX,442.0,1.468326,0.499561,1.0,1.0,1.0,2.0,2.0
BMI,442.0,26.375792,4.418122,18.0,23.2,25.7,29.275,42.2
BP,442.0,94.647014,13.831283,62.0,84.0,93.0,105.0,133.0
S1,442.0,189.140271,34.608052,97.0,164.25,186.0,209.75,301.0
S2,442.0,115.43914,30.413081,41.6,96.05,113.0,134.5,242.4
S3,442.0,49.788462,12.934202,22.0,40.25,48.0,57.75,99.0
S4,442.0,4.070249,1.29045,2.0,3.0,4.0,5.0,9.09
S5,442.0,4.641411,0.522391,3.2581,4.2767,4.62005,4.9972,6.107
S6,442.0,91.260181,11.496335,58.0,83.25,91.0,98.0,124.0


In [5]:
df.drop("Y", axis= 1, inplace= True)

## *Train Test Split*

In [6]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split

X = df.drop("S6", axis=1)
y = df["S6"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9)

### **Standart Scaler**

In [7]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

### **Grid Search**

In [8]:
from sklearn.linear_model import ElasticNet

In [9]:
elasticnet = ElasticNet()

In [10]:
from sklearn.model_selection import GridSearchCV

In [17]:
params = {
    "alpha" : [.01, .1, 1, 5, 10, 50, 100, 1000],
    "l1_ratio" : [.01, .1, .5, .7, .9, .95, .99]
}

In [32]:
grid_model = GridSearchCV(
    estimator= elasticnet,
    param_grid= params,
    scoring= "neg_mean_squared_error",
    cv= 5,
    verbose= 2
)

In [33]:
grid_model.fit(X_train, y_train)

Fitting 5 folds for each of 56 candidates, totalling 280 fits
[CV] END ..........................alpha=0.01, l1_ratio=0.01; total time=   0.0s
[CV] END ..........................alpha=0.01, l1_ratio=0.01; total time=   0.0s
[CV] END ..........................alpha=0.01, l1_ratio=0.01; total time=   0.0s
[CV] END ..........................alpha=0.01, l1_ratio=0.01; total time=   0.0s
[CV] END ..........................alpha=0.01, l1_ratio=0.01; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.1; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.1; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.1; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.1; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.1; total time=   0.0s
[CV] END ...........................alpha=0.01, l1_ratio=0.5; total time=   0.0s
[CV] END ...........................alpha=0.01,

In [34]:
grid_model.best_estimator_

In [35]:
grid_result = pd.DataFrame(grid_model.cv_results_)

In [37]:
grid_result.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001506,0.000347,0.000242,5.8e-05,0.01,0.01,"{'alpha': 0.01, 'l1_ratio': 0.01}",-63.957924,-78.12855,-121.123616,-89.13051,-123.515159,-95.171152,23.571671,5
1,0.001429,0.000399,0.000327,0.000128,0.01,0.1,"{'alpha': 0.01, 'l1_ratio': 0.1}",-63.974029,-78.161528,-121.092306,-89.091881,-123.576824,-95.179314,23.572583,6
2,0.000965,0.000143,0.000241,0.000171,0.01,0.5,"{'alpha': 0.01, 'l1_ratio': 0.5}",-64.057475,-78.365711,-120.894844,-88.84248,-123.951623,-95.222426,23.58197,7
3,0.000855,3.2e-05,0.000129,3e-06,0.01,0.7,"{'alpha': 0.01, 'l1_ratio': 0.7}",-64.119881,-78.527451,-120.741011,-88.643087,-124.243161,-95.254918,23.591414,8
4,0.000913,4.3e-05,0.000128,1e-05,0.01,0.9,"{'alpha': 0.01, 'l1_ratio': 0.9}",-64.225981,-78.767361,-120.523437,-88.355092,-124.671983,-95.30877,23.605463,11


In [38]:
y_pred = grid_model.predict(X_test)

In [39]:
from sklearn.metrics import mean_squared_error

In [40]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("RMSE : ", round(rmse, 2))

RMSE :  9.44
