<a href="https://www.kaggle.com/code/samithsachidanandan/grid-search-vs-random-search-vs-pipeline-search?scriptVersionId=273501937" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection

from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('/kaggle/input/mobile-price-dataset/mobile_price_train.csv')

In [3]:
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,21,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [4]:
  
X = df.drop("price_range", axis=1).values

y = df.price_range.values
    
    

### Grid Search

In [5]:
classifier = ensemble.RandomForestClassifier(n_jobs=-1)
    
param_grid = {
"n_estimators": [100, 200, 250, 300, 400, 500],
"max_depth": [1, 2, 5, 7, 11, 15],
"criterion": ["gini", "entropy"]
}
    
model = model_selection.GridSearchCV(
estimator=classifier,
param_grid=param_grid,
scoring="accuracy",
verbose=10,
n_jobs=1,
cv=5
)
   
model.fit(X, y)
print(f"Best score: {model.best_score_}")

print("Best parameters set:")
best_parameters = model.best_estimator_.get_params()

for param_name in sorted(param_grid.keys()):
    print(f"\t{param_name}: {best_parameters[param_name]}")

Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV 1/5; 1/72] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 1/5; 1/72] END criterion=gini, max_depth=1, n_estimators=100;, score=0.562 total time=   0.4s
[CV 2/5; 1/72] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 2/5; 1/72] END criterion=gini, max_depth=1, n_estimators=100;, score=0.588 total time=   0.3s
[CV 3/5; 1/72] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 3/5; 1/72] END criterion=gini, max_depth=1, n_estimators=100;, score=0.608 total time=   0.3s
[CV 4/5; 1/72] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 4/5; 1/72] END criterion=gini, max_depth=1, n_estimators=100;, score=0.595 total time=   0.3s
[CV 5/5; 1/72] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 5/5; 1/72] END criterion=gini, max_depth=1, n_estimators=100;, score=0.570 total time=   0.3s
[CV 1/5; 2/72] START criterion=gini, max_de

### Random Search 

In [6]:
classifier = ensemble.RandomForestClassifier(n_jobs=-1)

In [7]:
param_grid = {
"n_estimators": np.arange(100, 1500, 100),
"max_depth": np.arange(1, 31),
"criterion": ["gini", "entropy"]
}

In [8]:
model = model_selection.RandomizedSearchCV(
estimator=classifier,
param_distributions=param_grid,
n_iter=20,
scoring="accuracy",
verbose=10,
n_jobs=1,
cv=5
)


In [9]:

model.fit(X, y)
print(f"Best score: {model.best_score_}")
print("Best parameters set:")
best_parameters = model.best_estimator_.get_params()
for param_name in sorted(param_grid.keys()):
    print(f"\t{param_name}: {best_parameters[param_name]}")

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5; 1/20] START criterion=gini, max_depth=23, n_estimators=300.............
[CV 1/5; 1/20] END criterion=gini, max_depth=23, n_estimators=300;, score=0.890 total time=   1.3s
[CV 2/5; 1/20] START criterion=gini, max_depth=23, n_estimators=300.............
[CV 2/5; 1/20] END criterion=gini, max_depth=23, n_estimators=300;, score=0.873 total time=   1.3s
[CV 3/5; 1/20] START criterion=gini, max_depth=23, n_estimators=300.............
[CV 3/5; 1/20] END criterion=gini, max_depth=23, n_estimators=300;, score=0.895 total time=   1.3s
[CV 4/5; 1/20] START criterion=gini, max_depth=23, n_estimators=300.............
[CV 4/5; 1/20] END criterion=gini, max_depth=23, n_estimators=300;, score=0.882 total time=   1.3s
[CV 5/5; 1/20] START criterion=gini, max_depth=23, n_estimators=300.............
[CV 5/5; 1/20] END criterion=gini, max_depth=23, n_estimators=300;, score=0.875 total time=   1.3s
[CV 1/5; 2/20] START criterion=entropy

### Pipeline Search

In [10]:
def quadratic_weighted_kappa(y_true, y_pred):
    """
    Create a wrapper for cohen's kappa
    with quadratic weights

    """
    return metrics.cohen_kappa_score(y_true,y_pred,weights="quadratic")

In [11]:
train = pd.read_csv('/kaggle/input/mobile-price-dataset/mobile_price_train.csv')
test = pd.read_csv('/kaggle/input/mobile-price-dataset/mobile_price_test.csv')

In [12]:
train.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,21,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [13]:
X = train.drop("price_range", axis=1)
y = train["price_range"]

# define pipeline
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("svm", SVC())
])

# parameter grid
param_grid = {
    "svm__C": [0.1, 1, 10],
    "svm__gamma": ["scale", 0.01, 0.1],
    "svm__kernel": ["rbf"]
}

# grid search
grid = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    cv=5,
    scoring="accuracy",
    n_jobs=-1,
    verbose=2
)

grid.fit(X, y)

print("Best Score:", grid.best_score_)
print("Best Params:", grid.best_params_)

# Final model
best_model = grid.best_estimator_

Fitting 5 folds for each of 9 candidates, totalling 45 fits
Best Score: 0.9260000000000002
Best Params: {'svm__C': 1, 'svm__gamma': 0.01, 'svm__kernel': 'rbf'}
