### 1. Grid Search (RF)

In [23]:
import pandas as pd
import numpy as np

from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection

# 1. Load data
df = pd.read_csv('mobile_clsfn_train.csv')

# 2. Create input features(X)
X = df.drop(columns=['price_range'], axis=1).values

# 3. Creating target variable(y)
y = df['price_range'].values

# 4. Initializing Random Forest model
classifier = ensemble.RandomForestClassifier()

# 5. Parameters we want to tune
param_grid = {
    'n_estimators': [100, 200, 300, 400],
    'max_depth': [1, 3, 5, 7],
    'criterion': ['gini', 'entropy']
}

# 6. Initializing GridSearch
model = model_selection.GridSearchCV(
    estimator = classifier,
    param_grid = param_grid,
    scoring = 'accuracy',
    verbose = 10,
    cv = 5
)

# 7. Run GridSearch
model.fit(X, y)

# 8. Get the best parameters
print("\nAccuracy:", model.best_score_)
print("\nBest parameters(RF):", model.best_estimator_.get_params())

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV 1/5; 1/32] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 1/5; 1/32] END criterion=gini, max_depth=1, n_estimators=100;, score=0.542 total time=   0.0s
[CV 2/5; 1/32] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 2/5; 1/32] END criterion=gini, max_depth=1, n_estimators=100;, score=0.560 total time=   0.1s
[CV 3/5; 1/32] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 3/5; 1/32] END criterion=gini, max_depth=1, n_estimators=100;, score=0.623 total time=   0.0s
[CV 4/5; 1/32] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 4/5; 1/32] END criterion=gini, max_depth=1, n_estimators=100;, score=0.615 total time=   0.1s
[CV 5/5; 1/32] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 5/5; 1/32] END criterion=gini, max_depth=1, n_estimators=100;, score=0.562 total time=   0.0s
[CV 1/5; 2/32] START criterion=gini, max_de

[CV 1/5; 10/32] END criterion=gini, max_depth=5, n_estimators=200;, score=0.845 total time=   0.4s
[CV 2/5; 10/32] START criterion=gini, max_depth=5, n_estimators=200.............
[CV 2/5; 10/32] END criterion=gini, max_depth=5, n_estimators=200;, score=0.843 total time=   0.4s
[CV 3/5; 10/32] START criterion=gini, max_depth=5, n_estimators=200.............
[CV 3/5; 10/32] END criterion=gini, max_depth=5, n_estimators=200;, score=0.863 total time=   0.4s
[CV 4/5; 10/32] START criterion=gini, max_depth=5, n_estimators=200.............
[CV 4/5; 10/32] END criterion=gini, max_depth=5, n_estimators=200;, score=0.833 total time=   0.4s
[CV 5/5; 10/32] START criterion=gini, max_depth=5, n_estimators=200.............
[CV 5/5; 10/32] END criterion=gini, max_depth=5, n_estimators=200;, score=0.812 total time=   0.4s
[CV 1/5; 11/32] START criterion=gini, max_depth=5, n_estimators=300.............
[CV 1/5; 11/32] END criterion=gini, max_depth=5, n_estimators=300;, score=0.828 total time=   0.6s
[

[CV 2/5; 19/32] END criterion=entropy, max_depth=1, n_estimators=300;, score=0.542 total time=   0.4s
[CV 3/5; 19/32] START criterion=entropy, max_depth=1, n_estimators=300..........
[CV 3/5; 19/32] END criterion=entropy, max_depth=1, n_estimators=300;, score=0.537 total time=   0.4s
[CV 4/5; 19/32] START criterion=entropy, max_depth=1, n_estimators=300..........
[CV 4/5; 19/32] END criterion=entropy, max_depth=1, n_estimators=300;, score=0.525 total time=   0.4s
[CV 5/5; 19/32] START criterion=entropy, max_depth=1, n_estimators=300..........
[CV 5/5; 19/32] END criterion=entropy, max_depth=1, n_estimators=300;, score=0.555 total time=   0.4s
[CV 1/5; 20/32] START criterion=entropy, max_depth=1, n_estimators=400..........
[CV 1/5; 20/32] END criterion=entropy, max_depth=1, n_estimators=400;, score=0.583 total time=   0.5s
[CV 2/5; 20/32] START criterion=entropy, max_depth=1, n_estimators=400..........
[CV 2/5; 20/32] END criterion=entropy, max_depth=1, n_estimators=400;, score=0.550 to

[CV 2/5; 28/32] END criterion=entropy, max_depth=5, n_estimators=400;, score=0.848 total time=   1.6s
[CV 3/5; 28/32] START criterion=entropy, max_depth=5, n_estimators=400..........
[CV 3/5; 28/32] END criterion=entropy, max_depth=5, n_estimators=400;, score=0.873 total time=   1.4s
[CV 4/5; 28/32] START criterion=entropy, max_depth=5, n_estimators=400..........
[CV 4/5; 28/32] END criterion=entropy, max_depth=5, n_estimators=400;, score=0.830 total time=   1.3s
[CV 5/5; 28/32] START criterion=entropy, max_depth=5, n_estimators=400..........
[CV 5/5; 28/32] END criterion=entropy, max_depth=5, n_estimators=400;, score=0.843 total time=   1.2s
[CV 1/5; 29/32] START criterion=entropy, max_depth=7, n_estimators=100..........
[CV 1/5; 29/32] END criterion=entropy, max_depth=7, n_estimators=100;, score=0.855 total time=   0.3s
[CV 2/5; 29/32] START criterion=entropy, max_depth=7, n_estimators=100..........
[CV 2/5; 29/32] END criterion=entropy, max_depth=7, n_estimators=100;, score=0.853 to

### 2. Random Search(RF)

In [24]:
import pandas as pd
import numpy as np

from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection

# 1. Load data
df = pd.read_csv('mobile_clsfn_train.csv')

# 2. Create input features(X)
X = df.drop(columns=['price_range'], axis=1).values

# 3. Creating target variable(y)
y = df['price_range'].values

# 4. Initializing Random Forest model
classifier = ensemble.RandomForestClassifier()

# 5. Parameters we want to tune
param_grid = {
    'n_estimators': np.arange(100, 1500, 100),
    'max_depth': np.arange(1, 20),
    'criterion': ['gini', 'entropy']
}

# 6. Initializing Random Search
model = model_selection.RandomizedSearchCV(
    estimator = classifier,
    param_distributions = param_grid,
    n_iter = 10,
    scoring = 'accuracy',
    verbose = 10,
    cv = 5
)

# 7. Run Random Search
model.fit(X, y)

# 8. Get the best parameters
print("\nAccuracy:", model.best_score_)
print("\nBest parameters(RF):", model.best_estimator_.get_params())

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5; 1/10] START criterion=entropy, max_depth=7, n_estimators=400...........
[CV 1/5; 1/10] END criterion=entropy, max_depth=7, n_estimators=400;, score=0.853 total time=   1.3s
[CV 2/5; 1/10] START criterion=entropy, max_depth=7, n_estimators=400...........
[CV 2/5; 1/10] END criterion=entropy, max_depth=7, n_estimators=400;, score=0.860 total time=   1.3s
[CV 3/5; 1/10] START criterion=entropy, max_depth=7, n_estimators=400...........
[CV 3/5; 1/10] END criterion=entropy, max_depth=7, n_estimators=400;, score=0.895 total time=   1.3s
[CV 4/5; 1/10] START criterion=entropy, max_depth=7, n_estimators=400...........
[CV 4/5; 1/10] END criterion=entropy, max_depth=7, n_estimators=400;, score=0.860 total time=   1.3s
[CV 5/5; 1/10] START criterion=entropy, max_depth=7, n_estimators=400...........
[CV 5/5; 1/10] END criterion=entropy, max_depth=7, n_estimators=400;, score=0.848 total time=   1.3s
[CV 1/5; 2/10] START criterio

[CV 1/5; 10/10] END criterion=gini, max_depth=9, n_estimators=400;, score=0.877 total time=   1.8s
[CV 2/5; 10/10] START criterion=gini, max_depth=9, n_estimators=400.............
[CV 2/5; 10/10] END criterion=gini, max_depth=9, n_estimators=400;, score=0.875 total time=   1.6s
[CV 3/5; 10/10] START criterion=gini, max_depth=9, n_estimators=400.............
[CV 3/5; 10/10] END criterion=gini, max_depth=9, n_estimators=400;, score=0.892 total time=   1.6s
[CV 4/5; 10/10] START criterion=gini, max_depth=9, n_estimators=400.............
[CV 4/5; 10/10] END criterion=gini, max_depth=9, n_estimators=400;, score=0.868 total time=   1.5s
[CV 5/5; 10/10] START criterion=gini, max_depth=9, n_estimators=400.............
[CV 5/5; 10/10] END criterion=gini, max_depth=9, n_estimators=400;, score=0.858 total time=   1.5s

Accuracy: 0.8865000000000001

Best parameters(RF): {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'entropy', 'max_depth': 17, 'max_features': 'sqrt', 'max

### 3. Grid/Random Search with Pipelines

In [25]:
import pandas as pd
import numpy as np

from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection
from sklearn import decomposition
from sklearn import preprocessing
from sklearn import pipeline

df = pd.read_csv('mobile_clsfn_train.csv')

X = df.drop(columns=['price_range'], axis=1).values
y = df['price_range'].values

scl = preprocessing.StandardScaler()      # Standardization
pca = decomposition.PCA()                 # PCA
rf = ensemble.RandomForestClassifier()    # Random Forest

classifier = pipeline.Pipeline([('scaling', scl), ('pca', pca), ('rf', rf)])

param_grid = {
    'pca__n_components': np.arange(5, 10),
    'rf__n_estimators': np.arange(100, 1500, 100),
    'rf__max_depth': np.arange(1, 20),
    'rf__criterion': ['gini', 'entropy']
}

model = model_selection.RandomizedSearchCV(
    estimator = classifier,
    param_distributions = param_grid,
    n_iter = 10,
    scoring = 'accuracy',
    verbose = 10,
    cv = 5
)

model.fit(X, y)

print("\nAccuracy:", model.best_score_)
print("\nBest parameters(RF):", model.best_estimator_.get_params())

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5; 1/10] START pca__n_components=5, rf__criterion=entropy, rf__max_depth=9, rf__n_estimators=300
[CV 1/5; 1/10] END pca__n_components=5, rf__criterion=entropy, rf__max_depth=9, rf__n_estimators=300;, score=0.360 total time=   1.8s
[CV 2/5; 1/10] START pca__n_components=5, rf__criterion=entropy, rf__max_depth=9, rf__n_estimators=300
[CV 2/5; 1/10] END pca__n_components=5, rf__criterion=entropy, rf__max_depth=9, rf__n_estimators=300;, score=0.407 total time=   1.7s
[CV 3/5; 1/10] START pca__n_components=5, rf__criterion=entropy, rf__max_depth=9, rf__n_estimators=300
[CV 3/5; 1/10] END pca__n_components=5, rf__criterion=entropy, rf__max_depth=9, rf__n_estimators=300;, score=0.275 total time=   1.7s
[CV 4/5; 1/10] START pca__n_components=5, rf__criterion=entropy, rf__max_depth=9, rf__n_estimators=300
[CV 4/5; 1/10] END pca__n_components=5, rf__criterion=entropy, rf__max_depth=9, rf__n_estimators=300;, score=0.390 total time

[CV 1/5; 8/10] END pca__n_components=7, rf__criterion=gini, rf__max_depth=18, rf__n_estimators=600;, score=0.390 total time=   3.7s
[CV 2/5; 8/10] START pca__n_components=7, rf__criterion=gini, rf__max_depth=18, rf__n_estimators=600
[CV 2/5; 8/10] END pca__n_components=7, rf__criterion=gini, rf__max_depth=18, rf__n_estimators=600;, score=0.410 total time=   3.7s
[CV 3/5; 8/10] START pca__n_components=7, rf__criterion=gini, rf__max_depth=18, rf__n_estimators=600
[CV 3/5; 8/10] END pca__n_components=7, rf__criterion=gini, rf__max_depth=18, rf__n_estimators=600;, score=0.350 total time=   3.6s
[CV 4/5; 8/10] START pca__n_components=7, rf__criterion=gini, rf__max_depth=18, rf__n_estimators=600
[CV 4/5; 8/10] END pca__n_components=7, rf__criterion=gini, rf__max_depth=18, rf__n_estimators=600;, score=0.427 total time=   4.1s
[CV 5/5; 8/10] START pca__n_components=7, rf__criterion=gini, rf__max_depth=18, rf__n_estimators=600
[CV 5/5; 8/10] END pca__n_components=7, rf__criterion=gini, rf__max_

### 4. Bayesian Optimization with Gaussian Process