## Section 2.2

### Again using Iris Dataset

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [2]:
iris = load_iris()
X, y = iris.data, iris.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

#### 1. Scratch Implementation

In [4]:
import random
import numpy as np

def loguniform(low=0, high=1, size=100, base=10):
    """function creates a log uniform distribution with 
    random values."""
    return np.power(base, np.random.uniform(low, high, size))

param_grid = {
              "gamma": loguniform(low=-10, high=4, base=10),
              "C": loguniform(low=-3, high=11, base=10)
             }

In [5]:
def get_random_hp_set(grid):
      # function chooses a random value for each from grid
    hp_set = dict()
    for key, param in grid.items():
             hp_set[key] = np.random.choice(param) 
    return hp_set

In [6]:
def random_search(clf, grid, n_iterations, X_train, y_train, X_test, y_test):
# defining function for random search    
    logs = list()
    best_hp_set = {
    "best_test_score": 0.0
    }

    for iteration in range(n_iterations):
        log = dict()

        # selecting the set of hyperparameters from function defined
        # for random search.
        hp_set = get_random_hp_set(grid)
        # print(hp_set)
        model = clf(**hp_set)
        model.fit(X_train, y_train)
        train_score = model.score(X_train, y_train)
        test_score = model.score(X_test, y_test)

        log["hp"] = hp_set
        log["train_score"] = train_score
        log["test_score"] = test_score

        if best_hp_set["best_test_score"]<test_score:
            best_hp_set["best_test_score"] = test_score
            best_hp_set["hp_set"] = hp_set

        logs.append(log)

    return logs, best_hp_set

In [7]:
from sklearn.svm import SVC

logs, best = random_search(SVC, param_grid, 20, X_train, y_train, X_test, y_test)

In [8]:
best

{'best_test_score': 1.0,
 'hp_set': {'gamma': 0.00797171003068375, 'C': 7.540309393263641}}

#### Using RandomSearchCV from Scikit-Learn

In [9]:
from sklearn.model_selection import RandomizedSearchCV

In [10]:
clf = RandomizedSearchCV(SVC(), param_grid, n_iter=20, cv=3)
clf.fit(X_train, y_train)

RandomizedSearchCV(cv=3, estimator=SVC(), n_iter=20,
                   param_distributions={'C': array([1.75434095e+02, 8.42174049e+00, 8.73379196e+04, 4.71531577e-02,
       5.80297770e-01, 2.26900446e-01, 1.40126999e-03, 7.16724080e+04,
       7.36827187e+03, 8.17567567e+09, 2.27758251e+09, 6.96439337e+03,
       3.83990168e+01, 1.69372702e-02, 2.18588215e+06, 3.22067266e+10,
       3.27397167e-03, 8.53399071e+0...
       2.04313456e+03, 1.89691854e+01, 1.28770150e-03, 1.47144366e-07,
       8.12001785e-08, 1.20600739e-02, 1.39540862e-05, 9.01774886e-04,
       2.83940416e+02, 1.54729492e-07, 7.22696175e+03, 5.87213192e-08,
       6.78005902e-09, 4.52941091e+00, 1.18327933e-03, 6.80673036e+01,
       4.63163521e+01, 8.78363147e-02, 7.22486627e-09, 9.55041001e-09,
       4.95616407e-07, 6.33498536e-08, 1.90574768e+02, 2.09632354e-05])})

In [11]:
clf.best_estimator_

SVC(C=1418247.5308956155, gamma=9.855717172593676e-07)

In [12]:
# print(f'Test Score: {clf.score(X_test, y_test)}')
# print(f'Train Score: {clf.score(X_train, y_train)}')