In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dataLoad import PulsarData
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization
from sklearn.model_selection import cross_val_score

In [8]:
# For pretty plotting
plt.style.use('seaborn-paper')
plt.rcParams["font.family"] = "serif"

In [9]:
raw_features = PulsarData('HTRU_2').features
raw_targets = PulsarData('HTRU_2').targets

In [10]:
train_features_data, test_features_data, train_targets_data, test_targets_data  =  train_test_split( raw_features, 
                                                        raw_targets, test_size=0.25, random_state=42)

In [17]:
def GP_CrossValidation(max_iter_predict,n_restarts_optimizer, data, targets):
    """Decision Tree cross validation.
       Fits a Decision Tree with the given paramaters to the target 
       given data, calculated a CV accuracy score and returns the mean.
       The goal is to find combinations of max_depth, min_samples_leaf 
       that maximize the accuracy
    """
    
    estimator = GaussianProcessClassifier(random_state=42, 
                                       max_iter_predict=max_iter_predict, 
                                       n_restarts_optimizer=n_restarts_optimizer)
    
    cval = cross_val_score(estimator, data, targets, scoring='accuracy', cv=5)
    
    return cval.mean()

In [18]:
def optimize_GP(data, targets, pars, n_iter=5):
    """Apply Bayesian Optimization to Decision Tree parameters."""
    
    def crossval_wrapper(max_iter_predict, n_restarts_optimizer):
        """Wrapper of Decision Tree cross validation. 
           Notice how we ensure max_depth, min_samples_leaf 
           are casted to integer before we pass them along.
        """
        return GP_CrossValidation(max_iter_predict=int(max_iter_predict), 
                                            n_restarts_optimizer=int(n_restarts_optimizer), 
                                            data=data, 
                                            targets=targets)

    optimizer = BayesianOptimization(f=crossval_wrapper, 
                                     pbounds=pars, 
                                     random_state=42, 
                                     verbose=2)
    optimizer.maximize(init_points=4, n_iter=n_iter)

    return optimizer


In [19]:
parameters_BayesianOptimization = {"max_iter_predict": (0, 10), 
                                   "n_restarts_optimizer": (50, 150),
                                  }

BayesianOptimization = optimize_GP(train_features_data, 
                                             train_targets_data, 
                                             parameters_BayesianOptimization, 
                                             n_iter=5)
print(BayesianOptimization.max)

|   iter    |  target   | max_it... | n_rest... |
-------------------------------------------------


In [11]:
gpc = GaussianProcessClassifier(random_state=42).fit(train_features_data, train_targets_data)

In [12]:
gpc.score(test_features_data, test_targets_data)

0.9635754189944135