In [None]:
import requests 
import json
import random
from sklearn.datasets import load_diabetes, load_digits, load_breast_cancer
from sklearn.model_selection import train_test_split
diabetes = load_diabetes()
digits = load_diabetes()
breast_cancer = load_breast_cancer()

from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from xgboost import XGBClassifier

In [None]:

class bayes_optimization_example:
    def __init__(self, gbr_batch_size, n_processors, model):
        self.n_procs = n_processors
        self.gbr_batch_size = gbr_batch_size # how many points to evaluate when optimizing gaussian process
        self.model = model

    def metric(self, x, y):
        if self.target_type == 'regression':
            if x < y:
                return True
            else:
                return False
        else:
            if x > y:
                return True
            else:
                return False

    def initialize(self, toy_data, hp_types, hp_ranges):
        
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(toy_data.data, toy_data.target, test_size=0.5, random_state=42)
        self.data = toy_data.data
        self.target = toy_data.target
        self.hp_types = hp_types
        self.historical_scores = []
        self.historical_points = []
        self.hp_ranges = hp_ranges
        for a in range(10):
            r = [random.uniform(hp_ranges[x][0], hp_ranges[x][1]) 
                 if hp_types[x] == 'float' else random.randint(hp_ranges[x][0], hp_ranges[x][1]) 
                 for x in range(len(hp_ranges))]
            if self.model == 'ElasticNet':
                self.target_type = 'regression'
                regr = ElasticNet(alpha = r[0], l1_ratio=r[1])
            elif self.model == 'SVR':
                self.target_type = 'regression'
                regr = make_pipeline(StandardScaler(), SVR(C=r[0], epsilon=r[1]))
            elif self.model == 'XGBoost':
                self.target_type = 'classification'
                regr = XGBClassifier(gamma=r[0], reg_lambda=r[1], colsample_bytree=r[2], 
                                     max_depth=r[3], min_child_weight=r[4], learning_rate=r[5])
            regr.fit(self.X_train, self.y_train)
            if self.target_type == 'regression':
                self.historical_scores.append(str(r2_score(self.y_test, regr.predict(self.X_test))))
            else:
                self.historical_scores.append(str(cross_val_score(regr, toy_data.data, toy_data.target).mean()))                
            self.historical_points.append(','.join([str(x) for x in r]))

    def test_points(self, next_points):
        
        for nxt_pt in [p.split(',') for p in next_points['next_points'].split(';')]:
            if self.model == 'ElasticNet':
                regr = ElasticNet(alpha = float(nxt_pt[0]), l1_ratio=float(nxt_pt[1]))
            elif self.model == 'SVR':
                regr = make_pipeline(StandardScaler(), SVR(C = float(nxt_pt[0]), epsilon=float(nxt_pt[1])))
            elif self.model == 'XGBoost':
                regr = XGBClassifier(gamma=nxt_pt[0], reg_lambda=nxt_pt[1], colsample_bytree=nxt_pt[2], 
                                     max_depth=nxt_pt[3], min_child_weight=nxt_pt[4], learning_rate=nxt_pt[5])          
            regr.fit(self.X_train, self.y_train)
            if self.target_type == 'regression':
                self.historical_scores.append(str(r2_score(self.y_test, regr.predict(self.X_test))))
            else:
                self.historical_scores.append(str(cross_val_score(regr, self.data, self.target).mean()))               
                
        self.historical_points += next_points['next_points'].split(';')
        
    def get_best_point(self):
        
        if self.target_type == 'regression':
            best = 1000
        else:
            best = -1
            
        best_point = 'failed'
        for s, pt in zip(qc_bo.historical_scores, qc_bo.historical_points):
            if self.metric(float(s),  best):
                best = float(s)
                best_point = pt
        return(best_point)
    
    def create_url (self):

        if self.target_type == 'regression':
            # if lower score is better (rmse, r2, etc)
            data = json.dumps({'scores': ','.join([str(1-float(s)) for s in self.historical_scores]), 'points': ';'.join(self.historical_points)})
        else:
            data = json.dumps({'scores': ','.join(self.historical_scores), 'points': ';'.join(self.historical_points)})
        
        y_best = 10
        hp_ranges_str = ';'.join([','.join([str(x) for x in s]) for s in self.hp_ranges])
        hp_types_str = ','.join(self.hp_types)
        stem = "http://localhost:8000/bayes_opt?hp_types="
        url = stem + "{}&g_batch_size={}&hp_ranges={}&y_best={}&n_gpus={}&use_qc={}".format(hp_types_str, self.gbr_batch_size, 
                                                                                               hp_ranges_str, y_best, self.n_procs, 'False')
        return url, data

## Elastic Net

In [None]:
qc_bo = bayes_optimization_example(30, 4, 'EN')
hp_types = ['float', 'float']
hp_ranges =  [[0.0001,.99999],[0.0001,.99999]]
qc_bo.initialize(diabetes, hp_types, hp_ranges)
historical_qei = []
best_points = []

In [None]:
for a in range(50):
    url, data = qc_bo.create_url() # using historical data
    response = requests.post(url, data=data)
    next_points = json.loads(response.text)
    historical_qei.append(next_points['best_ccdf'])
    qc_bo.test_points(next_points)
    best_points.append(qc_bo.get_best_point())

In [None]:
h = [np.log(float(q)) for q in qc_bo.historical_scores]
best = 10
best_so_far = []
for q in qc_bo.historical_scores:
    if np.log(float(q))< best:
        best = np.log(float(q))
    best_so_far.append(best)
plt.plot(h)
plt.plot(best_so_far)

In [None]:
# when parameters changed
plt.plot([float(b.split(',')[0]) for b in best_points], label='alpha')
plt.plot([float(b.split(',')[1]) for b in best_points], label='l1_ratio')
p = plt.legend()
qc_bo.get_best_point()

In [None]:
plt.plot(historical_qei)

## Support Vector Regression
C, 
epsilon

In [None]:
qc_bo = bayes_optimization_example(30, 4, 'SVR')
hp_types = ['float', 'float']
hp_ranges =  [[0.1,10],[0.001,.999]]
qc_bo.initialize(diabetes, hp_types, hp_ranges)
historical_qei = []
best_points = []

In [None]:
for a in range(50):
    url, data = qc_bo.create_url() # using historical data
    response = requests.post(url, data=data)
    next_points = json.loads(response.text)
    historical_qei.append(next_points['best_ccdf'])
    qc_bo.test_points(next_points)
    best_points.append(qc_bo.get_best_point())

In [None]:
# when parameters changed
plt.plot([float(b.split(',')[0]) for b in best_points], label='C')
plt.plot([float(b.split(',')[1]) for b in best_points], label='epsilon')
p = plt.legend()
qc_bo.get_best_point()

In [None]:
h = [np.log(float(q)) for q in qc_bo.historical_scores]
best = 10
best_so_far = []
for q in qc_bo.historical_scores:
    if np.log(float(q))< best:
        best = np.log(float(q))
    best_so_far.append(best)
plt.plot(h)
plt.plot(best_so_far)

## XGBoost

### One hot encoding for digits dataset

In [None]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(digits.target.reshape(-1,1))
enc.categories_
ohe_target = enc.transform(digits.target.reshape(-1, 1))
#enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])

In [None]:
class bunch:
    def __init__(self, d):
        self.data = d['data']
        self.target = d['target']
        
digits_bunch= bunch({'target': ohe_target,
                     'data': digits.data})

### Breast Cancer

In [None]:
qc_bo = bayes_optimization_example(300, 4, 'XGBoost')

parameter_names = ['gamma', 'reg_lambda', 'colsample_by_tree',
                   'max_depth', 'min_child_weight', 'learning_rate']

hp_types = ['float', 'float', 'float',  'int', 'float', 'float']
hp_ranges =  [[0.01, .999],[0.001,.999], [0.001,.999],
              [2, 5],[0.001,.999] ,[0.001,.999]]

qc_bo.initialize(breast_cancer, hp_types, hp_ranges)
historical_qei = []
best_points = []

In [None]:
for a in range(100):
    url, data = qc_bo.create_url() # using historical data
    start = time.time()
    response = requests.post(url, data=data)
    print("{}: Spent {} seconds getting next points".format(a, round(time.time()-start,3)))
    next_points = json.loads(response.text)
    historical_qei.append(next_points['best_ccdf'])
    qc_bo.test_points(next_points)
    best_points.append(qc_bo.get_best_point())

In [None]:
class use_log:
    def __init__(self, log_or_not):
        self.log_or_not = log_or_not
    def log(self, x):
        if self.log_or_not:
            return np.log(x)
        else:
            return (x)
def show_results(session, log):
    u = use_log(log)
    h = [u.log(float(q)) for q in session.historical_scores]
    print("average performance (during bo)               {}".format(np.mean([float(q) for q in session.historical_scores])))
    print("standard deviation of performance (during bo) {}".format(np.std([float(q) for q in session.historical_scores])))
    if session.target_type == 'regression':
        best = 10
    else:
        best = -1
    
    best_so_far = []
    for q in session.historical_scores:
        if session.metric(u.log(float(q)), best):
            best = u.log(float(q))
        best_so_far.append(best)
    plt.plot(h, label='historical')
    plt.plot(best_so_far, label='best_so_far')
    p = plt.legend()
    print("Best after BO {}".format(best))
show_results(qc_bo, False)

In [None]:
[str(p)+': '+z for p, z in zip(parameter_names, qc_bo.get_best_point().split(','))]

In [None]:
import time