In [1]:
import numpy as np
import pandas as pd
import GPyOpt
from GPyOpt.methods import BayesianOptimization
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate, cross_val_predict
from sklearn.metrics import recall_score
from sklearn.ensemble import GradientBoostingClassifier

In [2]:
data=pd.read_csv('Creditcard.csv')
X,Y=data.iloc[:,:-1],data.iloc[:,-1]

In [19]:
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [20]:
data.shape

(284807, 31)

In [21]:
np.linspace(start=0.0001,stop=0.1,num=3)

array([0.0001 , 0.05005, 0.1    ])

In [3]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=0)

In [4]:
learning_rate=[x for x in np.linspace(start=0.0001,stop=0.1,num=3)]
n_estimators=[int(x) for x in np.linspace(start=100,stop=2048,num=3)]
params={'learning_rate':learning_rate,'max_depth':range(1,4),'n_estimators':n_estimators}

In [5]:
bds = [{'name': 'learning_rate', 'type': 'continuous', 'domain': (0.0001, 0.1)},
        {'name': 'max_depth', 'type': 'discrete', 'domain': (1, 2, 3)},
        {'name': 'n_estimators', 'type': 'discrete', 'domain': (100, 200, 400, 600, 800, 1000, 1500, 2000, 2048)}]

In [6]:
cv=StratifiedKFold(n_splits=2, shuffle=True, random_state=1)
scoring=['accuracy','precision_macro','recall_macro']

In [7]:
def obj_score(parameters):
  parameters=parameters[0]
  model=GradientBoostingClassifier(learning_rate=parameters[0],max_depth=parameters[1],n_estimators=round(parameters[2]))
  scores=cross_validate(model,X,Y,scoring=scoring,cv=cv,n_jobs=-1)
  score=np.mean(scores['test_recall_macro'])
  return score

In [8]:
import time

since = time.time()
time.sleep(2)
print(time.time() - since)

2.0153920650482178


In [9]:
since = time.time()

optimizer=BayesianOptimization(f=obj_score, 
                                 domain=bds,
                                 model_type='GP',
                                 acquisition_type ='EI',
                                 acquisition_jitter = 0.05,
                                 exact_feval=True, 
                                 maximize=True)

print(time.time() - since)

7293.1122262477875


In [11]:
since = time.time()

optimizer=BayesianOptimization(f=obj_score, 
                                 domain=bds,
                                 model_type='GP',
                                 acquisition_type ='EI',
                                 acquisition_jitter = 0.05,
                                 exact_feval=True, 
                                 maximize=True)

optimizer.run_optimization(max_iter=10,report_file='report',evaluations_file='evaluations',models_file='models')

print(time.time() - since)

6702.196932077408


In [24]:
bds = [{'name': 'learning_rate', 'type': 'discrete', 'domain': (0.0001, 0.05005, 0.1)},
        {'name': 'max_depth', 'type': 'discrete', 'domain': (1, 2, 3)},
        {'name': 'n_estimators', 'type': 'discrete', 'domain': (100, 200, 400, 600, 800, 1000, 1500, 2000, 2048)}]

In [25]:
since = time.time()

optimizer=BayesianOptimization(f=obj_score, 
                                 domain=bds,
                                 model_type='GP',
                                 acquisition_type ='EI',
                                 acquisition_jitter = 0.05,
                                 exact_feval=True, 
                                 maximize=True)

optimizer.run_optimization(max_iter=10,report_file='report',evaluations_file='evaluations',models_file='models')

print(time.time() - since)

20867.29037809372


In [18]:
print(13995 // 60, 13995 % 60)

233 15


In [26]:
print(20867 // 60, 20867 % 60)

347 47


In [28]:
347 // 60, 347 % 60

(5, 47)

In [29]:
5 * 3600 + 47 * 60 + 47

20867

In [None]:
header_params = []
for param in bds:
  header_params.append(param['name'])

In [13]:
# 1.08003289e-02 3.00000000e+00 2.00000000e+02
model = GradientBoostingClassifier(learning_rate=0.01, max_depth= 3, n_estimators=200)
model.fit(X_train, Y_train)
model.score(X_test, Y_test)

0.999420666409185

In [16]:
from sklearn.metrics import recall_score

y_pred = model.predict(X_test)
recall_score(Y_test, y_pred)

0.7623762376237624

# JANGAN DI APA APAIN

In [None]:
df_results=pd.DataFrame(data=optimizer.X, columns=header_params)

In [None]:
df_results

Unnamed: 0,learning_rate,max_depth,n_estimators
0,0.073397,1.0,100.0
1,0.095667,4.0,2048.0
2,0.003572,1.0,2048.0
3,0.061565,1.0,2048.0
4,0.088855,1.0,100.0
5,0.027488,4.0,2048.0
6,0.067943,1.0,2048.0
7,0.001551,1.0,2048.0
8,0.003647,1.0,100.0
9,0.032498,1.0,100.0


In [32]:
152313 // 60, 152313 % 60

(2538, 33)

In [31]:
2538 // 60, 2538 % 60

(42, 18)