In [1]:
import pandas as pd

import xgboost as xgb
from sklearn.metrics import fbeta_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
from simulated_annealing import *

In [3]:
raw_df = pd.read_csv('data/creditcardfraud.zip', compression='zip')

## Split Dataset:

In [4]:
x_tr, test = train_test_split(raw_df, test_size=0.2, shuffle=True)

In [5]:
train, valid = train_test_split(x_tr, test_size=0.25, shuffle=True)

In [6]:
xtrain, ytrain = train.drop('Class', axis=1), train['Class']
xvalid, yvalid = valid.drop('Class', axis=1), valid['Class']
xtest, ytest = test.drop('Class', axis=1), test['Class']

## XGBoost DMatrix Inputs:

In [7]:
dtrain = xgb.DMatrix(xtrain, label=ytrain)
dvalid = xgb.DMatrix(xvalid, label=yvalid)
dtest = xgb.DMatrix(xtest, label=ytest)

## Annealing Functions:

In [8]:
# Parameters that are kept constant during the tuning process
param = {'silent':1,
         'min_child_weight':1, # Defines the minimum sum of weights of all observations required in a child
         'objective':'binary:logistic',
         'eval_metric':'auc',
         'seed': 42}

In [9]:
# Parameter search space
tune_dic = OrderedDict()
tune_dic['max_depth']= [5,10,15,20,25] ## maximum tree depth
tune_dic['subsample']=[0.5,0.6,0.7,0.8,0.9,1.0] ## proportion of training instances used in trees
tune_dic['colsample_bytree']= [0.5,0.6,0.7,0.8,0.9,1.0] ## subsample ratio of columns
tune_dic['eta']= [0.01,0.05,0.10,0.20,0.30,0.40]  ## learning rate
tune_dic['gamma']= [0.00,0.05,0.10,0.15,0.20]  ## minimum loss function reduction required for a split
tune_dic['scale_pos_weight']=[30,40,50,300,400,500,600,700] ## relative weight of positive/negative instances

In [10]:
# Custom metric calculation function
def f2_score(y_pred, y_true): return fbeta_score(y_true, (y_pred>=0.5).astype(int), beta=2)

# Function to train model
def train_model(curr_params, param, Xtrain, Xvalid, Ytrain=None, Yvalid=None, metric=f2_score, num_rounds=20):
    """
    Train the model with given set of hyperparameters
    curr_params - Dict of hyperparameters and chosen values
    param - Dict of hyperparameters that are kept constant
    Xtrain - DMatrix of traing data
    Ytrain - Training labels
    Ytrain - DMatrix of validation data
    Yvalid - Validaion labels
    metric - Metric to compute model performance on
    num_rounds - Number of boosting rounds
    """
    param.update(curr_params)
    model = xgb.train(param, Xtrain, num_boost_round=num_rounds)
    preds = model.predict(Xvalid)
    labels = Xvalid.get_label()
    metric_val = metric(preds, labels)
    
    return model, metric_val

In [12]:
res = simulate_annealing(train_model, tune_dic, param, dtrain, dvalid, maxiters=10, train_dict={'num_rounds':50})

Starting Iteration 0
Local Improvement in metric from  -1.0000 to   0.7739 - parameters accepted
Global improvement in metric from  -1.0000 to   0.7739 - best parameters updated
Starting Iteration 1
Local Improvement in metric from   0.7739 to   0.7841 - parameters accepted
Global improvement in metric from   0.7739 to   0.7841 - best parameters updated
Starting Iteration 2
Combination revisited
No Improvement but parameters accepted. Metric change:   0.0000 
                threshold: 1.0000 random number: 0.5847
                
Starting Iteration 3
No Improvement but parameters accepted. Metric change:  -0.0086 
                threshold: 0.9676 random number: 0.1444
                
Starting Iteration 4
No Improvement but parameters accepted. Metric change:   0.0000 
                threshold: 1.0000 random number: 0.2147
                
Starting Iteration 5
Local Improvement in metric from   0.7755 to   0.7857 - parameters accepted
Global improvement in metric from   0.7841 to   