In [None]:
import cupy

In [None]:
?cupy.random.normal

# <center>Scaling XGBoost Hyper-Parameter Optimization</center>

<img src="images/swarm.png" width="400"/>

# Motivation:

To reach highest performance in classification tasks (i.e., supervised learning ), it is best practice to build an ensemble of champion models. 

Each member of the ensemble is a winner of a search over many models of its kind with altered hyper-parameters.

In this notebook, we build a harness for running such a [hyper-parameter] search to demonstrate the accuracy benefits while exploring performance as we scale within and accross GPU nodes.

# Library Imports:

In [12]:
import ipyvolume as ipv
import matplotlib.pyplot as plt

import numpy as np; import pandas as pd; import cudf
import cuml; import xgboost; from xgboost import plot_tree
import warnings; warnings.filterwarnings('ignore')

import time; import copy 

import data_utils
import swarm
import visualization as viz

# reload library modules/code without a kernel restart
import importlib; importlib.reload( swarm ); importlib.reload( data_utils ); importlib.reload( viz);

# Dataset:

<center> In this notebook you can try different hyper-parameter search methods using synthetic or real data. </center>


<img src="images/datasets.png" width="800"/>

# [ Optional ] Synthetic Dataset Demo

In [None]:
exploreSyntheticDataFlag = True
if exploreSyntheticDataFlag:
    viz.visualize_synthetic_data_variants('whirl', nSamples = 10000, sdevScales = [ .3, .3, .3], nCoils = [2, 4, 6, 12])
    viz.visualize_synthetic_data_variants('helix', nSamples = 10000, sdevScales = [ .1, .1, .1], nCoils = [2, 4, 6, 12])

# Import / Generate Data

In [None]:
?data_utils.Dataset

In [13]:
# dataset = data_utils.Dataset('fashion-mnist')
dataset = data_utils.Dataset('airline', nSamples = 1000000)
#dataset = data_utils.Dataset('synthetic', coilType = 'whirl', coilDensity=20, nCoordinates = 1000, nSamples=1000000)

reading AIRLINE from local copy [ via pandas reader ]
rescaling data
rescaling data


# ETL [ split & normalize ]

# Visualize

In [None]:
dataset.trainData.shape, dataset.testData.shape

In [None]:
viz.plot_data( dataset.data, dataset.labels, dataset.datasetName )

In [None]:
viz.plot_data( dataset.testData, dataset.testLabels, dataset.datasetName )

# Define Compute Cluster

In [None]:
import dask
from dask import delayed
from dask_cuda import LocalCUDACluster

from dask.distributed import Client
from dask.distributed import as_completed
from dask.distributed import worker

In [None]:
cluster = LocalCUDACluster( ip = '', n_workers = 4)

In [None]:
client = Client( cluster, asynchronous = True)

In [None]:
client

# Define HPO XGBoost Search Ranges

In [None]:
paramRanges = { 0: ['max_depth', 3, 20, 'int'],
                1: ['learning_rate', .001, 1, 'float'],
                2: ['gamma', 0, 2, 'float'] }

# Search Strategies

&nbsp;

| method name | &nbsp;&nbsp;&nbsp; performance | &nbsp;&nbsp;&nbsp; search duration  |
|-----------------------|-----------------|------------------|
| random-search         | &nbsp;&nbsp;&nbsp; worst | &nbsp;&nbsp;&nbsp; slow    |
| particle-search [1]      | &nbsp;&nbsp;&nbsp; good  | &nbsp;&nbsp;&nbsp; fast    |
| async-particle-search | &nbsp;&nbsp;&nbsp; best  | &nbsp;&nbsp;&nbsp; fastest |

<center>[1] https://en.wikipedia.org/wiki/Particle_swarm_optimization#Algorithm</center>

# Key Params [ nParticles & nEpochs ]

# Sync vs Async [ Dask Task Stream ]

<img src='images/sync_vs_async.png' width='1000px'>

# Monitoring Perf [ <- ]
You'll have need to have launched the container w Port Open
Connect via [ the http:// is important]

# Single Run

In [None]:
# fixed parameters
nTrees = 100
paramsGPU = { 'tree_method': 'gpu_hist',
              'max_depth': 6,
              'objective': 'binary:hinge'
            }

startTime = time.time()

trainDMatrix = xgboost.DMatrix( data = dataset.trainData, label = dataset.trainLabels )
trainedModelGPU = xgboost.train( dtrain = trainDMatrix, params = paramsGPU, num_boost_round = nTrees )

testDMatrix = xgboost.DMatrix( data = dataset.testData, label = dataset.testLabels )
predictionsGPU = trainedModelGPU.predict( testDMatrix ).astype(int)

trainAccuracy = 1 - float( trainedModelGPU.eval(trainDMatrix).split(':')[1] )
testAccuracy = 1 - float( trainedModelGPU.eval(testDMatrix).split(':')[1] )   

elapsedTime = time.time() - startTime

In [None]:
print(f'{paramsGPU}, {nTrees}')
print(f'train accuracy: {trainAccuracy} \ntest accuracy: {testAccuracy} \ntrained in {elapsedTime:0.2f} seconds')

# Run HPO - <font color = '#ffb500'> Synchronous Swarm </font>

In [None]:
syncSwarm = swarm.SyncSwarm( client, dataset, paramRanges, nParticles = 16, nEpochs = 10 )

In [None]:
syncSwarm.run_search()

## Visualize <font color='#ffb500'> Synchronous Swarm </font>

In [None]:
viz.plot_particle_evals( syncSwarm )

In [None]:
viz.viz_particle_trails( syncSwarm, topN = 5 )

In [None]:
viz.viz_swarm( syncSwarm, syncSwarm.paramRanges)

# Run HPO - <font color='#7400ff'> Asynchronous Swarm </font>

In [None]:
asyncSwarm = swarm.AsyncSwarm( client, dataset, paramRanges, nParticles = 1, nEpochs = 1 )

In [None]:
asyncSwarm.run_search()

## Visualize <font color='#7400ff'> Asynchronous Swarm </font>

In [None]:
viz.plot_particle_evals( asyncSwarm )

In [None]:
viz.viz_particle_trails( asyncSwarm, topN = 5 )

In [None]:
viz.viz_swarm( asyncSwarm, asyncSwarm.paramRanges )

# Run HPO - <font color='#666666'> Random Search </font>

In [None]:
randomAsyncSwarm = swarm.RandomSearchAsync ( client, dataset, paramRanges, nEpochs = 5 )

In [None]:
randomAsyncSwarm.run_search()

## Visualize <font color='#666666'> Random Swarm </font>

In [None]:
viz.plot_particle_evals( randomAsyncSwarm )

In [None]:
viz.viz_particle_trails( randomAsyncSwarm, topN = 0 )

In [None]:
viz.viz_swarm( randomAsyncSwarm, randomAsyncSwarm.paramRanges )

# Determine best swarm

In [None]:
if syncSwarm.globalBest['accuracy'] > asyncSwarm.globalBest['accuracy']:
    swarm = syncSwarm
else:
    swarm = asyncSwarm

# Train XGBoost model with best params

In [None]:
bestParams = {
    'tree_method': 'gpu_hist',
    'random_state': 0, 
    'max_depth': int(swarm.globalBest['params'][0]),
    'learning_rate': swarm.globalBest['params'][1],
    'gamma': swarm.globalBest['params'][2]
}
    
bestParams['objective'] = dataset.trainObjective[0]
if dataset.trainObjective[1] is not None: 
    bestParams['num_class'] = dataset.trainObjective[1]

In [None]:
%%time
trainDMatrix = xgboost.DMatrix( data = dataset.trainData, label = dataset.trainLabels )
testDMatrix = xgboost.DMatrix( data = dataset.testData, label = dataset.testLabels )
trainedModelGPU = xgboost.train( dtrain = trainDMatrix, evals = [(testDMatrix, 'test')], params = bestParams,
                                 num_boost_round = swarm.globalBest['nTrees'], verbose_eval=False)

In [None]:
# save best model
trainedModelGPU.save_model('xgb.model.hpo')

In [None]:
from cuml import ForestInference

In [None]:
?ForestInference.load

In [None]:
dataset.trainOb

In [None]:
fm = ForestInference.load( filename='xgb.model.hpo')

In [None]:
fm = ForestInference.load( filename='xgb.model.hpo',
                           algo='BATCH_TREE_REORG',
                           output_class=True,
                           threshold=0,
                           model_type='xgboost' )

In [None]:
type( dataset.testData )

In [None]:
test = dataset.testData.as_gpu_matrix(order='C')

In [None]:
for iColumn in dataset.testData.columns:    
    dataset.testData[iColumn] = dataset.testData[iColumn].astype('float32')

In [None]:
dataset.testData.dtypes

In [None]:
%%time
trainedModelGPU.predict( testDMatrix )

In [None]:
%%time
filPredictions = fm.predict ( dataset.testData, 
                              algo='BATCH_TREE_REORG', 
                              output_class=True, 
                              threshold=0, 
                              model_type='xgboost' )

In [None]:
cudf.Series(filPredictions)

In [None]:
%%time
# perform prediction on the model loaded from path
fil_preds = fm.predict()

# Inference with trained model on test data

In [None]:
%%time
predictions = trainedModelGPU.eval(testDMatrix)

In [None]:
testDataPerf = 1 - float( predictions.split(':')[1] )
print(f'accuracy: {testDataPerf}')

# Summary

# Scale Up Results [ DGX-2 ]

<img src='images/synthetic_async.png'></img>

Async Scaling > Sync Scaling > Random Search

# References

In [None]:
# https://www.osgeo.cn/matplotlib/gallery/lines_bars_and_markers/timeline.html

# Future Work / Extensions

larger than single GPU memory datasets - dask_cudf + [ dask_xgboost or xgboost.dask ]

1. [ Generate a classification dataset on GPU ](#data-load) (e.g., double helix, unwinding helix/whirl )

2. [ ETL - process/prepare data for model training ](#ETL) (e.g., scale, split, augment )   
    
3. [ Define HPO Strategy ](#define-hpo)

4. [ Create Compute Cluster ](#compute-cluster)
   > LocalCUDACluster or KubeCluster
      
5. [ Define Seach ](#define-search)

6. [ Run ASYNC Particle Swarm ](#run-async-PSO)

7. [ Run Classic Particle Swarm ](#run-classic-PSO)

8. [ Run Random Search Baseline ](#run-random-search)

9. [ Summary ](#summary)

# User Choices:

The user is able to make several key choices in running this notebook. They are as follows:

1. [ Dataset ]()
2. [ Compute Scaling Strategy - Scale-Up, Scale-Out ]()
3. [ XGBoost Parameter Search Range ]()

4. [ Particle Swarm Type ]()
   * Synchronous
   * Asynchronous
   * Random Search