# Benchmark and Bounds Tests

The purpose of this notebook is to benchmark all of the single GPU cuML algorithms against their skLearn counterparts, while also providing the ability to find and verify upper bounds.

Each benchmark returns a Panda with the results, which can then be analyzed, manipulated, and stored to disk. 

## Notebook Credits
**Authorship**
Original Author: Corey Nolet <br />
Last Edit: Taurean Dyer, 9/25/2019<br />

Last Edit: Corey Nolet, 10/04/2019
    
### Test System Specs
Test System Hardware: DGX-1 
Test System Software: Ubuntu 16.04  
RAPIDS Version: 0.10.0pre - Conda Install  
Driver: 410.48
CUDA: 10.0  

### Known Working Systems
RAPIDS Versions: 0.10+

In [None]:
import cuml

from cuml.benchmark.runners import SpeedupComparisonRunner
from cuml.benchmark.algorithms import algorithm_by_name


print(cuml.__version__)

## Neighbors

### Nearest Neighbors

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("NearestNeighbors"), verbose=True)

## Clustering

### DBSCAN

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("DBSCAN"), verbose=True)

### K-means Clustering

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(12, 22)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("KMeans"), verbose=True)

## Manifold Learning

### UMAP

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("UMAP"), verbose=True)

### T-SNE

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("TSNE"), verbose=True)

## Linear Models

### Linear Regression

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("LinearRegression"), verbose=True)

### Logistic Regression

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("LogisticRegression"), verbose=True)

### Ridge Regression

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("Ridge"), verbose=True)

### Lasso Regression

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("Lasso"), verbose=True)

### ElasticNet Regression

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("ElasticNet"), verbose=True)

### Mini-batch SGD Classifier

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("MBSGDClassifier"))

## Decomposition

### PCA

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("PCA"), verbose=True)

### TSVD

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("TSVD"), verbose=True)

## Ensemble

### Random Forest Classifier

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("RandomForestClassifier"), verbose=True)

### Random Forest Regressor

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(11, 24)], 
    bench_dims=[64, 128, 256],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("RandomForestClassifier"), verbose=True)

## Random Projection

### Gaussian Random Projection

In [None]:
runner = cuml.benchmark.runners.SpeedupComparisonRunner(
    bench_rows=[2**x for x in range(17, 24)], 
    bench_dims=[100, 500, 1000, 10000],
    dataset_name="blobs",
    input_type="numpy")

results = runner.run(algorithm_by_name("GaussianRandomProjection"), verbose=True)