In [1]:
import numpy as np

In [2]:
from prep_terrain_data import makeTerrainData

In [3]:
from knn_tester import *
from parameter_plots import *

In [9]:
from bokeh.io import output_notebook
output_notebook()

Default Parameters
===========================

In [13]:
data_generator = makeTerrainData
sample_size = 1000
default_k = 8
default_weights = "uniform"  
default_algorithm = "auto" 
n_jobs = 1

Sample Size
==============

In [7]:
sample_sizes = range(1000, 30000+1, 1000) + \
               range(35000, 60000+1, 5000) + \
               range(70000, 100000+1, 10000) + \
               [150000, 200000]
sample_size_results = loop_knn(data_generator,
                               sample_size=sample_sizes,
                               k=default_k,
                               weights=default_weights,
                               algorithm=default_algorithm,
                               n_jobs=n_jobs,
                               )


---------------------------------------- 
Best Results 
----------------------------------------
sample_size: 4000 
Out of sample accuracy: 0.963 
----------------------------------------


In [11]:
scaled_sample_sizes = np.array(sample_sizes) / 1000.0
parameter_plots(scaled_sample_sizes, results_dict=sample_size_results, 
                x_label="Sample Size (in thousands)", 
                title_accuracy="Sample Size vs Accuracy", 
                title_time="Sample Size vs Training Time",
                legend_pos="bottom_right")

k
===========

In [14]:
ks = range(1, 50) + range(55, 100+1, 5) + range(120, 300+1, 20)
k_results = loop_knn(data_generator,
                               sample_size=sample_size,
                               k=ks,
                               weights=default_weights,
                               algorithm=default_algorithm,
                               n_jobs=n_jobs,
                               )


---------------------------------------- 
Best Results 
----------------------------------------
k: 8 
Out of sample accuracy: 0.944 
----------------------------------------


In [16]:
parameter_plots(ks, results_dict=k_results, 
                x_label="# of Neighbors", 
                title_accuracy="# of Neighbors vs Accuracy", 
                title_time="# of Neighbors vs Training Time",
                legend_pos="top_right")

weights
----------

In [17]:
weights = ["uniform", "distance"]
weights_results = loop_knn(data_generator,
                               sample_size=sample_size,
                               k=default_k,
                               weights=weights,
                               algorithm=default_algorithm,
                               n_jobs=n_jobs,
                               )


---------------------------------------- 
Best Results 
----------------------------------------
weights: uniform 
Out of sample accuracy: 0.944 
----------------------------------------


In [24]:
weights_labels = [0,1]
parameter_plots(weights_labels, results_dict=weights_results, 
                x_label="Weighting Method (0=uniform, 1=distance)", 
                title_accuracy="Weighting Method vs Accuracy", 
                title_time="Weighting Method vs Training Time",
                legend_pos="right_center")

algorithm
----------

In [25]:
algorithms = ["auto", "ball_tree", "kd_tree", "brute"]
algorithms_results = loop_knn(data_generator,
                               sample_size=sample_size,
                               k=default_k,
                               weights=default_weights,
                               algorithm=algorithms,
                               n_jobs=n_jobs
                               )


---------------------------------------- 
Best Results 
----------------------------------------
algorithm: auto 
Out of sample accuracy: 0.944 
----------------------------------------


In [29]:
algorithms_labels = range(len(algorithms))
parameter_plots(algorithms_labels, results_dict=algorithms_results, 
                x_label="0=auto 1=ball_tree "\
                "2=kd_tree 3=brute)", 
                title_accuracy="Algorithm vs Accuracy", 
                title_time="Algorithm vs Training Time",
                legend_pos="right_center")