## Optimizing Bandwidth Testing

This document is to test how the optimizing bandwitch works in isolation. To see details about the function and how it is implemented into the `kde_function`, see `Defining KDE.ipynb`.

In [27]:
#Importing the required modules
import numpy as np
import random
from scipy.stats import iqr

In [28]:
#Defining a function to optimize bandwidth based on Scott's rule of thumb
def Scott_bw(X):
    """
    Takes X (NxM matrix) to output the optimal bandwidth based on Scott's rule of thumb.
    
    Args:
        X (ndarray): NxM matrix, N = # of objects, M = # of parameters.
    Returns:
        bw (float): The optimized bandwidth as a float number.
    
    """
    shape_string = str(X.shape)
    objects, parameters = shape_string.split(', ')
    N_string = objects[1:]
    N = int(N_string)
    IQR = iqr(X)
    A = min(np.std(X), IQR/1.34)
    bw = 1.059 * A * N ** (-1/5.)
    return bw

In [29]:
#Testing on mock data
mock_data3 = [] # tuples in 6 dimensions
N = 1000
for i in range(N):
    select_random = np.linspace(1.0, 10.0, 100) # 1.0 and 10.0 are arbitrary
    x4 = random.choice(select_random)
    x5 = random.choice(select_random)
    x6 = random.choice(select_random)
    x1 = 1
    x2 = 3 + x4 + 2*x5 + 3*x6 
    x3 = -3 - 2*x2 - 3*x2 - 4*x2
    mock_data3.append([x1, x2, x3, x4, x5, x6])
print(mock_data3)

[[1, 32.727272727272734, -297.5454545454546, 6.909090909090909, 1.4545454545454546, 6.636363636363637], [1, 46.36363636363636, -420.27272727272725, 10.0, 3.7272727272727275, 8.636363636363637], [1, 29.363636363636363, -267.27272727272725, 4.7272727272727275, 4.818181818181818, 4.0], [1, 39.27272727272728, -356.4545454545455, 7.545454545454546, 3.1818181818181817, 7.454545454545455], [1, 27.636363636363633, -251.72727272727272, 5.181818181818182, 4.0, 3.8181818181818183], [1, 22.090909090909093, -201.81818181818184, 1.0909090909090908, 1.9090909090909092, 4.7272727272727275], [1, 51.45454545454545, -466.09090909090907, 4.818181818181818, 7.090909090909091, 9.818181818181818], [1, 28.454545454545453, -259.09090909090907, 8.09090909090909, 6.090909090909091, 1.7272727272727273], [1, 26.90909090909091, -245.1818181818182, 9.727272727272727, 1.9090909090909092, 3.4545454545454546], [1, 18.0, -165.0, 3.0, 4.363636363636363, 1.0909090909090908], [1, 46.90909090909091, -425.18181818181813, 5.2

In [30]:
#Turning the list into an Nx6 matrix in an array format
X = np.array(mock_data3)

#Optimizing bw for given N and X
Scott_bw(X)

1.47983170922272