# Explore cost functions
Here we will explore how different cost functions can help find the most optimal set of parameters for a microburst detector

In [41]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [9]:
fPath = 'burst_params.csv'
d = np.genfromtxt(fPath, delimiter=',', names=True)

d is a structure (very similar to a dictionary, but looks like it consists of tuples which are immutable

In [10]:
d[:5]

array([( 0.5,  0.1,  0.2,  1029.,  60178.,  77.,  5771.),
       ( 0.5,  0.1,  0.3,  1029.,  64591.,  90.,  6187.),
       ( 0.5,  0.1,  0.4,  1029.,  60231.,  97.,  5757.),
       ( 0.5,  0.1,  0.5,  1029.,  60231.,  97.,  5757.),
       ( 0.5,  0.1,  0.6,  1029.,  57993.,  97.,  5539.)],
      dtype=[('thresh', '<f8'), ('n', '<f8'), ('a', '<f8'), ('validNum', '<f8'), ('detNum', '<f8'), ('TPR', '<f8'), ('FPR', '<f8')])

In [11]:
def dana_cost(alpha, beta, d):
    """ 
    This function implement's Data's idea of a cost function that gives a 
    certain weight (1/alpha) to false positive rates. 
    
    Cost = FP + alpha*TN
    
    where FP is the # of false positives, TN is the number of true-negatives
    which is given by TN = (N-TP) where N is the total number of valid 
    detections, and TP is the number of true positives.
    
    Intuitively, this is stating that false positives are (1/alpha) worse 
    than true negatives.
    """
    cost = d['validNum']*d['FPR']/100 + alpha*d['validNum']*(1 - d['TPR']/100) + beta*np.abs(d['detNum'] - d['validNum'])
    return cost

In [62]:
cost = dana_cost(1/10, 2, d)

In [63]:
np.min(cost), np.argmin(cost)

(546.19499999999994, 546)

In [64]:
d[np.argmin(cost)]

( 2.5,  0.3,  1.9,  1029.,  990.,  55.,  41.)

In [66]:
idx = np.where(cost == np.min(cost))[0]
for i in idx:
    print(d[i])

( 2.5,  0.3,  1.9,  1029.,  990.,  55.,  41.)
