### Different knn weighting schemes
All of the nearest n neighbors receive the same consideration in determining the prediction. It makes sense that points that are 'nearer' may be more important or weightier. Below are figures showing the unweighted approach we have been using, a weighting scheme based on inverse distance (1/distance), and an exponetial weighting scheme for 4-ethylphenol.<br>
<img src='ethylphenol.png'>

---
### <center>**knn weighting plots**
---
<center>k = 5 with neighboring 5 molecular structures<br><img src='knn_equal_weighting_molecule.png'></center><br>

---
<center>k = 10<br>
<img src='knn_inverse_weighting.png'><img src='knn_exponential_weighting.png'></center>

<font color='green'>Repeat the above replacing the `predict_knn` with the first weighting scheme which is 1/distance weighting in the `predict_knn_weighted` function and then the `predict_knn_weighted_exp` function.

In [144]:
def predict_knn(row, train, test, k=5, pr=0):
    """Return the majority class among the k nearest neighbors."""
    if pr:
        print(f'Predicting target value, {target[0]}, for row = {row} using k={k} with features: {features}')
    return np.average(closest(train, test.select(features).row(row), k , target, features).column(target[0]))

In [None]:
def closest(training, test, k, target, features):
    """Return a table of the k closest neighbors to example."""
    return distances(training, test, target, features).sort('Distance').take(np.arange(k))

In [198]:
def predict_knn_weighted(row, train, test, k, pr=0):
    """Return the majority class among the k nearest neighbors."""
    dist_table = closest(train, test, k, target, features)    
    total_inverse = np.sum(1/dist_table['Distance'])
    dist_table=dist_table.with_columns('knn_weighting',(1/dist_table['Distance'])*total_inverse)
    sum_weight = np.sum(dist_table['knn_weighting'])
    weighted_mean_pKa = np.sum(dist_table['pKa']*dist_table['knn_weighting']/sum_weight)
    return weighted_mean_pKa

In [199]:
def predict_knn_weighted_exp(example,k):
    """Return the majority class among the k nearest neighbors."""
    dist_table = closest(train, test, k, target, features)     
    total_exp = np.sum(np.exp(-dist_table['Distance']))
    dist_table=dist_table.with_columns('knn_weighting',(np.exp(-dist_table['Distance']))*total_exp)
    sum_weight = np.sum(dist_table['knn_weighting'])
    weighted_mean_pKa = np.sum(dist_table['pKa']*dist_table['knn_weighting']/sum_weight)
    return weighted_mean_pKa

In [200]:
for knn in [...]:
    exp_pKa = []
    predict_pKa = []
    for i in np.arange(test.num_rows):
        exp_pKa.append(test.column("pKa").item(i))
        predict_pKa.append(predict_knn_weighted(i,train,test, knn,0))
    plt.hist(np.array(exp_pKa)-np.array(predict_pKa), bins=25, edgecolor="black", linewidth=1.2)
    plt.xlim(-8,8)
    rmse_nn = np.mean((np.array(exp_pKa)-np.array(predict_pKa)) ** 2) ** 0.5 
    t=plt.text(3,9,f'Root mean square error: {rmse_nn:.2f}',fontsize=9)
    t.set_bbox(dict(facecolor='green',alpha=0.7, edgecolor='blue'))
    plt.title("k = " + str(knn))
    plt.savefig('k_'+str(knn)+'-plots.png')
    plt.show()
    plt.show()


UFuncTypeError: ufunc 'subtract' did not contain a loop with signature matching types (dtype('<U10'), dtype('float64')) -> None

In [None]:
for k in [...]:
    exp_pKa = make_array()
    predict_pKA = make_array()
    for i in np.arange(test.num_rows):
        exp_pKa = np.append(exp_pKa, test.column("pKa").item(i))
        example_nn_row = test.drop('Name','pKa','smiles').row(i)
        predict_pKA = np.append(predict_pKA, ...(example_nn_row, k))  # PLACE TO PUT NEW FUNCTION
    plt.scatter(exp_pKa, predict_pKA)
    z = np.polyfit(exp_pKa, predict_pKA, 1)
    p = np.poly1d(z)
    plt.plot(
        exp_pKa, p(exp_pKa), "blue", label="{}".format(p), color='teal',alpha=0.7)  # Equation of line placed in legend from label
    plt.xlabel("Experimental pKa")
    plt.ylabel("Predicted pKa")
    plt.title("k = " + str(k))
    plt.legend(fontsize="small")
    plt.savefig('k-plots.png')
    plt.show()


### <font blue>Which weighting scheme works best?</font>

...