In [2]:
import pandas as pd
import numpy as np
from data_analysis import get_heart_bounds, correlation_coef, graph_3d
from graph import narrow,corrplot3axes,trend,nearest,plot_exploration, graph_dist_over_axis, graph_cc_distribution
from BayesOptLib.bayes_opt.bayesian_optimization import BayesianOptimization
from RandomSampler import RandomSampler
import matplotlib
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics.pairwise import euclidean_distances
matplotlib.use('Qt5Agg')  # or can use 'TkAgg', whatever you have/prefer
from prettytable import PrettyTable
%matplotlib qt 


def get_index(label):
    """
    Gets the idx of a label in the labels array
    :param label: label to check for
    :return: idx
    """
    idx = 0
    for coord in labels:
        if np.array_equal(label, coord):
            break
        idx += 1
    return idx


def black_box(x, y, z):
    """
    Represents a black box function to maximize for CC of two ECGs given an XYZ coordinate
    :param x:
    :param y: coordinates of the prediction
    :param z:
    :return: CC of that point and the target
    """
    sample_ecg = ecgs[get_index(np.array([x, y, z]))]
    return correlation_coef(target_ecg, sample_ecg)


def plot_exploration(visited, color_gradient):
    """
    Handles plotting the predictions of the network over time
    :param visited:
    :return:
    """
    path = np.array(visited)
    color_gradient = np.array(color_gradient)


    # Plot final for viewing
    rest = np.delete(labels, np.where(np.isin(labels, path)), axis=0)
    color_gradient = np.delete(color_gradient, np.where(np.isin(labels, path)), axis=0)
#     fig = plt.figure(0)
    fig = plt.figure(figsize=(8,8)); ax = Axes3D(fig)
    ax = fig.gca(projection='3d')

    ax.scatter(xs=rest[:, 0], ys=rest[:, 1], zs=rest[:, 2], zdir='z', alpha=0.75, c=color_gradient, cmap = plt.cm.Reds)
    ax.scatter(xs=path[:, 0], ys=path[:, 1], zs=path[:, 2], zdir='z', color='blue')
    ax.plot(path[:, 0], path[:, 1], path[:, 2], color = 'blue')

    m = path
    for i in range(len(m)):
        ax.text(m[i, 0], m[i, 1], m[i, 2], '%s' % (str(i)), size=10, zorder=1, color='k')
    ax.scatter(xs=target[0], ys=target[1], zs=target[2], color='black', s = 100)
    plt.show()



In [4]:
# if __name__ == '__main__':
    # Read in ECGs and Coordinates
ecgs = pd.read_csv("simu-data/Heart3_SimuData.csv", header=None).to_numpy()
labels = pd.read_csv("simu-data/Heart3_XYZsub.csv", header=None).to_numpy() / 1000
print(ecgs.shape)
# Get bounds of the heart mesh
bounds = get_heart_bounds(labels)

(2000, 7212)


In [5]:
def optimize_point(labels):
    # Build the optimizer with the heart bounds
    optimizer = BayesianOptimization(
        f=black_box,
        pbounds=bounds,
        random_state=None, real_set=labels
    )

    # Maximize over x number of points
    optimizer.maximize(init_points=10, n_iter=35,  acq="ucb", kappa = 2)
    return optimizer


In [26]:
# Pick out a sample to use as a target
tidx = np.random.randint(0, labels.shape[0])
#tidx = 1
target, target_ecg = labels[tidx], ecgs[tidx]
print("Target: ", target)

# Remove target from labels
#labels = np.delete(labels, np.where(np.isin(labels, target)), axis=0)

# Get plots of target CC distribution
# graph_cc_distribution(target_ecg)
# graph_dist_over_axis(target_ecg)

# Optimize for target and plot path
optimizer = optimize_point(labels)

Target:  [ 19.91477148 -68.44228906 -63.47517188]
|   iter    |  target   |     x     |     y     |     z     |
-------------------------------------------------------------
| [0m 1       [0m | [0m 0.2325  [0m | [0m 61.52   [0m | [0m-48.58   [0m | [0m-73.47   [0m |
| [95m 2       [0m | [95m 0.5453  [0m | [95m-22.6    [0m | [95m-67.85   [0m | [95m-65.45   [0m |
| [0m 3       [0m | [0m-0.2353  [0m | [0m 70.31   [0m | [0m-29.36   [0m | [0m-12.29   [0m |
| [95m 4       [0m | [95m 0.6665  [0m | [95m 30.75   [0m | [95m-97.58   [0m | [95m-77.78   [0m |
| [0m 5       [0m | [0m 0.5976  [0m | [0m-13.4    [0m | [0m-98.29   [0m | [0m-49.23   [0m |
| [0m 6       [0m | [0m-0.08513 [0m | [0m 31.65   [0m | [0m-54.17   [0m | [0m-8.234   [0m |
| [0m 7       [0m | [0m 0.2432  [0m | [0m 52.83   [0m | [0m-86.6    [0m | [0m-64.09   [0m |
| [95m 8       [0m | [95m 0.8752  [0m | [95m-11.55   [0m | [95m-71.92   [0m | [95m-69.06   [

In [5]:
init =10
actual = np.asarray(optimizer.predicted)
approx = np.asarray(optimizer.visited)
approx = approx[len(approx)-len(actual):len(approx),:]


In [7]:
# np.linalg.norm(target[1,:]-optimizer.visited[1,:])
# optimizer.visited.shape

In [10]:
np.quantile(ecgs[1],0.5)

0.09855815

In [11]:
plt.hist(ecgs[1])
plt.show()

In [17]:
def all_mean(ecgs):
    mean_array = []
    var_array = []
    for i in range(len(ecgs)):
        m = np.mean(ecgs[i])
        v = np.var(ecgs[i])
        mean_array = np.append(mean_array,m)
        var_array = np.append(var_array,v)
    return mean_array, var_array
        
        

In [18]:
mean, var = all_mean(ecgs)

In [23]:
plt.hist(var)
plt.show()

In [46]:
def nearest(tidx,labels,ecgs,dis_limit):
    target_loc = labels[tidx]
    target_ecg = ecgs[tidx]
    cc = np.array([[1]])
    dis = np.array([[0]])
    mean_target = np.array([[0]])+np.quantile(target_ecg,0.25)
    var_target = np.array([[0]])+np.quantile(target_ecg,0.75)
    first_row=np.concatenate((target_loc.reshape(1,3), dis,cc,mean_target,var_target),axis =1)
    nn_loc = np.empty((0, 3))
    nn_cc = np.empty ((0,1))
    nn_dis = np.empty((0,1))
    nn_mean = np.empty((0,1))
    nn_var = np.empty((0,1))
    for i in range(len(labels)):
        d = np.sqrt(np.sum((target_loc - labels[i])**2))
        if d < dis_limit:
            nn_loc = np.append(nn_loc,labels[i].reshape(1,3),axis=0)
            nn_cc = np.append(nn_cc,np.corrcoef(target_ecg, ecgs[i])[0, 1])
            nn_dis = np.append(nn_dis,d)
            nn_mean = np.append(nn_mean,np.quantile(ecgs[i],0.25))
            nn_var = np.append(nn_var,np.quantile(ecgs[i],0.75))
    nn_dis = nn_dis.reshape(-1,1)
    nn_cc = nn_cc.reshape(-1,1)
    nn_mean = nn_mean.reshape(-1,1)
    nn_var = nn_var.reshape(-1,1)
    near_points = np.concatenate((nn_loc, nn_dis, nn_cc,nn_mean,nn_var),axis =1)
    table = np.concatenate((first_row,near_points), axis = 0)
    table = np.around(table,2)
    return table
table=nearest(tidx,labels,ecgs,15)
# plt.scatter(table[:,3],table[:,4])
# plt.xlabel("distance")
# plt.ylabel("correlation")
# plt.show
x= PrettyTable()
x.field_names = ['x', 'y','z','Distance','Corr','mean','var']
for row in table:
    x.add_row(row)
print(x)

+-------+--------+--------+----------+------+-------+------+
|   x   |   y    |   z    | Distance | Corr |  mean | var  |
+-------+--------+--------+----------+------+-------+------+
| 19.91 | -68.44 | -63.48 |   0.0    | 1.0  | -0.05 | 0.52 |
| 25.51 | -74.43 | -67.23 |   9.02   | 0.95 | -0.06 | 0.53 |
| 29.41 | -74.42 | -72.8  |  14.59   | 0.96 | -0.05 | 0.49 |
| 29.41 | -74.42 | -72.8  |  14.59   | 0.96 | -0.05 | 0.49 |
| 29.41 | -74.42 | -72.8  |  14.59   | 0.96 | -0.05 | 0.49 |
| 26.84 | -75.54 | -69.17 |  11.44   | 0.93 | -0.06 | 0.52 |
| 12.75 | -58.67 | -63.35 |  12.12   | 0.93 | -0.04 | 0.53 |
| 13.78 | -61.18 | -62.32 |   9.58   | 0.96 | -0.04 | 0.54 |
|  27.3 | -68.35 | -73.0  |  12.06   | 0.95 | -0.04 | 0.43 |
| 12.75 | -58.67 | -63.35 |  12.12   | 0.93 | -0.04 | 0.53 |
| 21.68 | -67.0  | -61.5  |   3.01   | 0.94 | -0.04 | 0.46 |
| 19.21 | -61.13 | -68.38 |   8.84   | 0.88 | -0.04 | 0.44 |
| 18.17 | -67.79 | -62.15 |   2.29   | 1.0  | -0.05 | 0.52 |
| 19.25 | -55.42 | -70.4

In [41]:
nearest(tidx,labels,ecgs,20)

array([[ 19.91, -68.44, -63.48,   0.  ,   1.  ,   0.26,   0.94],
       [ 25.51, -74.43, -67.23,   9.02,   0.95,   0.26,   0.95],
       [ 29.41, -74.42, -72.8 ,  14.59,   0.96,   0.26,   0.91],
       [ 31.11, -71.94, -75.67,  16.92,   0.96,   0.25,   0.84],
       [ 29.41, -74.42, -72.8 ,  14.59,   0.96,   0.26,   0.91],
       [ 29.41, -74.42, -72.8 ,  14.59,   0.96,   0.26,   0.91],
       [ 26.84, -75.54, -69.17,  11.44,   0.93,   0.26,   0.97],
       [ 31.93, -70.24, -77.3 ,  18.4 ,   0.89,   0.25,   0.87],
       [ 31.17, -74.46, -75.11,  17.27,   0.96,   0.25,   0.87],
       [ 29.57, -79.71, -69.55,  16.03,   0.89,   0.26,   0.99],
       [ 20.42, -54.83, -49.61,  19.44,   0.1 ,   0.26,   0.94],
       [ 12.75, -58.67, -63.35,  12.12,   0.93,   0.26,   0.92],
       [ 13.78, -61.18, -62.32,   9.58,   0.96,   0.26,   0.93],
       [ 18.29, -56.59, -51.32,  17.05,   0.16,   0.26,   0.9 ],
       [  9.74, -57.59, -60.41,  15.19,   0.9 ,   0.26,   0.92],
       [ 27.3 , -68.35, -

In [24]:
def nearest(tidx,labels,ecgs,dis_limit):
    target_loc = labels[tidx]
    target_ecg = ecgs[tidx]
    cc = np.array([[1]])
    dis = np.array([[0]])
    first_row=np.concatenate((target_loc.reshape(1,3), dis,cc),axis =1)
    nn_loc = np.empty((0, 3))
    nn_cc = np.empty ((0,1))
    nn_dis = np.empty((0,1))
    for i in range(len(labels)):
        d = np.sqrt(np.sum((target_loc - labels[i])**2))
        if d < dis_limit:
            nn_loc = np.append(nn_loc,labels[i].reshape(1,3),axis=0)
            nn_cc = np.append(nn_cc,np.corrcoef(target_ecg, ecgs[i])[0, 1])
            nn_dis = np.append(nn_dis,d)
    nn_dis = nn_dis.reshape(-1,1)
    nn_cc = nn_cc.reshape(-1,1)
    near_points = np.concatenate((nn_loc, nn_dis, nn_cc),axis =1)
    table = np.concatenate((first_row,near_points), axis = 0)
    table = np.around(table,2)
    plt.figure(2)
    plt.scatter(table[:,3],table[:,4])
    plt.xlabel("distance")
    plt.ylabel("correlation")
    plt.show
    return table

In [44]:
np.quantile(ecgs[1],0.5)

0.09855815

In [66]:
x,y,z,nn_cc = corrplot3axes(tidx,labels,ecgs,15)

In [58]:
def plot_exploration(visited, color_gradient):
    """
    Handles plotting the predictions of the network over time
    :param visited:
    :return:
    """
    path = np.array(visited)
    color_gradient = np.array(color_gradient)

    # Plot final for viewing
    rest = np.delete(labels, np.where(np.isin(labels, path)), axis=0)
    color_gradient = np.delete(color_gradient, np.where(np.isin(labels, path)), axis=0)
    fig = plt.figure(0)
    ax = fig.gca(projection='3d')

    ax.scatter(xs=rest[:, 0], ys=rest[:, 1], zs=rest[:, 2], zdir='z', alpha=0.75, c=color_gradient, cmap = plt.cm.Reds)
    ax.scatter(xs=path[:, 0], ys=path[:, 1], zs=path[:, 2], zdir='z', color='blue')
    ax.plot(path[:, 0], path[:, 1], path[:, 2], color = 'blue')

    m = path
    for i in range(len(m)):
        ax.text(m[i, 0], m[i, 1], m[i, 2], '%s' % (str(i)), size=10, zorder=1, color='k')
    ax.scatter(xs=target[0], ys=target[1], zs=target[2], color='black', s = 100)
    plt.show()


In [6]:
color_gradient = []
    # Loop through all points to get CC with that point
for ecg, coord in zip(ecgs, labels):
    if np.array_equal(target_ecg, ecg):
        true = coord
        color_gradient.append(1)
        continue

    cc = correlation_coef(target_ecg, ecg)
    color_gradient.append(cc)
# %matplotlib notebook
plot_exploration(optimizer.visited, color_gradient)



In [None]:
def graph_cc_distribution(target_loc,target,limit,g_labels,g_ecgs):
    true, blue, green, yellow, red = None, [], [], [], []
    color_gradient = []
    ecgs = np.empty((0, 7212))
    labels = np.empty((0, 3))
    for i in range(len(g_labels)):
        d = np.sqrt(np.sum((target_loc - g_labels[i])**2))
        if d < limit:
            labels = np.append(labels,g_labels[i].reshape(1,3),axis=0)
            ecgs = np.append(ecgs,g_ecgs[i].reshape(1,7212),axis=0)
    for ecg, coord in zip(ecgs, labels):
        if np.array_equal(target, ecg):
            true = coord
            color_gradient.append(1)
            continue

        cc = correlation_coef(target, ecg)
        color_gradient.append(cc)
        if cc >= .9:
            blue.append(coord)
        elif .9 > cc >= .75:
            green.append(coord)
        elif .75 > cc > .3:
            yellow.append(coord)
        else:
            red.append(coord)

    # Plot out the points according to color
    fig = plt.figure(55)
    ax = fig.add_subplot(111, projection='3d')
#     ax = fig.gca(projection='3d')
    ax.scatter(true[0], true[1], true[2], color='black')
    ax.scatter(xs=labels[:, 0], ys=labels[:, 1], zs=labels[:, 2], c=color_gradient, cmap = plt.cm.autumn)
    ax.scatter(true[0], true[1], true[2], color='black', marker = "X", s = 100)
    ax.set_xlabel("X"), ax.set_ylabel("Y"), ax.set_zlabel("Z")
    for angle in range(0, 360):
        ax.view_init(30, angle)
        plt.draw()
        plt.pause(.001)
    fig.show()
#     return labels

In [None]:
graph_cc_distribution(target,target_ecg,50,labels,ecgs)

In [97]:
def test(target,limit,g_labels,g_ecgs):
    true, blue, green, yellow, red = None, [], [], [], []
    color_gradient = []
    ecgs = np.empty((0, 7212))
    labels = np.empty((0, 3))
    for i in range(len(g_labels)):
        d = np.sqrt(np.sum((target - g_labels[i])**2))
        if d < limit:
            labels = np.append(labels,g_labels[i].reshape(1,3),axis=0)
            ecgs = np.append(ecgs,g_ecgs[i].reshape(1,7212),axis=0)
    return labels,ecgs

In [98]:
labels,ecgs=test(target,10,labels,ecgs)

In [108]:
target_ecg

array([5.88000000e-15, 1.01528000e-05, 7.95080000e-05, ...,
       2.31613333e-01, 2.20140000e-01, 2.08898667e-01])