In [1]:
from auxiliary import loadData_staticTargetAddrMatch, prepare_data_loaders
from algorithms import RssPosAlgo_NearestNeighbour
from algorithms import RssPosAlgo_NearestNeighbour_Interpolation 
from algorithms import RssPosAlgo_NearestNeighbour_GetKmeansDb
import numpy as np

### building a database out of the recorded experiment data with kmeans

In [2]:
datajsonpath = "../experiments/exp004_20241022_sna_kadirburakerdem/data-tshark/data.json"
inp_rss_vals, gt_locations = loadData_staticTargetAddrMatch(datajsonpath, second_hold = 5, shuffle=False, 
                                                            target_addresses=["d8:47:32:eb:6c:38",
"50:c7:bf:19:e6:4d",
"4c:77:6d:5f:dc:20"], snap250ms=False
)

In [3]:
_, _, x_train, y_train, x_test, y_test = prepare_data_loaders(inp_rss_vals, gt_locations, batch_size=1, train_test_split=0.5)

print("Shape of total RSS array:",inp_rss_vals.shape)
print("Shape of total loc array:",gt_locations.shape)
print("Shape of train and test RSS arrays, respectively:", x_train.shape, x_test.shape)
print("Shape of train and test loc arrays, respectively:", y_train.shape, y_test.shape)

Shape of total RSS array: (6173, 3)
Shape of total loc array: (6173, 2)
Shape of train and test RSS arrays, respectively: torch.Size([3086, 3]) torch.Size([3087, 3])
Shape of train and test loc arrays, respectively: torch.Size([3086, 2]) torch.Size([3087, 2])


In [4]:
# get cluster IDs according to loc values, same number of clusters as manual db (=13)
num_clusters = 13
db_kmeans = RssPosAlgo_NearestNeighbour_GetKmeansDb(x_train, y_train, num_clusters, verbose=True)

Cluster IDs of each train set element after Kmeans:

[11 11 11 ...  6  6  6]
--------------------
Cluster centers (location) of each cluster center (virtual point), {x,y}:

[[3.44000602 3.80600789]
 [0.364704   3.10145701]
 [2.81847518 1.89211323]
 [1.57156013 4.34114093]
 [1.45085843 1.85701362]
 [0.64687478 1.96292415]
 [3.56844808 2.0086854 ]
 [1.84405937 3.4601633 ]
 [0.4803877  4.15587593]
 [1.08309868 3.00542174]
 [2.14432484 2.58689764]
 [1.95239633 1.5387324 ]
 [2.63455492 3.88538891]]
--------------------


### evaluate manual vs. kmeans data dicts w.r.t. test set

In [5]:
def geterror(xt,yt,db):
    meanerror = 0;
    for test_idx, x_test_sample in enumerate(xt): 
        loc_pred   = RssPosAlgo_NearestNeighbour(x_test_sample, db)
        meanerror += (yt[test_idx].numpy() - loc_pred)**2
    return meanerror/(test_idx+1)

In [6]:
print("Kmeans L2 err:", np.linalg.norm(geterror(x_test, y_test, db_kmeans)))

Kmeans L2 err: 4.9694863992964295


test for different split sizes

In [7]:
splitsizes = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

# get cluster IDs according to loc values, same number of clusters as manual db (=13)
num_clusters = 13
for ss in splitsizes:
    _, _, x_train, y_train, x_test, y_test = prepare_data_loaders(inp_rss_vals, gt_locations, batch_size=1, train_test_split=ss)
    db_kmeans = RssPosAlgo_NearestNeighbour_GetKmeansDb(x_train, y_train, num_clusters)
    print("Split size:", ss)
    print("Kmeans L2 err:", np.linalg.norm(geterror(x_test, y_test, db_kmeans)))
    print("")

Split size: 0.1
Kmeans L2 err: 3.5895831726768024

Split size: 0.2
Kmeans L2 err: 3.0472752232524853

Split size: 0.3
Kmeans L2 err: 4.401943938820229

Split size: 0.4
Kmeans L2 err: 4.459008230471633

Split size: 0.5
Kmeans L2 err: 4.9694863992964295

Split size: 0.6
Kmeans L2 err: 5.4210847259999495

Split size: 0.7
Kmeans L2 err: 5.44328839638313

Split size: 0.8
Kmeans L2 err: 4.386181916638634

Split size: 0.9
Kmeans L2 err: 4.850984205130677



### evaluate Kmeans normal nearest neighbour vs. interpolated

In [8]:
_, _, x_train, y_train, x_test, y_test = prepare_data_loaders(inp_rss_vals, gt_locations, batch_size=1, train_test_split=0.5)
db_kmeans = RssPosAlgo_NearestNeighbour_GetKmeansDb(x_train, y_train, num_clusters)
meanerror_nene_interp = 0;
meanerror_nene        = 0;
for test_idx, x_test_sample in enumerate(x_test): 
    loc_pred_nene          = RssPosAlgo_NearestNeighbour(x_test_sample, db_kmeans)
    loc_pred_nene_interp   = RssPosAlgo_NearestNeighbour_Interpolation(x_test_sample, db_kmeans)
    meanerror_nene        += (y_test[test_idx].numpy() - loc_pred_nene)**2
    meanerror_nene_interp += (y_test[test_idx].numpy() - loc_pred_nene_interp)**2
print("NeNe       :", np.linalg.norm(meanerror_nene/(test_idx+1)))
print("NeNe+Interp:", np.linalg.norm(meanerror_nene_interp/(test_idx+1)))

NeNe       : 4.9694863992964295
NeNe+Interp: 4.308347922244778
