In [1]:
from auxiliary import loadData_staticTargetAddrMatch, prepare_data_loaders
from algorithms import RssPosAlgo_NearestNeighbour
from algorithms import RssPosAlgo_NearestNeighbour_Interpolation 
from algorithms import RssPosAlgo_NearestNeighbour_GetKmeansDb
import numpy as np

### building a database out of the recorded experiment data with kmeans

In [2]:
datajsonpath = "../experiments/exp002_20240907_sna_kadirburak/data-tshark/data.json"
inp_rss_vals, gt_locations = loadData_staticTargetAddrMatch(datajsonpath, second_hold = 5, shuffle=False, 
                                                            target_addresses=["4c:77:6d:92:b3:60",
"4c:77:6d:5f:ea:e0",
"4c:77:6d:05:af:20"]
)

In [3]:
_, _, x_train, y_train, x_test, y_test = prepare_data_loaders(inp_rss_vals, gt_locations, batch_size=1, train_test_split=0.5)

print("Shape of total RSS array:",inp_rss_vals.shape)
print("Shape of total loc array:",gt_locations.shape)
print("Shape of train and test RSS arrays, respectively:", x_train.shape, x_test.shape)
print("Shape of train and test loc arrays, respectively:", y_train.shape, y_test.shape)

Shape of total RSS array: (348, 3)
Shape of total loc array: (348, 2)
Shape of train and test RSS arrays, respectively: torch.Size([174, 3]) torch.Size([174, 3])
Shape of train and test loc arrays, respectively: torch.Size([174, 2]) torch.Size([174, 2])


In [4]:
# get cluster IDs according to loc values, same number of clusters as manual db (=13)
num_clusters = 13
db_kmeans = RssPosAlgo_NearestNeighbour_GetKmeansDb(x_train, y_train, num_clusters, verbose=True)

Cluster IDs of each train set element after Kmeans:

[ 0  0  0  0  0  0  0  0  7  7  7  7  4  4  4  4 12 12 12 12 12 12 12 11
 11 11 11 11  8  8  8  8  8  8  3  3  3  3  3  3  3  3  3  3  3  3  3  3
  3  8  8  8 10 10 10 10 10 10  1  1  1  1  1  1  1  1  1  1  1  1  1  1
 11 11 11 11 11  4  4  4  4  4  4  4  7  7  7  7  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  6  6  6  6  6  6  6  6  6  6  6  9  9  9  9  9
  9  9  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  5  5
  5  5  5  5  5  5  5  5 12  4  4  4  4  4  4  7  7  7  7  7  7  7  7  7
  0  0  0  0  0  0]
--------------------
Cluster centers (location) of each cluster center (virtual point), {x,y}:

[[0.92683114 2.27431192]
 [2.67426438 4.70542829]
 [2.48437504 1.25151475]
 [0.74757434 4.58426011]
 [1.95969467 3.11256065]
 [2.99351363 2.29740818]
 [0.44654803 1.17553726]
 [1.22597812 2.9328029 ]
 [1.31993957 4.28943846]
 [1.25467168 0.79017064]
 [1.96105778 4.65815822]
 [2.0147192  4.06863041]
 [2.61812547 3.44652

### evaluate manual vs. kmeans data dicts w.r.t. test set

In [5]:
def geterror(xt,yt,db):
    meanerror = 0;
    for test_idx, x_test_sample in enumerate(xt): 
        loc_pred   = RssPosAlgo_NearestNeighbour(x_test_sample, db)
        meanerror += (yt[test_idx].numpy() - loc_pred)**2
    return meanerror/(test_idx+1)

In [6]:
print("Kmeans L2 err:", np.linalg.norm(geterror(x_test, y_test, db_kmeans)))

Kmeans L2 err: 2.1764064118072284


test for different split sizes

In [7]:
splitsizes = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

# get cluster IDs according to loc values, same number of clusters as manual db (=13)
num_clusters = 13
for ss in splitsizes:
    _, _, x_train, y_train, x_test, y_test = prepare_data_loaders(inp_rss_vals, gt_locations, batch_size=1, train_test_split=ss)
    db_kmeans = RssPosAlgo_NearestNeighbour_GetKmeansDb(x_train, y_train, num_clusters)
    print("Split size:", ss)
    print("Kmeans L2 err:", np.linalg.norm(geterror(x_test, y_test, db_kmeans)))
    print("")

Split size: 0.1
Kmeans L2 err: 3.217439555433861

Split size: 0.2
Kmeans L2 err: 4.890586111787305

Split size: 0.3
Kmeans L2 err: 2.54735927156641

Split size: 0.4
Kmeans L2 err: 2.266598240192207

Split size: 0.5
Kmeans L2 err: 2.1764064118072284

Split size: 0.6
Kmeans L2 err: 2.096012300449852

Split size: 0.7
Kmeans L2 err: 1.5405137220716507

Split size: 0.8
Kmeans L2 err: 1.2272734384250967

Split size: 0.9
Kmeans L2 err: 1.4504681992403539



### evaluate Kmeans normal nearest neighbour vs. interpolated

In [8]:
_, _, x_train, y_train, x_test, y_test = prepare_data_loaders(inp_rss_vals, gt_locations, batch_size=1, train_test_split=0.5)
db_kmeans = RssPosAlgo_NearestNeighbour_GetKmeansDb(x_train, y_train, num_clusters)
meanerror_nene_interp = 0;
meanerror_nene        = 0;
for test_idx, x_test_sample in enumerate(x_test): 
    loc_pred_nene          = RssPosAlgo_NearestNeighbour(x_test_sample, db_kmeans)
    loc_pred_nene_interp   = RssPosAlgo_NearestNeighbour_Interpolation(x_test_sample, db_kmeans)
    meanerror_nene        += (y_test[test_idx].numpy() - loc_pred_nene)**2
    meanerror_nene_interp += (y_test[test_idx].numpy() - loc_pred_nene_interp)**2
print("NeNe       :", np.linalg.norm(meanerror_nene/(test_idx+1)))
print("NeNe+Interp:", np.linalg.norm(meanerror_nene_interp/(test_idx+1)))

NeNe       : 2.1764064118072284
NeNe+Interp: 1.744515057793197


see the improvement over ss=0.5 above