In [13]:
import sys
sys.path

['/Users/mili/Downloads/open-indoor-localization-master/python',
 '/Users/mili/.vscode/extensions/ms-toolsai.jupyter-2020.11.358541065/pythonFiles',
 '/Users/mili/.vscode/extensions/ms-toolsai.jupyter-2020.11.358541065/pythonFiles/lib/python',
 '/Users/mili/opt/anaconda3/lib/python38.zip',
 '/Users/mili/opt/anaconda3/lib/python3.8',
 '/Users/mili/opt/anaconda3/lib/python3.8/lib-dynload',
 '',
 '/Users/mili/opt/anaconda3/lib/python3.8/site-packages',
 '/Users/mili/opt/anaconda3/lib/python3.8/site-packages/aeosa',
 '/Users/mili/opt/anaconda3/lib/python3.8/site-packages/IPython/extensions',
 '/Users/mili/.ipython']

In [14]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors


class Fingerprint:
    def __init__(self):
        self.use_type = None
        self.test = None
        self.train = None
        self.sorted_mac = None
        self.filter_mac = None

    def __process(self, file_name):
        file = open(file_name, "r")
        line = file.readline()
        data_dict = {'grid_id': [], 'x': [], 'y': [], 'mac': [], 'type': [], 'rssi': []}
        grid_id = 0
        while True:
            if line.strip() is not '':
                grid_id += 1
                grid_title = line.split(" ")
                grid_pos = [float(grid_title[0]), float(grid_title[1])]
                for i in range(int(grid_title[2])):
                    line = file.readline().rstrip("\n")
                    record = line.split("|")
                    ap = record[0].split(" ")
                    data_dict['grid_id'].append(grid_id)
                    data_dict['x'].append(grid_pos[0])
                    data_dict['y'].append(grid_pos[1])
                    data_dict['mac'].append(ap[0][0:17])
                    data_dict['type'].append(ap[2])
                    rssi_list = record[1].strip().split(' ') if record[1].strip() != '' else [ap[1]]
                    data_dict['rssi'].append(float(np.array(rssi_list).astype(np.float).mean()))
            line = file.readline()
            if not line:
                break
        file.close()

        data_frame = pd.DataFrame.from_dict(data_dict)
        # using grid_id for grouping to avoid group same positions's data together if use position for grouping
        grouped = data_frame.groupby(by='grid_id')

        grid_sum = len(grouped.groups)
        if not self.sorted_mac:
            mac = set(data_frame[data_frame['type'] == self.use_type]['mac'].unique())
            self.sorted_mac = sorted(mac & self.filter_mac if self.filter_mac else mac)
        sigma = np.full(shape=(grid_sum, len(self.sorted_mac)), fill_value=-100.)
        theta = np.full(shape=(grid_sum, len(self.sorted_mac)), fill_value=-2.)
        prior = np.full(grid_sum, 1. / grid_sum)

        grid_index = 0
        grid_position = np.full(shape=(grid_sum, 2), fill_value=0)
        for name, group in grouped:
            for index, row in group.iterrows():
                if row['mac'] in self.sorted_mac:
                    ap_index = self.sorted_mac.index(row['mac'])
                    theta[grid_index, ap_index] = row['rssi']
            grid_position[grid_index] = [group["x"].mean(), group['y'].mean()]
            grid_index += 1

        return {"prior": prior, "theta": theta, "sigma": sigma, "position": grid_position}

    def set_filter_mac(self, mac_list):
        self.filter_mac = set(mac_list) if mac_list else None

    def prepare_data(self, train_path, test_path, use='w'):
        self.use_type = use
        self.train = self.__process(train_path)
        self.test = self.__process(test_path)


def sklearn_knn(train_dict, test_dict, k=4):
    neigh = NearestNeighbors(n_neighbors=k)
    neigh.fit(train_dict['theta'])
    predict_neighbor = neigh.kneighbors(test_dict['theta'])

    predict_position = []
    for i in range(len(test_dict['theta'])):
        k_distances = predict_neighbor[0][i]
        k_indexs = predict_neighbor[1][i]

        predict_position.append(
            np.average(train_dict['position'][k_indexs], axis=0))

    error = np.linalg.norm(np.array(predict_position) - test_dict['position'], axis=1)
    error = np.transpose([error])
    return np.append(np.array(predict_position), error, axis=1)

In [15]:
if __name__ == '__main__':
    fingerprint = Fingerprint()
    # set filter list here.
    fingerprint.set_filter_mac(None)
    # prepare train and test data, default use wifi data, 'b' if want beacon used
    fingerprint.prepare_data('train11_16.txt', 'test11_16.txt', 'w')
    # knn localize and output result.
    result = sklearn_knn(fingerprint.train, fingerprint.test, k=4)
    rmse = sum(result[:, 2]) / float(len(result[:, 2]))
    print(" test points:{:5} , rmse:{:.5}".format(fingerprint.test["position"].shape[0], rmse))


 test points:    5 , rmse:1.8383


In [16]:
fingerprint.train

{'prior': array([0.05555556, 0.05555556, 0.05555556, 0.05555556, 0.05555556,
        0.05555556, 0.05555556, 0.05555556, 0.05555556, 0.05555556,
        0.05555556, 0.05555556, 0.05555556, 0.05555556, 0.05555556,
        0.05555556, 0.05555556, 0.05555556]),
 'theta': array([[-76., -76., -80.,  -2., -87.,  -2., -81., -81., -79., -88., -51.,
         -51., -49., -45., -46., -45.,  -2.,  -2.,  -2., -87., -88., -87.,
          -2., -88.,  -2.,  -2.],
        [-76., -76., -80.,  -2., -87.,  -2., -81., -81., -79., -88., -51.,
         -51., -49., -45., -46., -45.,  -2.,  -2.,  -2., -87., -88., -87.,
          -2., -88.,  -2.,  -2.],
        [-76., -76., -80.,  -2., -87.,  -2., -81., -81., -79., -88., -51.,
         -51., -49., -45., -46., -45.,  -2.,  -2.,  -2., -87., -88., -87.,
          -2., -88.,  -2.,  -2.],
        [-76., -76., -80.,  -2., -87.,  -2., -81., -81., -79., -88., -51.,
         -51., -49., -45., -46., -45.,  -2.,  -2.,  -2., -87., -88., -87.,
          -2., -88.,  -2.,  -2

In [22]:
file_name = "train11_16.txt"
file = open(file_name, "r")
line = file.readline()
data_dict = {'grid_id': [], 'x': [], 'y': [], 'mac': [], 'type': [], 'rssi': []}
grid_id = 0
while True:
            if line.strip() is not '':
                grid_id += 1
                grid_title = line.split(" ")
                print(grid_title)
                grid_pos = [float(grid_title[0]), float(grid_title[1])]
                for i in range(int(grid_title[2])):
                    line = file.readline().rstrip("\n")
                    record = line.split("|")
                    ap = record[0].split(" ")
                    data_dict['grid_id'].append(grid_id)
                    data_dict['x'].append(grid_pos[0])
                    data_dict['y'].append(grid_pos[1])
                    data_dict['mac'].append(ap[0][0:17])
                    data_dict['type'].append(ap[2])
                    rssi_list = record[1].strip().split(' ') if record[1].strip() != '' else [ap[1]]
                    data_dict['rssi'].append(float(np.array(rssi_list).astype(np.float).mean()))
            line = file.readline()
            if not line:
                break
file.close()

data_frame = pd.DataFrame.from_dict(data_dict)

['8.00', '1.00', '18', '2020-11-16-00:56:44\n']
['8.00', '1.00', '18', '2020-11-16-00:57:20\n']
['8.00', '2.00', '18', '2020-11-16-00:57:35\n']
['7.00', '2.00', '18', '2020-11-16-00:57:49\n']
['6.00', '2.00', '18', '2020-11-16-00:57:59\n']
['4.00', '2.00', '18', '2020-11-16-00:58:17\n']
['5.00', '1.00', '18', '2020-11-16-00:58:29\n']
['5.00', '1.00', '19', '2020-11-16-00:58:46\n']
['5.00', '3.00', '19', '2020-11-16-00:59:11\n']
['4.00', '5.00', '19', '2020-11-16-00:59:26\n']
['6.00', '5.00', '19', '2020-11-16-00:59:41\n']
['6.00', '5.00', '19', '2020-11-16-01:00:00\n']
['6.00', '4.00', '19', '2020-11-16-01:00:09\n']
['2.00', '5.00', '19', '2020-11-16-01:00:29\n']
['2.00', '3.00', '19', '2020-11-16-01:00:41\n']
['3.00', '4.00', '16', '2020-11-16-01:01:41\n']
['2.00', '1.00', '16', '2020-11-16-01:01:59\n']
['3.00', '1.00', '16', '2020-11-16-01:02:15\n']


In [38]:
data_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 326 entries, 0 to 325
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   grid_id  326 non-null    int64  
 1   x        326 non-null    float64
 2   y        326 non-null    float64
 3   mac      326 non-null    object 
 4   type     326 non-null    object 
 5   rssi     326 non-null    float64
dtypes: float64(3), int64(1), object(2)
memory usage: 15.4+ KB


In [24]:
data_frame

Unnamed: 0,grid_id,x,y,mac,type,rssi
0,1,8.0,1.0,70:8b:cd:5e:4f:f0,w,-88.0
1,1,8.0,1.0,04:bd:88:76:f9:f1,w,-46.0
2,1,8.0,1.0,04:bd:88:76:f9:e0,w,-51.0
3,1,8.0,1.0,04:bd:88:76:f9:f0,w,-45.0
4,1,8.0,1.0,04:bd:88:76:f9:e2,w,-49.0
...,...,...,...,...,...,...
321,18,3.0,1.0,04:bd:88:76:f4:21,w,-86.0
322,18,3.0,1.0,04:bd:88:76:f4:20,w,-87.0
323,18,3.0,1.0,04:bd:88:77:07:22,w,-90.0
324,18,3.0,1.0,04:bd:88:76:f5:81,w,-88.0


In [32]:
# using grid_id for grouping to avoid group same positions's data together if use position for grouping
grouped_df = data_frame.groupby(by=['x','y'])
for key,item in grouped_df:
  a_group = grouped_df.get_group(key)
  print(a_group, "\n")

     grid_id    x    y                mac type  rssi
294       17  2.0  1.0  04:bd:88:77:08:20    w -87.0
295       17  2.0  1.0  04:bd:88:77:08:21    w -88.0
296       17  2.0  1.0  62:45:b6:ce:ac:21    w -66.0
297       17  2.0  1.0  04:bd:88:76:f9:e0    w -57.0
298       17  2.0  1.0  04:bd:88:76:f9:f1    w -67.0
299       17  2.0  1.0  04:bd:88:76:f9:f0    w -67.0
300       17  2.0  1.0  04:bd:88:76:f9:e2    w -55.0
301       17  2.0  1.0  04:bd:88:76:f9:e1    w -57.0
302       17  2.0  1.0  04:bd:88:76:f9:f2    w -67.0
303       17  2.0  1.0  84:d4:7e:4a:4b:e2    w -91.0
304       17  2.0  1.0  04:bd:88:76:f4:22    w -85.0
305       17  2.0  1.0  04:bd:88:76:f4:21    w -86.0
306       17  2.0  1.0  04:bd:88:76:f4:20    w -87.0
307       17  2.0  1.0  04:bd:88:77:07:22    w -90.0
308       17  2.0  1.0  04:bd:88:76:f5:81    w -88.0
309       17  2.0  1.0  04:bd:88:76:f5:80    w -86.0 

     grid_id    x    y                mac type  rssi
259       15  2.0  3.0  62:45:b6:ce:ac:21   

In [36]:
sorted_mac = None
filter_mac = None
use_type = 'w'
grid_sum = len(grouped.groups)
if not sorted_mac:
            mac = set(data_frame[data_frame['type'] == use_type]['mac'].unique())
            sorted_mac = sorted(mac & filter_mac if filter_mac else mac)
sorted_mac


['04:bd:88:76:f4:20',
 '04:bd:88:76:f4:21',
 '04:bd:88:76:f4:22',
 '04:bd:88:76:f4:30',
 '04:bd:88:76:f4:31',
 '04:bd:88:76:f4:32',
 '04:bd:88:76:f5:80',
 '04:bd:88:76:f5:81',
 '04:bd:88:76:f5:82',
 '04:bd:88:76:f6:92',
 '04:bd:88:76:f9:e0',
 '04:bd:88:76:f9:e1',
 '04:bd:88:76:f9:e2',
 '04:bd:88:76:f9:f0',
 '04:bd:88:76:f9:f1',
 '04:bd:88:76:f9:f2',
 '04:bd:88:77:07:22',
 '04:bd:88:77:08:20',
 '04:bd:88:77:08:21',
 '18:64:72:29:09:c0',
 '18:64:72:29:09:c1',
 '18:64:72:29:09:c2',
 '62:45:b6:ce:ac:21',
 '70:8b:cd:5e:4f:f0',
 '84:d4:7e:4a:4b:e2',
 '84:d4:7e:4a:68:02']

In [None]:
sigma = np.full(shape=(grid_sum, len( sorted_mac)), fill_value=-100.)
theta = np.full(shape=(grid_sum, len( sorted_mac)), fill_value=-2.)
prior = np.full(grid_sum, 1. / grid_sum)

grid_index = 0
grid_position = np.full(shape=(grid_sum, 2), fill_value=0)
for name, group in grouped:
            for index, row in group.iterrows():
                if row['mac'] in sorted_mac:
                    ap_index = sorted_mac.index(row['mac'])
                    theta[grid_index, ap_index] = row['rssi']
grid_position[grid_index] = [group["x"].mean(), group['y'].mean()]
grid_index += 1