## Overview

In this notebook, I'm going to try to make more 'WAY_POINTS' using host's github code.<br>
The motive is, to utilize [snap to grid](https://www.kaggle.com/robikscube/indoor-navigation-snap-to-grid-post-processing) method more effectively.<br>
I think we can assume that there are more possible positions in the buildings other than waypoints from the training set. <br>
If we can find other positions, I thought we can have better result with [snap to grid](https://www.kaggle.com/robikscube/indoor-navigation-snap-to-grid-post-processing).<br>
<br>
There is a function named 'calibrate_magnetic_wifi_ibeacon_to_position' in 'main.py'.<br>
It can make infered waypoints from the path files.<br>
<br>
Actually for my models, it didn't contribute boosting my result. <br>
But as I felt it have possibility, I chose publishing this. <br>
<br>
If a predicted position is not in a pathway, it can be modified with this. <br>
As you can see in the last of this notebook, it can make so many waypoints. <br>
So it is not feasible if the original prediction is not accurate enough, [snap to grid](https://www.kaggle.com/robikscube/indoor-navigation-snap-to-grid-post-processing) won't work well with this waypoints. <br>
<br>
The result of this notebook, the calibrated waypoints is available [here](https://www.kaggle.com/kokitanisaka/calibrated-waypoints). <br>
<br>
If you got any thoughts, kindly drop some comments. Thank you! 

In [None]:
import multiprocessing
import numpy as np
import pandas as pd
import scipy.interpolate
import scipy.sparse
from tqdm import tqdm

import glob
import multiprocessing
from multiprocessing import Pool

### Option
As it takes some time to make waypoints files, you can suppress it turning the flag to False. <br>
In that case, waypoints are read from the dataset. 

In [None]:
MAKE_WAYPOINTS = False

In [None]:
!git clone --depth 1 https://github.com/location-competition/indoor-location-competition-20 indoor_location_competition_20
!rm -rf indoor_location_competition_20/data

In [None]:
from indoor_location_competition_20.io_f import read_data_file
import indoor_location_competition_20.compute_f as compute_f

In [None]:
# this code is from host's github

def calibrate_magnetic_wifi_ibeacon_to_position(path_file_list, waypoints=None):
    mwi_datas = {}
    for path_filename in path_file_list:
        print(f'Processing {path_filename}...')

        path_datas = read_data_file(path_filename)
        acce_datas = path_datas.acce
        magn_datas = path_datas.magn
        ahrs_datas = path_datas.ahrs
        wifi_datas = path_datas.wifi
        ibeacon_datas = path_datas.ibeacon
        if waypoints is None:
            posi_datas = path_datas.waypoint
        else:
            posi_datas = waypoints

        step_positions = compute_f.compute_step_positions(acce_datas, ahrs_datas, posi_datas)

        if wifi_datas.size != 0:
            sep_tss = np.unique(wifi_datas[:, 0].astype(float))
            wifi_datas_list = compute_f.split_ts_seq(wifi_datas, sep_tss)
            for wifi_ds in wifi_datas_list:
                diff = np.abs(step_positions[:, 0] - float(wifi_ds[0, 0]))
                index = np.argmin(diff)
                target_xy_key = tuple(step_positions[index, 1:3])
                if target_xy_key in mwi_datas:
                    mwi_datas[target_xy_key]['wifi'] = np.append(mwi_datas[target_xy_key]['wifi'], wifi_ds, axis=0)
                else:
                    mwi_datas[target_xy_key] = {
                        'magnetic': np.zeros((0, 4)),
                        'wifi': wifi_ds,
                        'ibeacon': np.zeros((0, 3))
                    }

        if ibeacon_datas.size != 0:
            sep_tss = np.unique(ibeacon_datas[:, 0].astype(float))
            ibeacon_datas_list = compute_f.split_ts_seq(ibeacon_datas, sep_tss)
            for ibeacon_ds in ibeacon_datas_list:
                diff = np.abs(step_positions[:, 0] - float(ibeacon_ds[0, 0]))
                index = np.argmin(diff)
                target_xy_key = tuple(step_positions[index, 1:3])
                if target_xy_key in mwi_datas:
                    mwi_datas[target_xy_key]['ibeacon'] = np.append(mwi_datas[target_xy_key]['ibeacon'], ibeacon_ds, axis=0)
                else:
                    mwi_datas[target_xy_key] = {
                        'magnetic': np.zeros((0, 4)),
                        'wifi': np.zeros((0, 5)),
                        'ibeacon': ibeacon_ds
                    }

        sep_tss = np.unique(magn_datas[:, 0].astype(float))
        magn_datas_list = compute_f.split_ts_seq(magn_datas, sep_tss)
        for magn_ds in magn_datas_list:
            diff = np.abs(step_positions[:, 0] - float(magn_ds[0, 0]))
            index = np.argmin(diff)
            target_xy_key = tuple(step_positions[index, 1:3])
            if target_xy_key in mwi_datas:
                mwi_datas[target_xy_key]['magnetic'] = np.append(mwi_datas[target_xy_key]['magnetic'], magn_ds, axis=0)
            else:
                mwi_datas[target_xy_key] = {
                    'magnetic': magn_ds,
                    'wifi': np.zeros((0, 5)),
                    'ibeacon': np.zeros((0, 3))
                }

    return mwi_datas

In [None]:
# this code is from host's github

def extract_magnetic_strength(mwi_datas):
    magnetic_strength = {}
    for position_key in mwi_datas:
        # print(f'Position: {position_key}')

        magnetic_data = mwi_datas[position_key]['magnetic']
        magnetic_s = np.mean(np.sqrt(np.sum(magnetic_data[:, 1:4] ** 2, axis=1)))
        magnetic_strength[position_key] = magnetic_s

    return magnetic_strength


def extract_wifi_rssi(mwi_datas):
    wifi_rssi = {}
    for position_key in mwi_datas:
        # print(f'Position: {position_key}')

        wifi_data = mwi_datas[position_key]['wifi']
        for wifi_d in wifi_data:
            bssid = wifi_d[2]
            rssi = int(wifi_d[3])

            if bssid in wifi_rssi:
                position_rssi = wifi_rssi[bssid]
                if position_key in position_rssi:
                    old_rssi = position_rssi[position_key][0]
                    old_count = position_rssi[position_key][1]
                    position_rssi[position_key][0] = (old_rssi * old_count + rssi) / (old_count + 1)
                    position_rssi[position_key][1] = old_count + 1
                else:
                    position_rssi[position_key] = np.array([rssi, 1])
            else:
                position_rssi = {}
                position_rssi[position_key] = np.array([rssi, 1])

            wifi_rssi[bssid] = position_rssi

    return wifi_rssi


def extract_ibeacon_rssi(mwi_datas):
    ibeacon_rssi = {}
    for position_key in mwi_datas:
        # print(f'Position: {position_key}')

        ibeacon_data = mwi_datas[position_key]['ibeacon']
        for ibeacon_d in ibeacon_data:
            ummid = ibeacon_d[1]
            rssi = int(ibeacon_d[2])

            if ummid in ibeacon_rssi:
                position_rssi = ibeacon_rssi[ummid]
                if position_key in position_rssi:
                    old_rssi = position_rssi[position_key][0]
                    old_count = position_rssi[position_key][1]
                    position_rssi[position_key][0] = (old_rssi * old_count + rssi) / (old_count + 1)
                    position_rssi[position_key][1] = old_count + 1
                else:
                    position_rssi[position_key] = np.array([rssi, 1])
            else:
                position_rssi = {}
                position_rssi[position_key] = np.array([rssi, 1])

            ibeacon_rssi[ummid] = position_rssi

    return ibeacon_rssi


def extract_wifi_count(mwi_datas):
    wifi_counts = {}
    for position_key in mwi_datas:
        # print(f'Position: {position_key}')

        wifi_data = mwi_datas[position_key]['wifi']
        count = np.unique(wifi_data[:, 2]).shape[0]
        wifi_counts[position_key] = count

    return wifi_counts

In [None]:
floor_map = {"B2":-2, "B1":-1, "F1":0, "F2": 1, "F3":2,
             "F4":3, "F5":4, "F6":5, "F7":6,"F8":7,"F9":8,
             "1F":0, "2F":1, "3F":2, "4F":3, "5F":4, "6F":5,
             "7F":6, "8F": 7, "9F":8}

In [None]:
# target only buildings in test set
def generate_target_buildings():
    ssubm = pd.read_csv(
        '/kaggle/input/indoor-location-navigation/sample_submission.csv')
    ssubm_df = ssubm["site_path_timestamp"].apply(
        lambda x: pd.Series(x.split("_")))
    return sorted(ssubm_df[0].value_counts().index.tolist()) # type: ignore

target_buildings = generate_target_buildings()

In [None]:
# This function yield the waypoints files. 

def process_waypoints(target_building):
    floors = glob.glob(f'../input/indoor-location-navigation/train/{target_building}/*')
    
    waypoints = None
    
    for floor in floors:
        floor_name = floor.split('/')[-1]
        
        floor_val = floor_map[floor_name]

        paths = glob.glob(f'{floor}/*.txt')


        for path in paths:
            calibration = calibrate_magnetic_wifi_ibeacon_to_position([path])

            path_name = path.split('/')[-1]

            try:
                my_dict = extract_wifi_count(calibration)

                wifi_count = pd.DataFrame(list(my_dict.items()),columns = ['xy','wifi_count']) 

                wifi_count['type_name'] = 'TYPE_WAYPOINT'
                wifi_count['x'] = wifi_count['xy'].apply(lambda x: x[0])
                wifi_count['y'] = wifi_count['xy'].apply(lambda x: x[1])
                wifi_count['timestamp'] = 0
                wifi_count['site'] = target_building
                wifi_count['floorNo'] = floor_name
                wifi_count['floor'] = floor_val
                wifi_count['path'] = path_name.replace('.txt', '')

                if waypoints is None:
                    waypoints = wifi_count
                else:
                    waypoints = pd.concat([waypoints, wifi_count])
            except:
                print(f' --- ERROR --- Building:{target_building} Floor:{floor_name} Path:{path_name}')
                import traceback
                traceback.print_exc()        
    
    waypoints[['type_name','x','y','timestamp','site','floorNo','floor','path']].to_csv(f'{target_building}_waypoints.csv', index=False)

In [None]:
num_cores = multiprocessing.cpu_count()

if MAKE_WAYPOINTS:
    with Pool(num_cores) as pool:
        pool.map(process_waypoints, [t for t in target_buildings])  

In [None]:
import pandas as pd
import numpy as np

import json
import matplotlib.pylab as plt

def split_col(df):
    df = pd.concat([
        df['site_path_timestamp'].str.split('_', expand=True) \
        .rename(columns={0:'site',
                         1:'path',
                         2:'timestamp'}),
        df
    ], axis=1).copy()
    return df

floor_map = {"B2":-2, "B1":-1, "F1":0, "F2": 1, "F3":2,
             "F4":3, "F5":4, "F6":5, "F7":6,"F8":7,"F9":8,
             "1F":0, "2F":1, "3F":2, "4F":3, "5F":4, "6F":5,
             "7F":6, "8F": 7, "9F":8}


def plot_preds(
    site,
    floorNo,
    sub=None,
    true_locs=None,
    base="../input/indoor-location-navigation",
    show_train=True,
    show_preds=True,
    fix_labels=True,
    map_floor=None
):
    """
    Plots predictions on floorplan map.
    
    map_floor : use a different floor's map
    """
    if map_floor is None:
        map_floor = floorNo
    # Prepare width_meter & height_meter (taken from the .json file)
    floor_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_image.png"
    json_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_info.json"
    with open(json_plan_filename) as json_file:
        json_data = json.load(json_file)

    width_meter = json_data["map_info"]["width"]
    height_meter = json_data["map_info"]["height"]

    floor_img = plt.imread(f"{base}/metadata/{site}/{map_floor}/floor_image.png")

    fig, ax = plt.subplots(figsize=(12, 12))
    plt.imshow(floor_img)

    if show_train:
        true_locs = true_locs.query('site == @site and floorNo == @map_floor').copy()
        true_locs["x_"] = true_locs["x"] * floor_img.shape[0] / height_meter
        true_locs["y_"] = (
            true_locs["y"] * -1 * floor_img.shape[1] / width_meter
        ) + floor_img.shape[0]
        true_locs.query("site == @site and floorNo == @map_floor").groupby("path").plot(
            x="x_",
            y="y_",
            style="+",
            ax=ax,
            label="train waypoint location",
            color="grey",
            alpha=0.5,
        )

    if show_preds:
        sub = sub.query('site == @site and floorNo == @floorNo').copy()
        sub["x_"] = sub["x"] * floor_img.shape[0] / height_meter
        sub["y_"] = (
            sub["y"] * -1 * floor_img.shape[1] / width_meter
        ) + floor_img.shape[0]
        for path, path_data in sub.query(
            "site == @site and floorNo == @floorNo"
        ).groupby("path"):
            path_data.plot(
                x="x_",
                y="y_",
                style=".-",
                ax=ax,
                title=f"{site} - floor - {floorNo}",
                alpha=1,
                label=path,
            )
    if fix_labels:
        handles, labels = ax.get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        plt.legend(
            by_label.values(), by_label.keys(), loc="center left", bbox_to_anchor=(1, 0.5)
        )
    return fig, ax

def sub_process(sub, train_waypoints):
    train_waypoints['isTrainWaypoint'] = True
    sub = split_col(sub[['site_path_timestamp','floor','x','y']]).copy()
    sub = sub.merge(train_waypoints[['site','floorNo','floor']].drop_duplicates(), how='left')
    sub = sub.merge(
        train_waypoints[['x','y','site','floor','isTrainWaypoint']].drop_duplicates(),
        how='left',
        on=['site','x','y','floor']
             )
    sub['isTrainWaypoint'] = sub['isTrainWaypoint'].fillna(False)
    return sub.copy()

from scipy.spatial.distance import cdist

def add_xy(df):
    df['xy'] = [(x, y) for x,y in zip(df['x'], df['y'])]
    return df

def closest_point(point, points):
    """ Find closest point from a list of points. """
    return points[cdist([point], points).argmin()]

def snap_to_grid(sub, threshold):
    """
    Snap to grid if within a threshold.
    
    x, y are the predicted points.
    x_, y_ are the closest grid points.
    _x_, _y_ are the new predictions after post processing.
    """
    sub['_x_'] = sub['x']
    sub['_y_'] = sub['y']
    sub.loc[sub['dist'] < threshold, '_x_'] = sub.loc[sub['dist'] < threshold]['x_']
    sub.loc[sub['dist'] < threshold, '_y_'] = sub.loc[sub['dist'] < threshold]['y_']
    return sub.copy()

def snap(sub, train_waypoints, threshold=0):
    #train_waypoints = pd.read_csv('../input/indoor-location-train-waypoints/train_waypoints.csv')

    sub = sub_process(sub, train_waypoints)
    
    sub = add_xy(sub)
    train_waypoints = add_xy(train_waypoints)

    ds = []
    for (site, myfloor), d in sub.groupby(['site','floor']):
        true_floor_locs = train_waypoints.loc[(train_waypoints['floor'] == myfloor) &
                                              (train_waypoints['site'] == site)] \
            .reset_index(drop=True)
        if len(true_floor_locs) == 0:
            print(f'Skipping {site} {myfloor}')
            continue
        d['matched_point'] = [closest_point(x, list(true_floor_locs['xy'])) for x in d['xy']]
        d['x_'] = d['matched_point'].apply(lambda x: x[0])
        d['y_'] = d['matched_point'].apply(lambda x: x[1])
        ds.append(d)

    sub = pd.concat(ds)
    
    # Calculate the distances
    sub['dist'] = np.sqrt( (sub.x-sub.x_)**2 + (sub.y-sub.y_)**2 )

    sub_pp = snap_to_grid(sub, threshold=threshold)

    sub_pp = sub_pp[['site_path_timestamp','floor','_x_','_y_','site','path','floorNo']] \
        .rename(columns={'_x_':'x', '_y_':'y'})
    
    plot_preds(example_site, example_floorNo, sub_pp,
               train_waypoints, show_preds=True)
    plt.show()
    
    return sub_pp

In [None]:
if MAKE_WAYPOINTS:
    waypoint_files = glob.glob('*_waypoints.csv')
else:
    waypoint_files = glob.glob('../input/calibrated-waypoints/*_waypoints.csv')

train_waypoints = None
for waypoint_file in waypoint_files:
    wp = pd.read_csv(waypoint_file)
    if train_waypoints is None:
        train_waypoints = wp
    else:
        train_waypoints = pd.concat([train_waypoints, wp])
        
original_wp = pd.read_csv('../input/indoor-location-train-waypoints/train_waypoints.csv')
        
train_waypoints = pd.concat([train_waypoints, original_wp])

In [None]:
sample_submission = pd.read_csv('../input/indoor-location-navigation/sample_submission.csv')

In [None]:
example_site = '5dbc1d84c1eb61796cf7c010'
example_floorNo = 'F3'

### The result 
Let's take a look the generated waypoints. <br>
We clearly see that we got many other waypoints. <br>
Can be used, or not? <br>

In [None]:
snap(sample_submission, original_wp)

In [None]:
snap(sample_submission, train_waypoints)