### What is this?
This notebook takes your raw submission (= no post processing yet) as input and shows the following for each sites and floors.

- Your raw submission in the floor map on the left.
- Cost Minimization post processing result in the floor map on the middle.
- Snap to Grid Post Processing result followed by  Cost Minimization post processing on the right.


By having them in the same place I hope you'll be able to see how much improvement you would potentially have!
I appreciate your feedback :)

### References
- [indoor - Post-processing by Cost Minimization](https://www.kaggle.com/saitodevel01/indoor-post-processing-by-cost-minimization) by @saitodevel01.
- [Indoor Navigation - Snap to Grid" Post Processing](https://www.kaggle.com/robikscube/indoor-navigation-snap-to-grid-post-processing) by @robikscube.
- [LSTM by Keras with Unified Wi-Fi Feats](https://www.kaggle.com/kokitanisaka/lstm-by-keras-with-unified-wi-fi-feats) by @kokitanisaka.


In [None]:
# Helper Functions
import pandas as pd
import numpy as np

import json
import matplotlib.pylab as plt

def split_col(df):
    df = pd.concat([
        df['site_path_timestamp'].str.split('_', expand=True) \
        .rename(columns={0:'site',
                         1:'path',
                         2:'timestamp'}),
        df
    ], axis=1).copy()
    return df

floor_map = {"B2":-2, "B1":-1, "F1":0, "F2": 1, "F3":2,
             "F4":3, "F5":4, "F6":5, "F7":6,"F8":7,"F9":8,
             "1F":0, "2F":1, "3F":2, "4F":3, "5F":4, "6F":5,
             "7F":6, "8F": 7, "9F":8}


def plot_preds(
    ax,
    context_text,
    site,
    floorNo,
    sub=None,
    true_locs=None,
    base="../input/indoor-location-navigation",
    show_train=True,
    show_preds=True,
    fix_labels=True,
    map_floor=None
):
    """
    Plots predictions on floorplan map.
    
    map_floor : use a different floor's map
    """
    if map_floor is None:
        map_floor = floorNo
    # Prepare width_meter & height_meter (taken from the .json file)
    floor_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_image.png"
    json_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_info.json"
    with open(json_plan_filename) as json_file:
        json_data = json.load(json_file)

    width_meter = json_data["map_info"]["width"]
    height_meter = json_data["map_info"]["height"]

    floor_img = plt.imread(f"{base}/metadata/{site}/{map_floor}/floor_image.png")


    ax.imshow(floor_img)

    if show_train:
        true_locs = true_locs.query('site == @site and floorNo == @map_floor').copy()
        true_locs["x_"] = true_locs["x"] * floor_img.shape[0] / height_meter
        true_locs["y_"] = (
            true_locs["y"] * -1 * floor_img.shape[1] / width_meter
        ) + floor_img.shape[0]
        true_locs.query("site == @site and floorNo == @map_floor").groupby("path").plot(
            x="x_",
            y="y_",
            style="+",
            ax=ax,
            label="train waypoint location",
            color="grey",
            alpha=0.5,
        )

    if show_preds:
        sub = sub.query('site == @site and floorNo == @floorNo').copy()
        sub["x_"] = sub["x"] * floor_img.shape[0] / height_meter
        sub["y_"] = (
            sub["y"] * -1 * floor_img.shape[1] / width_meter
        ) + floor_img.shape[0]
        for path, path_data in sub.query(
            "site == @site and floorNo == @floorNo"
        ).groupby("path"):
            path_data.plot(
                x="x_",
                y="y_",
                style=".-",
                ax=ax,
                title=f"{context_text} - {site} - floor - {floorNo}",
                alpha=1,
                label=path,
            )
    if fix_labels:
        handles, labels = ax.get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        ax.legend(
            by_label.values(), by_label.keys(), loc="center left", bbox_to_anchor=(1, 0.5)
        )
    return

In [None]:
!git clone --depth 1 https://github.com/location-competition/indoor-location-competition-20 indoor_location_competition_20
!rm -rf indoor_location_competition_20/data

In [None]:
import multiprocessing
from typing import Any, List, Tuple, Union

import numpy as np
import pandas as pd
import scipy.interpolate
import scipy.sparse
from tqdm import tqdm
from scipy.spatial.distance import cdist
import indoor_location_competition_20.compute_f as compute_f
from indoor_location_competition_20.io_f import read_data_file


# Credit @obikscube: https://www.kaggle.com/robikscube/indoor-navigation-snap-to-grid-post-processing
def post_process_snap_to_grid(sub_df: pd.DataFrame, train_waypoints, threshold: int) -> pd.DataFrame:
    """
    Snap to grid if within a threshold.

    x, y are the predicted points.
    x_, y_ are the closest grid points.
    _x_, _y_ are the new predictions after post processing.
    """
    sub_df = split_col(sub_df)
    sub_df = sub_process(sub_df, train_waypoints)

    sub_df = add_xy(sub_df)
    train_waypoints = add_xy(train_waypoints)

    ds = []
    for (site, myfloor), d in sub_df.groupby(['site','floor']):
        true_floor_locs = train_waypoints.loc[(train_waypoints['floor'] == myfloor) &
                                            (train_waypoints['site'] == site)] \
            .reset_index(drop=True)
        if len(true_floor_locs) == 0:
            print(f'Skipping {site} {myfloor}')
            continue
        d['matched_point'] = [closest_point(x, list(true_floor_locs['xy'])) for x in d['xy']]
        d['x_'] = d['matched_point'].apply(lambda x: x[0])
        d['y_'] = d['matched_point'].apply(lambda x: x[1])
        ds.append(d)

    sub_df = pd.concat(ds) # type: ignore

    # Calculate the distances
    sub_df['dist'] = np.sqrt((sub_df.x - sub_df.x_) **
                             2 + (sub_df.y - sub_df.y_)**2)

    sub_df['_x_'] = sub_df['x']
    sub_df['_y_'] = sub_df['y']
    sub_df.loc[sub_df['dist'] < threshold,
               '_x_'] = sub_df.loc[sub_df['dist'] < threshold]['x_']  # type: ignore
    sub_df.loc[sub_df['dist'] < threshold,
               '_y_'] = sub_df.loc[sub_df['dist'] < threshold]['y_']  # type: ignore

    sub_df = sub_df[['site_path_timestamp', 'floor', '_x_', '_y_',
                     'site', 'path', 'floorNo']].rename(columns={'_x_': 'x', '_y_': 'y'})
    return sub_df


def split_col(df: pd.DataFrame) -> pd.DataFrame:
    df = pd.concat([
        df['site_path_timestamp'].str.split('_', expand=True).rename(columns={0: 'site',
                                                                              1: 'path',
                                                                              2: 'timestamp'}),
        df
    ], axis=1)  # type: ignore
    return df

def add_xy(df: pd.DataFrame) -> pd.DataFrame:
    df['xy'] = [(x, y) for x,y in zip(df['x'], df['y'])]
    return df

def closest_point(point, points):
    """ Find closest point from a list of points. """
    return points[cdist([point], points).argmin()]

def sub_process(sub, train_waypoints):
    sub = split_col(sub[['site_path_timestamp','floor','x','y']]).copy()
    sub = sub.merge(train_waypoints[['site','floorNo','floor']].drop_duplicates(), how='left')
    return sub

# credit @saitodevel01: https://www.kaggle.com/saitodevel01/indoor-post-processing-by-cost-minimization
def post_process_correct_path(sub_df: pd.DataFrame) -> pd.DataFrame:
    tmp = sub_df['site_path_timestamp'].apply(lambda s : pd.Series(s.split('_')))
    sub_df['site'] = tmp[0]
    sub_df['path'] = tmp[1]
    sub_df['timestamp'] = tmp[2].astype(float)

    processes = multiprocessing.cpu_count()
    with multiprocessing.Pool(processes=processes) as pool:
        dfs = pool.imap_unordered(correct_path, sub_df.groupby('path'))
        dfs = tqdm(dfs)
        dfs = list(dfs)
    processed_df = pd.concat(dfs).sort_values('site_path_timestamp')
    return processed_df # type: ignore

def compute_rel_positions(acce_datas, ahrs_datas):
    step_timestamps, step_indexs, step_acce_max_mins = compute_f.compute_steps(acce_datas)
    headings = compute_f.compute_headings(ahrs_datas)
    stride_lengths = compute_f.compute_stride_length(step_acce_max_mins)
    step_headings = compute_f.compute_step_heading(step_timestamps, headings)
    rel_positions = compute_f.compute_rel_positions(stride_lengths, step_headings)
    return rel_positions

def correct_path(args):
    path, path_df = args

    T_ref  = path_df['timestamp'].values
    xy_hat = path_df[['x', 'y']].values

    example = read_data_file(f'/kaggle/input/indoor-location-navigation/test/{path}.txt')
    rel_positions = compute_rel_positions(example.acce, example.ahrs)
    if T_ref[-1] > rel_positions[-1, 0]:
        rel_positions = [np.array([[0, 0, 0]]), rel_positions, np.array([[T_ref[-1], 0, 0]])]
    else:
        rel_positions = [np.array([[0, 0, 0]]), rel_positions]
    rel_positions = np.concatenate(rel_positions)

    T_rel = rel_positions[:, 0]
    delta_xy_hat = np.diff(scipy.interpolate.interp1d(T_rel, np.cumsum(rel_positions[:, 1:3], axis=0), axis=0)(T_ref), axis=0)

    N = xy_hat.shape[0]
    delta_t = np.diff(T_ref)
    alpha = (8.1)**(-2) * np.ones(N)
    beta  = (0.3 + 0.3 * 1e-3 * delta_t)**(-2)
    A = scipy.sparse.spdiags(alpha, [0], N, N)
    B = scipy.sparse.spdiags( beta, [0], N-1, N-1)
    D = scipy.sparse.spdiags(np.stack([-np.ones(N), np.ones(N)]), [0, 1], N-1, N)

    Q = A + (D.T @ B @ D)
    c = (A @ xy_hat) + (D.T @ (B @ delta_xy_hat))
    xy_star = scipy.sparse.linalg.spsolve(Q, c)

    return pd.DataFrame({
        'site_path_timestamp' : path_df['site_path_timestamp'],
        'floor' : path_df['floor'],
        'x' : xy_star[:, 0],
        'y' : xy_star[:, 1],
    })

In [None]:
train_waypoints = pd.read_csv('/kaggle/input/indoor-location-train-waypoints/train_waypoints.csv')
sub_df = pd.read_csv('/kaggle/input/lstm-by-keras-with-unified-wifi-feats/submission.csv')
processed_sub_df1 = post_process_correct_path(sub_df)
processed_sub_df2 = post_process_snap_to_grid(processed_sub_df1, train_waypoints, threshold=8)

In [None]:
sub_df = sub_process(sub_df, train_waypoints)
processed_sub_df1 = sub_process(processed_sub_df1, train_waypoints)
processed_sub_df2 = sub_process(processed_sub_df2, train_waypoints)

In [None]:
from pathlib import Path
def generate_target_sites(sub_df):
    return sorted(sub_df['site'].unique())

def generate_site_floors_dict(sub_df):
    sites = generate_target_sites(sub_df)
    site_floors_dict = {}
    for site in sites:
        site_path = Path('/kaggle/input/indoor-location-navigation/train') / site
        site_floors_dict[site] = [path.name for path in site_path.glob('*')]
    return site_floors_dict

all_sites = generate_target_sites(sub_df)
site_floors_dict = generate_site_floors_dict(sub_df)
site_floors_dict

In [None]:
sites1 = all_sites[:8]
sites2 = all_sites[8:16]
sites3 = all_sites[16:]

In [None]:
def plot_sites(sites, sub1_df, sub2_df, sub3_df):
    num_floors = 0
    for site in sites:
        num_floors += len(site_floors_dict[site])

    fig, ax = plt.subplots(num_floors, 3, figsize=(36, 12 * num_floors))

    idx = 0
    for site in sites:
        floors = site_floors_dict[site]

        for floor in floors:
            plot_preds(ax[idx][0], "raw submission", site, floor, sub1_df, train_waypoints, show_preds=True)
            plot_preds(ax[idx][1], "correct_path", site, floor, sub2_df, train_waypoints, show_preds=True)
            plot_preds(ax[idx][2], "snap_to_grid", site, floor, sub3_df, train_waypoints, show_preds=True)
            idx += 1
    plt.show()

In [None]:
plot_sites(sites1, sub_df, processed_sub_df1, processed_sub_df2)

In [None]:
plot_sites(sites2, sub_df, processed_sub_df1, processed_sub_df2)

In [None]:
plot_sites(sites3, sub_df, processed_sub_df1, processed_sub_df2)