## Overview
What we are trying to do in this notebook is to fix a path looks obviously wrong, crossing a wall such as the image in the left.

<img src= "https://i.imgur.com/a8B34Hm.png" alt ="the structure of the model" style='width: 500px;'>

To make it work, I converted the maps into character [in this notebook](https://www.kaggle.com/kokitanisaka/create-arrayed-map).

In this notebook, we apply push to corridor, snap to grid and device based leakage pp. And then fix the wrongly made waypoints.

In [None]:
import pandas as pd
import numpy as np
import glob
from scipy.spatial.distance import cdist
import json
import matplotlib.pylab as plt
import pickle

from shapely.geometry import Point, LineString
from tqdm import tqdm
import warnings
from shapely.geometry import Polygon
from shapely.ops import nearest_points
import multiprocessing
import os
import math
from collections import deque

import multiprocessing
from multiprocessing import Pool

num_cores = multiprocessing.cpu_count()

warnings.simplefilter('ignore')

import re
import types
def imports():
    for name, val in globals().items():
        # module imports
        if isinstance(val, types.ModuleType):
            yield name, val
        # functions / callables
        if hasattr(val, '__call__'):
            yield name, val
np.seterr(divide='ignore', invalid='ignore')
noglobal = lambda fn: types.FunctionType(fn.__code__, dict(imports()))

In [None]:
SNAP_THRESHOLD = 20
FIND_DIST_THRESHOLD = 20
THRESHOLD_AREA = 3
APPLY_LEAK_FIRST = True
submission_path = '../input/self-attentintive-lstm-by-keras/submission.csv'

example_site='5a0546857ecc773753327266'
example_floorNo='B1'

In [None]:
submission = pd.read_csv(submission_path)
target_sites = submission['site_path_timestamp'].apply(lambda x: x.split('_')[0]).unique()

submission['site'] = submission.apply(lambda x: x['site_path_timestamp'].split('_')[0], axis=1)
submission['path'] = submission.apply(lambda x: x['site_path_timestamp'].split('_')[1], axis=1)
submission['timestamp'] = submission.apply(lambda x: x['site_path_timestamp'].split('_')[2], axis=1)

In [None]:
metadata = pd.DataFrame(glob.glob('../input/indoor-location-navigation/metadata/*'))
metadata.columns=['path']
metadata['site'] = metadata['path'].apply(lambda x: x.split('/')[-1])
metadata = metadata[metadata['site'].isin(target_sites)]['path'].values

## Device Based Leak PP

In [None]:
def device_based_leak_pp(sub):
    df_leak = pd.read_pickle('../input/indoor-support-data/df_leak.pkl')
    df_leak = df_leak.rename({'path_id':'path'}, axis=1)    
    df_sub = sub.copy()
    list_path = df_sub["path"].unique()
    for path in tqdm(list_path):
        df_sub_path = df_sub.query("path == @path")
        start_idx = df_sub.loc[df_sub["path"] == path].index.min()
        end_idx = df_sub.loc[df_sub["path"] == path].index.max()
        start_x = df_sub_path.at[start_idx,"x"]
        start_y = df_sub_path.at[start_idx,"y"]
        end_x = df_sub_path.at[end_idx,"x"]
        end_y = df_sub_path.at[end_idx,"y"]
        start_x_leak = df_leak.query("path == @path")["start_waypoint_x"].iloc[0]
        start_y_leak = df_leak.query("path == @path")["start_waypoint_y"].iloc[0]
        end_x_leak = df_leak.query("path == @path")["end_waypoint_x"].iloc[0]
        end_y_leak = df_leak.query("path == @path")["end_waypoint_y"].iloc[0]
        if not np.isnan(start_x_leak):
            df_sub.at[start_idx,"x"] = start_x_leak
            df_sub.at[start_idx,"y"] = start_y_leak
        if not np.isnan(end_x_leak):
            df_sub.at[end_idx,"x"] = end_x_leak
            df_sub.at[end_idx,"y"] = end_y_leak
    return df_sub

## Snap to Grid

In [None]:
floor_map = {"B2":-2, "B1":-1, "F1":0, "F2": 1, "F3":2,
             "F4":3, "F5":4, "F6":5, "F7":6,"F8":7,"F9":8,
             "1F":0, "2F":1, "3F":2, "4F":3, "5F":4, "6F":5,
             "7F":6, "8F": 7, "9F":8}

In [None]:
train_waypoints = pd.read_csv('../input/indoor-location-train-waypoints/train_waypoints.csv')
train_waypoints['x_coord'] = train_waypoints['x'].apply(lambda x: math.floor(x))
train_waypoints['y_coord'] = train_waypoints['y'].apply(lambda x: math.floor(x))
train_waypoints['wall_dist'] = -1

train_waypoints = train_waypoints.drop_duplicates(subset=['path', 'x', 'y'])

In [None]:
# snap to grid        
def split_col(df):
    df = pd.concat([
        df['site_path_timestamp'].str.split('_', expand=True) \
        .rename(columns={0:'site',
                         1:'path',
                         2:'timestamp'}),
        df
    ], axis=1).copy()
    return df

def plot_preds(
    site,
    floorNo,
    sub=None,
    true_locs=None,
    base="../input/indoor-location-navigation",
    show_train=True,
    show_preds=True,
    fix_labels=True,
    map_floor=None
):
    """
    Plots predictions on floorplan map.
    
    map_floor : use a different floor's map
    """
    if map_floor is None:
        map_floor = floorNo
    # Prepare width_meter & height_meter (taken from the .json file)
    floor_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_image.png"
    json_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_info.json"
    with open(json_plan_filename) as json_file:
        json_data = json.load(json_file)

    width_meter = json_data["map_info"]["width"]
    height_meter = json_data["map_info"]["height"]

    floor_img = plt.imread(f"{base}/metadata/{site}/{map_floor}/floor_image.png")

    fig, ax = plt.subplots(figsize=(12, 12))
    plt.imshow(floor_img)

    if show_train:
        true_locs = true_locs.query('site == @site and floorNo == @map_floor').copy()
        true_locs["x_"] = true_locs["x"] * floor_img.shape[0] / height_meter
        true_locs["y_"] = (
            true_locs["y"] * -1 * floor_img.shape[1] / width_meter
        ) + floor_img.shape[0]
        true_locs.query("site == @site and floorNo == @map_floor").groupby("path").plot(
            x="x_",
            y="y_",
            style="+",
            ax=ax,
            label="train waypoint location",
            color="grey",
            alpha=0.5,
        )

    if show_preds:
        sub = sub.query('site == @site and floorNo == @floorNo').copy()
        sub["x_"] = sub["x"] * floor_img.shape[0] / height_meter
        sub["y_"] = (
            sub["y"] * -1 * floor_img.shape[1] / width_meter
        ) + floor_img.shape[0]
        for path, path_data in sub.query(
            "site == @site and floorNo == @floorNo"
        ).groupby("path"):
            path_data.plot(
                x="x_",
                y="y_",
                style=".-",
                ax=ax,
                title=f"{site} - floor - {floorNo}",
                alpha=1,
                label=path,
            )
    if fix_labels:
        handles, labels = ax.get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        plt.legend(
            by_label.values(), by_label.keys(), loc="center left", bbox_to_anchor=(1, 0.5)
        )
    return fig, ax

def sub_process(sub, train_waypoints):
    train_waypoints['isTrainWaypoint'] = True
    sub = split_col(sub[['site_path_timestamp','floor','x','y']]).copy()
    sub = sub.merge(train_waypoints[['site','floorNo','floor']].drop_duplicates(), how='left')
    sub = sub.merge(
        train_waypoints[['x','y','site','floor','isTrainWaypoint']].drop_duplicates(),
        how='left',
        on=['site','x','y','floor']
             )
    sub['isTrainWaypoint'] = sub['isTrainWaypoint'].fillna(False)
    return sub.copy()

def add_xy(df):
    df['xy'] = [(x, y) for x,y in zip(df['x'], df['y'])]
    return df

def closest_point(point, points):
    """ Find closest point from a list of points. """
    return points[cdist([point], points).argmin()]

def snap_to_grid(sub, threshold, more_than = 0):
    """
    Snap to grid if within a threshold.
    
    x, y are the predicted points.
    x_, y_ are the closest grid points.
    _x_, _y_ are the new predictions after post processing.
    """
    sub['_x_'] = sub['x']
    sub['_y_'] = sub['y']
    cond = (more_than <= sub['dist']) & (sub['dist'] < threshold)
    sub.loc[cond, '_x_'] = sub.loc[cond]['x_']
    sub.loc[cond, '_y_'] = sub.loc[cond]['y_']
    return sub.copy()

def snap(sub, train_waypoints, threshold=0, more_than=0):
    sub = sub_process(sub, train_waypoints)
    
    sub = add_xy(sub)
    train_waypoints = add_xy(train_waypoints)

    ds = []
    for (site, myfloor), d in sub.groupby(['site','floor']):
        true_floor_locs = train_waypoints.loc[(train_waypoints['floor'] == myfloor) &
                                              (train_waypoints['site'] == site)] \
            .reset_index(drop=True)
        if len(true_floor_locs) == 0:
            print(f'Skipping {site} {myfloor}')
            continue
        d['matched_point'] = [closest_point(x, list(true_floor_locs['xy'])) for x in d['xy']]
        d['x_'] = d['matched_point'].apply(lambda x: x[0])
        d['y_'] = d['matched_point'].apply(lambda x: x[1])
        ds.append(d)

    sub = pd.concat(ds)
    
    # Calculate the distances
    sub['dist'] = np.sqrt( (sub.x-sub.x_)**2 + (sub.y-sub.y_)**2 )

    sub_pp = snap_to_grid(sub, threshold=threshold, more_than=more_than)

    sub_pp = sub_pp[['site_path_timestamp','floor','_x_','_y_','site','path','floorNo']] \
        .rename(columns={'_x_':'x', '_y_':'y'})
    
    if example_site is not None:
        plot_preds(example_site, example_floorNo, sub_pp,
                   train_waypoints, show_preds=True)
        plt.show()
    
    return sub_pp

## Push to Corridor

In [None]:
def fix_prediction(args):
    # Unpack
    (site, floor), df = args
    
    # Find the file path
    floor_name = os.listdir('../input/indoor-location-navigation-scaled-geojson/scaled_geojson/' + site)
    for name in floor_name:
        if floor_map[name] == floor:
            file = '../input/indoor-location-navigation-scaled-geojson/scaled_geojson/' + site + '/' + name + '/shapely_geometry.pkl'
            break
            
    # Open the corridor
    with open(file, 'rb') as f:
        corridor = pickle.load(f)
        
    # Find the outside-corridor points and force them into the corridor
    out_corridor = []
    out_corridor_idx = []
    corridor_nearest_points = []
    for i in range(df.shape[0]):
        p = Point(df[['x', 'y']].iloc[i].values)
        if not p.within(corridor):
            out_corridor.append(p)
            out_corridor_idx.append(df[['x', 'y']].index[i])
            nearest_p, _ = nearest_points(corridor, p)
            x, y = nearest_p.xy[0][0], nearest_p.xy[1][0]
            corridor_nearest_points.append([x, y])
    
    if len(corridor_nearest_points) != 0:
        df.loc[out_corridor_idx, ['x', 'y']] = np.array(corridor_nearest_points)
    
    return df

def push_to_corridor(ss):
    ss[['site', 'path', 'timestamp']] = np.array([i.split('_') for i in ss.site_path_timestamp])

    processes = multiprocessing.cpu_count()
    with multiprocessing.Pool(processes = processes) as pool:
        dfs = pool.imap_unordered(fix_prediction, ss.groupby(['site', 'floor']))
        dfs = tqdm(dfs)
        dfs = list(dfs)
    return pd.concat(dfs).sort_values('site_path_timestamp')[['site_path_timestamp', 'floor', 'x', 'y']]

In [None]:
snapped_submission = submission
snapped_submission = push_to_corridor(snapped_submission)
snapped_submission = snap(snapped_submission, train_waypoints, 5)
if APPLY_LEAK_FIRST:
    snapped_submission = device_based_leak_pp(snapped_submission)

submission['x_org'] = submission['x']
submission['y_org'] = submission['y']

submission['x'] = snapped_submission['x']
submission['y'] = snapped_submission['y']

## Fix the Snapped Waypoints

In [None]:
class Node:
    def __init__(self, row, col, aa):
        self.col = col
        self.row = row
        self.nears = [[row,col+1],[row,col-1],[row+1,col],[row-1,col]]
        self.step = -1
        self.aa = aa
        
    def __repr__(self):
        return f'Node row:{self.row} Node col:{self.col} nears:{self.nears} step:{self.step}'
    
    def has_visited(self):
        return self.step != -1
    
    def show(self):
        if self.step == 0:
            return 0
        
        if self.aa == '*':
            return self.aa
        
        return str(self.step)[0] if self.step != -1 else self.aa
    
    def reset(self):
        self.step = -1

In [None]:
def create_nodes(array_map):
    nodes = []
    for i in range(len(array_map)): 
        nodes.append([])
        for j in range(len(array_map[0])):
            nodes[i].append(Node(i, j, array_map[i][j])) 

    return nodes

def snap_dist(x, path_row):
    return math.sqrt((path_row['x_org'] - x['x']) ** 2 + (path_row['y_org'] - x['y']) ** 2)

In [None]:
def get_xy(x, y, nodes):
    if x <= 0: x = 0
    if x >= len(nodes):  x = len(nodes) - 1
    if y <= 0:  y = 0
    if y >= len(nodes[0]):  y = len(nodes[0]) - 1
        
    return x, y

@noglobal
def calc_distance_to_waypoints_considering_wall(
        start_x, start_y,  
        array_map, nodes_template, waypoints_template, path_row, SNAP_THRESHOLD, FIND_DIST_THRESHOLD):
    
    nodes = None
    nodes = nodes_template.copy()
    for node in nodes:
        for cell in node:
            cell.reset()
    
    waypoints = waypoints_template.copy()

    waypoints['snap_dist'] = waypoints.apply(lambda x: snap_dist(x, path_row), axis=1)
    waypoints = waypoints[waypoints['snap_dist']<=SNAP_THRESHOLD]

    queue = deque()
    nodes[start_x][start_y].step = 0
    
    queue.append(nodes[start_x][start_y])
    num_found_waypoints = 0
    farthest_step = 0
    step = 0
    break_fg = False
    while queue:
        node = queue.popleft() 
        step = node.step + 1
        if step > farthest_step:
            farthest_step = step
        
        nears = node.nears
        for i, near in enumerate(nears):
            
            near_x, near_y = get_xy(near[0], near[1], nodes)
            try:
                ext_node = nodes[near_x][near_y]
            except:
                print(f'{len(nodes)}:{len(nodes[0])} - {near_x}:{near_y}')
            map_point = array_map[near_x][near_y]
            if not ext_node.has_visited():
                if map_point == '.':
                    ext_node.step = step
                    queue.append(ext_node)
                elif map_point == '*':
                    ext_node.step = step
                    queue.append(ext_node)
                    num_found_waypoints += 1
                    
                    waypoints.loc[(waypoints['x_coord']==near_x) & (waypoints['y_coord']==near_y), 'wall_dist'] = step

                    if num_found_waypoints >= len(waypoints):
                        break_fg = True
                        break
                        
        if farthest_step >= FIND_DIST_THRESHOLD:
            break
        if break_fg: 
            break
    
    return waypoints[waypoints['wall_dist']!=-1].sort_values(['snap_dist', 'wall_dist'], ascending=True).head(1), nodes, waypoints

In [None]:
def get_line(x):
    return LineString([Point(x['x'], x['y']), (Point(x['x_lag'], x['y_lag']))])

def intersection(x, corridor):
    if math.isnan(x['x_lag']): return None

    return corridor.intersection(x['line_lag'].buffer(2))

def intersection_area(x, corridor):
    if math.isnan(x['x_lag']): return None

    length = x['line_lag'].length
    
    if length == 0: return 0
    
    return x['intersection'].area / x['line_lag'].length

In [None]:
data = submission

data = data.sort_values('site_path_timestamp')
data['x_lag'] = data.groupby('path')['x'].shift()
data['y_lag'] = data.groupby('path')['y'].shift()
data['x_lead'] = data.groupby('path')['x'].shift(-1)
data['y_lead'] = data.groupby('path')['y'].shift(-1)


for site in tqdm(target_sites):
    
    metadata_floor_paths = glob.glob(f'../input/indoor-location-navigation/metadata/{site}/*')
    
    for metadata_floor_path in metadata_floor_paths:
        floor = metadata_floor_path.split('/')[-1]
        floor_val = floor_map[floor]

        corridor_path = f'../input/indoor-location-navigation-scaled-geojson/scaled_geojson/{site}/{floor}/shapely_geometry.pkl'
        with open(corridor_path, 'rb') as f:
            corridor = pickle.load(f)
        minx, miny, maxx, maxy = corridor.bounds
        all_area = Polygon([(0, 0), (maxx, 0), (maxx, maxy), (0, maxy)])
        all_area = all_area.difference(corridor)
            
        with open(f'../input/indoor-array-map/{site}_{floor}_array_map.pkl' ,'rb') as f:
            array_map = np.array(pickle.load(f)).T
            
        nodes_template = create_nodes(array_map)

        waypoints = train_waypoints[(train_waypoints['site']==site) & (train_waypoints['floor']==floor_val)]
            
        floor_data = data[(data['site']==site) & (data['floor']==floor_val)]
        
        for path, path_group in floor_data.groupby('path'):

            path_group['x_upd'] = 0
            path_group['y_upd'] = 0
            path_group['line_lag'] = path_group.apply(lambda x: get_line(x), axis=1)
            path_group['intersection'] = path_group.apply(lambda x: intersection(x, all_area), axis=1)
            path_group['intersection_area'] = path_group.apply(lambda x: intersection_area(x, all_area) , axis=1)
                
            for i in path_group.index:

                path_row = path_group.loc[i, :]

                # skip the first step
                if math.isnan(path_row['x_lag']): continue

                # check if the line crosses the wall
                line_from_to = LineString([Point(path_row['x_lag'], path_row['y_lag']), 
                                           Point(path_row['x'], path_row['y'])])
                line_crosses = corridor.crosses(line_from_to)
                if not line_crosses: continue
                if path_row['intersection_area'] <= THRESHOLD_AREA: continue

                # update the current point to a valid one
                start_x, start_y = get_xy(math.floor(path_row['x_lag']), math.floor(path_row['y_lag']), nodes_template)
                
                nearest_wp, res_nodes, possible_wps = calc_distance_to_waypoints_considering_wall(start_x, start_y, array_map, nodes_template, waypoints, path_row,
                                                                                                  SNAP_THRESHOLD, FIND_DIST_THRESHOLD)
                if nearest_wp.shape[0] != 0:
                    rep_x = nearest_wp['x'].values[0]
                    rep_y = nearest_wp['y'].values[0]
                    path_group.loc[i, 'x_upd'] = rep_x
                    path_group.loc[i, 'y_upd'] = rep_y
                    path_group.loc[i + 1, 'x_lag'] = rep_x
                    path_group.loc[i + 1, 'y_lag'] = rep_y
                    path_group = path_group[~path_group['x'].isna()]

                    path_group['line_lag'] = path_group.apply(lambda x: get_line(x), axis=1)
                    path_group['intersection'] = path_group.apply(lambda x: intersection(x, all_area), axis=1)
                    path_group['intersection_area'] = path_group.apply(lambda x: intersection_area(x, all_area) , axis=1)
                    
                    data.loc[i, 'x'] = rep_x
                    data.loc[i, 'y'] = rep_y
                    
                    
            # fix the points outside the map
            def floor_type(x, array_map):
                _x = math.floor(x['x'])
                _y = math.floor(x['y'])
                return array_map[_x][_y]

            path_group['floor_type'] = path_group.apply(lambda x: floor_type(x, array_map), axis=1)
            walls = path_group[path_group['floor_type']=='#']
             
            for i, wall in walls.iterrows():
                waypoints['dist'] = waypoints.apply(lambda x: math.sqrt((x['x'] - wall['x_org']) ** 2 + (x['y'] - wall['y_org']) ** 2), axis=1)
                nearest = waypoints[waypoints['dist']<=6].sort_values('dist')
                if nearest.shape[0] != 0:
                    nearest = nearest.head(1)
                    data.loc[i, 'x'] = nearest['x'].values[0]
                    data.loc[i, 'y'] = nearest['y'].values[0]
                else:
                    _x = data.loc[i, 'x_org']
                    _y = data.loc[i, 'y_org']

                    if array_map[math.floor(_x)][math.floor(_y)] == '#':
                        nearest = waypoints.sort_values('dist').head(1)
                        data.loc[i, 'x'] = nearest['x'].values[0]
                        data.loc[i, 'y'] = nearest['y'].values[0]
                    else:
                        data.loc[i, 'x'] = _x
                        data.loc[i, 'y'] = _y


## Display Utils

In [None]:
def display_nodes_map(nodes, possible_wps):
    ns = np.array(nodes)
    
    for i, r in possible_wps.iterrows():
        ns[r['x_coord']][r['y_coord']] = '+'
    
    ns = ns.T
    for line in reversed(ns):
        print()
        for cell in line:
            try:
                print(cell.show(), end='')
            except:
                print(cell, end='')
            
def display_aa_map(array_map, nodes, deg_point=None):
    ar_map = array_map.copy()
    
    for node in nodes:
        for cell in node:
            if cell.has_visited():
                ar_map[cell.row][cell.col] = str(cell.step)[0]
                #print(f'{cell.row}:{cell.col}')

    if deg_point is not None:
        ar_map[math.floor(deg_point[0])][math.floor(deg_point[1])] = '$' # 指定したポイントの表示
                
    ar_map = np.array(ar_map.T)
    
    for line in reversed(ar_map):
        print()
        for cell in line:
            print(cell, end='')

In [None]:
#tmp_waypoints
display_nodes_map(res_nodes, possible_wps)

In [None]:
data = data.sort_values('site_path_timestamp')
data[['site_path_timestamp', 'floor', 'x', 'y']].to_csv('fsw.csv', index=False)


In [None]:
print(f"fixed points : {data[data['x']!=submission['x']].shape[0]}")


In [None]:
example_site='5d27097f03f801723c320d97'
example_floorNo='F5'


In [None]:
# Before Fix (snap to grid had been not applied)
snap(pd.read_csv(submission_path), train_waypoints)

In [None]:
# Before Fix (snap to grid had been applied)
snap(snapped_submission, train_waypoints)

In [None]:
# After fix
snap(data, train_waypoints)