In this notebook, I use masks I created of the floor hallway areas to push my predictions into hallway areas. I had this idea in my head for a while, but I am a novice coder and was not sure how to accomplish it. 

When I saw Rob Mulla's https://www.kaggle.com/robikscube/indoor-navigation-snap-to-grid-post-processing, I thought I would try to adapt his code to accomplish my task. This notebook is just a copy of his notebook that I modified.
I mostly wanted to see if I could do it and also for practice. Please excuse my amateur/messy code and I appreciate any feedback.

Originally, I used predictions from my copy of https://www.kaggle.com/mehrankazeminia/part-a-indoor-navigation-comparative-method which has a lb score 6.742. After my push to hallway, it scores 6.678.

In this version (16), I use the submission from the public notebook https://www.kaggle.com/museas/order-to-use-post-processing/output?scriptVersionId=57406736&select=sub_cost_snap.csv which has a lb score of 4.909. After my push to hallway, it scores 4.889.

One thing to note: My "hallway masks" include the area surrounding the building which I would like fix, but I am still working on it. I create the hallway masks in https://www.kaggle.com/therocket290/make-hallway-masks.


In [None]:
import joblib
import os
from PIL import Image,ImageOps
import cv2

In [None]:
# Helper Functions
import pandas as pd
import numpy as np

import json
import matplotlib.pylab as plt

def split_col(df):
    df = pd.concat([
        df['site_path_timestamp'].str.split('_', expand=True) \
        .rename(columns={0:'site',
                         1:'path',
                         2:'timestamp'}),
        df
    ], axis=1).copy()
    return df

floor_map = {"B2":-2, "B1":-1, "F1":0, "F2": 1, "F3":2,
             "F4":3, "F5":4, "F6":5, "F7":6,"F8":7,"F9":8,
             "1F":0, "2F":1, "3F":2, "4F":3, "5F":4, "6F":5,
             "7F":6, "8F": 7, "9F":8}


def plot_preds(
    site,
    floorNo,
    sub=None,
    true_locs=None,
    base="../input/indoor-location-navigation",
    show_train=True,
    show_preds=True,
    fix_labels=True,
    map_floor=None
):
    """
    Plots predictions on floorplan map.
    
    map_floor : use a different floor's map
    """
    if map_floor is None:
        map_floor = floorNo
    # Prepare width_meter & height_meter (taken from the .json file)
    floor_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_image.png"
    json_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_info.json"
    with open(json_plan_filename) as json_file:
        json_data = json.load(json_file)

    width_meter = json_data["map_info"]["width"]
    height_meter = json_data["map_info"]["height"]
    floor_img_0 = plt.imread(f"{base}/metadata/{site}/{map_floor}/floor_image.png")
    
    
    thresh_path = '../input/threshold-images/'
    mask_path = '../input/hallwaymask/'
    meta_path = '../input/indoor-location-navigation/metadata/'
    floor_array = joblib.load(thresh_path+'thresh_list_'+site+'.pkl')
    floors = os.listdir(meta_path+site)
    floor_array = floor_array[floors.index(floorNo)]
    floor_array = floor_array[:,:,1]
    floor_img = Image.fromarray(floor_array)
    
    adj = np.zeros(floor_array.shape) + 255
    floor_inv = np.abs(floor_array - adj)
    floor_img_inv = Image.fromarray(floor_inv)
    

    fig, ax = plt.subplots(figsize=(12, 12))
    plt.imshow(floor_img_inv)
    
    true_floor_locs = pd.read_csv('../input/hallwaymask/white_area_'+site+'_'+floorNo+'.csv')
    plt.scatter(true_floor_locs['y'], true_floor_locs['x'])

    if show_train:
        true_locs = true_locs.query('site == @site and floorNo == @map_floor').copy()
        true_locs["x_"] = true_locs["x"] * floor_array.shape[0] / height_meter
        true_locs["y_"] = (
            true_locs["y"] * -1 * floor_array.shape[1] / width_meter
        ) + floor_array.shape[0]
        true_locs.query("site == @site and floorNo == @map_floor").groupby("path").plot(
            x="x_",
            y="y_",
            style="+",
            ax=ax,
            label="train waypoint location",
            color="grey",
            alpha=0.5,
        )

    if show_preds:
        sub = sub.query('site == @site and floorNo == @floorNo').copy()
        sub["x_"] = sub["x"] * floor_array.shape[0] / height_meter
        sub["y_"] = (
            sub["y"] * -1 * floor_array.shape[1] / width_meter
        ) + floor_array.shape[0]
        for path, path_data in sub.query(
            "site == @site and floorNo == @floorNo"
        ).groupby("path"):
            path_data.plot(
                x="x_",
                y="y_",
                style=".-",
                ax=ax,
                title=f"{site} - floor - {floorNo}",
                alpha=1,
                label=path,
            )
    if fix_labels:
        handles, labels = ax.get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        plt.legend(
            by_label.values(), by_label.keys(), loc="center left", bbox_to_anchor=(1, 0.5)
        )
    return fig, ax

def sub_process(sub, train_waypoints):
    train_waypoints['isTrainWaypoint'] = True
    sub = split_col(sub[['site_path_timestamp','floor','x','y']]).copy()
    sub = sub.merge(train_waypoints[['site','floorNo','floor']].drop_duplicates(), how='left')
    sub = sub.merge(
        train_waypoints[['x','y','site','floor','isTrainWaypoint']].drop_duplicates(),
        how='left',
        on=['site','x','y','floor']
             )
    sub['isTrainWaypoint'] = sub['isTrainWaypoint'].fillna(False)
    return sub.copy()

## Plot an example
I'll plot the hallway mask, together with the training waypoints on the map for a given floor.

In [None]:

# Define train_waypoints to be white pixel area
train_waypoints = pd.read_csv('../input/indoor-location-train-waypoints/train_waypoints.csv')

sub = sub_process(pd.read_csv('../input/sub-cost-snap-from-kaggle/sub_cost_snap.csv'),
                 train_waypoints)

# Plot the training Data For an example Floor
example_site = '5d27096c03f801723c31e5e0'

example_floorNo = 'F2'


plot_preds(example_site, example_floorNo, sub,
           train_waypoints, show_preds=False)
plt.show()

## Find the closest "Hallway" point for each prediction.

In [None]:
base="../input/indoor-location-navigation"
thresh_path = '../input/threshold-images/'
meta_path = '../input/indoor-location-navigation/metadata/'

from scipy.spatial.distance import cdist

def add_xy(df):
    df['xy'] = [(x, y) for x,y in zip(df['x'], df['y'])]
    return df

def add_xy2(df):
    df['xy2'] = [(x, y) for x,y in zip(df['x2'], df['y2'])]
    return df

def add_xy2_(df):
    df['xy2_'] = [(x, y) for x,y in zip(df['x2_'], df['y2_'])]
    return df

def add_xy2_(df):
    df['xy2_'] = [(x, y) for x,y in zip(df['y2_'], df['x2_'])]
    return df

def closest_point(point, points):
    """ Find closest point from a list of points. """
    return points[cdist([point], points).argmin()]

sub = add_xy(sub)

train_waypoints = add_xy(train_waypoints)

ds = []
#for (site, myfloor), d in sub[sub['site']==example_site].groupby(['site','floorNo']):
for (site, myfloor), d in sub.groupby(['site','floorNo']):
    
    #print('Working on site ', site, ', floor ', myfloor)
    
    map_floor = myfloor
    
    floor_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_image.png"
    json_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_info.json"
    with open(json_plan_filename) as json_file:
        json_data = json.load(json_file)

    width_meter = json_data["map_info"]["width"]
    height_meter = json_data["map_info"]["height"]
    
    floor_array = joblib.load(thresh_path+'thresh_list_'+site+'.pkl')
    floors = os.listdir(meta_path+site)
    floor_array = floor_array[floors.index(map_floor)]
    floor_array = floor_array[:,:,1]
    
    true_floor_locs = pd.read_csv('../input/hallwaymask/white_area_'+site+'_'+myfloor+'.csv')
    
    # Convert to meters
    true_floor_locs['x2_'] = true_floor_locs['x'] * height_meter / floor_array.shape[0]
    true_floor_locs['y2_'] = true_floor_locs['y'] * width_meter / floor_array.shape[1]
    true_floor_locs['x2_'] = height_meter - true_floor_locs['x2_']
    
    
    true_floor_locs = add_xy(true_floor_locs)
    true_floor_locs = add_xy2(true_floor_locs)
    true_floor_locs = add_xy2_(true_floor_locs)
     
    if len(true_floor_locs) == 0:
        print(f'Skipping {site} {myfloor}')
        continue
       
    d['matched_point'] = [closest_point(x, list(true_floor_locs['xy2_'])) for x in d['xy']]    
    
    d['x_'] = d['matched_point'].apply(lambda x: x[0])
    d['y_'] = d['matched_point'].apply(lambda x: x[1])
    ds.append(d)

sub = pd.concat(ds)

In [None]:
# Example of raw predictions
plot_preds(example_site, example_floorNo, sub,
           train_waypoints, show_preds=True)
plt.show()

## Push predictions to hallways.

I wanted to push every point for now, so I set a large threshold.

In [None]:
def snap_to_grid(sub, threshold):
    """
    Snap to grid if within a threshold.
    
    x, y are the predicted points.
    x_, y_ are the closest grid points.
    _x_, _y_ are the new predictions after post processing.
    """
    sub['_x_'] = sub['x']
    sub['_y_'] = sub['y']
    sub.loc[sub['dist'] < threshold, '_x_'] = sub.loc[sub['dist'] < threshold]['x_']
    sub.loc[sub['dist'] < threshold, '_y_'] = sub.loc[sub['dist'] < threshold]['y_']
    return sub.copy()

# Calculate the distances
sub['dist'] = np.sqrt( (sub.x-sub.x_)**2 + (sub.y-sub.y_)**2 )

sub.to_csv('sub_with_dist.csv')

sub_pp = snap_to_grid(sub, threshold=10000)

sub_pp = sub_pp[['site_path_timestamp','floor','_x_','_y_','site','path','floorNo']] \
    .rename(columns={'_x_':'x', '_y_':'y'})

Lets take a look at the predictions after post processing.

In [None]:
# Plot example after post processing
plot_preds(example_site, example_floorNo, sub_pp,
           train_waypoints, show_preds=True)
plt.show()

Not ideal, but at least the paths are in the hallways now.

Save Post Processed Submission.

In [None]:
sub_pp[['site_path_timestamp','floor','x','y']] \
    .to_csv('submission_push_to_hallway_ensemble.csv', index=False)