<div>
    <h1 align="center"> Snap to Grid - Part(3)</h1></h1>
    <h2 align="center">Identify the position of a smartphone in a shopping mall</h2>
    <h3 align="center">By: Somayyeh Gholami & Mehran Kazeminia</h3>
</div>

<div class="alert alert-success">  
</div>

# Description:

### - In this notebook, we want to improve the score of our previous notebook (No. 2). We chose "generated6", which has a score of "5.265". The address of our previous notebook is as follows:

https://www.kaggle.com/mehrankazeminia/2-3-indoor-navigation-comparative-method

### - We have used the following notebook codes in this notebook. Thanks again for sharing this great notebook.

https://www.kaggle.com/robikscube/indoor-navigation-snap-to-grid-post-processing

### - "Data Visualization" is of particular importance in this challenge. Because the location of the corridors is important :)


## >>> Good Luck <<<



<div class="alert alert-success">  
</div>

# If you find this work useful, please don't forget upvoting :)

<div class="alert alert-success">  
</div>

# Import 

In [None]:
import json
import numpy as np
import pandas as pd 
import matplotlib.pylab as plt

from scipy.spatial.distance import cdist

%matplotlib inline

<div class="alert alert-success">  
</div>

# Helper Functions

In [None]:
def split_col(df):
    df = pd.concat([
        df['site_path_timestamp'].str.split('_', expand=True) \
        .rename(columns={0:'site',
                         1:'path',
                         2:'timestamp'}),
        df
    ], axis=1).copy()
    return df

floor_map = {"B2":-2, "B1":-1, "F1":0, "F2": 1, "F3":2,
             "F4":3, "F5":4, "F6":5, "F7":6,"F8":7,"F9":8,
             "1F":0, "2F":1, "3F":2, "4F":3, "5F":4, "6F":5,
             "7F":6, "8F": 7, "9F":8}


def plot_preds(
    site,
    floorNo,
    sub=None,
    true_locs=None,
    base="../input/indoor-location-navigation",
    show_train=True,
    show_preds=True,
    fix_labels=True,
    map_floor=None
):
    """
    Plots predictions on floorplan map.
    
    map_floor : use a different floor's map
    """
    if map_floor is None:
        map_floor = floorNo
    # Prepare width_meter & height_meter (taken from the .json file)
    floor_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_image.png"
    json_plan_filename = f"{base}/metadata/{site}/{map_floor}/floor_info.json"
    with open(json_plan_filename) as json_file:
        json_data = json.load(json_file)

    width_meter = json_data["map_info"]["width"]
    height_meter = json_data["map_info"]["height"]

    floor_img = plt.imread(f"{base}/metadata/{site}/{map_floor}/floor_image.png")

    fig, ax = plt.subplots(figsize=(12, 12))
    plt.imshow(floor_img)

    if show_train:
        true_locs = true_locs.query('site == @site and floorNo == @map_floor').copy()
        true_locs["x_"] = true_locs["x"] * floor_img.shape[0] / height_meter
        true_locs["y_"] = (
            true_locs["y"] * -1 * floor_img.shape[1] / width_meter
        ) + floor_img.shape[0]
        true_locs.query("site == @site and floorNo == @map_floor").groupby("path").plot(
            x="x_",
            y="y_",
            style="+",
            ax=ax,
            label="train waypoint location",
            color="grey",
            alpha=0.5,
        )

    if show_preds:
        sub = sub.query('site == @site and floorNo == @floorNo').copy()
        sub["x_"] = sub["x"] * floor_img.shape[0] / height_meter
        sub["y_"] = (
            sub["y"] * -1 * floor_img.shape[1] / width_meter
        ) + floor_img.shape[0]
        for path, path_data in sub.query(
            "site == @site and floorNo == @floorNo"
        ).groupby("path"):
            path_data.plot(
                x="x_",
                y="y_",
                style=".-",
                ax=ax,
                title=f"{site} - floor - {floorNo}",
                alpha=1,
                label=path,
            )
    if fix_labels:
        handles, labels = ax.get_legend_handles_labels()
        by_label = dict(zip(labels, handles))
        plt.legend(
            by_label.values(), by_label.keys(), loc="center left", bbox_to_anchor=(1, 0.5)
        )
    return fig, ax

def sub_process(sub, train_waypoints):
    train_waypoints['isTrainWaypoint'] = True
    sub = split_col(sub[['site_path_timestamp','floor','x','y']]).copy()
    sub = sub.merge(train_waypoints[['site','floorNo','floor']].drop_duplicates(), how='left')
    sub = sub.merge(
        train_waypoints[['x','y','site','floor','isTrainWaypoint']].drop_duplicates(),
        how='left',
        on=['site','x','y','floor']
             )
    sub['isTrainWaypoint'] = sub['isTrainWaypoint'].fillna(False)
    return sub.copy()


<div class="alert alert-success">  
</div>

# Data Set

In [None]:
train_waypoints = pd.read_csv('../input/indoor-location-train-waypoints/train_waypoints.csv')
sub = sub_process(pd.read_csv('../input/2-3-indoor-navigation-comparative-method/generated6.csv'),
                 train_waypoints)


<div class="alert alert-success">  
</div>

# Find the closest "grid" point for each prediction.

In [None]:
def add_xy(df):
    df['xy'] = [(x, y) for x,y in zip(df['x'], df['y'])]
    return df

def closest_point(point, points):
    """ Find closest point from a list of points. """
    return points[cdist([point], points).argmin()]

sub = add_xy(sub)
train_waypoints = add_xy(train_waypoints)

ds = []
for (site, myfloor), d in sub.groupby(['site','floor']):
    true_floor_locs = train_waypoints.loc[(train_waypoints['floor'] == myfloor) &
                                          (train_waypoints['site'] == site)] \
        .reset_index(drop=True)
    if len(true_floor_locs) == 0:
        print(f'Skipping {site} {myfloor}')
        continue
    d['matched_point'] = [closest_point(x, list(true_floor_locs['xy'])) for x in d['xy']]
    d['x_'] = d['matched_point'].apply(lambda x: x[0])
    d['y_'] = d['matched_point'].apply(lambda x: x[1])
    ds.append(d)

sub = pd.concat(ds)


<div class="alert alert-success">  
</div>

# Apply a Threshold and "Snap to Grid"

In [None]:
def snap_to_grid(sub, threshold):
    """
    Snap to grid if within a threshold.
    
    x, y are the predicted points.
    x_, y_ are the closest grid points.
    _x_, _y_ are the new predictions after post processing.
    """
    sub['_x_'] = sub['x']
    sub['_y_'] = sub['y']
    sub.loc[sub['dist'] < threshold, '_x_'] = sub.loc[sub['dist'] < threshold]['x_']
    sub.loc[sub['dist'] < threshold, '_y_'] = sub.loc[sub['dist'] < threshold]['y_']
    return sub.copy()

# Calculate the distances
sub['dist'] = np.sqrt( (sub.x-sub.x_)**2 + (sub.y-sub.y_)**2 )

#sub_pp = snap_to_grid(sub, threshold=8)  #4.792

#sub_pp = snap_to_grid(sub, threshold=7)   #4.809
#sub_pp = snap_to_grid(sub, threshold=9)   #4.824

#sub_pp = snap_to_grid(sub, threshold=7.5)  #4.784
 
#sub_pp = snap_to_grid(sub, threshold=7.3) #4.794

#sub_pp = snap_to_grid(sub, threshold=7.6)  #4.788

#sub_pp = snap_to_grid(sub, threshold=7.4) #4.796

#sub_pp = snap_to_grid(sub, threshold=7.55) #4.782

#sub_pp = snap_to_grid(sub, threshold=7.52) #4.782  

sub_pp = snap_to_grid(sub, threshold=7.57) 

sub_pp = sub_pp[['site_path_timestamp','floor','_x_','_y_','site','path','floorNo']] \
    .rename(columns={'_x_':'x', '_y_':'y'})

# Save Post Processed Submission.

<div class="alert alert-success">  
</div>

In [None]:
sub_pp[['site_path_timestamp','floor','x','y']] \
    .to_csv('submission_snap_to_grid.csv', index=False)

<div class="alert alert-success">  
</div>

# Data Visualization

In [None]:
def split_col(df):
    """
    Split submission site/path/timestamp into individual columns.
    """
    df = pd.concat(
        [
            df["site_path_timestamp"]
            .str.split("_", expand=True)
            .rename(columns={0: "site", 1: "path", 2: "timestamp"}),
            df,
        ],
        axis=1,
    ).copy()
    return df


def plot_preds(
    site,
    floorNo,
    sub=None,
    true_locs=None,
    base="../input/indoor-location-navigation",
    show_train=True,
    show_preds=True,
):
    """
    Plots predictions on floorplan map.
    """
    # Prepare width_meter & height_meter (taken from the .json file)
    floor_plan_filename = f"{base}/metadata/{site}/{floorNo}/floor_image.png"
    json_plan_filename = f"{base}/metadata/{site}/{floorNo}/floor_info.json"
    with open(json_plan_filename) as json_file:
        json_data = json.load(json_file)

    width_meter = json_data["map_info"]["width"]
    height_meter = json_data["map_info"]["height"]

    floor_img = plt.imread(f"{base}/metadata/{site}/{floorNo}/floor_image.png")

    fig, ax = plt.subplots(figsize=(12, 12))
    plt.imshow(floor_img)

    if show_train:
        true_locs["x_"] = true_locs["x"] * floor_img.shape[0] / height_meter
        true_locs["y_"] = (
            true_locs["y"] * -1 * floor_img.shape[1] / width_meter
        ) + floor_img.shape[0]
        true_locs.query("site == @site and floorNo == @floorNo").groupby("path").plot(
            x="x_",
            y="y_",
            style="+",
            ax=ax,
            label="train waypoint location",
            color="grey",
            alpha=0.5,
        )

    if show_preds:
        sub["x_"] = sub["x"] * floor_img.shape[0] / height_meter
        sub["y_"] = (
            sub["y"] * -1 * floor_img.shape[1] / width_meter
        ) + floor_img.shape[0]
        for path, path_data in sub.query(
            "site == @site and floorNo == @floorNo"
        ).groupby("path"):
            path_data.plot(
                x="x_",
                y="y_",
                style=".-",
                ax=ax,
                title=f"{site} - floor - {floorNo}",
                alpha=1,
                label=path,
            )
    return fig, ax


In [None]:
sub = split_col(sub_pp[['site_path_timestamp','floor','x','y']])

true_locs = pd.read_csv("../input/indoor-location-train-waypoints/train_waypoints.csv")

# Add floor No to sub file
sub = sub.merge(true_locs[["site", "floor", "floorNo"]].drop_duplicates())


for (site, floorNo), d in sub.groupby(["site", "floorNo"]):
    fig, ax = plot_preds(site, floorNo, sub, true_locs)
    # Remove duplicate labels
    handles, labels = ax.get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    plt.legend(
        by_label.values(), by_label.keys(), loc="center left", bbox_to_anchor=(1, 0.5)
    )
    plt.show()
    

<div class="alert alert-success">  
</div>